Skip to content

Tools

Collection of essential tools for running SpectraFit.

PostProcessing

Post-processing of the dataframe.

Source code in spectrafit/tools.py
Python
class PostProcessing:
    """Post-processing of the dataframe."""

    def __init__(
        self, df: pd.DataFrame, args: Dict[str, Any], minimizer: Minimizer, result: Any
    ) -> None:
        """Initialize PostProcessing class.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str, Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.
            minimizer (Minimizer): The minimizer class.
            result (Any): The result of the minimization of the best fit.
        """
        self.args = args
        self.df = self.rename_columns(df=df)
        self.minimizer = minimizer
        self.result = result
        self.data_size = self.check_global_fitting()

    def __call__(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
        """Call the post-processing."""
        self.make_insight_report()
        self.make_residual_fit()
        self.make_fit_contributions()
        self.export_correlation2args()
        self.export_results2args()
        self.export_regression_metrics2args()
        self.export_desprective_statistic2args()
        return (self.df, self.args)

    def check_global_fitting(self) -> Optional[int]:
        """Check if the global fitting is performed.

        !!! note "About Global Fitting"
            In case of the global fitting, the data is extended by the single
            contribution of the model.

        Returns:
            Optional[int]: The number of spectra of the global fitting.
        """
        if self.args["global_"]:
            return max(
                int(self.result.params[i].name.split("_")[-1])
                for i in self.result.params
            )
        return None

    def rename_columns(self, df: pd.DataFrame) -> pd.DataFrame:
        """Rename the columns of the dataframe.

        Rename the columns of the dataframe to the names defined in the input file.

        Args:
            df (pd.DataFrame): DataFrame containing the original input data, which are
                 individually pre-named.

        Returns:
            pd.DataFrame: DataFrame containing renamed columns. All column-names are
                 lowered. In case of a regular fitting, the columns are named `energy`
                 and `intensity`. In case of a global fitting, `energy` stays `energy`
                 and `intensity` is extended by a `_`  and column index; like: `energy`
                 and `intensity_1`, `intensity_2`, `intensity_...` depending on
                 the dataset size.
        """
        if self.args["global_"]:
            return df.rename(
                columns={
                    col: (
                        ColumnNamesAPI().energy
                        if i == 0
                        else f"{ColumnNamesAPI().intensity}_{i}"
                    )
                    for i, col in enumerate(df.columns)
                }
            )
        return df.rename(
            columns={
                df.columns[0]: ColumnNamesAPI().energy,
                df.columns[1]: ColumnNamesAPI().intensity,
            }
        )

    def make_insight_report(self) -> None:
        """Make an insight-report of the fit statistic.

        !!! note "About Insight Report"

            The insight report based on:

                1. Configurations
                2. Statistics
                3. Variables
                4. Error-bars
                5. Correlations
                6. Covariance Matrix
                7. _Optional_: Confidence Interval

            All of the above are included in the report as dictionary in `args`.

        """
        self.args["fit_insights"] = fit_report_as_dict(
            inpars=self.result, settings=self.minimizer, modelpars=self.result.params
        )
        if self.args["conf_interval"]:
            try:
                _min_rel_change = self.args["conf_interval"].pop("min_rel_change", None)
                ci = ConfidenceInterval(
                    self.minimizer, self.result, **self.args["conf_interval"]
                )
                if _min_rel_change is not None:
                    ci.min_rel_change = _min_rel_change
                    self.args["conf_interval"]["min_rel_change"] = _min_rel_change

                trace = self.args["conf_interval"].get("trace")

                if trace is True:
                    self.args["confidence_interval"] = (ci.calc_all_ci(), ci.trace_dict)
                else:
                    self.args["confidence_interval"] = ci.calc_all_ci()

            except (MinimizerException, ValueError, KeyError) as exc:
                print(f"Error: {exc} -> No confidence interval could be calculated!")
                self.args["confidence_interval"] = {}

    def make_residual_fit(self) -> None:
        r"""Make the residuals of the model and the fit.

        !!! note "About Residual and Fit"

            The residual is calculated by the difference of the best fit `model` and
            the reference `data`. In case of a global fitting, the residuals are
            calculated for each `spectra` separately plus an avaraged global residual.

            $$
            \mathrm{residual} = \mathrm{model} - \mathrm{data}
            $$
            $$
            \mathrm{residual}_{i} = \mathrm{model}_{i} - \mathrm{data}_{i}
            $$
            $$
            \mathrm{residual}_{avg} = \frac{ \sum_{i}
                \mathrm{model}_{i} - \mathrm{data}_{i}}{i}
            $$

            The fit is defined by the difference sum of fit and reference data. In case
            of a global fitting, the residuals are calculated for each `spectra`
            separately.
        """
        df_copy: pd.DataFrame = self.df.copy()
        if self.args["global_"]:
            residual = self.result.residual.reshape((-1, self.data_size)).T
            for i, _residual in enumerate(residual, start=1):
                df_copy[f"{ColumnNamesAPI().residual}_{i}"] = _residual
                df_copy[f"{ColumnNamesAPI().fit}_{i}"] = (
                    self.df[f"{ColumnNamesAPI().intensity}_{i}"].to_numpy() + _residual
                )
            df_copy[f"{ColumnNamesAPI().residual}_avg"] = np.mean(residual, axis=0)
        else:
            residual = self.result.residual
            df_copy[ColumnNamesAPI().residual] = residual
            df_copy[ColumnNamesAPI().fit] = (
                self.df[ColumnNamesAPI().intensity].to_numpy() + residual
            )
        self.df = df_copy

    def make_fit_contributions(self) -> None:
        """Make the fit contributions of the best fit model.

        !!! info "About Fit Contributions"
            The fit contributions are made independently of the local or global fitting.
        """
        self.df = calculated_model(
            params=self.result.params,
            x=self.df.iloc[:, 0].to_numpy(),
            df=self.df,
            global_fit=self.args["global_"],
        )

    def export_correlation2args(self) -> None:
        """Export the correlation matrix to the input file arguments.

        !!! note "About Correlation Matrix"

            The linear correlation matrix is calculated from and for the pandas
            dataframe and divided into two parts:

            1. Linear correlation matrix
            2. Non-linear correlation matrix (coming later ...)

        !!! note "About reading the correlation matrix"

            The correlation matrix is stored in the `args` as a dictionary with the
            following keys:

            * `index`
            * `columns`
            * `data`

            For re-reading the data, it is important to use the following code:

            >>> import pandas as pd
            >>> pd.DataFrame(**args["linear_correlation"])

            Important is to use the generator function for access the three keys and
            their values.
        """
        self.args["linear_correlation"] = self.df.corr().to_dict(orient="split")

    def export_results2args(self) -> None:
        """Export the results of the fit to the input file arguments."""
        self.args["fit_result"] = self.df.to_dict(orient="split")

    def export_regression_metrics2args(self) -> None:
        """Export the regression metrics of the fit to the input file arguments.

        !!! note "About Regression Metrics"
            The regression metrics are calculated by the `statsmodels.stats.diagnostic`
            module.
        """
        self.args["regression_metrics"] = RegressionMetrics(self.df)()

    def export_desprective_statistic2args(self) -> None:
        """Export the descriptive statistic of the spectra, fit, and contributions."""
        self.args["descriptive_statistic"] = self.df.describe(
            percentiles=np.arange(0.1, 1, 0.1).tolist()
        ).to_dict(orient="split")

__call__()

Call the post-processing.

Source code in spectrafit/tools.py
Python
def __call__(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
    """Call the post-processing."""
    self.make_insight_report()
    self.make_residual_fit()
    self.make_fit_contributions()
    self.export_correlation2args()
    self.export_results2args()
    self.export_regression_metrics2args()
    self.export_desprective_statistic2args()
    return (self.df, self.args)

__init__(df, args, minimizer, result)

Initialize PostProcessing class.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required
minimizer Minimizer

The minimizer class.

required
result Any

The result of the minimization of the best fit.

required
Source code in spectrafit/tools.py
Python
def __init__(
    self, df: pd.DataFrame, args: Dict[str, Any], minimizer: Minimizer, result: Any
) -> None:
    """Initialize PostProcessing class.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str, Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.
        minimizer (Minimizer): The minimizer class.
        result (Any): The result of the minimization of the best fit.
    """
    self.args = args
    self.df = self.rename_columns(df=df)
    self.minimizer = minimizer
    self.result = result
    self.data_size = self.check_global_fitting()

check_global_fitting()

Check if the global fitting is performed.

About Global Fitting

In case of the global fitting, the data is extended by the single contribution of the model.

Returns:

Type Description
Optional[int]

Optional[int]: The number of spectra of the global fitting.

Source code in spectrafit/tools.py
Python
def check_global_fitting(self) -> Optional[int]:
    """Check if the global fitting is performed.

    !!! note "About Global Fitting"
        In case of the global fitting, the data is extended by the single
        contribution of the model.

    Returns:
        Optional[int]: The number of spectra of the global fitting.
    """
    if self.args["global_"]:
        return max(
            int(self.result.params[i].name.split("_")[-1])
            for i in self.result.params
        )
    return None

export_correlation2args()

Export the correlation matrix to the input file arguments.

About Correlation Matrix

The linear correlation matrix is calculated from and for the pandas dataframe and divided into two parts:

  1. Linear correlation matrix
  2. Non-linear correlation matrix (coming later ...)

About reading the correlation matrix

The correlation matrix is stored in the args as a dictionary with the following keys:

  • index
  • columns
  • data

For re-reading the data, it is important to use the following code:

import pandas as pd pd.DataFrame(**args["linear_correlation"])

Important is to use the generator function for access the three keys and their values.

Source code in spectrafit/tools.py
Python
def export_correlation2args(self) -> None:
    """Export the correlation matrix to the input file arguments.

    !!! note "About Correlation Matrix"

        The linear correlation matrix is calculated from and for the pandas
        dataframe and divided into two parts:

        1. Linear correlation matrix
        2. Non-linear correlation matrix (coming later ...)

    !!! note "About reading the correlation matrix"

        The correlation matrix is stored in the `args` as a dictionary with the
        following keys:

        * `index`
        * `columns`
        * `data`

        For re-reading the data, it is important to use the following code:

        >>> import pandas as pd
        >>> pd.DataFrame(**args["linear_correlation"])

        Important is to use the generator function for access the three keys and
        their values.
    """
    self.args["linear_correlation"] = self.df.corr().to_dict(orient="split")

export_desprective_statistic2args()

Export the descriptive statistic of the spectra, fit, and contributions.

Source code in spectrafit/tools.py
Python
def export_desprective_statistic2args(self) -> None:
    """Export the descriptive statistic of the spectra, fit, and contributions."""
    self.args["descriptive_statistic"] = self.df.describe(
        percentiles=np.arange(0.1, 1, 0.1).tolist()
    ).to_dict(orient="split")

export_regression_metrics2args()

Export the regression metrics of the fit to the input file arguments.

About Regression Metrics

The regression metrics are calculated by the statsmodels.stats.diagnostic module.

Source code in spectrafit/tools.py
Python
def export_regression_metrics2args(self) -> None:
    """Export the regression metrics of the fit to the input file arguments.

    !!! note "About Regression Metrics"
        The regression metrics are calculated by the `statsmodels.stats.diagnostic`
        module.
    """
    self.args["regression_metrics"] = RegressionMetrics(self.df)()

export_results2args()

Export the results of the fit to the input file arguments.

Source code in spectrafit/tools.py
Python
def export_results2args(self) -> None:
    """Export the results of the fit to the input file arguments."""
    self.args["fit_result"] = self.df.to_dict(orient="split")

make_fit_contributions()

Make the fit contributions of the best fit model.

About Fit Contributions

The fit contributions are made independently of the local or global fitting.

Source code in spectrafit/tools.py
Python
def make_fit_contributions(self) -> None:
    """Make the fit contributions of the best fit model.

    !!! info "About Fit Contributions"
        The fit contributions are made independently of the local or global fitting.
    """
    self.df = calculated_model(
        params=self.result.params,
        x=self.df.iloc[:, 0].to_numpy(),
        df=self.df,
        global_fit=self.args["global_"],
    )

make_insight_report()

Make an insight-report of the fit statistic.

About Insight Report

The insight report based on:

CBM BASIC V2
1. Configurations
2. Statistics
3. Variables
4. Error-bars
5. Correlations
6. Covariance Matrix
7. _Optional_: Confidence Interval

All of the above are included in the report as dictionary in args.

Source code in spectrafit/tools.py
Python
def make_insight_report(self) -> None:
    """Make an insight-report of the fit statistic.

    !!! note "About Insight Report"

        The insight report based on:

            1. Configurations
            2. Statistics
            3. Variables
            4. Error-bars
            5. Correlations
            6. Covariance Matrix
            7. _Optional_: Confidence Interval

        All of the above are included in the report as dictionary in `args`.

    """
    self.args["fit_insights"] = fit_report_as_dict(
        inpars=self.result, settings=self.minimizer, modelpars=self.result.params
    )
    if self.args["conf_interval"]:
        try:
            _min_rel_change = self.args["conf_interval"].pop("min_rel_change", None)
            ci = ConfidenceInterval(
                self.minimizer, self.result, **self.args["conf_interval"]
            )
            if _min_rel_change is not None:
                ci.min_rel_change = _min_rel_change
                self.args["conf_interval"]["min_rel_change"] = _min_rel_change

            trace = self.args["conf_interval"].get("trace")

            if trace is True:
                self.args["confidence_interval"] = (ci.calc_all_ci(), ci.trace_dict)
            else:
                self.args["confidence_interval"] = ci.calc_all_ci()

        except (MinimizerException, ValueError, KeyError) as exc:
            print(f"Error: {exc} -> No confidence interval could be calculated!")
            self.args["confidence_interval"] = {}

make_residual_fit()

Make the residuals of the model and the fit.

About Residual and Fit

The residual is calculated by the difference of the best fit model and the reference data. In case of a global fitting, the residuals are calculated for each spectra separately plus an avaraged global residual.

\[
\mathrm{residual} = \mathrm{model} - \mathrm{data}
$$
$$
\mathrm{residual}_{i} = \mathrm{model}_{i} - \mathrm{data}_{i}
$$
$$
\mathrm{residual}_{avg} = \frac{ \sum_{i}
    \mathrm{model}_{i} - \mathrm{data}_{i}}{i}
\]

The fit is defined by the difference sum of fit and reference data. In case of a global fitting, the residuals are calculated for each spectra separately.

Source code in spectrafit/tools.py
Python
def make_residual_fit(self) -> None:
    r"""Make the residuals of the model and the fit.

    !!! note "About Residual and Fit"

        The residual is calculated by the difference of the best fit `model` and
        the reference `data`. In case of a global fitting, the residuals are
        calculated for each `spectra` separately plus an avaraged global residual.

        $$
        \mathrm{residual} = \mathrm{model} - \mathrm{data}
        $$
        $$
        \mathrm{residual}_{i} = \mathrm{model}_{i} - \mathrm{data}_{i}
        $$
        $$
        \mathrm{residual}_{avg} = \frac{ \sum_{i}
            \mathrm{model}_{i} - \mathrm{data}_{i}}{i}
        $$

        The fit is defined by the difference sum of fit and reference data. In case
        of a global fitting, the residuals are calculated for each `spectra`
        separately.
    """
    df_copy: pd.DataFrame = self.df.copy()
    if self.args["global_"]:
        residual = self.result.residual.reshape((-1, self.data_size)).T
        for i, _residual in enumerate(residual, start=1):
            df_copy[f"{ColumnNamesAPI().residual}_{i}"] = _residual
            df_copy[f"{ColumnNamesAPI().fit}_{i}"] = (
                self.df[f"{ColumnNamesAPI().intensity}_{i}"].to_numpy() + _residual
            )
        df_copy[f"{ColumnNamesAPI().residual}_avg"] = np.mean(residual, axis=0)
    else:
        residual = self.result.residual
        df_copy[ColumnNamesAPI().residual] = residual
        df_copy[ColumnNamesAPI().fit] = (
            self.df[ColumnNamesAPI().intensity].to_numpy() + residual
        )
    self.df = df_copy

rename_columns(df)

Rename the columns of the dataframe.

Rename the columns of the dataframe to the names defined in the input file.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the original input data, which are individually pre-named.

required

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame containing renamed columns. All column-names are lowered. In case of a regular fitting, the columns are named energy and intensity. In case of a global fitting, energy stays energy and intensity is extended by a _ and column index; like: energy and intensity_1, intensity_2, intensity_... depending on the dataset size.

Source code in spectrafit/tools.py
Python
def rename_columns(self, df: pd.DataFrame) -> pd.DataFrame:
    """Rename the columns of the dataframe.

    Rename the columns of the dataframe to the names defined in the input file.

    Args:
        df (pd.DataFrame): DataFrame containing the original input data, which are
             individually pre-named.

    Returns:
        pd.DataFrame: DataFrame containing renamed columns. All column-names are
             lowered. In case of a regular fitting, the columns are named `energy`
             and `intensity`. In case of a global fitting, `energy` stays `energy`
             and `intensity` is extended by a `_`  and column index; like: `energy`
             and `intensity_1`, `intensity_2`, `intensity_...` depending on
             the dataset size.
    """
    if self.args["global_"]:
        return df.rename(
            columns={
                col: (
                    ColumnNamesAPI().energy
                    if i == 0
                    else f"{ColumnNamesAPI().intensity}_{i}"
                )
                for i, col in enumerate(df.columns)
            }
        )
    return df.rename(
        columns={
            df.columns[0]: ColumnNamesAPI().energy,
            df.columns[1]: ColumnNamesAPI().intensity,
        }
    )

PreProcessing

Summarized all pre-processing-filters together.

Source code in spectrafit/tools.py
Python
class PreProcessing:
    """Summarized all pre-processing-filters  together."""

    def __init__(self, df: pd.DataFrame, args: Dict[str, Any]) -> None:
        """Initialize PreProcessing class.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.
        """
        self.df = df
        self.args = args

    def __call__(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
        """Apply all pre-processing-filters.

        Returns:
            pd.DataFrame: DataFrame containing the input data (`x` and `data`), which
                 are optionally:

                    1. shrinked to a given range
                    2. shifted
                    3. linear oversampled
                    4. smoothed
            Dict[str,Any]: Adding a descriptive statistics to the input dictionary.
        """
        df_copy: pd.DataFrame = self.df.copy()
        self.args["data_statistic"] = df_copy.describe(
            percentiles=np.arange(0.1, 1.0, 0.1).tolist()
        ).to_dict(orient="split")
        try:
            if isinstance(self.args["energy_start"], (int, float)) or isinstance(
                self.args["energy_stop"], (int, float)
            ):
                df_copy = self.energy_range(df_copy, self.args)
            if self.args["shift"]:
                df_copy = self.energy_shift(df_copy, self.args)
            if self.args["oversampling"]:
                df_copy = self.oversampling(df_copy, self.args)
            if self.args["smooth"]:
                df_copy = self.smooth_signal(df_copy, self.args)
        except KeyError as exc:
            print(f"KeyError: {exc} is not part of the dataframe!")
            sys.exit(1)
        return (df_copy, self.args)

    @staticmethod
    def energy_range(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
        """Select the energy range for fitting.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.

        Returns:
            pd.DataFrame: DataFrame containing the `optimized` input data
                 (`x` and `data`), which are shrinked according to the energy range.
        """
        energy_start: Union[int, float] = args["energy_start"]
        energy_stop: Union[int, float] = args["energy_stop"]

        df_copy = df.copy()
        if isinstance(energy_start, (int, float)) and isinstance(
            energy_stop, (int, float)
        ):
            return df_copy.loc[
                (df[args["column"][0]] >= energy_start)
                & (df[args["column"][0]] <= energy_stop)
            ]
        elif isinstance(energy_start, (int, float)):
            return df_copy.loc[df[args["column"][0]] >= energy_start]
        elif isinstance(energy_stop, (int, float)):
            return df_copy.loc[df[args["column"][0]] <= energy_stop]

    @staticmethod
    def energy_shift(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
        """Shift the energy axis by a given value.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.

        Returns:
            pd.DataFrame: DataFrame containing the `optimized` input data
                 (`x` and `data`), which are energy-shifted by the given value.
        """
        df_copy: pd.DataFrame = df.copy()
        df_copy.loc[:, args["column"][0]] = (
            df[args["column"][0]].to_numpy() + args["shift"]
        )
        return df_copy

    @staticmethod
    def oversampling(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
        """Oversampling the data to increase the resolution of the data.

        !!! note "About Oversampling"
            In this implementation of oversampling, the data is oversampled by the
             factor of 5. In case of data with only a few points, the increased
             resolution should allow to easier solve the optimization problem. The
             oversampling based on a simple linear regression.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.

        Returns:
            pd.DataFrame: DataFrame containing the `optimized` input data
                 (`x` and `data`), which are oversampled by the factor of 5.
        """
        x_values = np.linspace(
            df[args["column"][0]].min(),
            df[args["column"][0]].max(),
            5 * df.shape[0],
        )
        y_values = np.interp(
            x_values,
            df[args["column"][0]].to_numpy(),
            df[args["column"][1]].to_numpy(),
        )
        return pd.DataFrame({args["column"][0]: x_values, args["column"][1]: y_values})

    @staticmethod
    def smooth_signal(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
        """Smooth the intensity values.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`).
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.

        Returns:
            pd.DataFrame: DataFrame containing the `optimized` input data
                 (`x` and `data`), which are smoothed by the given value.
        """
        box = np.ones(args["smooth"]) / args["smooth"]
        df_copy: pd.DataFrame = df.copy()
        df_copy.loc[:, args["column"][1]] = np.convolve(
            df[args["column"][1]].to_numpy(), box, mode="same"
        )
        return df_copy

__call__()

Apply all pre-processing-filters.

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame containing the input data (x and data), which are optionally:

CBM BASIC V2
1. shrinked to a given range
2. shifted
3. linear oversampled
4. smoothed
Dict[str, Any]

Dict[str,Any]: Adding a descriptive statistics to the input dictionary.

Source code in spectrafit/tools.py
Python
def __call__(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
    """Apply all pre-processing-filters.

    Returns:
        pd.DataFrame: DataFrame containing the input data (`x` and `data`), which
             are optionally:

                1. shrinked to a given range
                2. shifted
                3. linear oversampled
                4. smoothed
        Dict[str,Any]: Adding a descriptive statistics to the input dictionary.
    """
    df_copy: pd.DataFrame = self.df.copy()
    self.args["data_statistic"] = df_copy.describe(
        percentiles=np.arange(0.1, 1.0, 0.1).tolist()
    ).to_dict(orient="split")
    try:
        if isinstance(self.args["energy_start"], (int, float)) or isinstance(
            self.args["energy_stop"], (int, float)
        ):
            df_copy = self.energy_range(df_copy, self.args)
        if self.args["shift"]:
            df_copy = self.energy_shift(df_copy, self.args)
        if self.args["oversampling"]:
            df_copy = self.oversampling(df_copy, self.args)
        if self.args["smooth"]:
            df_copy = self.smooth_signal(df_copy, self.args)
    except KeyError as exc:
        print(f"KeyError: {exc} is not part of the dataframe!")
        sys.exit(1)
    return (df_copy, self.args)

__init__(df, args)

Initialize PreProcessing class.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required
Source code in spectrafit/tools.py
Python
def __init__(self, df: pd.DataFrame, args: Dict[str, Any]) -> None:
    """Initialize PreProcessing class.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.
    """
    self.df = df
    self.args = args

energy_range(df, args) staticmethod

Select the energy range for fitting.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame containing the optimized input data (x and data), which are shrinked according to the energy range.

Source code in spectrafit/tools.py
Python
@staticmethod
def energy_range(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
    """Select the energy range for fitting.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.

    Returns:
        pd.DataFrame: DataFrame containing the `optimized` input data
             (`x` and `data`), which are shrinked according to the energy range.
    """
    energy_start: Union[int, float] = args["energy_start"]
    energy_stop: Union[int, float] = args["energy_stop"]

    df_copy = df.copy()
    if isinstance(energy_start, (int, float)) and isinstance(
        energy_stop, (int, float)
    ):
        return df_copy.loc[
            (df[args["column"][0]] >= energy_start)
            & (df[args["column"][0]] <= energy_stop)
        ]
    elif isinstance(energy_start, (int, float)):
        return df_copy.loc[df[args["column"][0]] >= energy_start]
    elif isinstance(energy_stop, (int, float)):
        return df_copy.loc[df[args["column"][0]] <= energy_stop]

energy_shift(df, args) staticmethod

Shift the energy axis by a given value.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame containing the optimized input data (x and data), which are energy-shifted by the given value.

Source code in spectrafit/tools.py
Python
@staticmethod
def energy_shift(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
    """Shift the energy axis by a given value.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.

    Returns:
        pd.DataFrame: DataFrame containing the `optimized` input data
             (`x` and `data`), which are energy-shifted by the given value.
    """
    df_copy: pd.DataFrame = df.copy()
    df_copy.loc[:, args["column"][0]] = (
        df[args["column"][0]].to_numpy() + args["shift"]
    )
    return df_copy

oversampling(df, args) staticmethod

Oversampling the data to increase the resolution of the data.

About Oversampling

In this implementation of oversampling, the data is oversampled by the factor of 5. In case of data with only a few points, the increased resolution should allow to easier solve the optimization problem. The oversampling based on a simple linear regression.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame containing the optimized input data (x and data), which are oversampled by the factor of 5.

Source code in spectrafit/tools.py
Python
@staticmethod
def oversampling(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
    """Oversampling the data to increase the resolution of the data.

    !!! note "About Oversampling"
        In this implementation of oversampling, the data is oversampled by the
         factor of 5. In case of data with only a few points, the increased
         resolution should allow to easier solve the optimization problem. The
         oversampling based on a simple linear regression.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.

    Returns:
        pd.DataFrame: DataFrame containing the `optimized` input data
             (`x` and `data`), which are oversampled by the factor of 5.
    """
    x_values = np.linspace(
        df[args["column"][0]].min(),
        df[args["column"][0]].max(),
        5 * df.shape[0],
    )
    y_values = np.interp(
        x_values,
        df[args["column"][0]].to_numpy(),
        df[args["column"][1]].to_numpy(),
    )
    return pd.DataFrame({args["column"][0]: x_values, args["column"][1]: y_values})

smooth_signal(df, args) staticmethod

Smooth the intensity values.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the input data (x and data).

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame containing the optimized input data (x and data), which are smoothed by the given value.

Source code in spectrafit/tools.py
Python
@staticmethod
def smooth_signal(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
    """Smooth the intensity values.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`).
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.

    Returns:
        pd.DataFrame: DataFrame containing the `optimized` input data
             (`x` and `data`), which are smoothed by the given value.
    """
    box = np.ones(args["smooth"]) / args["smooth"]
    df_copy: pd.DataFrame = df.copy()
    df_copy.loc[:, args["column"][1]] = np.convolve(
        df[args["column"][1]].to_numpy(), box, mode="same"
    )
    return df_copy

SaveResult

Saving the result of the fitting process.

Source code in spectrafit/tools.py
Python
class SaveResult:
    """Saving the result of the fitting process."""

    def __init__(self, df: pd.DataFrame, args: Dict[str, Any]) -> None:
        """Initialize SaveResult class.

        !!! note "About SaveResult"

            The SaveResult class is responsible for saving the results of the
            optimization process. The results are saved in the following formats:

            1. JSON (default) for all results and meta data of the fitting process.
            2. CSV for the results of the optimization process.

        !!! note "About the output `CSV`-file"

            The output files are seperated into three classes:

                1. The `results` of the optimization process.
                2. The `correlation analysis` of the optimization process.
                3. The `error analysis` of the optimization process.

            The result outputfile contains the following information:

                1. The column names of the energy axis (`x`) and the intensity values
                (`data`)
                2. The name of the column containing the energy axis (`x`)
                3. The name of the column containing the intensity values (`data`)
                4. The name of the column containing the best fit (`best_fit`)
                5. The name of the column containing the residuum (`residuum`)
                6. The name of the column containing the model contribution (`model`)
                7. The name of the column containing the error of the model
                    contribution (`model_error`)
                8. The name of the column containing the error of the best fit
                    (`best_fit_error`)
                9. The name of the column containing the error of the residuum
                    (`residuum_error`)

            The `correlation analysis` file contains the following information about all
            attributes of the model:

                1. Energy
                2. Intensity or Intensities (global fitting)
                3. Residuum
                4. Best fit
                5. Model contribution(s)

            The `error analysis` file contains the following information about all model
            attributes vs:

                1. Initial model values
                2. Current model values
                3. Best model values
                4. Residuum / error relative to the best fit
                5. Residuum / error relative to the absolute fit

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.
        """
        self.df = df
        self.args = transform_nested_types(args)

    def __call__(self) -> None:
        """Call the SaveResult class."""
        self.save_as_json()
        self.save_as_csv()

    def save_as_csv(self) -> None:
        """Save the the fit results to csv files.

        !!! note "About saving the fit results"
            The fit results are saved to csv files and are divided into three different
            categories:

                1. The `results` of the optimization process.
                2. The `correlation analysis` of the optimization process.
                3. The `error analysis` of the optimization process.
        """
        _fname = Path(f"{self.args['outfile']}_fit.csv")
        self.df.to_csv(_fname, index=False)
        pd.DataFrame(**self.args["linear_correlation"]).to_csv(
            Path(f"{self.args['outfile']}_correlation.csv"),
            index=True,
            index_label="attributes",
        )
        pd.DataFrame.from_dict(self.args["fit_insights"]["variables"]).to_csv(
            Path(f"{self.args['outfile']}_components.csv"),
            index=True,
            index_label="attributes",
        )

    def save_as_json(self) -> None:
        """Save the fitting result as json file."""
        if self.args["outfile"]:
            with Path(f"{self.args['outfile']}_summary.json").open(
                "w", encoding="utf-8"
            ) as f:
                json.dump(transform_nested_types(self.args), f, indent=4)
        else:
            raise FileNotFoundError("No output file provided!")

__call__()

Call the SaveResult class.

Source code in spectrafit/tools.py
Python
def __call__(self) -> None:
    """Call the SaveResult class."""
    self.save_as_json()
    self.save_as_csv()

__init__(df, args)

Initialize SaveResult class.

About SaveResult

The SaveResult class is responsible for saving the results of the optimization process. The results are saved in the following formats:

  1. JSON (default) for all results and meta data of the fitting process.
  2. CSV for the results of the optimization process.

About the output CSV-file

The output files are seperated into three classes:

MySQL
1. The `results` of the optimization process.
2. The `correlation analysis` of the optimization process.
3. The `error analysis` of the optimization process.

The result outputfile contains the following information:

MySQL
1. The column names of the energy axis (`x`) and the intensity values
(`data`)
2. The name of the column containing the energy axis (`x`)
3. The name of the column containing the intensity values (`data`)
4. The name of the column containing the best fit (`best_fit`)
5. The name of the column containing the residuum (`residuum`)
6. The name of the column containing the model contribution (`model`)
7. The name of the column containing the error of the model
    contribution (`model_error`)
8. The name of the column containing the error of the best fit
    (`best_fit_error`)
9. The name of the column containing the error of the residuum
    (`residuum_error`)

The correlation analysis file contains the following information about all attributes of the model:

CBM BASIC V2
1. Energy
2. Intensity or Intensities (global fitting)
3. Residuum
4. Best fit
5. Model contribution(s)

The error analysis file contains the following information about all model attributes vs:

CBM BASIC V2
1. Initial model values
2. Current model values
3. Best model values
4. Residuum / error relative to the best fit
5. Residuum / error relative to the absolute fit

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required
Source code in spectrafit/tools.py
Python
def __init__(self, df: pd.DataFrame, args: Dict[str, Any]) -> None:
    """Initialize SaveResult class.

    !!! note "About SaveResult"

        The SaveResult class is responsible for saving the results of the
        optimization process. The results are saved in the following formats:

        1. JSON (default) for all results and meta data of the fitting process.
        2. CSV for the results of the optimization process.

    !!! note "About the output `CSV`-file"

        The output files are seperated into three classes:

            1. The `results` of the optimization process.
            2. The `correlation analysis` of the optimization process.
            3. The `error analysis` of the optimization process.

        The result outputfile contains the following information:

            1. The column names of the energy axis (`x`) and the intensity values
            (`data`)
            2. The name of the column containing the energy axis (`x`)
            3. The name of the column containing the intensity values (`data`)
            4. The name of the column containing the best fit (`best_fit`)
            5. The name of the column containing the residuum (`residuum`)
            6. The name of the column containing the model contribution (`model`)
            7. The name of the column containing the error of the model
                contribution (`model_error`)
            8. The name of the column containing the error of the best fit
                (`best_fit_error`)
            9. The name of the column containing the error of the residuum
                (`residuum_error`)

        The `correlation analysis` file contains the following information about all
        attributes of the model:

            1. Energy
            2. Intensity or Intensities (global fitting)
            3. Residuum
            4. Best fit
            5. Model contribution(s)

        The `error analysis` file contains the following information about all model
        attributes vs:

            1. Initial model values
            2. Current model values
            3. Best model values
            4. Residuum / error relative to the best fit
            5. Residuum / error relative to the absolute fit

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.
    """
    self.df = df
    self.args = transform_nested_types(args)

save_as_csv()

Save the the fit results to csv files.

About saving the fit results

The fit results are saved to csv files and are divided into three different categories:

MySQL
1. The `results` of the optimization process.
2. The `correlation analysis` of the optimization process.
3. The `error analysis` of the optimization process.
Source code in spectrafit/tools.py
Python
def save_as_csv(self) -> None:
    """Save the the fit results to csv files.

    !!! note "About saving the fit results"
        The fit results are saved to csv files and are divided into three different
        categories:

            1. The `results` of the optimization process.
            2. The `correlation analysis` of the optimization process.
            3. The `error analysis` of the optimization process.
    """
    _fname = Path(f"{self.args['outfile']}_fit.csv")
    self.df.to_csv(_fname, index=False)
    pd.DataFrame(**self.args["linear_correlation"]).to_csv(
        Path(f"{self.args['outfile']}_correlation.csv"),
        index=True,
        index_label="attributes",
    )
    pd.DataFrame.from_dict(self.args["fit_insights"]["variables"]).to_csv(
        Path(f"{self.args['outfile']}_components.csv"),
        index=True,
        index_label="attributes",
    )

save_as_json()

Save the fitting result as json file.

Source code in spectrafit/tools.py
Python
def save_as_json(self) -> None:
    """Save the fitting result as json file."""
    if self.args["outfile"]:
        with Path(f"{self.args['outfile']}_summary.json").open(
            "w", encoding="utf-8"
        ) as f:
            json.dump(transform_nested_types(self.args), f, indent=4)
    else:
        raise FileNotFoundError("No output file provided!")

check_keywords_consistency(check_args, ref_args)

Check if the keywords are consistent.

Check if the keywords are consistent between two dictionaries. The two dictionaries are reference keywords of the cmd_line_args and the args of the input_file.

Parameters:

Name Type Description Default
check_args MutableMapping[str, Any]

First dictionary to be checked.

required
ref_args Dict[str, Any]

Second dictionary to be checked.

required

Raises:

Type Description
KeyError

If the keywords are not consistent.

Source code in spectrafit/tools.py
Python
def check_keywords_consistency(
    check_args: MutableMapping[str, Any], ref_args: Dict[str, Any]
) -> None:
    """Check if the keywords are consistent.

    Check if the keywords are consistent between two dictionaries. The two dictionaries
    are reference keywords of the `cmd_line_args` and the `args` of the `input_file`.

    Args:
        check_args (MutableMapping[str, Any]): First dictionary to be checked.
        ref_args (Dict[str,Any]): Second dictionary to be checked.

    Raises:
        KeyError: If the keywords are not consistent.
    """
    for key in check_args:
        if key not in ref_args.keys():
            raise KeyError(f"ERROR: The {key} is not parameter of the `cmd-input`!")

exclude_none_dictionary(value)

Exclude None values from the dictionary.

Parameters:

Name Type Description Default
value Dict[str, Any]

Dictionary to be processed to exclude None values.

required

Returns:

Type Description
Dict[str, Any]

Dict[str, Any]: Dictionary without None values.

Source code in spectrafit/tools.py
Python
def exclude_none_dictionary(value: Dict[str, Any]) -> Dict[str, Any]:
    """Exclude `None` values from the dictionary.

    Args:
        value (Dict[str, Any]): Dictionary to be processed to
            exclude `None` values.

    Returns:
        Dict[str, Any]: Dictionary without `None` values.
    """
    if isinstance(value, list):
        return [exclude_none_dictionary(v) for v in value if v is not None]
    elif isinstance(value, dict):
        return {
            k: exclude_none_dictionary(v) for k, v in value.items() if v is not None
        }
    else:
        return value

load_data(args)

Load the data from a txt file.

About the data format

Load data from a txt file, which can be an ASCII file as txt, csv, or user-specific but rational file. The file can be separated by a delimiter.

In case of 2d data, the columns has to be defined. In case of 3D data, all columns are considered as data.

Parameters:

Name Type Description Default
args Dict[str, str]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

Source code in spectrafit/tools.py
Python
def load_data(args: Dict[str, str]) -> pd.DataFrame:
    """Load the data from a txt file.

    !!! note "About the data format"

        Load data from a txt file, which can be an ASCII file as txt, csv, or
        user-specific but rational file. The file can be separated by a delimiter.

        In case of 2d data, the columns has to be defined. In case of 3D data, all
        columns are considered as data.

    Args:
        args (Dict[str,str]): The input file arguments as a dictionary with additional
             information beyond the command line arguments.

    Returns:
        pd.DataFrame: DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will be
             extended by the single contribution of the model.
    """
    try:
        if args["global_"]:
            return pd.read_csv(
                args["infile"],
                sep=args["separator"],
                header=args["header"],
                dtype=np.float64,
                decimal=args["decimal"],
                comment=args["comment"],
            )
        return pd.read_csv(
            args["infile"],
            sep=args["separator"],
            header=args["header"],
            usecols=args["column"],
            dtype=np.float64,
            decimal=args["decimal"],
            comment=args["comment"],
        )
    except ValueError as exc:
        print(f"Error: {exc} -> Dataframe contains non numeric data!")
        sys.exit(1)

pkl2any(pkl_fname, encoding='latin1')

Load a pkl file and return the data as a any type of data or object.

Parameters:

Name Type Description Default
pkl_fname Path

The pkl file to load.

required
encoding str

The encoding to use. Defaults to "latin1".

'latin1'

Raises:

Type Description
ValueError

If the file format is not supported.

Returns:

Name Type Description
Any Any

Data or objects, which can contain various data types supported by pickle.

Source code in spectrafit/tools.py
Python
def pkl2any(pkl_fname: Path, encoding: str = "latin1") -> Any:
    """Load a pkl file and return the data as a any type of data or object.

    Args:
        pkl_fname (Path): The pkl file to load.
        encoding (str, optional): The encoding to use. Defaults to "latin1".

    Raises:
        ValueError: If the file format is not supported.

    Returns:
        Any: Data or objects, which can contain various data types supported by pickle.
    """
    if pkl_fname.suffix == ".gz":
        with gzip.open(pkl_fname, "rb") as f:
            return unicode_check(f, encoding=encoding)
    elif pkl_fname.suffix == ".pkl":
        with pkl_fname.open("rb") as f:
            return unicode_check(f, encoding=encoding)
    else:
        choices = [".pkl", ".pkl.gz"]
        raise ValueError(
            f"File format '{pkl_fname.suffix}' is not supported. "
            f"Supported file formats are: {choices}"
        )

pure_fname(fname)

Return the filename without the suffix.

Pure filename without the suffix is implemented to avoid the problem with multiple dots in the filename like test.pkl.gz or test.tar.gz. The stem attribute of the Path class returns the filename without the suffix, but it also removes only the last suffix. Hence, the test.pkl.gz will be returned as test.pkl and not as test. This function returns the filename without the suffix. It is implemented recursively to remove all suffixes.

Parameters:

Name Type Description Default
fname Path

The filename to be processed.

required

Returns:

Name Type Description
Path Path

The filename without the suffix.

Source code in spectrafit/tools.py
Python
def pure_fname(fname: Path) -> Path:
    """Return the filename without the suffix.

    Pure filename without the suffix is implemented to avoid the problem with
    multiple dots in the filename like `test.pkl.gz` or `test.tar.gz`.
    The `stem` attribute of the `Path` class returns the filename without the
    suffix, but it also removes only the last suffix. Hence, the `test.pkl.gz`
    will be returned as `test.pkl` and not as `test`. This function returns
    the filename without the suffix. It is implemented recursively to remove
    all suffixes.

    Args:
        fname (Path): The filename to be processed.

    Returns:
        Path: The filename without the suffix.
    """
    _fname = fname.parent / fname.stem
    return pure_fname(_fname) if _fname.suffix else _fname

read_input_file(fname)

Read the input file.

Read the input file as toml, json, or yaml files and return as a dictionary.

Parameters:

Name Type Description Default
fname str

Name of the input file.

required

Raises:

Type Description
OSError

If the input file is not supported.

Returns:

Name Type Description
dict MutableMapping[str, Any]

Return the input file arguments as a dictionary with additional information beyond the command line arguments.

Source code in spectrafit/tools.py
Python
def read_input_file(fname: Path) -> MutableMapping[str, Any]:
    """Read the input file.

    Read the input file as `toml`, `json`, or `yaml` files and return as a dictionary.

    Args:
        fname (str): Name of the input file.

    Raises:
        OSError: If the input file is not supported.

    Returns:
        dict: Return the input file arguments as a dictionary with additional
             information beyond the command line arguments.

    """
    fname = Path(fname)

    if fname.suffix == ".toml":
        with fname.open("rb") as f:
            args = tomli.load(f)
    elif fname.suffix == ".json":
        with fname.open(encoding="utf-8") as f:
            args = json.load(f)
    elif fname.suffix in {".yaml", ".yml"}:
        with fname.open(encoding="utf-8") as f:
            args = yaml.load(f, Loader=yaml.FullLoader)
    else:
        raise OSError(
            f"ERROR: Input file {fname} has not supported file format.\n"
            "Supported fileformats are: '*.json', '*.yaml', and '*.toml'"
        )
    return args

transform_nested_types(value)

Transform nested types numpy values to python values.

Parameters:

Name Type Description Default
value Dict[str, Any]

Dictionary to be processed to transform numpy values to python values.

required

Returns:

Type Description
Dict[str, Any]

Dict[str, Any]: Dictionary with python values.

Source code in spectrafit/tools.py
Python
def transform_nested_types(value: Dict[str, Any]) -> Dict[str, Any]:
    """Transform nested types numpy values to python values.

    Args:
        value (Dict[str, Any]): Dictionary to be processed to
            transform numpy values to python values.

    Returns:
        Dict[str, Any]: Dictionary with python values.
    """
    if isinstance(value, list):
        return [transform_nested_types(v) for v in value]
    elif isinstance(value, tuple):
        return tuple(transform_nested_types(v) for v in value)
    elif isinstance(value, dict):
        return {k: transform_nested_types(v) for k, v in value.items()}
    elif isinstance(value, np.ndarray):
        return transform_nested_types(value.tolist())
    elif isinstance(value, np.int32):
        return int(value)
    elif isinstance(value, np.int64):
        return int(value)
    elif isinstance(value, np.bool_):
        return bool(value)
    elif isinstance(value, np.float64):
        return float(value)
    else:
        return value

unicode_check(f, encoding='latin1')

Check if the pkl file is encoded in unicode.

Parameters:

Name Type Description Default
f Any

The pkl file to load.

required
encoding str

The encoding to use. Defaults to "latin1".

'latin1'

Returns:

Name Type Description
Any Any

The pkl file, which can be a nested dictionary containing raw data, metadata, and other information.

Source code in spectrafit/tools.py
Python
def unicode_check(f: Any, encoding: str = "latin1") -> Any:
    """Check if the pkl file is encoded in unicode.

    Args:
        f (Any): The pkl file to load.
        encoding (str, optional): The encoding to use. Defaults to "latin1".

    Returns:
        Any: The pkl file, which can be a nested dictionary containing raw data,
            metadata, and other information.
    """
    try:
        data_dict = pickle.load(f)
    except UnicodeDecodeError:  # pragma: no cover
        data_dict = pickle.load(f, encoding=encoding)
    return data_dict