Skip to content

Tools

Collection of essential tools for running SpectraFit.

PostProcessing

Post-processing of the dataframe.

Source code in spectrafit/tools.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
class PostProcessing:
    """Post-processing of the dataframe."""

    def __init__(
        self, df: pd.DataFrame, args: Dict[str, Any], minimizer: Minimizer, result: Any
    ) -> None:
        """Initialize PostProcessing class.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str, Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.
            minimizer (Minimizer): The minimizer class.
            result (Any): The result of the minimization of the best fit.
        """
        self.args = args
        self.df = self.rename_columns(df=df)
        self.minimizer = minimizer
        self.result = result
        self.data_size = self.check_global_fitting()

    def __call__(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
        """Call the post-processing."""
        self.make_insight_report()
        self.make_residual_fit()
        self.make_fit_contributions()
        self.export_correlation2args()
        self.export_results2args()
        self.export_regression_metrics2args()
        self.export_desprective_statistic2args()
        return (self.df, self.args)

    def check_global_fitting(self) -> Optional[int]:
        """Check if the global fitting is performed.

        !!! note "About Global Fitting"
            In case of the global fitting, the data is extended by the single
            contribution of the model.

        Returns:
            Optional[int]: The number of spectra of the global fitting.
        """
        if self.args["global_"]:
            return max(
                int(self.result.params[i].name.split("_")[-1])
                for i in self.result.params
            )
        return None

    def rename_columns(self, df: pd.DataFrame) -> pd.DataFrame:
        """Rename the columns of the dataframe.

        Rename the columns of the dataframe to the names defined in the input file.

        Args:
            df (pd.DataFrame): DataFrame containing the original input data, which are
                 individually pre-named.

        Returns:
            pd.DataFrame: DataFrame containing renamed columns. All column-names are
                 lowered. In case of a regular fitting, the columns are named `energy`
                 and `intensity`. In case of a global fitting, `energy` stays `energy`
                 and `intensity` is extended by a `_`  and column index; like: `energy`
                 and `intensity_1`, `intensity_2`, `intensity_...` depending on
                 the dataset size.
        """
        if self.args["global_"]:
            return df.rename(
                columns={
                    col: ColumnNamesAPI().energy
                    if i == 0
                    else f"{ColumnNamesAPI().intensity}_{i}"
                    for i, col in enumerate(df.columns)
                }
            )
        return df.rename(
            columns={
                df.columns[0]: ColumnNamesAPI().energy,
                df.columns[1]: ColumnNamesAPI().intensity,
            }
        )

    def make_insight_report(self) -> None:
        """Make an insight-report of the fit statistic.

        !!! note "About Insight Report"

            The insight report based on:

                1. Configurations
                2. Statistics
                3. Variables
                4. Error-bars
                5. Correlations
                6. Covariance Matrix
                7. _Optional_: Confidence Interval

            All of the above are included in the report as dictionary in `args`.

        """
        self.args["fit_insights"] = fit_report_as_dict(
            self.result, modelpars=self.result.params
        )
        if self.args["conf_interval"]:
            try:
                self.args["confidence_interval"] = conf_interval(
                    self.minimizer, self.result, **self.args["conf_interval"]
                )
            except (MinimizerException, ValueError, KeyError) as exc:
                print(f"Error: {exc} -> No confidence interval could be calculated!")
                self.args["confidence_interval"] = {}

    def make_residual_fit(self) -> None:
        r"""Make the residuals of the model and the fit.

        !!! note "About Residual and Fit"

            The residual is calculated by the difference of the best fit `model` and
            the reference `data`. In case of a global fitting, the residuals are
            calculated for each `spectra` separately plus an avaraged global residual.

            $$
            \mathrm{residual} = \mathrm{model} - \mathrm{data}
            $$
            $$
            \mathrm{residual}_{i} = \mathrm{model}_{i} - \mathrm{data}_{i}
            $$
            $$
            \mathrm{residual}_{avg} = \frac{ \sum_{i}
                \mathrm{model}_{i} - \mathrm{data}_{i}}{i}
            $$

            The fit is defined by the difference sum of fit and reference data. In case
            of a global fitting, the residuals are calculated for each `spectra`
            separately.
        """
        df_copy: pd.DataFrame = self.df.copy()
        if self.args["global_"]:
            residual = self.result.residual.reshape((-1, self.data_size)).T
            for i, _residual in enumerate(residual, start=1):
                df_copy[f"{ColumnNamesAPI().residual}_{i}"] = _residual
                df_copy[f"{ColumnNamesAPI().fit}_{i}"] = (
                    self.df[f"{ColumnNamesAPI().intensity}_{i}"].to_numpy() + _residual
                )
            df_copy[f"{ColumnNamesAPI().residual}_avg"] = np.mean(residual, axis=0)
        else:
            residual = self.result.residual
            df_copy[ColumnNamesAPI().residual] = residual
            df_copy[ColumnNamesAPI().fit] = (
                self.df[ColumnNamesAPI().intensity].to_numpy() + residual
            )
        self.df = df_copy

    def make_fit_contributions(self) -> None:
        """Make the fit contributions of the best fit model.

        !!! info "About Fit Contributions"
            The fit contributions are made independently of the local or global fitting.
        """
        self.df = calculated_model(
            params=self.result.params,
            x=self.df.iloc[:, 0].to_numpy(),
            df=self.df,
            global_fit=self.args["global_"],
        )

    def export_correlation2args(self) -> None:
        """Export the correlation matrix to the input file arguments.

        !!! note "About Correlation Matrix"

            The linear correlation matrix is calculated from and for the pandas
            dataframe and divided into two parts:

            1. Linear correlation matrix
            2. Non-linear correlation matrix (coming later ...)

        !!! note "About reading the correlation matrix"

            The correlation matrix is stored in the `args` as a dictionary with the
            following keys:

            * `index`
            * `columns`
            * `data`

            For re-reading the data, it is important to use the following code:

            >>> import pandas as pd
            >>> pd.DataFrame(**args["linear_correlation"])

            Important is to use the generator function for access the three keys and
            their values.
        """
        self.args["linear_correlation"] = self.df.corr().to_dict(orient="split")

    def export_results2args(self) -> None:
        """Export the results of the fit to the input file arguments."""
        self.args["fit_result"] = self.df.to_dict(orient="split")

    def export_regression_metrics2args(self) -> None:
        """Export the regression metrics of the fit to the input file arguments.

        !!! note "About Regression Metrics"
            The regression metrics are calculated by the `statsmodels.stats.diagnostic`
            module.
        """
        self.args["regression_metrics"] = RegressionMetrics(self.df)()

    def export_desprective_statistic2args(self) -> None:
        """Export the descriptive statistic of the spectra, fit, and contributions."""
        self.args["descriptive_statistic"] = self.df.describe(
            percentiles=np.arange(0.1, 1, 0.1)
        ).to_dict(orient="split")

__call__()

Call the post-processing.

Source code in spectrafit/tools.py
207
208
209
210
211
212
213
214
215
216
def __call__(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
    """Call the post-processing."""
    self.make_insight_report()
    self.make_residual_fit()
    self.make_fit_contributions()
    self.export_correlation2args()
    self.export_results2args()
    self.export_regression_metrics2args()
    self.export_desprective_statistic2args()
    return (self.df, self.args)

__init__(df, args, minimizer, result)

Initialize PostProcessing class.

Parameters:

Name Type Description Default
df pd.DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required
minimizer Minimizer

The minimizer class.

required
result Any

The result of the minimization of the best fit.

required
Source code in spectrafit/tools.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
def __init__(
    self, df: pd.DataFrame, args: Dict[str, Any], minimizer: Minimizer, result: Any
) -> None:
    """Initialize PostProcessing class.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str, Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.
        minimizer (Minimizer): The minimizer class.
        result (Any): The result of the minimization of the best fit.
    """
    self.args = args
    self.df = self.rename_columns(df=df)
    self.minimizer = minimizer
    self.result = result
    self.data_size = self.check_global_fitting()

check_global_fitting()

Check if the global fitting is performed.

About Global Fitting

In case of the global fitting, the data is extended by the single contribution of the model.

Returns:

Type Description
Optional[int]

Optional[int]: The number of spectra of the global fitting.

Source code in spectrafit/tools.py
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
def check_global_fitting(self) -> Optional[int]:
    """Check if the global fitting is performed.

    !!! note "About Global Fitting"
        In case of the global fitting, the data is extended by the single
        contribution of the model.

    Returns:
        Optional[int]: The number of spectra of the global fitting.
    """
    if self.args["global_"]:
        return max(
            int(self.result.params[i].name.split("_")[-1])
            for i in self.result.params
        )
    return None

export_correlation2args()

Export the correlation matrix to the input file arguments.

About Correlation Matrix

The linear correlation matrix is calculated from and for the pandas dataframe and divided into two parts:

  1. Linear correlation matrix
  2. Non-linear correlation matrix (coming later ...)

About reading the correlation matrix

The correlation matrix is stored in the args as a dictionary with the following keys:

  • index
  • columns
  • data

For re-reading the data, it is important to use the following code:

import pandas as pd pd.DataFrame(**args["linear_correlation"])

Important is to use the generator function for access the three keys and their values.

Source code in spectrafit/tools.py
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
def export_correlation2args(self) -> None:
    """Export the correlation matrix to the input file arguments.

    !!! note "About Correlation Matrix"

        The linear correlation matrix is calculated from and for the pandas
        dataframe and divided into two parts:

        1. Linear correlation matrix
        2. Non-linear correlation matrix (coming later ...)

    !!! note "About reading the correlation matrix"

        The correlation matrix is stored in the `args` as a dictionary with the
        following keys:

        * `index`
        * `columns`
        * `data`

        For re-reading the data, it is important to use the following code:

        >>> import pandas as pd
        >>> pd.DataFrame(**args["linear_correlation"])

        Important is to use the generator function for access the three keys and
        their values.
    """
    self.args["linear_correlation"] = self.df.corr().to_dict(orient="split")

export_desprective_statistic2args()

Export the descriptive statistic of the spectra, fit, and contributions.

Source code in spectrafit/tools.py
395
396
397
398
399
def export_desprective_statistic2args(self) -> None:
    """Export the descriptive statistic of the spectra, fit, and contributions."""
    self.args["descriptive_statistic"] = self.df.describe(
        percentiles=np.arange(0.1, 1, 0.1)
    ).to_dict(orient="split")

export_regression_metrics2args()

Export the regression metrics of the fit to the input file arguments.

About Regression Metrics

The regression metrics are calculated by the statsmodels.stats.diagnostic module.

Source code in spectrafit/tools.py
386
387
388
389
390
391
392
393
def export_regression_metrics2args(self) -> None:
    """Export the regression metrics of the fit to the input file arguments.

    !!! note "About Regression Metrics"
        The regression metrics are calculated by the `statsmodels.stats.diagnostic`
        module.
    """
    self.args["regression_metrics"] = RegressionMetrics(self.df)()

export_results2args()

Export the results of the fit to the input file arguments.

Source code in spectrafit/tools.py
382
383
384
def export_results2args(self) -> None:
    """Export the results of the fit to the input file arguments."""
    self.args["fit_result"] = self.df.to_dict(orient="split")

make_fit_contributions()

Make the fit contributions of the best fit model.

About Fit Contributions

The fit contributions are made independently of the local or global fitting.

Source code in spectrafit/tools.py
339
340
341
342
343
344
345
346
347
348
349
350
def make_fit_contributions(self) -> None:
    """Make the fit contributions of the best fit model.

    !!! info "About Fit Contributions"
        The fit contributions are made independently of the local or global fitting.
    """
    self.df = calculated_model(
        params=self.result.params,
        x=self.df.iloc[:, 0].to_numpy(),
        df=self.df,
        global_fit=self.args["global_"],
    )

make_insight_report()

Make an insight-report of the fit statistic.

About Insight Report

The insight report based on:

1. Configurations
2. Statistics
3. Variables
4. Error-bars
5. Correlations
6. Covariance Matrix
7. _Optional_: Confidence Interval

All of the above are included in the report as dictionary in args.

Source code in spectrafit/tools.py
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
def make_insight_report(self) -> None:
    """Make an insight-report of the fit statistic.

    !!! note "About Insight Report"

        The insight report based on:

            1. Configurations
            2. Statistics
            3. Variables
            4. Error-bars
            5. Correlations
            6. Covariance Matrix
            7. _Optional_: Confidence Interval

        All of the above are included in the report as dictionary in `args`.

    """
    self.args["fit_insights"] = fit_report_as_dict(
        self.result, modelpars=self.result.params
    )
    if self.args["conf_interval"]:
        try:
            self.args["confidence_interval"] = conf_interval(
                self.minimizer, self.result, **self.args["conf_interval"]
            )
        except (MinimizerException, ValueError, KeyError) as exc:
            print(f"Error: {exc} -> No confidence interval could be calculated!")
            self.args["confidence_interval"] = {}

make_residual_fit()

Make the residuals of the model and the fit.

About Residual and Fit

The residual is calculated by the difference of the best fit model and the reference data. In case of a global fitting, the residuals are calculated for each spectra separately plus an avaraged global residual.

\[ \mathrm{residual} = \mathrm{model} - \mathrm{data} $$ $$ \mathrm{residual}_{i} = \mathrm{model}_{i} - \mathrm{data}_{i} $$ $$ \mathrm{residual}_{avg} = \frac{ \sum_{i} \mathrm{model}_{i} - \mathrm{data}_{i}}{i} \]

The fit is defined by the difference sum of fit and reference data. In case of a global fitting, the residuals are calculated for each spectra separately.

Source code in spectrafit/tools.py
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
def make_residual_fit(self) -> None:
    r"""Make the residuals of the model and the fit.

    !!! note "About Residual and Fit"

        The residual is calculated by the difference of the best fit `model` and
        the reference `data`. In case of a global fitting, the residuals are
        calculated for each `spectra` separately plus an avaraged global residual.

        $$
        \mathrm{residual} = \mathrm{model} - \mathrm{data}
        $$
        $$
        \mathrm{residual}_{i} = \mathrm{model}_{i} - \mathrm{data}_{i}
        $$
        $$
        \mathrm{residual}_{avg} = \frac{ \sum_{i}
            \mathrm{model}_{i} - \mathrm{data}_{i}}{i}
        $$

        The fit is defined by the difference sum of fit and reference data. In case
        of a global fitting, the residuals are calculated for each `spectra`
        separately.
    """
    df_copy: pd.DataFrame = self.df.copy()
    if self.args["global_"]:
        residual = self.result.residual.reshape((-1, self.data_size)).T
        for i, _residual in enumerate(residual, start=1):
            df_copy[f"{ColumnNamesAPI().residual}_{i}"] = _residual
            df_copy[f"{ColumnNamesAPI().fit}_{i}"] = (
                self.df[f"{ColumnNamesAPI().intensity}_{i}"].to_numpy() + _residual
            )
        df_copy[f"{ColumnNamesAPI().residual}_avg"] = np.mean(residual, axis=0)
    else:
        residual = self.result.residual
        df_copy[ColumnNamesAPI().residual] = residual
        df_copy[ColumnNamesAPI().fit] = (
            self.df[ColumnNamesAPI().intensity].to_numpy() + residual
        )
    self.df = df_copy

rename_columns(df)

Rename the columns of the dataframe.

Rename the columns of the dataframe to the names defined in the input file.

Parameters:

Name Type Description Default
df pd.DataFrame

DataFrame containing the original input data, which are individually pre-named.

required

Returns:

Type Description
pd.DataFrame

pd.DataFrame: DataFrame containing renamed columns. All column-names are lowered. In case of a regular fitting, the columns are named energy and intensity. In case of a global fitting, energy stays energy and intensity is extended by a _ and column index; like: energy and intensity_1, intensity_2, intensity_... depending on the dataset size.

Source code in spectrafit/tools.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
def rename_columns(self, df: pd.DataFrame) -> pd.DataFrame:
    """Rename the columns of the dataframe.

    Rename the columns of the dataframe to the names defined in the input file.

    Args:
        df (pd.DataFrame): DataFrame containing the original input data, which are
             individually pre-named.

    Returns:
        pd.DataFrame: DataFrame containing renamed columns. All column-names are
             lowered. In case of a regular fitting, the columns are named `energy`
             and `intensity`. In case of a global fitting, `energy` stays `energy`
             and `intensity` is extended by a `_`  and column index; like: `energy`
             and `intensity_1`, `intensity_2`, `intensity_...` depending on
             the dataset size.
    """
    if self.args["global_"]:
        return df.rename(
            columns={
                col: ColumnNamesAPI().energy
                if i == 0
                else f"{ColumnNamesAPI().intensity}_{i}"
                for i, col in enumerate(df.columns)
            }
        )
    return df.rename(
        columns={
            df.columns[0]: ColumnNamesAPI().energy,
            df.columns[1]: ColumnNamesAPI().intensity,
        }
    )

PreProcessing

Summarized all pre-processing-filters together.

Source code in spectrafit/tools.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
class PreProcessing:
    """Summarized all pre-processing-filters  together."""

    def __init__(self, df: pd.DataFrame, args: Dict[str, Any]) -> None:
        """Initialize PreProcessing class.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.
        """
        self.df = df
        self.args = args

    def __call__(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
        """Apply all pre-processing-filters.

        Returns:
            pd.DataFrame: DataFrame containing the input data (`x` and `data`), which
                 are optionally:

                    1. shrinked to a given range
                    2. shifted
                    3. linear oversampled
                    4. smoothed
            Dict[str,Any]: Adding a descriptive statistics to the input dictionary.
        """
        df_copy: pd.DataFrame = self.df.copy()
        self.args["data_statistic"] = df_copy.describe(
            percentiles=np.arange(0.1, 1.0, 0.1)
        ).to_dict(orient="split")
        try:
            if isinstance(self.args["energy_start"], (int, float)) or isinstance(
                self.args["energy_stop"], (int, float)
            ):
                df_copy = self.energy_range(df_copy, self.args)
            if self.args["shift"]:
                df_copy = self.energy_shift(df_copy, self.args)
            if self.args["oversampling"]:
                df_copy = self.oversampling(df_copy, self.args)
            if self.args["smooth"]:
                df_copy = self.smooth_signal(df_copy, self.args)
        except KeyError as exc:
            print(f"KeyError: {exc} is not part of the dataframe!")
            sys.exit(1)
        return (df_copy, self.args)

    @staticmethod
    def energy_range(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
        """Select the energy range for fitting.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.

        Returns:
            pd.DataFrame: DataFrame containing the `optimized` input data
                 (`x` and `data`), which are shrinked according to the energy range.
        """
        energy_start: Union[int, float] = args["energy_start"]
        energy_stop: Union[int, float] = args["energy_stop"]

        df_copy: pd.DataFrame = df.copy()
        if isinstance(energy_start, (int, float)) and isinstance(
            energy_stop, (int, float)
        ):
            return df_copy.loc[
                (df[args["column"][0]] >= energy_start)
                & (df[args["column"][0]] <= energy_stop)
            ]
        elif isinstance(energy_start, (int, float)):
            return df_copy.loc[df[args["column"][0]] >= energy_start]
        elif isinstance(energy_stop, (int, float)):
            return df_copy.loc[df[args["column"][0]] <= energy_stop]

    @staticmethod
    def energy_shift(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
        """Shift the energy axis by a given value.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.

        Returns:
            pd.DataFrame: DataFrame containing the `optimized` input data
                 (`x` and `data`), which are energy-shifted by the given value.
        """
        df_copy: pd.DataFrame = df.copy()
        df_copy.loc[:, args["column"][0]] = (
            df[args["column"][0]].to_numpy() + args["shift"]
        )
        return df_copy

    @staticmethod
    def oversampling(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
        """Oversampling the data to increase the resolution of the data.

        !!! note "About Oversampling"
            In this implementation of oversampling, the data is oversampled by the
             factor of 5. In case of data with only a few points, the increased
             resolution should allow to easier solve the optimization problem. The
             oversampling based on a simple linear regression.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.

        Returns:
            pd.DataFrame: DataFrame containing the `optimized` input data
                 (`x` and `data`), which are oversampled by the factor of 5.
        """
        x_values = np.linspace(
            df[args["column"][0]].min(),
            df[args["column"][0]].max(),
            5 * df.shape[0],
        )
        y_values = np.interp(
            x_values,
            df[args["column"][0]].to_numpy(),
            df[args["column"][1]].to_numpy(),
        )
        return pd.DataFrame({args["column"][0]: x_values, args["column"][1]: y_values})

    @staticmethod
    def smooth_signal(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
        """Smooth the intensity values.

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`).
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.

        Returns:
            pd.DataFrame: DataFrame containing the `optimized` input data
                 (`x` and `data`), which are smoothed by the given value.
        """
        box = np.ones(args["smooth"]) / args["smooth"]
        df_copy: pd.DataFrame = df.copy()
        df_copy.loc[:, args["column"][1]] = np.convolve(
            df[args["column"][1]].to_numpy(), box, mode="same"
        )
        return df_copy

__call__()

Apply all pre-processing-filters.

Returns:

Type Description
pd.DataFrame

pd.DataFrame: DataFrame containing the input data (x and data), which are optionally:

1. shrinked to a given range
2. shifted
3. linear oversampled
4. smoothed
Dict[str, Any]

Dict[str,Any]: Adding a descriptive statistics to the input dictionary.

Source code in spectrafit/tools.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def __call__(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
    """Apply all pre-processing-filters.

    Returns:
        pd.DataFrame: DataFrame containing the input data (`x` and `data`), which
             are optionally:

                1. shrinked to a given range
                2. shifted
                3. linear oversampled
                4. smoothed
        Dict[str,Any]: Adding a descriptive statistics to the input dictionary.
    """
    df_copy: pd.DataFrame = self.df.copy()
    self.args["data_statistic"] = df_copy.describe(
        percentiles=np.arange(0.1, 1.0, 0.1)
    ).to_dict(orient="split")
    try:
        if isinstance(self.args["energy_start"], (int, float)) or isinstance(
            self.args["energy_stop"], (int, float)
        ):
            df_copy = self.energy_range(df_copy, self.args)
        if self.args["shift"]:
            df_copy = self.energy_shift(df_copy, self.args)
        if self.args["oversampling"]:
            df_copy = self.oversampling(df_copy, self.args)
        if self.args["smooth"]:
            df_copy = self.smooth_signal(df_copy, self.args)
    except KeyError as exc:
        print(f"KeyError: {exc} is not part of the dataframe!")
        sys.exit(1)
    return (df_copy, self.args)

__init__(df, args)

Initialize PreProcessing class.

Parameters:

Name Type Description Default
df pd.DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required
Source code in spectrafit/tools.py
32
33
34
35
36
37
38
39
40
41
42
43
def __init__(self, df: pd.DataFrame, args: Dict[str, Any]) -> None:
    """Initialize PreProcessing class.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.
    """
    self.df = df
    self.args = args

energy_range(df, args) staticmethod

Select the energy range for fitting.

Parameters:

Name Type Description Default
df pd.DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required

Returns:

Type Description
pd.DataFrame

pd.DataFrame: DataFrame containing the optimized input data (x and data), which are shrinked according to the energy range.

Source code in spectrafit/tools.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
@staticmethod
def energy_range(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
    """Select the energy range for fitting.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.

    Returns:
        pd.DataFrame: DataFrame containing the `optimized` input data
             (`x` and `data`), which are shrinked according to the energy range.
    """
    energy_start: Union[int, float] = args["energy_start"]
    energy_stop: Union[int, float] = args["energy_stop"]

    df_copy: pd.DataFrame = df.copy()
    if isinstance(energy_start, (int, float)) and isinstance(
        energy_stop, (int, float)
    ):
        return df_copy.loc[
            (df[args["column"][0]] >= energy_start)
            & (df[args["column"][0]] <= energy_stop)
        ]
    elif isinstance(energy_start, (int, float)):
        return df_copy.loc[df[args["column"][0]] >= energy_start]
    elif isinstance(energy_stop, (int, float)):
        return df_copy.loc[df[args["column"][0]] <= energy_stop]

energy_shift(df, args) staticmethod

Shift the energy axis by a given value.

Parameters:

Name Type Description Default
df pd.DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required

Returns:

Type Description
pd.DataFrame

pd.DataFrame: DataFrame containing the optimized input data (x and data), which are energy-shifted by the given value.

Source code in spectrafit/tools.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
@staticmethod
def energy_shift(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
    """Shift the energy axis by a given value.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.

    Returns:
        pd.DataFrame: DataFrame containing the `optimized` input data
             (`x` and `data`), which are energy-shifted by the given value.
    """
    df_copy: pd.DataFrame = df.copy()
    df_copy.loc[:, args["column"][0]] = (
        df[args["column"][0]].to_numpy() + args["shift"]
    )
    return df_copy

oversampling(df, args) staticmethod

Oversampling the data to increase the resolution of the data.

About Oversampling

In this implementation of oversampling, the data is oversampled by the factor of 5. In case of data with only a few points, the increased resolution should allow to easier solve the optimization problem. The oversampling based on a simple linear regression.

Parameters:

Name Type Description Default
df pd.DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required

Returns:

Type Description
pd.DataFrame

pd.DataFrame: DataFrame containing the optimized input data (x and data), which are oversampled by the factor of 5.

Source code in spectrafit/tools.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
@staticmethod
def oversampling(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
    """Oversampling the data to increase the resolution of the data.

    !!! note "About Oversampling"
        In this implementation of oversampling, the data is oversampled by the
         factor of 5. In case of data with only a few points, the increased
         resolution should allow to easier solve the optimization problem. The
         oversampling based on a simple linear regression.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.

    Returns:
        pd.DataFrame: DataFrame containing the `optimized` input data
             (`x` and `data`), which are oversampled by the factor of 5.
    """
    x_values = np.linspace(
        df[args["column"][0]].min(),
        df[args["column"][0]].max(),
        5 * df.shape[0],
    )
    y_values = np.interp(
        x_values,
        df[args["column"][0]].to_numpy(),
        df[args["column"][1]].to_numpy(),
    )
    return pd.DataFrame({args["column"][0]: x_values, args["column"][1]: y_values})

smooth_signal(df, args) staticmethod

Smooth the intensity values.

Parameters:

Name Type Description Default
df pd.DataFrame

DataFrame containing the input data (x and data).

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required

Returns:

Type Description
pd.DataFrame

pd.DataFrame: DataFrame containing the optimized input data (x and data), which are smoothed by the given value.

Source code in spectrafit/tools.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
@staticmethod
def smooth_signal(df: pd.DataFrame, args: Dict[str, Any]) -> pd.DataFrame:
    """Smooth the intensity values.

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`).
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.

    Returns:
        pd.DataFrame: DataFrame containing the `optimized` input data
             (`x` and `data`), which are smoothed by the given value.
    """
    box = np.ones(args["smooth"]) / args["smooth"]
    df_copy: pd.DataFrame = df.copy()
    df_copy.loc[:, args["column"][1]] = np.convolve(
        df[args["column"][1]].to_numpy(), box, mode="same"
    )
    return df_copy

SaveResult

Saving the result of the fitting process.

Source code in spectrafit/tools.py
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
class SaveResult:
    """Saving the result of the fitting process."""

    def __init__(self, df: pd.DataFrame, args: Dict[str, Any]) -> None:
        """Initialize SaveResult class.

        !!! note "About SaveResult"

            The SaveResult class is responsible for saving the results of the
            optimization process. The results are saved in the following formats:

            1. JSON (default) for all results and meta data of the fitting process.
            2. CSV for the results of the optimization process.

        !!! note "About the output `CSV`-file"

            The output files are seperated into three classes:

                1. The `results` of the optimization process.
                2. The `correlation analysis` of the optimization process.
                3. The `error analysis` of the optimization process.

            The result outputfile contains the following information:

                1. The column names of the energy axis (`x`) and the intensity values
                (`data`)
                2. The name of the column containing the energy axis (`x`)
                3. The name of the column containing the intensity values (`data`)
                4. The name of the column containing the best fit (`best_fit`)
                5. The name of the column containing the residuum (`residuum`)
                6. The name of the column containing the model contribution (`model`)
                7. The name of the column containing the error of the model
                    contribution (`model_error`)
                8. The name of the column containing the error of the best fit
                    (`best_fit_error`)
                9. The name of the column containing the error of the residuum
                    (`residuum_error`)

            The `correlation analysis` file contains the following information about all
            attributes of the model:

                1. Energy
                2. Intensity or Intensities (global fitting)
                3. Residuum
                4. Best fit
                5. Model contribution(s)

            The `error analysis` file contains the following information about all model
            attributes vs:

                1. Initial model values
                2. Current model values
                3. Best model values
                4. Residuum / error relative to the best fit
                5. Residuum / error relative to the absolute fit

        Args:
            df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
                 as well as the best fit and the corresponding residuum. Hence, it will
                 be extended by the single contribution of the model.
            args (Dict[str,Any]): The input file arguments as a dictionary with
                 additional information beyond the command line arguments.
        """
        self.df = df
        self.args = args

    def __call__(self) -> None:
        """Call the SaveResult class."""
        self.save_as_json()
        self.save_as_csv()

    def save_as_csv(self) -> None:
        """Save the the fit results to csv files.

        !!! note "About saving the fit results"
            The fit results are saved to csv files and are divided into three different
            categories:

                1. The `results` of the optimization process.
                2. The `correlation analysis` of the optimization process.
                3. The `error analysis` of the optimization process.
        """
        self.df.to_csv(Path(f"{self.args['outfile']}_fit.csv"), index=False)
        pd.DataFrame(**self.args["linear_correlation"]).to_csv(
            Path(f"{self.args['outfile']}_correlation.csv"),
            index=True,
            index_label="attributes",
        )
        pd.DataFrame.from_dict(self.args["fit_insights"]["variables"]).to_csv(
            Path(f"{self.args['outfile']}_errors.csv"),
            index=True,
            index_label="attributes",
        )

    def save_as_json(self) -> None:
        """Save the fitting result as json file."""
        if self.args["outfile"]:
            with open(
                Path(f"{self.args['outfile']}_summary.json"), "w", encoding="utf-8"
            ) as f:
                json.dump(self.args, f, indent=4)
        else:
            raise FileNotFoundError("No output file provided!")

__call__()

Call the SaveResult class.

Source code in spectrafit/tools.py
468
469
470
471
def __call__(self) -> None:
    """Call the SaveResult class."""
    self.save_as_json()
    self.save_as_csv()

__init__(df, args)

Initialize SaveResult class.

About SaveResult

The SaveResult class is responsible for saving the results of the optimization process. The results are saved in the following formats:

  1. JSON (default) for all results and meta data of the fitting process.
  2. CSV for the results of the optimization process.

About the output CSV-file

The output files are seperated into three classes:

1. The `results` of the optimization process.
2. The `correlation analysis` of the optimization process.
3. The `error analysis` of the optimization process.

The result outputfile contains the following information:

1. The column names of the energy axis (`x`) and the intensity values
(`data`)
2. The name of the column containing the energy axis (`x`)
3. The name of the column containing the intensity values (`data`)
4. The name of the column containing the best fit (`best_fit`)
5. The name of the column containing the residuum (`residuum`)
6. The name of the column containing the model contribution (`model`)
7. The name of the column containing the error of the model
    contribution (`model_error`)
8. The name of the column containing the error of the best fit
    (`best_fit_error`)
9. The name of the column containing the error of the residuum
    (`residuum_error`)

The correlation analysis file contains the following information about all attributes of the model:

1. Energy
2. Intensity or Intensities (global fitting)
3. Residuum
4. Best fit
5. Model contribution(s)

The error analysis file contains the following information about all model attributes vs:

1. Initial model values
2. Current model values
3. Best model values
4. Residuum / error relative to the best fit
5. Residuum / error relative to the absolute fit

Parameters:

Name Type Description Default
df pd.DataFrame

DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

required
args Dict[str, Any]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required
Source code in spectrafit/tools.py
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
def __init__(self, df: pd.DataFrame, args: Dict[str, Any]) -> None:
    """Initialize SaveResult class.

    !!! note "About SaveResult"

        The SaveResult class is responsible for saving the results of the
        optimization process. The results are saved in the following formats:

        1. JSON (default) for all results and meta data of the fitting process.
        2. CSV for the results of the optimization process.

    !!! note "About the output `CSV`-file"

        The output files are seperated into three classes:

            1. The `results` of the optimization process.
            2. The `correlation analysis` of the optimization process.
            3. The `error analysis` of the optimization process.

        The result outputfile contains the following information:

            1. The column names of the energy axis (`x`) and the intensity values
            (`data`)
            2. The name of the column containing the energy axis (`x`)
            3. The name of the column containing the intensity values (`data`)
            4. The name of the column containing the best fit (`best_fit`)
            5. The name of the column containing the residuum (`residuum`)
            6. The name of the column containing the model contribution (`model`)
            7. The name of the column containing the error of the model
                contribution (`model_error`)
            8. The name of the column containing the error of the best fit
                (`best_fit_error`)
            9. The name of the column containing the error of the residuum
                (`residuum_error`)

        The `correlation analysis` file contains the following information about all
        attributes of the model:

            1. Energy
            2. Intensity or Intensities (global fitting)
            3. Residuum
            4. Best fit
            5. Model contribution(s)

        The `error analysis` file contains the following information about all model
        attributes vs:

            1. Initial model values
            2. Current model values
            3. Best model values
            4. Residuum / error relative to the best fit
            5. Residuum / error relative to the absolute fit

    Args:
        df (pd.DataFrame): DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will
             be extended by the single contribution of the model.
        args (Dict[str,Any]): The input file arguments as a dictionary with
             additional information beyond the command line arguments.
    """
    self.df = df
    self.args = args

save_as_csv()

Save the the fit results to csv files.

About saving the fit results

The fit results are saved to csv files and are divided into three different categories:

1. The `results` of the optimization process.
2. The `correlation analysis` of the optimization process.
3. The `error analysis` of the optimization process.
Source code in spectrafit/tools.py
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
def save_as_csv(self) -> None:
    """Save the the fit results to csv files.

    !!! note "About saving the fit results"
        The fit results are saved to csv files and are divided into three different
        categories:

            1. The `results` of the optimization process.
            2. The `correlation analysis` of the optimization process.
            3. The `error analysis` of the optimization process.
    """
    self.df.to_csv(Path(f"{self.args['outfile']}_fit.csv"), index=False)
    pd.DataFrame(**self.args["linear_correlation"]).to_csv(
        Path(f"{self.args['outfile']}_correlation.csv"),
        index=True,
        index_label="attributes",
    )
    pd.DataFrame.from_dict(self.args["fit_insights"]["variables"]).to_csv(
        Path(f"{self.args['outfile']}_errors.csv"),
        index=True,
        index_label="attributes",
    )

save_as_json()

Save the fitting result as json file.

Source code in spectrafit/tools.py
496
497
498
499
500
501
502
503
504
def save_as_json(self) -> None:
    """Save the fitting result as json file."""
    if self.args["outfile"]:
        with open(
            Path(f"{self.args['outfile']}_summary.json"), "w", encoding="utf-8"
        ) as f:
            json.dump(self.args, f, indent=4)
    else:
        raise FileNotFoundError("No output file provided!")

check_keywords_consistency(check_args, ref_args)

Check if the keywords are consistent.

Check if the keywords are consistent between two dictionaries. The two dictionaries are reference keywords of the cmd_line_args and the args of the input_file.

Parameters:

Name Type Description Default
check_args MutableMapping[str, Any]

First dictionary to be checked.

required
ref_args Dict[str, Any]

Second dictionary to be checked.

required

Raises:

Type Description
KeyError

If the keywords are not consistent.

Source code in spectrafit/tools.py
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
def check_keywords_consistency(
    check_args: MutableMapping[str, Any], ref_args: Dict[str, Any]
) -> None:
    """Check if the keywords are consistent.

    Check if the keywords are consistent between two dictionaries. The two dictionaries
    are reference keywords of the `cmd_line_args` and the `args` of the `input_file`.

    Args:
        check_args (MutableMapping[str, Any]): First dictionary to be checked.
        ref_args (Dict[str,Any]): Second dictionary to be checked.

    Raises:
        KeyError: If the keywords are not consistent.
    """
    for key in check_args:
        if key not in ref_args.keys():
            raise KeyError(f"ERROR: The {key} is not parameter of the `cmd-input`!")

load_data(args)

Load the data from a txt file.

About the data format

Load data from a txt file, which can be an ASCII file as txt, csv, or user-specific but rational file. The file can be separated by a delimiter.

In case of 2d data, the columns has to be defined. In case of 3D data, all columns are considered as data.

Parameters:

Name Type Description Default
args Dict[str, str]

The input file arguments as a dictionary with additional information beyond the command line arguments.

required

Returns:

Type Description
pd.DataFrame

pd.DataFrame: DataFrame containing the input data (x and data), as well as the best fit and the corresponding residuum. Hence, it will be extended by the single contribution of the model.

Source code in spectrafit/tools.py
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
def load_data(args: Dict[str, str]) -> pd.DataFrame:
    """Load the data from a txt file.

    !!! note "About the data format"

        Load data from a txt file, which can be an ASCII file as txt, csv, or
        user-specific but rational file. The file can be separated by a delimiter.

        In case of 2d data, the columns has to be defined. In case of 3D data, all
        columns are considered as data.

    Args:
        args (Dict[str,str]): The input file arguments as a dictionary with additional
             information beyond the command line arguments.

    Returns:
        pd.DataFrame: DataFrame containing the input data (`x` and `data`),
             as well as the best fit and the corresponding residuum. Hence, it will be
             extended by the single contribution of the model.
    """
    try:
        if args["global_"]:
            return pd.read_csv(
                args["infile"],
                sep=args["separator"],
                header=args["header"],
                dtype=np.float64,
                decimal=args["decimal"],
                comment=args["comment"],
            )
        return pd.read_csv(
            args["infile"],
            sep=args["separator"],
            header=args["header"],
            usecols=args["column"],
            dtype=np.float64,
            decimal=args["decimal"],
            comment=args["comment"],
        )
    except ValueError as exc:
        print(f"Error: {exc} -> Dataframe contains non numeric data!")
        sys.exit(1)

pkl2any(pkl_fname, encoding='latin1')

Load a pkl file and return the data as a any type of data or object.

Parameters:

Name Type Description Default
pkl_fname Path

The pkl file to load.

required
encoding str

The encoding to use. Defaults to "latin1".

'latin1'

Raises:

Type Description
ValueError

If the file format is not supported.

Returns:

Name Type Description
Any Any

Data or objects, which can contain various data types supported by pickle.

Source code in spectrafit/tools.py
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
def pkl2any(pkl_fname: Path, encoding: str = "latin1") -> Any:
    """Load a pkl file and return the data as a any type of data or object.

    Args:
        pkl_fname (Path): The pkl file to load.
        encoding (str, optional): The encoding to use. Defaults to "latin1".

    Raises:
        ValueError: If the file format is not supported.

    Returns:
        Any: Data or objects, which can contain various data types supported by pickle.
    """
    if pkl_fname.suffix == ".gz":
        with gzip.open(pkl_fname, "rb") as f:
            return unicode_check(f, encoding=encoding)
    elif pkl_fname.suffix == ".pkl":
        with open(pkl_fname, "rb") as f:
            return unicode_check(f, encoding=encoding)
    else:
        choices = [".pkl", ".pkl.gz"]
        raise ValueError(
            f"File format '{pkl_fname.suffix}' is not supported. "
            f"Supported file formats are: {choices}"
        )

pure_fname(fname)

Return the filename without the suffix.

Pure filename without the suffix is implemented to avoid the problem with multiple dots in the filename like test.pkl.gz or test.tar.gz. The stem attribute of the Path class returns the filename without the suffix, but it also removes only the last suffix. Hence, the test.pkl.gz will be returned as test.pkl and not as test. This function returns the filename without the suffix. It is implemented recursively to remove all suffixes.

Parameters:

Name Type Description Default
fname Path

The filename to be processed.

required

Returns:

Name Type Description
Path Path

The filename without the suffix.

Source code in spectrafit/tools.py
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
def pure_fname(fname: Path) -> Path:
    """Return the filename without the suffix.

    Pure filename without the suffix is implemented to avoid the problem with
    multiple dots in the filename like `test.pkl.gz` or `test.tar.gz`.
    The `stem` attribute of the `Path` class returns the filename without the
    suffix, but it also removes only the last suffix. Hence, the `test.pkl.gz`
    will be returned as `test.pkl` and not as `test`. This function returns
    the filename without the suffix. It is implemented recursively to remove
    all suffixes.

    Args:
        fname (Path): The filename to be processed.

    Returns:
        Path: The filename without the suffix.
    """
    _fname = fname.parent / fname.stem
    return pure_fname(_fname) if _fname.suffix else _fname

read_input_file(fname)

Read the input file.

Read the input file as toml, json, or yaml files and return as a dictionary.

Parameters:

Name Type Description Default
fname str

Name of the input file.

required

Raises:

Type Description
OSError

If the input file is not supported.

Returns:

Name Type Description
dict MutableMapping[str, Any]

Return the input file arguments as a dictionary with additional information beyond the command line arguments.

Source code in spectrafit/tools.py
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
def read_input_file(fname: Path) -> MutableMapping[str, Any]:
    """Read the input file.

    Read the input file as `toml`, `json`, or `yaml` files and return as a dictionary.

    Args:
        fname (str): Name of the input file.

    Raises:
        OSError: If the input file is not supported.

    Returns:
        dict: Return the input file arguments as a dictionary with additional
             information beyond the command line arguments.

    """
    fname = Path(fname)

    if fname.suffix == ".toml":
        with open(fname, "rb") as f:
            args = tomli.load(f)
    elif fname.suffix == ".json":
        with open(fname, encoding="utf-8") as f:
            args = json.load(f)
    elif fname.suffix in [".yaml", ".yml"]:
        with open(fname, encoding="utf-8") as f:
            args = yaml.load(f, Loader=yaml.FullLoader)
    else:
        raise OSError(
            f"ERROR: Input file {fname} has not supported file format.\n"
            "Supported fileformats are: '*.json', '*.yaml', and '*.toml'"
        )
    return args

unicode_check(f, encoding='latin1')

Check if the pkl file is encoded in unicode.

Parameters:

Name Type Description Default
f Any

The pkl file to load.

required
encoding str

The encoding to use. Defaults to "latin1".

'latin1'

Returns:

Name Type Description
Any Any

The pkl file, which can be a nested dictionary containing raw data, metadata, and other information.

Source code in spectrafit/tools.py
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
def unicode_check(f: Any, encoding: str = "latin1") -> Any:
    """Check if the pkl file is encoded in unicode.

    Args:
        f (Any): The pkl file to load.
        encoding (str, optional): The encoding to use. Defaults to "latin1".

    Returns:
        Any: The pkl file, which can be a nested dictionary containing raw data,
            metadata, and other information.
    """
    try:
        data_dict = pickle.load(f)
    except UnicodeDecodeError:  # pragma: no cover
        data_dict = pickle.load(f, encoding=encoding)
    return data_dict