Source code for mlreflect.curve_fitter.curve_fitter

import warnings

import numpy as np
import pandas as pd
from numpy import ndarray

from .minimizer import least_log_mean_squares_fit, q_shift_variants, curve_variant_log_mse, curve_scaling_variants
from ..data_generation import ReflectivityGenerator, interp_reflectivity
from ..models import TrainedModel
from ..training import InputPreprocessor, OutputPreprocessor


[docs]class CurveFitter: """Make a prediction on specular reflectivity data based on the trained model. Args: trained_model: :class:`TrainedModel` object that contains the trained Keras model, the trained q values, the standardization values and the sample structure. """ def __init__(self, trained_model: TrainedModel): self.trained_model = trained_model self.generator = None self.ip = InputPreprocessor() self.ip._standard_std = trained_model.ip_std self.ip._standard_mean = trained_model.ip_mean self.op = OutputPreprocessor(trained_model.sample, 'min_to_zero')
[docs] def fit_curve(self, corrected_curve: ndarray, q_values: ndarray, dq: float = 0, factor: float = 1, polish=False, fraction_bounds: tuple = (0.5, 0.5, 0.1), optimize_q=True, n_q_samples: int = 1000, optimize_scaling=False, n_scale_samples: int = 300, simulate_reflectivity=True) -> dict: """Return predicted reflectivity and thin film properties based footprint-corrected data. Args: corrected_curve: "Ideal" reflectivity curve that has already been treated with footprint correction and other intensity corrections and is normalized to 1. q_values: Corresponding q values for each of the intensity values in units of 1/A. dq: Q-shift that is applied before interpolation of the data to the trained q values. Can sometimes improve the results if the total reflection edge is not perfectly aligned. factor: Multiplicative factor that is applied to the data after interpolation. Can sometimes improve the results if the total reflection edge is not perfectly aligned. polish: If ``True``, the predictions will be refined with a simple least log mean squares minimization via ``scipy.optimize.minimize``. This can often improve the "fit" of the model curve to the data at the expense of higher prediction times. fraction_bounds: The relative fitting bounds if the LMS for thickness, roughness and SLD, respectively. E.g. if the predicted thickness was 150 A, then a value of 0.5 would mean the fit bounds are ``(75, 225)``. optimize_q: If ``True``, the q interpolation will be resampled with small q shifts in a range of about +-0.003 1/A and the neural network prediction with the smallest MSE will be selected. If ``polish=True``, this step will happen before the LMS fit. n_q_samples: Number of q shift samples that will be generated. More samples can lead to a better result, but will increase the prediction time. optimize_scaling: If ``True``, the interpolated input curve is randomly rescaled by a factor between 0.9 and 1.1 and the neural network prediction with the smallest MSE will be selected. If ``polish=True``, this step will happen before the LMS fit. If ``optimize_q=True``, this will step will happen after the q shift optimization. n_scale_samples: Number of curve scaling samples that will be generated. More samples can lead to a better result, but will increase the prediction time. simulate_reflectivity: If ``True`` (default), the reflectivity according to the predicted parameter values will be simulated as well. This might slow down the prediction times. Returns: :class:`dict`: A dictionary containing the fit results: ``'predicted_reflectivity'``: Numpy :class:`ndarray` of the predicted intensity. ``'predicted_parameters'``: Pandas :class:`DataFrame` of the predicted thin film parameters. ``'best_shift'``: Q shift that lead to the prediction with the lowest MSE. Is ``None`` if ``optimize_q=False``. ``'best_scaling'``: Curve scaling factor that lead to the prediction with the lowest MSE. Is ``None`` if ``optimize_scaling=False``. """ max_q_idx = abs(q_values - self.trained_model.q_values.max()).argmin() + 1 min_q_idx = abs(q_values - self.trained_model.q_values.min()).argmin() corrected_curve = np.atleast_2d(corrected_curve) interpolated_curve = self._interpolate_intensity(corrected_curve * factor, q_values + dq) generator = ReflectivityGenerator(q_values, self.trained_model.sample) n_curves = len(corrected_curve) if optimize_q: restored_predicted_parameters = [] predicted_refl = np.empty_like(corrected_curve) best_q_shift = np.empty(n_curves) for i, curve in enumerate(corrected_curve): best_q_output = self._optimize_q(self.trained_model.q_values, q_values, curve, generator, n_q_samples) restored_predicted_parameters.append(best_q_output['best_prediction']) predicted_refl[i] = best_q_output['best_predicted_curve'] best_q_shift[i] = best_q_output['best_shift'] restored_predicted_parameters = pd.concat(restored_predicted_parameters).reset_index(drop=True) else: best_q_shift = None if optimize_scaling: restored_predicted_parameters = [] scaled_predicted_refl = np.empty(corrected_curve.shape) best_scaling = np.empty(n_curves) for i, curve in enumerate(corrected_curve): if best_q_shift is None: dq = 0 else: dq = best_q_shift[i] best_scaling_output = self._optimize_scaling(q_values, min_q_idx, max_q_idx, curve, n_scale_samples, dq=dq) restored_predicted_parameters.append(best_scaling_output['best_prediction']) scaled_predicted_refl[i] = best_scaling_output['best_predicted_curve'] best_scaling[i] = best_scaling_output['best_scaling'] restored_predicted_parameters = pd.concat(restored_predicted_parameters).reset_index(drop=True) else: best_scaling = None if not (optimize_scaling & optimize_q): predicted_parameters = self.trained_model.keras_model.predict( self.ip.standardize(np.atleast_2d(interpolated_curve))) restored_predicted_parameters = self.op.restore_labels(predicted_parameters) self._ensure_positive_parameters(restored_predicted_parameters) if polish: polished_parameters = [] for i in range(len(interpolated_curve)): polished_parameters.append(least_log_mean_squares_fit(q_values[min_q_idx:max_q_idx], corrected_curve[i, min_q_idx:max_q_idx], restored_predicted_parameters[i:(i + 1)], self.trained_model.sample, self.op, fraction_bounds)) polished_parameters = pd.concat(polished_parameters).reset_index(drop=True) self._ensure_positive_parameters(polished_parameters) if simulate_reflectivity: predicted_refl = generator.simulate_reflectivity(polished_parameters, progress_bar=False) else: predicted_refl = None return {'predicted_reflectivity': predicted_refl, 'predicted_parameters': polished_parameters, 'best_q_shift': best_q_shift, 'best_scaling': best_scaling} else: if simulate_reflectivity: predicted_refl = generator.simulate_reflectivity(restored_predicted_parameters, progress_bar=False) else: predicted_refl = None return {'predicted_reflectivity': predicted_refl, 'predicted_parameters': restored_predicted_parameters, 'best_q_shift': best_q_shift, 'best_scaling': best_scaling}
def _optimize_q(self, q_values_prediction, q_values_input, corrected_reflectivity, generator, n_variants=300, scale=0.001): q_shift_curves, shifts = q_shift_variants(q_values_prediction, q_values_input, corrected_reflectivity, n_variants, scale=scale) shift_predictions = self.fit_curve(q_shift_curves, q_values_prediction, polish=False, optimize_q=False, optimize_scaling=False) interpolated_reflectivity = self._interpolate_intensity(corrected_reflectivity, q_values_input) shift_mse = curve_variant_log_mse(interpolated_reflectivity, shift_predictions['predicted_reflectivity']) min_mse_idx = shift_mse.argmin() best_prediction = shift_predictions['predicted_parameters'][min_mse_idx:min_mse_idx + 1] best_predicted_curve = generator.simulate_reflectivity(best_prediction, progress_bar=False)[0] return {'best_shift': shifts[min_mse_idx][0], 'best_prediction': best_prediction, 'best_predicted_curve': best_predicted_curve} def _optimize_scaling(self, q_values, min_q_idx, max_q_idx, corrected_intensity, n_variants=300, scale=0.1, dq=0): scaled_curve_variants, scalings = curve_scaling_variants(corrected_intensity, n_variants, scale) scaled_predictions = self.fit_curve(scaled_curve_variants, q_values, polish=False, optimize_q=False, optimize_scaling=False, dq=dq) scaling_mse = curve_variant_log_mse(corrected_intensity[min_q_idx:max_q_idx], scaled_predictions['predicted_reflectivity'][:, min_q_idx:max_q_idx]) min_mse_idx = scaling_mse.argmin() return {'best_scaling': scalings[min_mse_idx][0], 'best_prediction': scaled_predictions['predicted_parameters'][min_mse_idx:min_mse_idx + 1], 'best_predicted_curve': scaled_predictions['predicted_reflectivity'][min_mse_idx]} def _interpolate_intensity(self, intensity: ndarray, q_values: ndarray): warnings.filterwarnings('ignore') intensity = np.atleast_2d(intensity) interp_intensity = np.empty((len(intensity), len(self.trained_model.q_values))) for i in range(len(intensity)): interp_intensity[i] = interp_reflectivity(self.trained_model.q_values, q_values, intensity[i]) return interp_intensity @staticmethod def _ensure_positive_parameters(parameters): for parameter_name in parameters.columns: if 'thickness' in parameter_name or 'roughness' in parameter_name: parameters[parameter_name] = abs(parameters[parameter_name])