from typing import Union, Tuple
from warnings import warn
import numpy as np
from numpy import ndarray
from pandas import DataFrame
from ..data_generation import MultilayerStructure, ConstantParameter, Parameter
from ..utils.label_helpers import convert_to_dataframe
[docs]class OutputPreprocessor:
"""Class for preprocessing reflectivity labels for training and validation.
Args:
sample: :class:`MultilayerStructure` object where the sample layers and their names and parameter ranges are
defined.
normalization: Defines how the output labels are normalized.
"min_to_zero" (default): shifts minimum value to ``0`` and scales maximum value to ``1``).
"absolute_max": scales absolute maximum value to ``1``).
Returns:
OutputPreprocessor
"""
def __init__(self, sample: MultilayerStructure, normalization: str = 'min_to_zero'):
allowed_normalizations = ['min_to_zero', 'absolute_max']
if normalization in allowed_normalizations:
self.normalization = normalization
else:
raise ValueError(f'normalization type "{normalization}" not supported')
self.sample = sample
self._labels_removal_list = []
@property
def all_label_names(self):
return self.sample.label_names
@property
def all_label_parameters(self):
return np.concatenate((self.sample.thicknesses, self.sample.roughnesses, self.sample.slds), axis=0)
@property
def number_of_labels(self):
return len(self.all_label_parameters)
@property
def number_of_layers(self):
return len(self.sample.thicknesses)
@property
def constant_labels(self):
constant_labels = []
for parameter in self.all_label_parameters:
if isinstance(parameter, ConstantParameter):
constant_labels.append(parameter)
return constant_labels
@property
def used_labels(self):
used_labels = []
for parameter in self.all_label_parameters:
if isinstance(parameter, Parameter) and not isinstance(parameter, ConstantParameter):
used_labels.append(parameter)
return used_labels
[docs] def apply_preprocessing(self, labels: Union[DataFrame, ndarray]) -> Tuple[DataFrame, DataFrame]:
"""Removes all constant labels and applies normalization to the non-constant labels.
Args:
labels: Pandas :class:`DataFrame` or ndarray of randomly generated labels.
Returns:
normalized_labels: DataFrame
constant_labels: DataFrame
"""
label_df = convert_to_dataframe(labels, self.all_label_names)
preprocessed_labels = self.remove_labels(label_df)
preprocessed_labels = self.normalize_labels(preprocessed_labels)
removed_labels_df = label_df[[param.name for param in self.constant_labels]]
return preprocessed_labels, removed_labels_df
[docs] def normalize_labels(self, label_df: DataFrame) -> DataFrame:
"""Normalizes all constant labels and returns normalized :class:`DataFrame`."""
label_df = label_df.copy()
for parameter in self.all_label_parameters:
if not isinstance(parameter, ConstantParameter):
if self.normalization is 'min_to_zero':
label_df[parameter.name] = (label_df[parameter.name] - parameter.min) / (parameter.max -
parameter.min)
elif self.normalization is 'absolute_max':
label_df[parameter.name] = label_df[parameter.name] / np.abs(parameter.max)
return label_df
[docs] def remove_labels(self, label_df: DataFrame) -> DataFrame:
"""Removes labels in ``constant_labels`` from ``label_df`` and returns :class:`DataFrame`."""
label_df = label_df.copy()
for param in self.constant_labels:
if param.name not in label_df.columns:
warn(f'Label "{param.name}" not in the list of labels (maybe already removed). Skipping "'
f'{param.name}".')
else:
del label_df[param.name]
return label_df
[docs] def restore_labels(self, predicted_labels: Union[DataFrame, ndarray]) -> DataFrame:
"""Takes the predicted labels, reverts normalization and adds constant labels and returns those as DataFrame."""
predicted_labels_df = convert_to_dataframe(predicted_labels, [param.name for param in self.used_labels])
restored_labels_df = self.renormalize_labels(predicted_labels_df)
restored_labels_df = self.add_constant_labels(restored_labels_df)
return restored_labels_df[self.all_label_names]
[docs] def renormalize_labels(self, label_df: DataFrame) -> DataFrame:
"""Removes normalization from all labels in ``label_df``."""
label_df = label_df.copy()
for param in self.used_labels:
if isinstance(param, Parameter) and not isinstance(param, ConstantParameter):
if self.normalization is 'min_to_zero':
label_df[param.name] = label_df[param.name] * (param.max - param.min) + param.min
elif self.normalization is 'absolute_max':
label_df[param.name] = label_df[param.name] * np.abs(param.max)
return label_df
[docs] def add_constant_labels(self, predicted_labels_df: DataFrame) -> DataFrame:
"""Adds all labels in ``constant_labels`` to ``predicted_labels_df``."""
predicted_labels_df = predicted_labels_df.copy()
for param in self.constant_labels:
predicted_labels_df[param.name] = param.value
return predicted_labels_df