Source code for mlreflect.training.noise_generator

import numpy as np
from numpy import ndarray
from tensorflow import keras

from .preprocessing import InputPreprocessor
from ..data_generation import noise
from ..data_generation.distributions import random_logarithmic_distribution


[docs]class BaseGenerator(keras.utils.Sequence):
    def __init__(self, reflectivity, labels, batch_functions, batch_size=32, shuffle=True):
        self.n_samples = len(reflectivity)
        if batch_size > self.n_samples:
            raise ValueError('batch size cannot be larger than input length')
        self.batch_functions = batch_functions
        self.batch_size = batch_size
        self.n_input = reflectivity.shape[1]
        self.n_output = labels.shape[1]
        self.labels = labels
        self.reflectivity = reflectivity
        self.shuffle = shuffle
        self.indexes = np.arange(self.n_samples)
        self.__shuffle()

    def __len__(self):
        return int(np.floor(self.n_samples / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        x, y = self.__data_generation(indexes)

        return x, y, [None]

[docs]    def on_epoch_end(self):
        self.__shuffle()

    def __shuffle(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, indexes):
        batch_input = self.reflectivity[indexes]
        batch_output = np.array(self.labels)[indexes]
        for function in self.batch_functions:
            batch_input, batch_output = function(batch_input, batch_output)
        return batch_input, batch_output


[docs]class NoiseGenerator(BaseGenerator):
    """Generator object that returns a standardized batch of reflectivity and labels with random noise and background.

    Args:
        reflectivity: Training reflectivity curves
        labels: Training labels on the same order as reflectivity
        input_preprocessor: :class:``InputPreprocessor`` object with or without stored standardization values
        batch_size: Number of samples per mini batch
        shuffle: If ``True``, shuffles reflectivity and labels after every epoch
        noise_range: Tuple ``(min, max)`` between which the shot noise levels are randomly generated
        background_range: Tuple ``(min, max)`` between which the background levels are randomly generated
        mode:
            'single': random noise and background levels are generated for every curve of a mini batch
            'batch': random noise and background levels are generated for each mini batch
        relative_background_spread: Relative standard deviation of the normal distribution (e.g. a value of ``0.1``
                means the standard deviation is 10% of the mean)

    """

    def __init__(self, reflectivity: ndarray, labels: ndarray, input_preprocessor: InputPreprocessor, batch_size=32,
                 shuffle=True, mode='single', noise_range=None, background_range=None,
                 relative_background_spread: float = 0.1):

        super().__init__(reflectivity, labels, None, batch_size, shuffle)

        self.input_preprocessor = input_preprocessor
        self.mode = mode
        self.noise_range = noise_range
        self.background_range = background_range
        self.relative_background_spread = relative_background_spread

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        x, y = self.__data_generation(indexes)

        return x, y, [None]

    def __data_generation(self, indexes):
        if not self.input_preprocessor.has_saved_standardization:
            raise ValueError('input_preprocessor must have saved standardization')
        refl = self.reflectivity[indexes]
        if self.noise_range is not None:
            if self.mode is 'single':
                refl = noise.apply_shot_noise(refl, self.noise_range)[0]
            elif self.mode is 'batch':
                batch_noise_level = random_logarithmic_distribution(*self.noise_range, 1)
                refl = noise.apply_shot_noise(refl, batch_noise_level)[0]
            else:
                raise ValueError('not a valid mode')
        if self.background_range is not None:
            if self.mode is 'single':
                refl += noise.generate_background(len(refl), self.n_input, self.background_range,
                                                  self.relative_background_spread)[0]
            elif self.mode is 'batch':
                batch_bg_level = random_logarithmic_distribution(*self.background_range, 1)
                refl += noise.generate_background(len(refl), self.n_input, batch_bg_level,
                                                  self.relative_background_spread)[0]
            else:
                raise ValueError('not a valid mode')

        return self.input_preprocessor.standardize(refl), np.array(self.labels)[indexes]


[docs]class UniformNoiseGenerator(NoiseGenerator):
    def __init__(self, reflectivity, labels, ip, batch_size=32, mode='single', shuffle=True, uniform_noise_range=(1, 1),
                 scaling_range=(1, 1)):
        super().__init__(reflectivity, labels, ip, batch_size=batch_size, mode=mode, shuffle=shuffle,
                         noise_range=None, background_range=None,
                         relative_background_spread=0)
        self.uniform_noise_range = uniform_noise_range
        self.scaling_range = scaling_range
        self.ip = ip

    def __data_generation(self, indexes):
        refl = self.reflectivity[indexes]
        refl = noise.apply_scaling_factor(noise.apply_uniform_noise(refl, self.uniform_noise_range), self.scaling_range)

        return self.ip.standardize(refl), np.array(self.labels)[indexes]

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        x, y = self.__data_generation(indexes)

        return x, y, [None]


[docs]class NoiseGeneratorLog(NoiseGenerator):
    def __init__(self, reflectivity, labels, batch_size=32, mode='single', shuffle=True, noise_range=None,
                 background_range=None, relative_background_spread: float = 0.1):
        super().__init__(reflectivity, labels, None, batch_size=batch_size, mode=mode, shuffle=shuffle,
                         noise_range=noise_range, background_range=background_range,
                         relative_background_spread=relative_background_spread)

    def __data_generation(self, indexes):
        refl = self.reflectivity[indexes]
        if self.noise_range is not None:
            if self.mode is 'single':
                refl = noise.apply_shot_noise(refl, self.noise_range)[0]
            elif self.mode is 'batch':
                batch_noise_level = random_logarithmic_distribution(*self.noise_range, 1)
                refl = noise.apply_shot_noise(refl, batch_noise_level)[0]
            else:
                raise ValueError('not a valid mode')
        if self.background_range is not None:
            if self.mode is 'single':
                refl += noise.generate_background(len(refl), self.n_input, self.background_range,
                                                  self.relative_background_spread)[0]
            elif self.mode is 'batch':
                batch_bg_level = random_logarithmic_distribution(*self.background_range, 1)
                refl += noise.generate_background(len(refl), self.n_input, batch_bg_level,
                                                  self.relative_background_spread)[0]
            else:
                raise ValueError('not a valid mode')

        return abs(np.log10(refl) / 10), np.array(self.labels)[indexes]

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        x, y = self.__data_generation(indexes)

        return x, y, [None]