Source code for profit.al.active_learning

"""
For computationally expensive simulations or experiments it is crucial to get the most information out of every
training point. This is not the case in the standard procedure of randomly selecting the training points.
In order to get the most out of the least number of training points, the next point is inferred by calculating an
acquisition function like the minimization of local variance or expected improvement.
"""
import numpy as np
from abc import abstractmethod
from warnings import warn

from profit.util.base_class import CustomABC
from profit.defaults import active_learning as defaults


[docs]class ActiveLearning(CustomABC):
    """Active learning base class.

    Parameters:
        runner (profit.run.Runner): Runner to dynamically start runs.
        variables (profit.util.variable.VariableGroup): Variables.
        ntrain (int): Total number of training points.
        nwarmup (int): Number of warmup (random) initialization points.
        batch_size (int): Number of training samples learned in parallel.
        convergence_criterion (float): AL is stopped when the loss of the acquisition function is lower than this
            criterion. Not implemented yet.
        nsearch (int): Number of possible candidate points in each dimension.
        make_plot (bool): Flat indicating if the AL progress is plotted.

    Attributes:
        krun (int): Current training cycle.
    """

    labels = {}

    def __init__(
        self,
        runner,
        variables,
        ntrain,
        nwarmup=defaults["nwarmup"],
        batch_size=defaults["batch_size"],
        convergence_criterion=defaults["convergence_criterion"],
        nsearch=defaults["nsearch"],
        make_plot=defaults["make_plot"],
    ):
        self.runner = runner
        self.variables = variables

        self.ntrain = ntrain
        self.nwarmup = min(nwarmup, ntrain)

        if nwarmup > ntrain:
            message = "nwarmup > ntrain. Setting nwarmup=ntrain."
            warn(message)

        self.batch_size = batch_size
        if (ntrain - nwarmup) % batch_size:
            raise RuntimeError(
                "Number of learning points ({}) and batch size ({}) for AL not compatible!".format(
                    ntrain - nwarmup, batch_size
                )
            )

        self.convergence_criterion = convergence_criterion
        self.make_plot = make_plot
        self.krun = 0

[docs]    @abstractmethod
    def warmup(self, save_intermediate=defaults["save_intermediate"]):
        """Warmup cycle before the actual learning starts."""
        pass

[docs]    @abstractmethod
    def learn(
        self,
        resume_from=defaults["resume_from"],
        save_intermediate=defaults["save_intermediate"],
    ):
        """Main loop for active learning."""
        pass

[docs]    def update_run(self, candidates):
        """Run a batch of simulations with the new candidates.

        Parameters:
            candidates (np.array): Input points to run the simulation on.
        """

        params_array = [{} for _ in range(self.batch_size)]

        for key, values in zip(self.variables.named_input.dtype.names, candidates.T):
            for idx, value in enumerate(values):
                params_array[idx][key] = value
        # Start batch
        self.runner.spawn_array(params_array, wait=True)

[docs]    def update_data(self):
        """Update the variables with the runner data."""
        from profit.util import check_ndim

        for key in self.runner.input_data.dtype.names:
            self.variables[key].value = check_ndim(self.runner.interface.input[key])
        for key in self.runner.output_data.dtype.names:
            self.variables[key].value = check_ndim(self.runner.interface.output[key])

[docs]    @abstractmethod
    def save(self, path):
        """Save the AL model.

        Parameters:
            path (str): Path where the model is saved.
        """
        pass

[docs]    def save_intermediate(self, model_path=None, input_path=None, output_path=None):
        from profit.util.file_handler import FileHandler

        if model_path:
            self.save(model_path)
        if input_path:
            FileHandler.save(input_path, self.variables.named_input)
        if output_path:
            formatted_output_data = (
                self.variables.formatted_output
                if output_path.endswith(".txt")
                else self.variables.named_output
            )
            FileHandler.save(output_path, formatted_output_data)
        print("Saved intermediate results.")

[docs]    @abstractmethod
    def plot(self):
        """Plot the progress of the AL learning."""
        pass

[docs]    @classmethod
    def from_config(cls, runner, variables, config, base_config):
        """Instantiates an ActiveLearning object from the configuration parameters.

        Parameters:
            runner (profit.run.runner.Runner): Runner instance.
            variables (profit.util.variable.VariableGroup): Variables.
            config (dict): Only the 'active_learning' part of the base_config.
            base_config (dict): The whole configuration parameters.

        Returns:
            profit.al.active_learning.ActiveLearning: AL instance.
        """

        child = cls[config["algorithm"]["class"]]
        child_instance = child.from_config(runner, variables, config, base_config)
        return child_instance