Source code for profit.run.local

""" Local Runner & Memory-map Interface

 * LocalRunner: start Workers locally via the shell (subprocess.Popen)
 * ForkRunner: start Workers locally with forking (multiprocessing.Process)
 * MemmapInterface: share date using a memory-mapped, structured array (using numpy)
"""

import subprocess
from multiprocessing import Process
from time import sleep
import logging
import numpy as np
import os
from shutil import rmtree
import json

from .interface import RunnerInterface, WorkerInterface
from .runner import Runner
from .worker import Worker


# === Local Runner === #


[docs]class LocalRunner(Runner, label="local"):
    """start Workers locally via the shell"""

    def __init__(self, command="profit-worker", parallel="all", **kwargs):
        if parallel == "all":  # parallel: 'all' infers the number of available CPUs
            parallel = len(os.sched_getaffinity(0))
        self.command = command
        super().__init__(parallel=parallel, **kwargs)

[docs]    def __repr__(self):
        return (
            f"<{self.__class__.__name__} (" + ", debug"
            if self.debug
            else "" + f", {self.command}"
            if self.command != "profit-worker"
            else "" + ")>"
        )

    @property
    def config(self):
        config = {
            "command": self.command,
        }
        return {**super().config, **config}  # super().config | config in python3.9

[docs]    def spawn(self, params=None, wait=False):
        super().spawn(params, wait)
        env = os.environ.copy()
        env["PROFIT_RUN_ID"] = str(self.next_run_id)
        env["PROFIT_WORKER"] = json.dumps(self.worker)
        env["PROFIT_INTERFACE"] = json.dumps(self.interface.config)
        self.runs[self.next_run_id] = subprocess.Popen(
            self.command, shell=True, env=env, cwd=self.work_dir
        )
        if wait:
            self.wait(self.next_run_id)
        self.next_run_id += 1

[docs]    def poll(self, run_id):
        if self.runs[run_id].poll() is not None:
            self.logger.info(f"run {run_id} failed")
            self.failed[run_id] = self.runs.pop(run_id)

[docs]    def cancel(self, run_id):
        self.runs[run_id].terminate()
        self.failed[run_id] = self.runs.pop(run_id)


# === Fork Runner === #


[docs]class ForkRunner(Runner, label="fork"):
    """start Workers locally using forking (multiprocessing.Process)"""

    def __init__(self, parallel="all", **kwargs):
        if parallel == "all":  # parallel: 'all' infers the number of available CPUs
            parallel = len(os.sched_getaffinity(0))
        super().__init__(parallel=parallel, **kwargs)

[docs]    def spawn(self, params=None, wait=False):
        super().spawn(params, wait)

        def work():
            with self.change_work_dir():
                worker = Worker.from_config(
                    self.worker, self.interface.config, self.next_run_id
                )
                worker.work()
                worker.clean()

        process = Process(target=work)
        self.runs[self.next_run_id] = process
        process.start()
        if wait:
            self.wait(self.next_run_id)
        self.next_run_id += 1

[docs]    def poll(self, run_id):
        if self.runs[run_id].exitcode is not None:
            self.logger.info(f"run {run_id} failed")
            self.failed[run_id] = self.runs.pop(run_id)

[docs]    def cancel(self, run_id):
        self.runs[run_id].terminate()
        self.failed[run_id] = self.runs.pop(run_id)


# === Numpy Memmap Interface === #


[docs]class MemmapRunnerInterface(RunnerInterface, label="memmap"):
    """Runner-Worker Interface using a memory mapped numpy array

    - expected to be very fast with the *local* Runner as each Worker can access the array directly (unverified)
    - expected to be inefficient if used on a cluster with a shared filesystem (unverified)
    - reliable
    - known issue: resizing the array (to add more runs) is dangerous, needs a workaround
      (e.g. several arrays in the same file)
    """

    def __init__(
        self,
        size,
        input_config,
        output_config,
        *,
        path: str = "interface.npy",
        logger_parent: logging.Logger = None,
    ):
        super().__init__(size, input_config, output_config, logger_parent=logger_parent)
        self.path = path

        init_data = np.zeros(
            size, dtype=self.input_vars + self.internal_vars + self.output_vars
        )
        np.save(self.path, init_data)
        self.logger.debug(f"init memmap <{self.path}, size {size}, {init_data.dtype}>")

        try:
            self._memmap = np.load(self.path, mmap_mode="r+")
        except FileNotFoundError:
            self.runner.logger.error(
                f"{self.__class__.__name__} could not load {self.path} (cwd: {os.getcwd()})"
            )
            raise

        # should return views on memmap
        self.input = self._memmap[[v[0] for v in self.input_vars]]
        self.output = self._memmap[[v[0] for v in self.output_vars]]
        self.internal = self._memmap[[v[0] for v in self.internal_vars]]

    @property
    def config(self):
        return {
            **super().config,
            "path": self.path,
        }  # super().config | config in python3.9

[docs]    def resize(self, size):
        """Resizing the Interface

        Attention: this is dangerous and may lead to unexpected errors!
        The problem is that the memory mapped file is overwritten.
        Any Workers which have this file mapped will run into severe problems.
        Possible future workarounds: multiple files or multiple headers in one file.
        """
        if size <= self.size:
            self.logger.warning("shrinking RunnerInterface is not supported")
            return

        self.logger.warning("resizing MemmapRunnerInterface is dangerous")
        self.clean()
        init_data = np.zeros(
            size, dtype=self.input_vars + self.internal_vars + self.output_vars
        )
        np.save(self.path, init_data)

        try:
            self._memmap = np.load(self.path, mmap_mode="r+")
        except FileNotFoundError:
            self.runner.logger.error(
                f"{self.__class__.__name__} could not load {self.path} (cwd: {os.getcwd()})"
            )
            raise

        self.input = self._memmap[[v[0] for v in self.input_vars]]
        self.output = self._memmap[[v[0] for v in self.output_vars]]
        self.internal = self._memmap[[v[0] for v in self.internal_vars]]

[docs]    def clean(self):
        if os.path.exists(self.path):
            os.remove(self.path)


[docs]class MemmapWorkerInterface(WorkerInterface, label="memmap"):
    """Runner-Worker Interface using a memory mapped numpy array

    counterpart to :py:class:`MemmapRunnerInterface`
    """

    def __init__(
        self, run_id: int, *, path="interface.npy", logger_parent: logging.Logger = None
    ):
        self.path = path
        self._memmap = None

        super().__init__(run_id, logger_parent=logger_parent)

    @property
    def config(self):
        return {
            **super().config,
            "path": self.path,
        }  # super().config | config in python3.9

    @property
    def time(self):
        if self._memmap is None:
            return None
        return self._data["TIME"]

    @time.setter
    def time(self, value):
        if self._memmap is not None:
            self._data["TIME"] = value

[docs]    def retrieve(self):
        try:
            self._memmap = np.load(self.path, mmap_mode="r+")
        except FileNotFoundError:
            self.logger.error(
                f"{self.__class__.__name__} could not load {self.path} (cwd: {os.getcwd()})"
            )
            raise

        # should return views on memmap
        inputs, outputs = [], []
        k = 0
        for k, key in enumerate(self._memmap.dtype.names):
            if key == "DONE":
                break
            inputs.append(key)
        for key in self._memmap.dtype.names[k:]:
            if key not in ["DONE", "TIME"]:
                outputs.append(key)
        self.input = self._memmap[inputs][self.run_id]
        self.output = self._memmap[outputs][self.run_id]
        self._data = self._memmap[self.run_id]

[docs]    def transmit(self):
        # signal the Worker has completed
        self._data["DONE"] = True
        # ensure the data is written to disk
        self._memmap.flush()

[docs]    def clean(self):
        if self._memmap is not None:
            # ensure the data is written to disk
            self._memmap.flush()
            # close the connection
            self._memmap = None
            del self._data
            del self.input
            del self.output