Source code for profit.util.file_handler

from profit.util.base_class import CustomABC
from numpy import ndarray
from typing import Union


[docs]class FileHandler(CustomABC):
    labels = {}
    associated_types = {"in": "txt", "out": "txt"}

[docs]    @classmethod
    def save(cls, filename, data, **kwargs):
        """
        Parameters:
            filename (str)
            data (ndarray, dict)
            kwargs: Options like header and format for specific child classes.
        """

        ending = filename.split(".")[-1]
        if ending not in cls.labels:
            ending = cls.associated_types[ending]
        cls.labels[ending].save(filename, data, **kwargs)

[docs]    @classmethod
    def load(cls, filename, as_type="dtype"):
        """
        Parameters:
            filename (str)
            as_type (str): Identifier in which format the data should be returned.
                Options: dtype (structured array), dict
        """
        ending = filename.split(".")[-1]
        if ending not in cls.labels:
            ending = cls.associated_types[ending]
        return cls.labels[ending].load(filename, as_type)


[docs]@FileHandler.register("txt")
class TxtHandler(FileHandler):
[docs]    @classmethod
    def save(cls, filename, data, header=None, fmt=None):
        from numpy import hstack, savetxt

        try:
            if not header:
                header = " ".join(data.dtype.names)
            data = hstack([data[key] for key in data.dtype.names])
            if fmt:
                savetxt(filename, data, header=header, fmt=fmt)
            else:
                savetxt(filename, data, header=header)
        except TypeError:
            savetxt(filename, data)

[docs]    @classmethod
    def load(cls, filename, as_type="dtype"):
        from numpy import genfromtxt
        from profit.util.util import check_ndim

        names = True if as_type == "dtype" else None
        return check_ndim(genfromtxt(filename, names=names))


[docs]@FileHandler.register("hdf5")
class HDF5Handler(FileHandler):
[docs]    @classmethod
    def save(cls, filename, data, **kwargs):
        from h5py import File

        with File(filename, "w") as h5f:
            if hasattr(data, "dtype"):
                # Save to numpy dtype names
                for key in data.dtype.names:
                    h5f[key] = data[key]
            elif isinstance(data, dict):
                # Save to dict key
                cls._recursive_dict2hdf(h5f, "", data)
            else:
                # Save to general data entry
                h5f["data"] = data

[docs]    @classmethod
    def load(cls, filename, as_type="dtype"):
        from h5py import File
        from numpy import array

        with File(filename, "r") as h5f:
            if as_type == "dtype":
                return cls.hdf2numpy(h5f)
            if as_type == "dict":
                return cls.hdf2dict(h5f)
            else:
                return array(h5f["data"])

[docs]    @classmethod
    def _recursive_dict2hdf(cls, file, path, _dict):
        for key, value in _dict.items():
            if isinstance(value, dict):
                cls._recursive_dict2hdf(file, path + str(key) + "/", value)
            else:
                file[path + key] = value

[docs]    @staticmethod
    def hdf2numpy(dataset):
        from numpy import zeros_like, array
        from profit.util.util import check_ndim

        dtypes = [(key, float) for key in list(dataset.keys())]
        data = zeros_like(dataset[dtypes[0][0]], dtype=dtypes)
        for key in data.dtype.names:
            data[key] = array(dataset[key])
        return check_ndim(data)

[docs]    @staticmethod
    def hdf2dict(dataset):
        from numpy import array, ndarray, atleast_1d
        from h5py import Dataset

        load_dict = {}

        def recursive_hdf2dict(_data, _dict):
            for key in _data.keys():
                if isinstance(_data[key], Dataset):
                    val = _data[key][()]
                    if isinstance(
                        val, bytes
                    ):  # Quick fix for new h5py version, which stores strings as bytes
                        val = val.decode("utf-8")
                    _dict[key] = (
                        atleast_1d(array(val)) if isinstance(val, ndarray) else val
                    )
                else:
                    _dict[key] = {}
                    recursive_hdf2dict(_data[key], _dict[key])

        recursive_hdf2dict(dataset, load_dict)
        return load_dict


[docs]@FileHandler.register("pkl")
class PickleHandler(FileHandler):
[docs]    @classmethod
    def save(cls, filename, data, **kwargs):
        from pickle import dump

        write_method = "wb" if not "method" in kwargs else kwargs["method"]
        dump(data, open(filename, write_method))

[docs]    @classmethod
    def load(cls, filename, as_type="raw", read_method="rb"):
        from pickle import load

        if as_type != "raw":
            return NotImplemented
        return load(open(filename, read_method))