Source code for pybrops.core.util.h5py

"""
Module containing utility functions for handling HDF5 files.
"""

import numpy
import h5py

from pybrops.core.error.error_type_h5py import check_is_h5py_File
from pybrops.core.error.error_type_python import check_is_str
from pybrops.core.error.error_value_h5py import check_h5py_File_is_readable
from pybrops.core.error.error_value_h5py import check_h5py_File_is_writable

__all__ = [
    "h5py_File_write_dict",
]

# writable items
writable_classes = (
    numpy.ndarray,
    bytes,
    str,
    int,
    float,
    numpy.floating,
    numpy.integer,
    numpy.bool_
)

[docs] def h5py_File_is_writable(h5file: h5py.File) -> bool: """ Determine if a ``h5py.File`` is writable. Parameters ---------- h5file : h5py.File HDF5 file stream to check. Returns ------- out : bool Whether the HDF5 file is writable. """ return h5file.file.mode in ("r+", "w", "w-", "x", "a")
[docs] def h5py_File_is_readable(stream: h5py.File) -> bool: """ Determine if a ``h5py.File`` is readable. Parameters ---------- h5file : h5py.File HDF5 file stream to check. Returns ------- out : bool Whether the HDF5 file is readable. """ return stream.file.mode in ("r", "r+", "a")
[docs] def h5py_File_has_group(h5file: h5py.File, groupname: str) -> bool: """ Determine if an ``h5py.File`` has a group. Parameters ---------- h5file : h5py.File An HDF5 file stream to check. groupname : str Name of the group to check in ``h5file`` Returns ------- out : bool Whether ``groupname`` is in ``h5file``. """ return groupname in h5file
####################### ### Write functions ### # ruthlessly stolen/based on: # https://codereview.stackexchange.com/questions/120802/recursively-save-python-dictionaries-to-hdf5-files-using-h5py/121308
[docs] def h5py_File_write_dict(h5file: h5py.File, groupname: str, in_dict: dict, overwrite: bool = True) -> None: """ Recursively save dictionary contents to an open HDF5 file. Parameters ---------- h5file : h5py.File An open, writable HDF5 file stream. groupname : str String representation of group name. Must be terminated by '/'. in_dict : dict Input dictionary to save to HDF5 file. If a field in ``in_dict`` is ``None``, skip the field; do not create a group for key associated with ``None`` item. overwrite : bool Whether to overwrite fields """ # type checks check_is_h5py_File(h5file, "h5file") check_h5py_File_is_writable(h5file) # for each item in dictionary for key, item in in_dict.items(): # if item is None, skip to the next loop iteration if item is None: continue # create field name fieldname = groupname + key # if item is writeable if isinstance(item, writable_classes): if (fieldname in h5file) and overwrite: del h5file[fieldname] h5file.create_dataset(fieldname, data = item) # else if is dictionary elif isinstance(item, dict): h5py_File_write_dict(h5file, fieldname + '/', item) # else raise error else: raise ValueError("Cannot save {0}: {1} type".format(key, type(item)))
###################### ### Read functions ###
[docs] def h5py_File_read_int(h5file: h5py.File, fieldname: str) -> int: """ Read an ``int`` from a file. Parameters ---------- h5file : h5py.File An open, readable HDF5 file stream. fieldname : str Name of the field from which to read. Returns ------- out : int A ``int`` read from the HDF5 file. """ # type checks check_is_h5py_File(h5file, "h5file") check_h5py_File_is_readable(h5file) check_is_str(fieldname, "fieldname") # extract field as int out = int(h5file[fieldname][()]) return out
[docs] def h5py_File_read_ndarray(h5file: h5py.File, fieldname: str) -> numpy.ndarray: """ Read a ``numpy.ndarray`` from a file as is. Do not perform any type conversions. Bare-bones function. Does not perform any type checks on inputs. Parameters ---------- h5file : h5py.File An open, readable HDF5 file stream. fieldname : str Name of the field from which to read. Returns ------- out : numpy.ndarray An array read from the HDF5 file. """ # type checks check_is_h5py_File(h5file, "h5file") check_h5py_File_is_readable(h5file) check_is_str(fieldname, "fieldname") # extract field as numpy.ndarray out = h5file[fieldname][()] return out
[docs] def h5py_File_read_ndarray_int(h5file: h5py.File, fieldname: str) -> numpy.ndarray: """ Read a ``numpy.ndarray`` from a file. If the datatype is not ``int``, convert to ``int``. Bare-bones function. Does not perform any type checks on inputs. Parameters ---------- h5file : h5py.File An open, readable HDF5 file stream. fieldname : str Name of the field from which to read. Returns ------- out : numpy.ndarray An ``int`` array read from the HDF5 file. """ # type checks check_is_h5py_File(h5file, "h5file") check_h5py_File_is_readable(h5file) check_is_str(fieldname, "fieldname") # extract field as numpy.ndarray out = h5file[fieldname][()] # if the ndarray is not integer, convert to integer if out.dtype != int: out = out.astype(int) return out
[docs] def h5py_File_read_ndarray_int8(h5file: h5py.File, fieldname: str) -> numpy.ndarray: """ Read a ``numpy.ndarray`` from a file. If the datatype is not ``int8``, convert to ``int8``. Bare-bones function. Does not perform any type checks on inputs. Parameters ---------- h5file : h5py.File An open, readable HDF5 file stream. fieldname : str Name of the field from which to read. Returns ------- out : numpy.ndarray An ``int8`` array read from the HDF5 file. """ # type checks check_is_h5py_File(h5file, "h5file") check_h5py_File_is_readable(h5file) check_is_str(fieldname, "fieldname") # extract field as numpy.ndarray out = h5file[fieldname][()] # if the ndarray is not int8, convert to int8 if out.dtype != "int8": out = out.astype("int8") return out
[docs] def h5py_File_read_ndarray_utf8(h5file: h5py.File, fieldname: str) -> numpy.ndarray: """ Read a ``numpy.ndarray`` from a file. If the datatype is ``bytes``, convert to a utf-8 encoded ``str`` format, otherwise keep as is. Bare-bones function. Does not perform any type checks on inputs. Parameters ---------- h5file : h5py.File An open, readable HDF5 file stream. fieldname : str Name of the field from which to read. Returns ------- out : numpy.ndarray An ``object`` array containing strings read from the HDF5 file. """ # type checks check_is_h5py_File(h5file, "h5file") check_h5py_File_is_readable(h5file) check_is_str(fieldname, "fieldname") # extract field as numpy.ndarray out = h5file[fieldname][()] # convert array elements to unicode / any out = numpy.array([s.decode("utf-8") if isinstance(s,bytes) else s for s in out], dtype = object) return out
[docs] def h5py_File_read_utf8(h5file: h5py.File, fieldname: str) -> str: """ Read an ``str`` from a file. Parameters ---------- h5file : h5py.File An open, readable HDF5 file stream. fieldname : str Name of the field from which to read. Returns ------- out : str A ``str`` read from the HDF5 file. """ # type checks check_is_h5py_File(h5file, "h5file") check_h5py_File_is_readable(h5file) check_is_str(fieldname, "fieldname") # extract field as bytes out = h5file[fieldname][()] # convert bytes to utf-8 string format out = out.decode("utf-8") return out
[docs] def h5py_File_read_dict(h5file: h5py.File, fieldname: str) -> dict: """ Read an ``dict`` from a file. Parameters ---------- h5file : h5py.File An open, readable HDF5 file stream. fieldname : str Name of the field from which to read. Returns ------- out : dict A ``dict`` read from the HDF5 file. """ # type checks check_is_h5py_File(h5file, "h5file") check_h5py_File_is_readable(h5file) check_is_str(fieldname, "fieldname") # create empty dict out = {} # get view of dataset view = h5file[fieldname] # for each field in the view, extract data for key in view.keys(): out[key] = view[key][()] return out