Source code for nnsa.io.hdf5

"""
Module related to writing/reading HDF5 files.
"""
import warnings

import numpy as np

from nnsa.utils.dictionaries import unflatten_dict, flatten_dict
from nnsa.utils.other import convert_string

__all__ = [
    'SUPPORTED_HDF5_ATTRIBUTE_TYPES',

    'read_dict_from_hdf5',
    'write_dict_to_hdf5',
]

# Object types that can be saved/read as attributes to/from hdf5.
SUPPORTED_HDF5_ATTRIBUTE_TYPES = ['int', 'float', 'bool',
                                  'NoneType', 'str', 'list',
                                  'tuple', 'ndarray', 'dict',
                                  'Morlet',]


[docs]def read_dict_from_hdf5(f, target): """ Read a dictionary saved to an hdf5 file by write_dict_to_hdf5(). Args: f (h5py._hl.files.File): file object to an open hdf5 file. target (str): path/group in the hdf5 file where the dict is saved. Returns: (dict): (unflattened) dictionary as read from the hdf5. """ d_flat = dict() f_target = f[target] f_target_types = f['{}_types'.format(target)] for k, v in f_target.attrs.items(): # Read original type of parameter value. original_type = f_target_types.attrs[k].decode() if 'float' in original_type: original_type = 'float' # Check if we the object type is supported (to prevent unexpected behaviour). if original_type not in SUPPORTED_HDF5_ATTRIBUTE_TYPES: if not (type(v) is np.string_ and 'np.float' in v.decode()): print(k) print(v) raise NotImplementedError('Undefined behaviour for reading attribute with original type "{}" from HDF5.' .format(original_type)) if original_type == 'dict': # Dicts are saved as strings. v = convert_string(v.decode(), 'dict') if type(v) is np.string_ and original_type == 'str': # Convert np.string_ object to str object. v = v.decode() if original_type == 'NoneType': # Convert to None. v = None if original_type == 'list': # Convert to list. v = v.tolist() if original_type == 'tuple': # Convert to tuple. v = tuple(v) if type(v) is np.string_ and 'np.float' in v.decode(): # Convert to numpy dtype by evaluating the string ( e.g. np.float32). v = eval(v.decode()) if original_type == 'Morlet': # Create Morlet wavelet. from nnsa.cwt.mothers import Morlet v = eval(v.decode()) # Save key, value pair in flattened dict. d_flat[k] = v return unflatten_dict(d_flat)
[docs]def write_dict_to_hdf5(f, d, target): """ Write a dictionary to a HDF5. 1) Flatten the dictionary 2) Create a hdf5 group, whose attributes will hold (flattened) key and value pairs. 3) Create a hdf5 group, whose attributes will hold key and type(value).__name__ pairs. 4) Save the values and the value types as attributes to the corresponding hdf5 group. Read a dictionary with read_dict_from_hdf5. Args: f (h5py._hl.files.File): file object to an open hdf5 file. d (dict): dict to save. target (str): path/group to be created in the hdf5 file where the dict will be saved. Note that the target group may not already exists, otherwise a ValueError is raised when attempting to (re)create it. """ # Flatten dictionary (don't store a dict as a value in hdf5). d_flat = flatten_dict(d) # Create groups in the hdf5 file to store the key, value pairs and the key, types pairs. f_target = f.create_group(target) f_target_types = f.create_group('{}_types'.format(target)) # Loop over dict items. for k, v in d_flat.items(): # Save original type. original_type = type(v).__name__ f_target_types.attrs[k] = np.string_(original_type) # Check if we the object type is supported (to prevent unexpected behaviour). if original_type == 'MatlabEngine': # Skip this. continue if 'float' in original_type: original_type = 'float' if original_type not in SUPPORTED_HDF5_ATTRIBUTE_TYPES: if "<class 'numpy.float" not in str(v): print(k) print(v) msg = '\nUndefined behaviour for saving attribute of type "{}" to HDF5.'.format(original_type) warnings.warn(msg) # Cast to string describing the object. v = str(v) else: # We have a type parameter specifying a numpy float type. We convert it to a string, e.g. 'np.float32'. v = str(v).replace('<class ', '').replace('>', '').replace("'", "").replace('numpy', 'np') if isinstance(v, dict): # Although dicts are flattened, the value may be an empty dict. Save this as a string. v = str(v) if v is None: # NoneType objects can not be saved to hdf5, save as 'None' string. v = 'None' if type(v) is str: # Convert to numpy.string_ type as recommended for compatibility. v = np.string_(v) # Save parameters key, value pair as attribute. f_target.attrs[k] = v
def test(): import h5py import os from numpy.testing import assert_equal pars = {'test1': 1, 'test2': 'hello', 'test3': {'test3_1': 1.5, 'test3_2': [1, 2, 3]}} filepath = 'test.h5' with h5py.File(filepath, 'w') as f: # Write algorithm_parameters (is a dict-like object). write_dict_to_hdf5(f, pars, 'header/algorithm_parameters') with h5py.File(filepath, 'r') as f: # Read dict and convert to a Parameters object. pars_read = read_dict_from_hdf5(f, 'header/algorithm_parameters') assert_equal(actual=pars_read, desired=pars) print('Test passed!') os.remove(filepath)