"""
Module related to writing/reading HDF5 files.
"""
import warnings
import numpy as np
from nnsa.utils.dictionaries import unflatten_dict, flatten_dict
from nnsa.utils.other import convert_string
__all__ = [
'SUPPORTED_HDF5_ATTRIBUTE_TYPES',
'read_dict_from_hdf5',
'write_dict_to_hdf5',
]
# Object types that can be saved/read as attributes to/from hdf5.
SUPPORTED_HDF5_ATTRIBUTE_TYPES = ['int', 'float', 'bool',
'NoneType', 'str', 'list',
'tuple', 'ndarray', 'dict',
'Morlet',]
[docs]def read_dict_from_hdf5(f, target):
"""
Read a dictionary saved to an hdf5 file by write_dict_to_hdf5().
Args:
f (h5py._hl.files.File): file object to an open hdf5 file.
target (str): path/group in the hdf5 file where the dict is saved.
Returns:
(dict): (unflattened) dictionary as read from the hdf5.
"""
d_flat = dict()
f_target = f[target]
f_target_types = f['{}_types'.format(target)]
for k, v in f_target.attrs.items():
# Read original type of parameter value.
original_type = f_target_types.attrs[k].decode()
if 'float' in original_type:
original_type = 'float'
# Check if we the object type is supported (to prevent unexpected behaviour).
if original_type not in SUPPORTED_HDF5_ATTRIBUTE_TYPES:
if not (type(v) is np.string_ and 'np.float' in v.decode()):
print(k)
print(v)
raise NotImplementedError('Undefined behaviour for reading attribute with original type "{}" from HDF5.'
.format(original_type))
if original_type == 'dict':
# Dicts are saved as strings.
v = convert_string(v.decode(), 'dict')
if type(v) is np.string_ and original_type == 'str':
# Convert np.string_ object to str object.
v = v.decode()
if original_type == 'NoneType':
# Convert to None.
v = None
if original_type == 'list':
# Convert to list.
v = v.tolist()
if original_type == 'tuple':
# Convert to tuple.
v = tuple(v)
if type(v) is np.string_ and 'np.float' in v.decode():
# Convert to numpy dtype by evaluating the string ( e.g. np.float32).
v = eval(v.decode())
if original_type == 'Morlet':
# Create Morlet wavelet.
from nnsa.cwt.mothers import Morlet
v = eval(v.decode())
# Save key, value pair in flattened dict.
d_flat[k] = v
return unflatten_dict(d_flat)
[docs]def write_dict_to_hdf5(f, d, target):
"""
Write a dictionary to a HDF5.
1) Flatten the dictionary
2) Create a hdf5 group, whose attributes will hold (flattened) key and value pairs.
3) Create a hdf5 group, whose attributes will hold key and type(value).__name__ pairs.
4) Save the values and the value types as attributes to the corresponding hdf5 group.
Read a dictionary with read_dict_from_hdf5.
Args:
f (h5py._hl.files.File): file object to an open hdf5 file.
d (dict): dict to save.
target (str): path/group to be created in the hdf5 file where the dict will be saved.
Note that the target group may not already exists, otherwise a ValueError is raised when attempting to
(re)create it.
"""
# Flatten dictionary (don't store a dict as a value in hdf5).
d_flat = flatten_dict(d)
# Create groups in the hdf5 file to store the key, value pairs and the key, types pairs.
f_target = f.create_group(target)
f_target_types = f.create_group('{}_types'.format(target))
# Loop over dict items.
for k, v in d_flat.items():
# Save original type.
original_type = type(v).__name__
f_target_types.attrs[k] = np.string_(original_type)
# Check if we the object type is supported (to prevent unexpected behaviour).
if original_type == 'MatlabEngine':
# Skip this.
continue
if 'float' in original_type:
original_type = 'float'
if original_type not in SUPPORTED_HDF5_ATTRIBUTE_TYPES:
if "<class 'numpy.float" not in str(v):
print(k)
print(v)
msg = '\nUndefined behaviour for saving attribute of type "{}" to HDF5.'.format(original_type)
warnings.warn(msg)
# Cast to string describing the object.
v = str(v)
else:
# We have a type parameter specifying a numpy float type. We convert it to a string, e.g. 'np.float32'.
v = str(v).replace('<class ', '').replace('>', '').replace("'", "").replace('numpy', 'np')
if isinstance(v, dict):
# Although dicts are flattened, the value may be an empty dict. Save this as a string.
v = str(v)
if v is None:
# NoneType objects can not be saved to hdf5, save as 'None' string.
v = 'None'
if type(v) is str:
# Convert to numpy.string_ type as recommended for compatibility.
v = np.string_(v)
# Save parameters key, value pair as attribute.
f_target.attrs[k] = v
def test():
import h5py
import os
from numpy.testing import assert_equal
pars = {'test1': 1,
'test2': 'hello',
'test3': {'test3_1': 1.5,
'test3_2': [1, 2, 3]}}
filepath = 'test.h5'
with h5py.File(filepath, 'w') as f:
# Write algorithm_parameters (is a dict-like object).
write_dict_to_hdf5(f, pars, 'header/algorithm_parameters')
with h5py.File(filepath, 'r') as f:
# Read dict and convert to a Parameters object.
pars_read = read_dict_from_hdf5(f, 'header/algorithm_parameters')
assert_equal(actual=pars_read, desired=pars)
print('Test passed!')
os.remove(filepath)