Source code for nnsa.utils.dictionaries

"""
This module contains functions dealing with dictionaries.
"""

from collections.abc import Mapping
import csv


__all__ = [
    'add_nested_dict',
    'flatten_dict',
    'itemize_items',
    'nested_update',
    'unflatten_dict',
    'write_dict_to_csv_as_table',
]


[docs]def add_nested_dict(d, keys, value):
    """
    Put a value in dictionary d with nested keys specified by the specified keys list.

    Set d['a']['b']['c'] = value, when keys = ['a', 'b', 'c']

    Args:
        d (dict): dictionary to add the value with nested keys to.
        keys (list): list of nested keys.
        value: the value to put in the dictionary.
    """
    # Set d['a']['b']['c'] = value, when keys = ['a', 'b', 'c']
    for key in keys[:-1]:
        d = d.setdefault(key, {})
    d[keys[-1]] = value


def add_postfix(d, postfix):
    """
    Add postfix to each key in dict d.

    Args:
        d (dict): the dict.
        postfix (str): the postfix.

    Returns:
        d_out (dict): (shallow) copy of the dict with the postfix.
    """
    d_out = {"{}{}".format(key, postfix): val for key, val in d.items()}
    return d_out


[docs]def flatten_dict(d, path='', d_out=None):
    """
    Traverse a nested/multi-level dictionary and create a new one-level dictionary.

    Item d['a']['b'] is mapped to key 'a/b' in the output dictionary.
    The reverse operation is achieved by the unflatten_dict() function.

    Args:
        d (dict): dictionary to flatten.
        path (str, optional): prefix for the flattened keys. Only needed for recursive calls. The user needs not to
            specify this, i.e. specify ''.
            Defaults to ''.
        d_out (dict or None, optional): if a dict is specify, this dict if updated with the flattened keys and value
            pairs. Needed for recursive calls.
            If None, the output dictionary is a new empty dictionary.
            Defaults to None.

    Returns:
        d_out (dict): one-level dictionary with same values as input d, but with flattened keys.

    Examples:
        >>> d = {'A': {'a': 2, 'b': True}, 'B': {'a': 10, 'b': False}}
        >>> flatten_dict(d)
        {'A/a': 2, 'A/b': True, 'B/a': 10, 'B/b': False}

    """
    # Create new dict.
    if d_out is None:
        d_out = {}

    # Separator of the key names in the flattened dict.
    sep = '/'

    # Loop over items in d
    for key, value in d.items():
        # Add current key to path (path will be the flattened key in the output dict).
        if path:
            path_new = '{}{}{}'.format(path, sep, key)
        else:
            path_new = '{}'.format(key)

        # If not the deepest level is reached, recursively call the function on the value, else add key, value to output
        # dictionary.
        if isinstance(value, Mapping):
            if len(value) == 0:
                d_out.update({path_new: value})
            else:
                d_out = flatten_dict(value, path=path_new, d_out=d_out)
        else:
            d_out.update({path_new: value})

    return d_out


[docs]def itemize_items(items):
    """
    Return a string that prints a list of items, where each item is a pair of objects.

    Handy for printing dictionaries, e.g. if d is some dict:
    print(itemize_items(d.items())

    Args:
        items (iterable): iterable yielding two values.

    Returns:
        (str): string in which the items are printed underneath each other with indentation.
    """
    # Specify indentation.
    indent = ' '*7

    # Loop over items.
    description = []
    for k, v in items:
        item_description = '{}{}: {}'.format(indent, k, v)

        # If the item description is multiline, add indentation to every line.
        description.append(item_description.replace('\n', '\n{}'.format(' '*len(indent))))

    return '\n'.join(description)


[docs]def nested_update(d, other=None, accept_new_key=True, **kwargs):
    """
    Update a nested dictionary d with update dictionary u, maintaining deeper levels of d that are not in u.

    Adapted from https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth

    Args:
        d (dict): (nested) dictionary to update.
        other (dict or iterable, optional): dictionary or iterable of key, value pairs with which to update.
        accept_new_key (bool, str, optional): If True, accepts new keys (keys that do not exist in d).
            If False, raises an error if attempting to update a key that does not exists in d (including deeper levels).
            If 'parameters_mode', do not accept new keys when updating a nnsa.Parameters object, but accept new keys
            in ordinary dict objects.
            Default to True (this is the default behaviour of Python dict.update()).
        **kwargs (optional): keyword arguments with which to update (in which the keyword is the key).

    Returns:
        d (dict): updated dictionary (in place, so return is in fact redundant).
    """
    if isinstance(other, dict):
        # Convert dict to iterable with key, value pairs.
        other = other.items()

    # Check if accept_new_key == 'parameters_mode'.
    parameters_mode = False
    if isinstance(accept_new_key, str):
        if accept_new_key.lower() == 'parameters_mode':
            from nnsa.parameters.parameters import Parameters
            parameters_mode = True
            accept_new_key = type(d).__name__ != 'Parameters'
        else:
            raise ValueError('Invalid string argument for accept_new_key="{}".'.format(accept_new_key))

    if not isinstance(accept_new_key, bool):
        raise TypeError('accept_new_key should be a bool. Got a {} instead.'.format(type(accept_new_key)))

    # For each key, value pair in other, update the key, value in d.
    if other is not None:
        for k, v in other:
            # Raise error if key does not exist in d.
            if not accept_new_key and k not in d:
                raise KeyError("'{}' not in collection with keys {}.".format(k, list(d.keys())))

            if isinstance(v, Mapping):
                # Value is a dict/Parameters: recursively call this update function.
                # If parameters_mode, do not accept new keys if v is a Parameters.
                accept_new_key_i = 'parameters_mode' if parameters_mode else accept_new_key
                d_i = d.get(k, dict()) if d.get(k, {}) is not None else dict()
                d[k] = nested_update(d_i, other=v, accept_new_key=accept_new_key_i)

            else:
                # Replace d[k] with v.
                d[k] = v

    # Handle **kwargs: kwargs is a dictionary.
    if kwargs:
        nested_update(d, other=kwargs, accept_new_key=accept_new_key)

    return d


def removekey(d, key):
    # Make a shallow copy of dict d and remove key.
    r = dict(d)
    del r[key]
    return r


[docs]def unflatten_dict(d):
    """
    Restore the original dictionary after flattening the dict with flatten_dict().

    Item d['a/b'] is mapped to ['a']['b'] in the output dictionary.
    The reverse operation is achieved by the flatten_dict() function.

    Examples:
       >>> d = {'A': {'a': 2, 'b': True}, 'B': {'a': 10, 'b': False}}
       >>> d_flat = flatten_dict(d)
       >>> print(d == unflatten_dict(d_flat))
       True

    Args:
        d (dict): one-level dictionary with keys representing nested dictionary keys, separated by '/'.

    Returns:
        unflat_dict (dict): unflattened, i.e. nested, dictionary with same values as input d.
    """
    # Separator of the key names in the flattened dict.
    sep = '/'

    # Loop over key, value pairs and add nested version to the output dict.
    unflat_dict = {}
    for key_flat, value in d.items():
        add_nested_dict(unflat_dict, keys=key_flat.split(sep), value=value)

    return unflat_dict


[docs]def write_dict_to_csv_as_table(filepath, table_dict):
    """
    Write a dictionary to a csv, structuring it as a table with the keys of the dict as column headers.

    Args:
        filepath (str): filepath to save the csv to.
        table_dict (dict): dictionary that contains the table data. The values of the dictionary must be a list, and
            each element of the list will be put on a new row. The number of elements in a list may vary between the
            columns, i.e. under each column a varying number of elements may be put.
    """
    with open(filepath, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        # Write header (standard annotations labels).
        writer.writerow(list(table_dict.keys()))

        # Maximum number of rows to write.
        max_length = max([len(labels_list) for labels_list in table_dict.values()])
        for i in range(max_length):
            # Create row.
            all_elements = []
            for key in table_dict.keys():
                # Write the original annotation text, or an empty string if all orignal texts have already been written
                # to previous rows.
                element = table_dict[key][i] if len(table_dict[key]) > i else ''
                all_elements.append(element)

            # Write row.
            writer.writerow(all_elements)