Source code for nnsa.utils.paths

"""
This module contains functions dealing with file and directory paths.
"""
import fnmatch
import glob
import os
from datetime import datetime
from inspect import stack


__all__ = [
    'check_directory_exists',
    'check_filename_exists',
    'check_file_extension',
    'get_filename',
    'get_filepaths',
    'get_output_dir',
    'select_path',
    'split_path',
]


[docs]def check_directory_exists(directory=None, filepath=None): """ Check if directory in filepath exists and create the directory if not. Specify either directory or filepath. Args: directory (str, optional): path to a directory. filepath (str, optional): path of a file. Checks the corresponding directory. """ if directory is None: if filepath is None: raise ValueError('Specify `directory` or `filename`.') directory = os.path.split(filepath)[0] if directory != '' and not os.path.exists(directory): # Create directory. os.makedirs(directory, exist_ok=True)
[docs]def check_filename_exists(filepath): """ Check if filepath already exists and raise an error if it does. Args: filepath (str): file path of a file. """ if os.path.exists(filepath): raise FileExistsError('File "{}" already exists.\nOverwriting is not permitted.' .format(filepath))
def check_filepath_exists(filepath): """ Alias for check_filename_exists(). """ return check_filename_exists(filepath)
[docs]def check_file_extension(filepath, valid_extensions): """ Check validity of extension of a filename for writing data file. Args: filepath (str): file path of a file. valid_extensions (str or list of str): string specifying the valid extension or a list of (case sensitive) extensions that are valid (without leading dot). Returns: (str): file extension (without leading dot). """ if type(valid_extensions) is str: # Convert to list. valid_extensions = [valid_extensions] # Get file extension. file_extension = os.path.splitext(filepath)[1] if not file_extension: raise ValueError('No file extension in filepath "{}"'.format(filepath)) # Remove leading dot and check if it is in the list of valid extensions. if file_extension[1:] not in valid_extensions: raise ValueError('Invalid extension in filepath "{}". Use one of: .{}'. format(filepath, ' .'.join(valid_extensions))) return file_extension[1:]
[docs]def get_filename(filepath): """ Get the filename of a filepath, without extension and with spaces replaced by '_'. Args: filepath (str): filepath. Returns: filename (str): filename without file extension and without white spaces. """ filename = os.path.splitext(os.path.basename(filepath))[0] filename = filename.replace(' ', '_') return filename
[docs]def get_filepaths(directory, pattern, case_sensitive=False, subdirectories=False, raise_error=False): """ Return a list with paths of files living in `directory` and with `pattern` in the filename. See fnmatch.fnmatch() for the pattern matching. Use * for wildcards. The paths are absolute if `directory` is absolute and relative if `directory` is relative. Args: directory (str): path to directory in which the files reside. pattern (str): pattern that the returned filenames must contain. case_sensitive (bool, optional): if True, pattern is case-sensitive. If False, pattern is not case-sensitive. Defaults to False. subdirectories (bool): if True, also look for files in subdirectories. If False, only look for files directly in the provided directory. Defaults to False. raise_error (bool): set to True to raise an error if there were no files found. Returns: matching_filepaths (list): filepaths of files in `directory` that match `pattern`. """ matching_filepaths = [] def _append_file(path, filename): if not case_sensitive: # Do not use fnmatch.fnmatch, since it normalizes the strings differently on different operating systems. pattern_found = fnmatch.fnmatchcase(filename.lower(), pattern.lower()) else: pattern_found = fnmatch.fnmatchcase(filename, pattern) filepath = os.path.join(path, filename) if pattern_found and os.path.isfile(filepath): matching_filepaths.append(filepath) if subdirectories: for path, subdirs, files in os.walk(directory): for filename in files: _append_file(path, filename) else: for filename in os.listdir(directory): _append_file(directory, filename) if raise_error and len(matching_filepaths) == 0: raise ValueError('No files found in {} with pattern {}.'.format(directory, pattern)) return matching_filepaths
def get_directories(directory, pattern, case_sensitive=False, subdirectories=False, raise_error=False): """ Return a list with paths of directories living in `directory` and with `pattern` in the name. See glob.glob() for the pattern matching. Use * for wildcards. The paths are absolute if `directory` is absolute and relative if `directory` is relative. Args: directory (str): path to directory in which the files reside. pattern (str): pattern that the returned filenames must contain. case_sensitive (bool, optional): if True, pattern is case-sensitive. If False, pattern is not case-sensitive. Defaults to False. subdirectories (bool): if True, also look for dirs in subdirectories (recursively). If False, only look for files directly in the provided directory. Defaults to False. raise_error (bool): set to True to raise an error if there were no files found. Returns: matching_directories (list): paths of directories in `directory` that match `pattern`. """ matching_directories = [] def _append_dir(path, dirname): if not case_sensitive: # Do not use fnmatch.fnmatch, since it normalizes the strings differently on different operating systems. pattern_found = fnmatch.fnmatchcase(dirname.lower(), pattern.lower()) else: pattern_found = fnmatch.fnmatchcase(dirname, pattern) dirpath = os.path.join(path, dirname) if pattern_found and os.path.isdir(dirpath): matching_directories.append(dirpath) if subdirectories: for path, subdirs, files in os.walk(directory): for dirname in subdirs: _append_dir(path, dirname) else: all_elements = os.listdir(directory) for element in all_elements: _append_dir(directory, element) if raise_error and len(matching_directories) == 0: raise ValueError('No directories found in {} with pattern {}.'.format(directory, pattern)) return matching_directories
[docs]def get_output_dir(output_root, create_unique=False): """ Return a directory path for saving the output that a script generates. A directory under output_dir is created using the path of the script that calls this function. The output directory is created such that a similar structure is maintained in output as the code that generate the output. E.g.: if a file nnsa/python/scripts/example.py calls this function, and the output is nnsa/output, then the returned directory path is nnsa/output/python/scripts/example. If the directory does not exist, the directory is created. Args: output_root (str): path to the output root directory. The output dir will be located under this root directory. create_unique (bool, optional): if True, creates a unique output directory with a name based on the current date and time. If False, does not create this additional unique directory. Defaults to False. Returns: dir_out (str): path to a directory for saving outputs of the script that calls this function. """ # Get the filepath of the script that called this function. filepath = os.path.abspath(stack()[1][1]) if 'ipython-input' in filepath: # If we call the function from the interactive console, use a temporary output folder. dirname = 'temp' else: # Get the common filename of the filepath and the output directory. common_path = os.path.commonpath([filepath, output_root]) # Get the relative path of the filepath starting from the common path. rel_path = os.path.relpath(filepath, start=common_path) # Remove the extension from the relative filepath to form a directory name. dirname = os.path.splitext(rel_path)[0] if create_unique: # datetime object containing current date and time in YYmmdd-hhmmss format. now = datetime.now().strftime("%Y%m%d-%H%M%S") dirname = os.path.join(dirname, now) # Create a path for the output under output_root. dir_out = os.path.join(output_root, dirname) # Check if path exists and create if not. if not os.path.exists(dir_out): # Create directory. os.makedirs(dir_out) return dir_out
[docs]def select_path(dialog_type, iconbitmap=None, **kwargs): """ Open a file dialog and let the user select a (new) file or directory. Args: dialog_type (str): if 'select_file', lets the user select an existing file. if 'select_files', lets the user select multiple existing files. If 'select_directory', lets the user select a directory. If 'saveas_file', lets the user create a new file(name). **kwargs (optional): optional keyword arguments for tkinter's filedialog functions. E.g.: filetypes (list): sequence of (label, pattern) tuples. The same label may occur with several patterns. initialdir (str): initial directory. title (str): message box title. Returns: path (str): the selected path. """ import tkinter as tk import tkinter.filedialog as tkfiledialog # Make a top-level instance. root = tk.Tk() # Make it invisible. root.attributes('-alpha', 0.0) # Lift it to top so it can get focus, # otherwise dialogs will end up behind the terminal. root.lift() root.focus_force() root.iconbitmap(iconbitmap) if dialog_type == 'select_file': path = tkfiledialog.askopenfilename(parent=root, **kwargs) elif dialog_type == 'select_files': path = tkfiledialog.askopenfilenames(parent=root, **kwargs) elif dialog_type == 'select_directory': path = tkfiledialog.askdirectory(parent=root, **kwargs) elif dialog_type == 'saveas_file': if 'defaultextension' not in kwargs: # Only when defaultextension is specified, the extension of the selected file type is automatically added # to the filename. kwargs['defaultextension'] = '' path = tkfiledialog.asksaveasfilename(parent=root, **kwargs) else: raise ValueError('Invalid dialog_type "{}". Choose from {}.' .format(dialog_type, ['select_file', 'select_directory', 'saveas_file'])) # Get rid of the top-level instance once to make it actually invisible. root.destroy() return path
[docs]def split_path(path): """ Recursively split a filepath (ignores the drive) and return its parts in a tuple. Args: path (str): path to split. Returns: (tuple): the parts (directories and filename) of the path. """ dir_parts = [] while True: path, tail = os.path.split(path) if tail == "": break dir_parts.insert(0, tail) return tuple(dir_parts)