"""
This module contains functions dealing with file and directory paths.
"""
import fnmatch
import glob
import os
from datetime import datetime
from inspect import stack
__all__ = [
'check_directory_exists',
'check_filename_exists',
'check_file_extension',
'get_filename',
'get_filepaths',
'get_output_dir',
'select_path',
'split_path',
]
[docs]def check_directory_exists(directory=None, filepath=None):
"""
Check if directory in filepath exists and create the directory if not.
Specify either directory or filepath.
Args:
directory (str, optional): path to a directory.
filepath (str, optional): path of a file. Checks the corresponding directory.
"""
if directory is None:
if filepath is None:
raise ValueError('Specify `directory` or `filename`.')
directory = os.path.split(filepath)[0]
if directory != '' and not os.path.exists(directory):
# Create directory.
os.makedirs(directory, exist_ok=True)
[docs]def check_filename_exists(filepath):
"""
Check if filepath already exists and raise an error if it does.
Args:
filepath (str): file path of a file.
"""
if os.path.exists(filepath):
raise FileExistsError('File "{}" already exists.\nOverwriting is not permitted.'
.format(filepath))
def check_filepath_exists(filepath):
"""
Alias for check_filename_exists().
"""
return check_filename_exists(filepath)
[docs]def check_file_extension(filepath, valid_extensions):
"""
Check validity of extension of a filename for writing data file.
Args:
filepath (str): file path of a file.
valid_extensions (str or list of str): string specifying the valid extension or a list of (case sensitive)
extensions that are valid (without leading dot).
Returns:
(str): file extension (without leading dot).
"""
if type(valid_extensions) is str:
# Convert to list.
valid_extensions = [valid_extensions]
# Get file extension.
file_extension = os.path.splitext(filepath)[1]
if not file_extension:
raise ValueError('No file extension in filepath "{}"'.format(filepath))
# Remove leading dot and check if it is in the list of valid extensions.
if file_extension[1:] not in valid_extensions:
raise ValueError('Invalid extension in filepath "{}". Use one of: .{}'.
format(filepath, ' .'.join(valid_extensions)))
return file_extension[1:]
[docs]def get_filename(filepath):
"""
Get the filename of a filepath, without extension and with spaces replaced by '_'.
Args:
filepath (str): filepath.
Returns:
filename (str): filename without file extension and without white spaces.
"""
filename = os.path.splitext(os.path.basename(filepath))[0]
filename = filename.replace(' ', '_')
return filename
[docs]def get_filepaths(directory, pattern, case_sensitive=False,
subdirectories=False, raise_error=False):
"""
Return a list with paths of files living in `directory` and with `pattern` in the filename.
See fnmatch.fnmatch() for the pattern matching. Use * for wildcards.
The paths are absolute if `directory` is absolute and relative if `directory` is relative.
Args:
directory (str): path to directory in which the files reside.
pattern (str): pattern that the returned filenames must contain.
case_sensitive (bool, optional): if True, pattern is case-sensitive.
If False, pattern is not case-sensitive.
Defaults to False.
subdirectories (bool): if True, also look for files in subdirectories. If False, only look
for files directly in the provided directory.
Defaults to False.
raise_error (bool): set to True to raise an error if there were no files found.
Returns:
matching_filepaths (list): filepaths of files in `directory` that match `pattern`.
"""
matching_filepaths = []
def _append_file(path, filename):
if not case_sensitive:
# Do not use fnmatch.fnmatch, since it normalizes the strings differently on different operating systems.
pattern_found = fnmatch.fnmatchcase(filename.lower(), pattern.lower())
else:
pattern_found = fnmatch.fnmatchcase(filename, pattern)
filepath = os.path.join(path, filename)
if pattern_found and os.path.isfile(filepath):
matching_filepaths.append(filepath)
if subdirectories:
for path, subdirs, files in os.walk(directory):
for filename in files:
_append_file(path, filename)
else:
for filename in os.listdir(directory):
_append_file(directory, filename)
if raise_error and len(matching_filepaths) == 0:
raise ValueError('No files found in {} with pattern {}.'.format(directory, pattern))
return matching_filepaths
def get_directories(directory, pattern, case_sensitive=False,
subdirectories=False, raise_error=False):
"""
Return a list with paths of directories living in `directory` and with `pattern` in the name.
See glob.glob() for the pattern matching. Use * for wildcards.
The paths are absolute if `directory` is absolute and relative if `directory` is relative.
Args:
directory (str): path to directory in which the files reside.
pattern (str): pattern that the returned filenames must contain.
case_sensitive (bool, optional): if True, pattern is case-sensitive.
If False, pattern is not case-sensitive.
Defaults to False.
subdirectories (bool): if True, also look for dirs in subdirectories (recursively). If False, only look
for files directly in the provided directory.
Defaults to False.
raise_error (bool): set to True to raise an error if there were no files found.
Returns:
matching_directories (list): paths of directories in `directory` that match `pattern`.
"""
matching_directories = []
def _append_dir(path, dirname):
if not case_sensitive:
# Do not use fnmatch.fnmatch, since it normalizes the strings differently on different operating systems.
pattern_found = fnmatch.fnmatchcase(dirname.lower(), pattern.lower())
else:
pattern_found = fnmatch.fnmatchcase(dirname, pattern)
dirpath = os.path.join(path, dirname)
if pattern_found and os.path.isdir(dirpath):
matching_directories.append(dirpath)
if subdirectories:
for path, subdirs, files in os.walk(directory):
for dirname in subdirs:
_append_dir(path, dirname)
else:
all_elements = os.listdir(directory)
for element in all_elements:
_append_dir(directory, element)
if raise_error and len(matching_directories) == 0:
raise ValueError('No directories found in {} with pattern {}.'.format(directory, pattern))
return matching_directories
[docs]def get_output_dir(output_root, create_unique=False):
"""
Return a directory path for saving the output that a script generates.
A directory under output_dir is created using the path of the script that calls this function. The output directory
is created such that a similar structure is maintained in output as the code that generate the output.
E.g.: if a file nnsa/python/scripts/example.py calls this function, and the output
is nnsa/output, then the returned directory path is nnsa/output/python/scripts/example.
If the directory does not exist, the directory is created.
Args:
output_root (str): path to the output root directory. The output dir will be located under this root directory.
create_unique (bool, optional): if True, creates a unique output directory with a name based on the current
date and time. If False, does not create this additional unique directory.
Defaults to False.
Returns:
dir_out (str): path to a directory for saving outputs of the script that calls this function.
"""
# Get the filepath of the script that called this function.
filepath = os.path.abspath(stack()[1][1])
if 'ipython-input' in filepath:
# If we call the function from the interactive console, use a temporary output folder.
dirname = 'temp'
else:
# Get the common filename of the filepath and the output directory.
common_path = os.path.commonpath([filepath, output_root])
# Get the relative path of the filepath starting from the common path.
rel_path = os.path.relpath(filepath, start=common_path)
# Remove the extension from the relative filepath to form a directory name.
dirname = os.path.splitext(rel_path)[0]
if create_unique:
# datetime object containing current date and time in YYmmdd-hhmmss format.
now = datetime.now().strftime("%Y%m%d-%H%M%S")
dirname = os.path.join(dirname, now)
# Create a path for the output under output_root.
dir_out = os.path.join(output_root, dirname)
# Check if path exists and create if not.
if not os.path.exists(dir_out):
# Create directory.
os.makedirs(dir_out)
return dir_out
[docs]def select_path(dialog_type, iconbitmap=None, **kwargs):
"""
Open a file dialog and let the user select a (new) file or directory.
Args:
dialog_type (str):
if 'select_file', lets the user select an existing file.
if 'select_files', lets the user select multiple existing files.
If 'select_directory', lets the user select a directory.
If 'saveas_file', lets the user create a new file(name).
**kwargs (optional): optional keyword arguments for tkinter's filedialog functions. E.g.:
filetypes (list): sequence of (label, pattern) tuples. The same label may occur with several patterns.
initialdir (str): initial directory.
title (str): message box title.
Returns:
path (str): the selected path.
"""
import tkinter as tk
import tkinter.filedialog as tkfiledialog
# Make a top-level instance.
root = tk.Tk()
# Make it invisible.
root.attributes('-alpha', 0.0)
# Lift it to top so it can get focus,
# otherwise dialogs will end up behind the terminal.
root.lift()
root.focus_force()
root.iconbitmap(iconbitmap)
if dialog_type == 'select_file':
path = tkfiledialog.askopenfilename(parent=root, **kwargs)
elif dialog_type == 'select_files':
path = tkfiledialog.askopenfilenames(parent=root, **kwargs)
elif dialog_type == 'select_directory':
path = tkfiledialog.askdirectory(parent=root, **kwargs)
elif dialog_type == 'saveas_file':
if 'defaultextension' not in kwargs:
# Only when defaultextension is specified, the extension of the selected file type is automatically added
# to the filename.
kwargs['defaultextension'] = ''
path = tkfiledialog.asksaveasfilename(parent=root, **kwargs)
else:
raise ValueError('Invalid dialog_type "{}". Choose from {}.'
.format(dialog_type, ['select_file',
'select_directory',
'saveas_file']))
# Get rid of the top-level instance once to make it actually invisible.
root.destroy()
return path
[docs]def split_path(path):
"""
Recursively split a filepath (ignores the drive) and return its parts in a tuple.
Args:
path (str): path to split.
Returns:
(tuple): the parts (directories and filename) of the path.
"""
dir_parts = []
while True:
path, tail = os.path.split(path)
if tail == "":
break
dir_parts.insert(0, tail)
return tuple(dir_parts)