Source code for fastwater.general.fileTools
# -*- coding: utf-8 -*-
"""
Generic file manipulation tools.
"""
# ----------------------------------------------------------------------------
# IMPORTS
# ----------------------------------------------------------------------------
# Standard Python Dependencies
import os
import fnmatch
import io
import datetime
import numpy as np
import scipy.io as sio
# Non-Standard Python Dependencies
# Local Module Dependencies
# Other Dependencies
#--------------------------------------------------------------------------
# GLOBAL CONSTANTS
#--------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# CLASS DEFINITIONS
# ----------------------------------------------------------------------------
#--------------------------------------------------------------------------
# FUNCTION DEFINITIONS
#--------------------------------------------------------------------------
[docs]def getListOfFiles(filePath, searchStr=""):
"""
Get a list of file from a directory based on a search string.
Parameters
----------
filePath : str
path string to directory to be searched for files.
searchStr : str
sub-string fragment that can be used to identify a file or set of files.
Returns
-------
files : list, str
a list of the file names containing the search string.
"""
if not searchStr:
searchStr = '*'
else:
srchStr = '*'+searchStr+'*'
files = fnmatch.filter(os.listdir(filePath), srchStr)
files.sort()
return files
[docs]def loadMatFile(fileName):
"""
Load a matlab data structure from an *.mat file.
Parameters
----------
fileName : str
path string to file to the mat be loaded into a python dictionary.
Returns
-------
matData : dict
python dictionary containing the matlab data structure.
"""
fh = open(fileName, 'rb')
buf = io.BytesIO(fh.read())
matData = sio.loadmat(buf)
fh.close()
return matData
[docs]def mat2dict(fileName):
"""
Load an *.mat file into a dictionary structure
Parameters
----------
fileName : str
path string to file to the mat be loaded into a python dictionary.
Returns
-------
dataDict : dict
python dictionary containing the matlab formatted data structure.
"""
matData = loadMatFile(fileName)
dataDict = {}
fields = matData.keys()
for field in fields:
if not field[0] in ['_']:
if len(matData[field].dtype) < 1:
data = np.squeeze(matData[field])
if data.size == 1:
data = data.tolist()
#dataDict[field] = np.squeeze(matData[field])
dataDict[field] = data
else:
subFields = list(matData[field].dtype.names)
dataDict[field] = {}
for subField in subFields:
data = np.squeeze(matData[field][subField])
if (data.size == 1) & ('O' not in data.dtype.char):
data = data.tolist()
else:
data = np.squeeze(data).tolist()
if 'U' in data.dtype.char:
data = data.tolist()
if np.size(data) == 1:
data = data[0]
#dataDict[field][subField] = matData[field][subField][0][0][0]
dataDict[field][subField] = data
return dataDict
[docs]def loadmat(filename):
'''
this function should be called instead of direct spio.loadmat
as it cures the problem of not properly recovering python dictionaries
from mat files. It calls the function check keys to cure all entries
which are still mat-objects
from: `StackOverflow <http://stackoverflow.com/questions/7008608/scipy-io-loadmat-nested-structures-i-e-dictionaries>`_
'''
data = sio.loadmat(filename, struct_as_record=False, squeeze_me=True)
return _check_keys(data)
def _check_keys(dict):
'''
checks if entries in dictionary are mat-objects. If yes
todict is called to change them to nested dictionaries
'''
for key in dict:
if isinstance(dict[key], sio.matlab.mio5_params.mat_struct):
dict[key] = _todict(dict[key])
return dict
def _todict(matobj):
'''
A recursive function which constructs from matobjects nested dictionaries
'''
dict = {}
for strg in matobj._fieldnames:
elem = matobj.__dict__[strg]
if isinstance(elem, sio.matlab.mio5_params.mat_struct):
dict[strg] = _todict(elem)
else:
dict[strg] = elem
return dict
[docs]def print_mat_nested(d, indent=0):
"""Pretty print nested structures from .mat files
Inspired by: `StackOverflow <http://stackoverflow.com/questions/3229419/pretty-printing-nested-dictionaries-in-python>`_
"""
if isinstance(d, dict):
for key, value in d.items(): # iteritems loops through key, value pairs
print('\t' * indent + 'Key: ' + str(key))
print_mat_nested(value, indent+1)
if isinstance(d,np.ndarray) and d.dtype.names is not None: # Note: and short-circuits by default
for n in d.dtype.names: # This means it's a struct, it's bit of a kludge test.
print('\t' * indent + 'Field: ' + str(n))
print_mat_nested(d[n], indent+1)
[docs]def generateGlobalAttributes(titleStr):
"""
Generate a generic global attributes dictionary for adding meta data to a
data dictionary.
Parameters
----------
titleStr : str
a string defining the data set being attributed.
Returns
-------
globalAttr : dict
a dictionary containing generic data about the dataset creation.
"""
globalAttr = {}
globalAttr['title'] = titleStr
globalAttr['creation_date'] = datetime.datetime.now().strftime("%d-%b-%Y %H:%M:%S")
globalAttr['contact'] = ' '
globalAttr['institution'] = 'School of Engineering, The University of Edinburgh, UK'
return globalAttr