# -*- coding: utf-8 -*-
"""
Tools to calculate statisitcal parameters for model validation.
"""
# ----------------------------------------------------------------------------
# IMPORTS
# ----------------------------------------------------------------------------
# Standard Python Dependencies
import os
import numpy as np
# Non-Standard Python Dependencies
import scipy.io as sio
# Local Module Dependencies
from fastwater.general.geometry import pol2cmplx
# Other Dependencies
#-----------------------------------------------------------------------------
# GLOBAL CONSTANTS
#-----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# CLASS DEFINITIONS
# ----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# FUNCTION DEFINITIONS
#-----------------------------------------------------------------------------
# == Standard Variables ======================================================
[docs]def corr_coef(model, obs):
"""
Validation Metric: Correlation Coefficient
:param numpy.array, float model: model data time series
:param numpy.array, floatobs: observation time series mapped onto model time stamps
:return: correlation coefficient (R)
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
num = np.nansum((m-np.nanmean(m))*(o-np.nanmean(o)))
msqr = np.nansum(np.square(m-np.nanmean(m)))
osqr = np.nansum(np.square(o-np.nanmean(o)))
denom = np.sqrt(msqr*osqr)
R = num/denom
return R
[docs]def bias(model, obs):
"""
Validation Metric: Bias
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: bias (b)
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
b = np.nanmean(m)-np.nanmean(o)
return b
[docs]def mean_bias(model, obs):
"""
Validation Metric: Mean Bias
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: mb
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
mb = np.nanmean(m-o)
return mb
[docs]def mean_absolute_error(model, obs):
"""
Validation Metric: Mean absolute error
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: mae
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
mae = np.nanmean(np.abs(m-o))
return mae
[docs]def norm_ma_error(model, obs):
"""
Validation Metric: Normalised mean absolute error
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: nmae
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
nmae = np.nansum(np.abs(m-o))/np.nansum(o)
nmae = nmae * 100.0
return nmae
[docs]def norm_mean_bias(model, obs):
"""
Validation Metric: Normalised mean bias
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: nmb
:rtype: float
"""
# Returns value as percentage
m = np.squeeze(model)
o = np.squeeze(obs)
nmb = np.nansum(m-o)/np.nansum(o)
nmb = nmb * 100.0
return nmb
[docs]def rms_error(model, obs):
"""
Validation Metric: Root-mean-square error
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: rmse
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
N = min(len(model != np.nan), len(obs != np.nan))
rmse = np.sqrt(np.nansum(np.square(m-o))/N)
return rmse
[docs]def norm_rmse(model, obs):
"""
Validation Metric: Normalised root-mean-square error
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: nrmse
:rtype: float
"""
# Returns value as percentage
o = np.squeeze(obs)
nrmse = rms_error(model, obs) / np.sqrt(np.nanmean(np.square(o)))
nrmse = nrmse * 100.0
return nrmse
[docs]def goodness_of_fit(model,obs):
"""
Validation Metric: Goodness-of-fit
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: R2
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
o_mean = np.nanmean(o)
SS_res = np.nansum(np.square(m-o))
SS_tot = np.nansum(np.square(m-o_mean))
R2 = 1.0 - SS_res/SS_tot
return R2
[docs]def scatter_index(model, obs):
"""
Validation Metric: Scatter index
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: si
:rtype: float
"""
# Returns value as percentage
m = np.squeeze(model)
o = np.squeeze(obs)
num = np.nansum(np.square((m-np.nanmean(m))-(o-np.nanmean(o))))
denom = np.nansum(np.square(o))
si = np.sqrt(num/denom)
si = si * 100.0
return si
[docs]def reliability_index(model, obs):
"""
Validation Metric: Reliability index
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: ri
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
N = min(len(model != np.nan), len(obs != np.nan))
ri = np.exp(np.sqrt(np.nansum(np.square(np.log(o/m)))/N))
return ri
[docs]def skill_score(model, obs):
"""
Validation Metric: Skill score
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: ss
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
num = np.nansum(np.square(np.abs(m-o)))
mean_o = np.nanmean(o)
denom = np.nansum(np.square(np.abs(m-mean_o)+np.abs(o-mean_o)))
ss = 1.0 - np.sqrt(num/denom)
return ss
[docs]def model_efficiency(model, obs):
"""
Validation Metric: Model efficiency
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: mef
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
mean_m = np.nanmean(m)
mean_o = np.nanmean(o)
denom = np.nansum(np.square(o-mean_o))
num = denom-np.nansum(np.square(m-mean_m))
mef = num/denom
return mef
# == Circular Variables ======================================================
[docs]def circ_mod_pi(theta):
"""
Fix angular range in radians to -\\pi <= \\theta <= \\pi
:param numpy.array, float theta: vector of angular values in radians
:return: theta in range -\\pi to \\pi
:rtype: numpy.array, float
"""
# This function requires theta in radians
if theta.size > 1:
indx = np.where(theta < -np.pi)[0]
theta[indx] = theta[indx]+2.0*np.pi
indx = np.where(theta > np.pi)[0]
theta[indx] = theta[indx]-2.0*np.pi
else:
if theta < -np.pi:
theta += 2.0*np.pi
elif theta > np.pi:
theta -= 2.0*np.pi
return theta
[docs]def circ_mean(theta, degrees=True):
"""
Calculate mean of angular data for range (-180 to 180)
Parameters
----------
theta : numpy.array, float
vector of angular values.
degrees : bool, optional
angular data units flag. The default is degrees = True.
Returns
-------
mean_theta : float
mean of the angular data in same units as passed in.
"""
if degrees:
thetar = circ_mod_pi(np.radians(np.squeeze(theta)))
else:
thetar = circ_mod_pi(np.squeeze(theta))
mean_theta = np.nanmean(thetar)
if degrees:
mean_theta = np.degrees(mean_theta)
return mean_theta
[docs]def circ_std(theta, degrees=True):
"""
Calculate standard deviation of angular data for range (-180 to 180)
Parameters
----------
theta : numpy.array, float
vector of angular values.
degrees : bool, optional
angular data units flag. The default is degrees = True.
Returns
-------
std_theta : float
stanadrd deviation of the angular data in same units as passed in.
"""
if degrees:
thetar = circ_mod_pi(np.radians(np.squeeze(theta)))
else:
thetar = circ_mod_pi(np.squeeze(theta))
std_theta = np.nanstd(thetar)
if degrees:
std_theta = np.degrees(std_theta)
return std_theta
[docs]def circ_corr(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Correlation Coefficient
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular correlation coefficient (c_corr)
:rtype: float
"""
if degrees:
theta1r = np.radians(np.squeeze(theta1), dtype='double')
theta2r = np.radians(np.squeeze(theta2), dtype='double')
else:
theta1r = np.double(np.squeeze(theta1))
theta2r = np.double(np.squeeze(theta2))
cmplx1 = pol2cmplx(np.ones_like(theta1r, dtype=np.double), theta1r)
cmplx2 = pol2cmplx(np.ones_like(theta2r, dtype=np.double), theta2r)
c_corr = np.abs(np.cov(cmplx1,cmplx2)[0,1])/(np.std(cmplx1)*np.std(cmplx2))
return c_corr
[docs]def circ_mean_bias(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Mean Bias
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular mean bias (mb)
:rtype: float
"""
theta1 = np.double(np.squeeze(theta1))
theta2 = np.double(np.squeeze(theta2))
if degrees:
dtheta = circ_mod_pi(np.radians(theta1 - theta2))
else:
dtheta = circ_mod_pi(theta1 - theta2)
circ_mb = np.nanmean(dtheta)
if degrees:
circ_mb = np.degrees(circ_mb)
return circ_mb
[docs]def circ_norm_mean_bias(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Normalised mean Bias
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular normalised mean bias (nmb) as a percentage
:rtype: float
"""
# Returns value as percentage
theta1 = np.double(np.squeeze(theta1))
theta2 = np.double(np.squeeze(theta2))
if degrees:
circ_nmb = circ_mean_bias(theta1, theta2, degrees)/np.double(180.0)
else:
circ_nmb = circ_mean_bias(theta1, theta2, degrees)/(np.pi)
circ_nmb = circ_nmb * 100.0
return circ_nmb
[docs]def circ_mean_abs_error(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Mean absolute error
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular mean absolute error (mae)
:rtype: float
"""
theta1 = np.double(np.squeeze(theta1))
theta2 = np.double(np.squeeze(theta2))
if degrees:
dtheta = circ_mod_pi(np.radians(theta1 - theta2))
else:
dtheta = circ_mod_pi(theta1 - theta2)
circ_mae = np.nanmean(np.abs(dtheta))
if degrees:
circ_mae = np.degrees(circ_mae)
return circ_mae
[docs]def circ_norm_ma_error(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Normalised mean absolute error
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular normalised mean absolute error (mae) as a percentage.
:rtype: float
"""
# Returns value as percentage
theta1 = np.double(np.squeeze(theta1))
theta2 = np.double(np.squeeze(theta2))
if degrees:
circ_nmae = circ_mean_abs_error(theta1, theta2, degrees)/np.double(180.0)
else:
circ_nmae = circ_mean_abs_error(theta1, theta2, degrees)/(np.pi)
circ_nmae = circ_nmae * 100.0
return circ_nmae
[docs]def circ_rms_error(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Root-mean-square error
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular RMS error (circ_rmse)
:rtype: float
"""
theta1 = np.squeeze(theta1)
theta2 = np.squeeze(theta2)
if degrees:
dtheta = circ_mod_pi(np.radians(theta1 - theta2))
else:
dtheta = circ_mod_pi(theta1 - theta2)
circ_rmse = np.sqrt(np.nanmean(np.square(dtheta)))
if degrees:
circ_rmse = np.degrees(circ_rmse)
return circ_rmse
[docs]def circ_norm_rmse(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Normalised root-mean-square error
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular normalised RMS error (circ)nrmse as a percentage
:rtype: float
"""
# Returns value as percentage
theta1 = np.squeeze(theta1)
theta2 = np.squeeze(theta2)
if degrees:
dtheta = circ_mod_pi(np.radians(theta1 - theta2))
else:
dtheta = circ_mod_pi(theta1 - theta2)
num = np.sqrt(np.nanmean(np.square(dtheta)))
denom = np.pi
circ_nrmse = (num / denom) * 100.0
return circ_nrmse
[docs]def circ_goodness_of_fit(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Goodness-of-fit
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular goodness-of-fit R2
:rtype: float
"""
theta1 = np.squeeze(theta1)
theta2 = np.squeeze(theta2)
if degrees:
m = circ_mod_pi(np.radians(theta1))
o = circ_mod_pi(np.radians(theta2))
else:
m = circ_mod_pi(theta1)
o = circ_mod_pi(theta2)
o_mean = np.nanmean(o)
SS_res = np.nansum(np.square(m-o))
SS_tot = np.nansum(np.square(m-o_mean))
circ_R2 = 1.0 - SS_res/SS_tot
return circ_R2
[docs]def circ_scatter_index(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Scatter index
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular scatter index (circ_si)
:rtype: float
"""
# Returns value as percentage
theta1 = np.double(np.squeeze(theta1))
theta2 = np.double(np.squeeze(theta2))
if degrees:
theta1 = np.radians(theta1)
theta2 = np.radians(theta2)
theta1 = circ_mod_pi(theta1)
theta2 = circ_mod_pi(theta2)
mtheta1 = np.nanmean(theta1)
mtheta2 = np.nanmean(theta2)
dtheta1 = circ_mod_pi(theta1-mtheta1)
dtheta2 = circ_mod_pi(theta2-mtheta2)
num = np.nanmean(np.square(circ_mod_pi(dtheta1-dtheta2)))
denom = np.square(np.pi)
circ_si = np.sqrt(num/denom) * 100.0
return circ_si
[docs]def stats_metrics_circular(model, obs, varName, degrees=True):
"""
Calculate circlar validation metrics for a pair of modelled and observation time series
"""
R = circ_corr(model, obs, degrees)
R2 = circ_goodness_of_fit(model, obs, degrees)
mb = circ_mean_bias(model, obs, degrees)
nmb = circ_norm_mean_bias(model, obs, degrees)
mae = circ_mean_abs_error(model, obs, degrees)
nmae = circ_norm_ma_error(model, obs, degrees)
rmse = circ_rms_error(model, obs, degrees)
nrmse = circ_norm_rmse(model, obs, degrees)
si = circ_scatter_index(model, obs, degrees)
metrics = {}
metrics['Variable'] = varName
metrics['R'] = R
metrics['R2'] = R2
metrics['MB'] = mb
metrics['NMB'] = nmb
metrics['MAE'] = mae
metrics['NMAE'] = nmae
metrics['RMSE'] = rmse
metrics['NRMSE'] = nrmse
metrics['SI'] = si
metrics['NSMPL'] = np.float64(len(model))
return metrics
[docs]def stats_metrics(model, obs, varName):
"""
Calculate standard validation metrics for a pair of modelled and observation time series
"""
R = corr_coef(model, obs)
R2 = goodness_of_fit(model, obs)
mb = mean_bias(model, obs)
nmb = norm_mean_bias(model, obs)
mae = mean_absolute_error(model, obs)
nmae = norm_ma_error(model, obs)
rmse = rms_error(model, obs)
nrmse = norm_rmse(model, obs)
si = scatter_index(model, obs)
metrics = {}
metrics['Variable'] = varName
metrics['R'] = R
metrics['R2'] = R2
metrics['MB'] = mb
metrics['NMB'] = nmb
metrics['MAE'] = mae
metrics['NMAE'] = nmae
metrics['RMSE'] = rmse
metrics['NRMSE'] = nrmse
metrics['SI'] = si
metrics['NSMPL'] = np.float64(len(model))
return metrics
[docs]def initialize_results():
"""
Initialise a dictionary for collecting validation results
"""
results = {}
results.update({'Variable': []})
results.update({'R': []})
results.update({'R2': []})
results.update({'MB': []})
results.update({'NMB': []})
results.update({'MAE': []})
results.update({'NMAE': []})
results.update({'RMSE': []})
results.update({'NRMSE': []})
results.update({'SI': []})
results.update({'NSMPL': []})
return results
[docs]def store_results(store, results):
"""
Store validation results record in the results dictionary
"""
store['Variable'].append(results['Variable'])
store['R'].append(results['R'])
store['R2'].append(results['R2'])
store['MB'].append(results['MB'])
store['NMB'].append(results['NMB'])
store['MAE'].append(results['MAE'])
store['NMAE'].append(results['NMAE'])
store['RMSE'].append(results['RMSE'])
store['NRMSE'].append(results['NRMSE'])
store['SI'].append(results['SI'])
store['NSMPL'].append(results['NSMPL'])
return
[docs]def save_results(store, prefix, paramName, outpath):
"""
Save tabulated validation results as an ASCII file
"""
outtxtname = prefix+'_STATS_'+paramName+'.txt'
outtxtfile = os.path.join(outpath,outtxtname)
with open(outtxtfile,'w') as ofile:
ofile.write('Validation Statistics for '+paramName+'\n')
fmtstr = "{:<25}{:^9.3}{:^9.3}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9}"
for row in zip(*([key] + (value) for key, value in store.items())):
ofile.write(fmtstr.format(*row)+'\n')
outmatname = prefix+'_STATS_'+paramName+'.mat'
outmatfile = os.path.join(outpath,outmatname)
saveFlg = sio.savemat(outmatfile,store)
return saveFlg
[docs]def display_metrics(metrics, paramName, testName=None):
"""
Display validation results as a table on the standard output
"""
if testName is not None:
print('Validation Statistics for', testName, '\n')
print('Validation Parameter = '+paramName)
fmtstr = "{:<25}{:^9.3}{:^9.3}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9}"
for row in zip(*([key] + [value] for key, value in metrics.items())):
print(fmtstr.format(*row))
print('')
return