Source code for fastwater.general.statsTools

# -*- coding: utf-8 -*-
"""
Tools to calculate statisitcal parameters for model validation.
"""

# ----------------------------------------------------------------------------
#   IMPORTS
# ----------------------------------------------------------------------------
# Standard Python Dependencies
import os
import numpy as np
# Non-Standard Python Dependencies
import scipy.io as sio
# Local Module Dependencies
from fastwater.general.geometry import pol2cmplx
# Other Dependencies

#-----------------------------------------------------------------------------
# GLOBAL CONSTANTS
#-----------------------------------------------------------------------------


# ----------------------------------------------------------------------------
#   CLASS DEFINITIONS
# ----------------------------------------------------------------------------


#-----------------------------------------------------------------------------
# FUNCTION DEFINITIONS
#-----------------------------------------------------------------------------

# == Standard Variables ======================================================
[docs]def corr_coef(model, obs): """ Validation Metric: Correlation Coefficient :param numpy.array, float model: model data time series :param numpy.array, floatobs: observation time series mapped onto model time stamps :return: correlation coefficient (R) :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) num = np.nansum((m-np.nanmean(m))*(o-np.nanmean(o))) msqr = np.nansum(np.square(m-np.nanmean(m))) osqr = np.nansum(np.square(o-np.nanmean(o))) denom = np.sqrt(msqr*osqr) R = num/denom return R
[docs]def bias(model, obs): """ Validation Metric: Bias :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: bias (b) :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) b = np.nanmean(m)-np.nanmean(o) return b
[docs]def mean_bias(model, obs): """ Validation Metric: Mean Bias :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: mb :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) mb = np.nanmean(m-o) return mb
[docs]def mean_absolute_error(model, obs): """ Validation Metric: Mean absolute error :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: mae :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) mae = np.nanmean(np.abs(m-o)) return mae
[docs]def norm_ma_error(model, obs): """ Validation Metric: Normalised mean absolute error :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: nmae :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) nmae = np.nansum(np.abs(m-o))/np.nansum(o) nmae = nmae * 100.0 return nmae
[docs]def norm_mean_bias(model, obs): """ Validation Metric: Normalised mean bias :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: nmb :rtype: float """ # Returns value as percentage m = np.squeeze(model) o = np.squeeze(obs) nmb = np.nansum(m-o)/np.nansum(o) nmb = nmb * 100.0 return nmb
[docs]def rms_error(model, obs): """ Validation Metric: Root-mean-square error :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: rmse :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) N = min(len(model != np.nan), len(obs != np.nan)) rmse = np.sqrt(np.nansum(np.square(m-o))/N) return rmse
[docs]def norm_rmse(model, obs): """ Validation Metric: Normalised root-mean-square error :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: nrmse :rtype: float """ # Returns value as percentage o = np.squeeze(obs) nrmse = rms_error(model, obs) / np.sqrt(np.nanmean(np.square(o))) nrmse = nrmse * 100.0 return nrmse
[docs]def goodness_of_fit(model,obs): """ Validation Metric: Goodness-of-fit :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: R2 :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) o_mean = np.nanmean(o) SS_res = np.nansum(np.square(m-o)) SS_tot = np.nansum(np.square(m-o_mean)) R2 = 1.0 - SS_res/SS_tot return R2
[docs]def scatter_index(model, obs): """ Validation Metric: Scatter index :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: si :rtype: float """ # Returns value as percentage m = np.squeeze(model) o = np.squeeze(obs) num = np.nansum(np.square((m-np.nanmean(m))-(o-np.nanmean(o)))) denom = np.nansum(np.square(o)) si = np.sqrt(num/denom) si = si * 100.0 return si
[docs]def reliability_index(model, obs): """ Validation Metric: Reliability index :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: ri :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) N = min(len(model != np.nan), len(obs != np.nan)) ri = np.exp(np.sqrt(np.nansum(np.square(np.log(o/m)))/N)) return ri
[docs]def skill_score(model, obs): """ Validation Metric: Skill score :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: ss :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) num = np.nansum(np.square(np.abs(m-o))) mean_o = np.nanmean(o) denom = np.nansum(np.square(np.abs(m-mean_o)+np.abs(o-mean_o))) ss = 1.0 - np.sqrt(num/denom) return ss
[docs]def model_efficiency(model, obs): """ Validation Metric: Model efficiency :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: mef :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) mean_m = np.nanmean(m) mean_o = np.nanmean(o) denom = np.nansum(np.square(o-mean_o)) num = denom-np.nansum(np.square(m-mean_m)) mef = num/denom return mef
# == Circular Variables ======================================================
[docs]def circ_mod_pi(theta): """ Fix angular range in radians to -\\pi <= \\theta <= \\pi :param numpy.array, float theta: vector of angular values in radians :return: theta in range -\\pi to \\pi :rtype: numpy.array, float """ # This function requires theta in radians if theta.size > 1: indx = np.where(theta < -np.pi)[0] theta[indx] = theta[indx]+2.0*np.pi indx = np.where(theta > np.pi)[0] theta[indx] = theta[indx]-2.0*np.pi else: if theta < -np.pi: theta += 2.0*np.pi elif theta > np.pi: theta -= 2.0*np.pi return theta
[docs]def circ_mean(theta, degrees=True): """ Calculate mean of angular data for range (-180 to 180) Parameters ---------- theta : numpy.array, float vector of angular values. degrees : bool, optional angular data units flag. The default is degrees = True. Returns ------- mean_theta : float mean of the angular data in same units as passed in. """ if degrees: thetar = circ_mod_pi(np.radians(np.squeeze(theta))) else: thetar = circ_mod_pi(np.squeeze(theta)) mean_theta = np.nanmean(thetar) if degrees: mean_theta = np.degrees(mean_theta) return mean_theta
[docs]def circ_std(theta, degrees=True): """ Calculate standard deviation of angular data for range (-180 to 180) Parameters ---------- theta : numpy.array, float vector of angular values. degrees : bool, optional angular data units flag. The default is degrees = True. Returns ------- std_theta : float stanadrd deviation of the angular data in same units as passed in. """ if degrees: thetar = circ_mod_pi(np.radians(np.squeeze(theta))) else: thetar = circ_mod_pi(np.squeeze(theta)) std_theta = np.nanstd(thetar) if degrees: std_theta = np.degrees(std_theta) return std_theta
[docs]def circ_corr(theta1, theta2, degrees=True): """ Validation Metric (Circular): Correlation Coefficient :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular correlation coefficient (c_corr) :rtype: float """ if degrees: theta1r = np.radians(np.squeeze(theta1), dtype='double') theta2r = np.radians(np.squeeze(theta2), dtype='double') else: theta1r = np.double(np.squeeze(theta1)) theta2r = np.double(np.squeeze(theta2)) cmplx1 = pol2cmplx(np.ones_like(theta1r, dtype=np.double), theta1r) cmplx2 = pol2cmplx(np.ones_like(theta2r, dtype=np.double), theta2r) c_corr = np.abs(np.cov(cmplx1,cmplx2)[0,1])/(np.std(cmplx1)*np.std(cmplx2)) return c_corr
[docs]def circ_mean_bias(theta1, theta2, degrees=True): """ Validation Metric (Circular): Mean Bias :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular mean bias (mb) :rtype: float """ theta1 = np.double(np.squeeze(theta1)) theta2 = np.double(np.squeeze(theta2)) if degrees: dtheta = circ_mod_pi(np.radians(theta1 - theta2)) else: dtheta = circ_mod_pi(theta1 - theta2) circ_mb = np.nanmean(dtheta) if degrees: circ_mb = np.degrees(circ_mb) return circ_mb
[docs]def circ_norm_mean_bias(theta1, theta2, degrees=True): """ Validation Metric (Circular): Normalised mean Bias :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular normalised mean bias (nmb) as a percentage :rtype: float """ # Returns value as percentage theta1 = np.double(np.squeeze(theta1)) theta2 = np.double(np.squeeze(theta2)) if degrees: circ_nmb = circ_mean_bias(theta1, theta2, degrees)/np.double(180.0) else: circ_nmb = circ_mean_bias(theta1, theta2, degrees)/(np.pi) circ_nmb = circ_nmb * 100.0 return circ_nmb
[docs]def circ_mean_abs_error(theta1, theta2, degrees=True): """ Validation Metric (Circular): Mean absolute error :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular mean absolute error (mae) :rtype: float """ theta1 = np.double(np.squeeze(theta1)) theta2 = np.double(np.squeeze(theta2)) if degrees: dtheta = circ_mod_pi(np.radians(theta1 - theta2)) else: dtheta = circ_mod_pi(theta1 - theta2) circ_mae = np.nanmean(np.abs(dtheta)) if degrees: circ_mae = np.degrees(circ_mae) return circ_mae
[docs]def circ_norm_ma_error(theta1, theta2, degrees=True): """ Validation Metric (Circular): Normalised mean absolute error :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular normalised mean absolute error (mae) as a percentage. :rtype: float """ # Returns value as percentage theta1 = np.double(np.squeeze(theta1)) theta2 = np.double(np.squeeze(theta2)) if degrees: circ_nmae = circ_mean_abs_error(theta1, theta2, degrees)/np.double(180.0) else: circ_nmae = circ_mean_abs_error(theta1, theta2, degrees)/(np.pi) circ_nmae = circ_nmae * 100.0 return circ_nmae
[docs]def circ_rms_error(theta1, theta2, degrees=True): """ Validation Metric (Circular): Root-mean-square error :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular RMS error (circ_rmse) :rtype: float """ theta1 = np.squeeze(theta1) theta2 = np.squeeze(theta2) if degrees: dtheta = circ_mod_pi(np.radians(theta1 - theta2)) else: dtheta = circ_mod_pi(theta1 - theta2) circ_rmse = np.sqrt(np.nanmean(np.square(dtheta))) if degrees: circ_rmse = np.degrees(circ_rmse) return circ_rmse
[docs]def circ_norm_rmse(theta1, theta2, degrees=True): """ Validation Metric (Circular): Normalised root-mean-square error :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular normalised RMS error (circ)nrmse as a percentage :rtype: float """ # Returns value as percentage theta1 = np.squeeze(theta1) theta2 = np.squeeze(theta2) if degrees: dtheta = circ_mod_pi(np.radians(theta1 - theta2)) else: dtheta = circ_mod_pi(theta1 - theta2) num = np.sqrt(np.nanmean(np.square(dtheta))) denom = np.pi circ_nrmse = (num / denom) * 100.0 return circ_nrmse
[docs]def circ_goodness_of_fit(theta1, theta2, degrees=True): """ Validation Metric (Circular): Goodness-of-fit :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular goodness-of-fit R2 :rtype: float """ theta1 = np.squeeze(theta1) theta2 = np.squeeze(theta2) if degrees: m = circ_mod_pi(np.radians(theta1)) o = circ_mod_pi(np.radians(theta2)) else: m = circ_mod_pi(theta1) o = circ_mod_pi(theta2) o_mean = np.nanmean(o) SS_res = np.nansum(np.square(m-o)) SS_tot = np.nansum(np.square(m-o_mean)) circ_R2 = 1.0 - SS_res/SS_tot return circ_R2
[docs]def circ_scatter_index(theta1, theta2, degrees=True): """ Validation Metric (Circular): Scatter index :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular scatter index (circ_si) :rtype: float """ # Returns value as percentage theta1 = np.double(np.squeeze(theta1)) theta2 = np.double(np.squeeze(theta2)) if degrees: theta1 = np.radians(theta1) theta2 = np.radians(theta2) theta1 = circ_mod_pi(theta1) theta2 = circ_mod_pi(theta2) mtheta1 = np.nanmean(theta1) mtheta2 = np.nanmean(theta2) dtheta1 = circ_mod_pi(theta1-mtheta1) dtheta2 = circ_mod_pi(theta2-mtheta2) num = np.nanmean(np.square(circ_mod_pi(dtheta1-dtheta2))) denom = np.square(np.pi) circ_si = np.sqrt(num/denom) * 100.0 return circ_si
[docs]def stats_metrics_circular(model, obs, varName, degrees=True): """ Calculate circlar validation metrics for a pair of modelled and observation time series """ R = circ_corr(model, obs, degrees) R2 = circ_goodness_of_fit(model, obs, degrees) mb = circ_mean_bias(model, obs, degrees) nmb = circ_norm_mean_bias(model, obs, degrees) mae = circ_mean_abs_error(model, obs, degrees) nmae = circ_norm_ma_error(model, obs, degrees) rmse = circ_rms_error(model, obs, degrees) nrmse = circ_norm_rmse(model, obs, degrees) si = circ_scatter_index(model, obs, degrees) metrics = {} metrics['Variable'] = varName metrics['R'] = R metrics['R2'] = R2 metrics['MB'] = mb metrics['NMB'] = nmb metrics['MAE'] = mae metrics['NMAE'] = nmae metrics['RMSE'] = rmse metrics['NRMSE'] = nrmse metrics['SI'] = si metrics['NSMPL'] = np.float64(len(model)) return metrics
[docs]def stats_metrics(model, obs, varName): """ Calculate standard validation metrics for a pair of modelled and observation time series """ R = corr_coef(model, obs) R2 = goodness_of_fit(model, obs) mb = mean_bias(model, obs) nmb = norm_mean_bias(model, obs) mae = mean_absolute_error(model, obs) nmae = norm_ma_error(model, obs) rmse = rms_error(model, obs) nrmse = norm_rmse(model, obs) si = scatter_index(model, obs) metrics = {} metrics['Variable'] = varName metrics['R'] = R metrics['R2'] = R2 metrics['MB'] = mb metrics['NMB'] = nmb metrics['MAE'] = mae metrics['NMAE'] = nmae metrics['RMSE'] = rmse metrics['NRMSE'] = nrmse metrics['SI'] = si metrics['NSMPL'] = np.float64(len(model)) return metrics
[docs]def initialize_results(): """ Initialise a dictionary for collecting validation results """ results = {} results.update({'Variable': []}) results.update({'R': []}) results.update({'R2': []}) results.update({'MB': []}) results.update({'NMB': []}) results.update({'MAE': []}) results.update({'NMAE': []}) results.update({'RMSE': []}) results.update({'NRMSE': []}) results.update({'SI': []}) results.update({'NSMPL': []}) return results
[docs]def store_results(store, results): """ Store validation results record in the results dictionary """ store['Variable'].append(results['Variable']) store['R'].append(results['R']) store['R2'].append(results['R2']) store['MB'].append(results['MB']) store['NMB'].append(results['NMB']) store['MAE'].append(results['MAE']) store['NMAE'].append(results['NMAE']) store['RMSE'].append(results['RMSE']) store['NRMSE'].append(results['NRMSE']) store['SI'].append(results['SI']) store['NSMPL'].append(results['NSMPL']) return
[docs]def save_results(store, prefix, paramName, outpath): """ Save tabulated validation results as an ASCII file """ outtxtname = prefix+'_STATS_'+paramName+'.txt' outtxtfile = os.path.join(outpath,outtxtname) with open(outtxtfile,'w') as ofile: ofile.write('Validation Statistics for '+paramName+'\n') fmtstr = "{:<25}{:^9.3}{:^9.3}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9}" for row in zip(*([key] + (value) for key, value in store.items())): ofile.write(fmtstr.format(*row)+'\n') outmatname = prefix+'_STATS_'+paramName+'.mat' outmatfile = os.path.join(outpath,outmatname) saveFlg = sio.savemat(outmatfile,store) return saveFlg
[docs]def display_metrics(metrics, paramName, testName=None): """ Display validation results as a table on the standard output """ if testName is not None: print('Validation Statistics for', testName, '\n') print('Validation Parameter = '+paramName) fmtstr = "{:<25}{:^9.3}{:^9.3}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9}" for row in zip(*([key] + [value] for key, value in metrics.items())): print(fmtstr.format(*row)) print('') return