Source code for roger.tools.evaluation

import re
import pandas as pd
import numpy as onp
import scipy as sp
import matplotlib.dates as mdates
import matplotlib as mpl
import seaborn as sns

mpl.use("agg")
import matplotlib.pyplot as plt  # noqa: E402

sns.set_style("ticks")


def filter_headers(string, substr):
    return [str for str in string if re.match(r"[^\d]+|^", str).group(0) in substr]


[docs]def join_obs_on_sim(idx, sim_vals, df_obs, rm_na=False): """Join observed values on simulated values. Args ---------- idx : pd.DatetimeIndex time index sim_vals : onp.ndarray simulated values df_obs : pd.DataFrame observed values rm_na : boolean, optional whether NaNs are removed. default is False. Returns ---------- df : pd.DataFrame DataFrame containing simulated and observed values. """ # dataframe with simulated values if sim_vals.ndim <= 1: df_sim = pd.DataFrame(data=sim_vals, index=idx, columns=["sim"]) else: sim_header = [f"sim{i}" for i in range(sim_vals.shape[-1])] df_sim = pd.DataFrame(data=sim_vals, index=idx, columns=sim_header) # dataframe with observed values df = pd.DataFrame(index=idx) df_obs = df.join(df_obs) df_obs.columns = ["obs"] df = df_sim.join(df_obs) if rm_na: df = df.dropna() return df
[docs]def plot_sim(df, y_lab="", ls_obs="line", x_lab="Time", ylim=None): """Plot simulated values. Args ---------- df : pd.DataFrame Dataframe with simulated and observed values y_lab : str label of y-axis ls_obs : str, optional linestyle of observations x_lab : str, optional label of x-axis ylim : tuple, optional y-axis limits Returns ---------- fig : Figure Plot for observed and simulated values """ # plot observed and simulated values fig, axs = plt.subplots(figsize=(6, 2)) axs.plot(df.index, df.iloc[:, 0], lw=1, ls="-", color="black") axs.set_xlim((df.index[0], df.index[-1])) if ylim: axs.set_ylim(ylim) axs.set_ylabel(y_lab) axs.set_xlabel(x_lab) fig.tight_layout() return fig
[docs]def plot_sim_cum(df, y_lab="", ls_obs="line", x_lab="Time", ylim=None): """Plot simulated values. Args ---------- df : pd.DataFrame Dataframe with simulated and observed values y_lab : str label of y-axis ls_obs : str, optional linestyle of observations x_lab : str, optional label of x-axis ylim : tuple, optional y-axis limits Returns ---------- fig : Figure Plot for observed and simulated values """ # plot observed and simulated values fig, axs = plt.subplots(figsize=(6, 2)) axs.plot(df.index, df.iloc[:, 0].cumsum(), lw=1, ls="-", color="black") axs.set_xlim((df.index[0], df.index[-1])) if ylim: axs.set_ylim(ylim) axs.set_ylabel(y_lab) axs.set_xlabel(x_lab) fig.tight_layout() return fig
[docs]def plot_obs_sim(df, y_lab="", ls_obs="line", x_lab="Time", ylim=None): """Plot observed and simulated values. Args ---------- df : pd.DataFrame Dataframe with simulated and observed values y_lab : str label of y-axis fmt_x : str, optional Format of x-axis. Default is numerical ('num'). Alternatively, date format can be used ('date'). ls_obs : str, optional linestyle of observations x_lab : str, optional label of x-axis ylim : tuple, optional y-axis limits Returns ---------- fig : Figure Plot for observed and simulated values """ # plot observed and simulated values sim_headers = filter_headers(df.columns, ["sim"]) bench_headers = filter_headers(df.columns, ["bench"]) fig, axs = plt.subplots(figsize=(6, 2)) for sim_header in sim_headers: axs.plot(df.index, df.loc[:, sim_header], lw=1, ls="-.", color="red") if bench_headers: for bench_header in bench_headers: axs.plot(df.index, df.loc[:, bench_header], lw=1, ls="-.", color="grey") if ls_obs == "line": axs.plot(df.index, df.loc[:, "obs"], lw=1.2, color="blue", alpha=0.5) axs.scatter(df.index, df.loc[:, "obs"], color="blue", s=1, alpha=0.5) axs.set_xlim((df.index[0], df.index[-1])) if ylim: axs.set_ylim(ylim) axs.set_ylabel(y_lab) axs.set_xlabel(x_lab) fig.tight_layout() return fig
[docs]def plot_obs_sim_year(df, y_lab, start_month_hyd_year=10, ls_obs="line", x_lab="Time", ylim=None): """Plot observed and simulated values. Args ---------- df : pd.DataFrame Dataframe with simulated and observed values y_lab : str label of y-axis start_month_hyd_year : int, optional starting month of hydrologic year ls_obs : str, optional linestyle of observations x_lab : str, optional label of x-axis ylim : tuple, optional y-axis limits Returns ---------- figs : list list with figures """ figs = [] sim_headers = filter_headers(df.columns, ["sim"]) bench_headers = filter_headers(df.columns, ["bench"]) df = assign_hyd_year(df.copy(), start_month_hyd_year=start_month_hyd_year) years = pd.unique(df.hyd_year) for year in years: df_year = df.loc[df.hyd_year == year].copy() df_year.loc[df_year.isnull().any(axis=1)] = 0 # plot observed and simulated values fig, axs = plt.subplots(figsize=(6, 2)) for sim_header in sim_headers: axs.plot(df.index, df.loc[:, sim_header], lw=1, ls="-.", color="red") if bench_headers: for bench_header in bench_headers: axs.plot(df.index, df.loc[:, bench_header], lw=1, ls="-.", color="grey") if ls_obs == "line": axs.plot(df_year.index, df_year.loc[:, "obs"], lw=1.2, color="blue", alpha=0.5) axs.scatter(df_year.index, df_year.iloc[:, "obs"], color="blue", s=1, alpha=0.5) axs.set_xlim((df_year.index[0], df_year.index[-1])) if ylim: axs.set_ylim(ylim) axs.set_ylabel(y_lab) axs.set_xlabel(str(year)) if len(df_year.index) > 120: axs.xaxis.set_major_formatter(mdates.DateFormatter("%m")) elif len(df_year.index) < 120: axs.xaxis.set_major_formatter(mdates.DateFormatter("%d-%m")) fig.tight_layout() figs.append(fig) return figs
[docs]def plot_obs_sim_cum(df, y_lab, x_lab="Time"): """Plot cumulated observed and simulated values. Args ---------- df : pd.DataFrame Dataframe with simulated and observed values y_lab : str label of y-axis x_lab : str, optional label of x-axis Returns ---------- fig : Figure Plot for observed and simulated values """ df.loc[df.isna().any(axis=1)] = 0 # plot observed and simulated values sim_headers = filter_headers(df.columns, ["sim"]) bench_headers = filter_headers(df.columns, ["bench"]) fig, axs = plt.subplots(figsize=(6, 2)) for sim_header in sim_headers: axs.plot(df.index, df.loc[:, sim_header].cumsum(), lw=1, ls="-.", color="red") if bench_headers: for bench_header in bench_headers: axs.plot(df.index, df.loc[:, bench_header].cumsum(), lw=1, ls="-.", color="grey") axs.plot(df.index, df.loc[:, "obs"].cumsum(), lw=1.5, color="blue", alpha=0.5) axs.set_xlim((df.index[0], df.index[-1])) axs.set_ylabel(y_lab) axs.set_xlabel(x_lab) fig.tight_layout() return fig
[docs]def plot_obs_sim_cum_year(df, y_lab, start_month_hyd_year=10, x_lab="Time"): """Plot cumulated observed and simulated values for each hydrologic year. Args ---------- df : pd.DataFrame Dataframe with simulated and observed values y_lab : str label of y-axis start_month_hyd_year : int, optional starting month of hydrologic year x_lab : str, optional label of x-axis Returns ---------- figs : list list with figures """ figs = [] sim_headers = filter_headers(df.columns, ["sim"]) bench_headers = filter_headers(df.columns, ["bench"]) df = assign_hyd_year(df.copy(), start_month_hyd_year=start_month_hyd_year) years = pd.unique(df.hyd_year) for year in years: df_year = df.loc[df.hyd_year == year].copy() df_year.loc[df_year.isnull().any(axis=1)] = 0 # plot observed and simulated values fig, axs = plt.subplots(figsize=(6, 3)) for sim_header in sim_headers: axs.plot(df_year.index, df_year.loc[:, sim_header].cumsum(), lw=1, ls="-.", color="red") if bench_headers: for bench_header in bench_headers: axs.plot(df.index, df.loc[:, bench_header].cumsum(), lw=1, ls="-.", color="grey") axs.plot(df_year.index, df_year.loc[:, "obs"].cumsum(), lw=1, color="blue") axs.set_xlim((df_year.index[0], df_year.index[-1])) axs.set_ylabel(y_lab) axs.set_xlabel(str(year)) if len(df_year.index) > 120: axs.xaxis.set_major_formatter(mdates.DateFormatter("%m")) elif len(df_year.index) < 120: axs.xaxis.set_major_formatter(mdates.DateFormatter("%d-%m")) fig.tight_layout() figs.append(fig) return figs
[docs]def plot_obs_sim_cum_year_facet(df, y_lab, start_month_hyd_year=10, x_lab="Time"): """Plot cumulated observed and simulated values for each hydrologic year. Args ---------- df : pd.DataFrame Dataframe with simulated and observed values y_lab : str label of y-axis start_month_hyd_year : int, optional starting month of hydrologic year x_lab : str, optional label of x-axis Returns ---------- fig : Figure """ headers = df.columns sim_headers = filter_headers(df.columns, ["sim"]) bench_headers = filter_headers(df.columns, ["bench"]) df_cs = pd.DataFrame(index=df.index, columns=headers) df = assign_hyd_year(df.copy(), start_month_hyd_year=start_month_hyd_year) years = pd.unique(df.hyd_year) for year in years: df_cs.loc[(df.hyd_year == year), :] = onp.cumsum(df.loc[(df.hyd_year == year), headers].values, axis=0) df_cs.loc[df_cs.isnull().any(axis=1)] = 0 # DataFrame from wide to long format ll_dfs = [] palette = [] for i, sim_header in enumerate(sim_headers): df_sim = df_cs.loc[:, sim_header].to_frame() df_sim.columns = ["sim_obs"] df_sim["type"] = f"sim{i}" ll_dfs.append(df_sim) palette.append("r") if bench_headers: for i, bench_header in enumerate(bench_headers): df_bench = df_cs.loc[:, bench_header].to_frame() df_bench.columns = ["sim_obs"] df_bench["type"] = f"bench{i}" ll_dfs.append(df_bench) palette.append("grey") df_obs = df_cs.loc[:, "obs"].to_frame() df_obs.columns = ["sim_obs"] df_obs["type"] = "obs" ll_dfs.append(df_obs) palette.append("b") df_sim_obs = pd.concat(ll_dfs) df_sim_obs_long = pd.melt(df_sim_obs, id_vars=["type"], value_vars=["sim_obs"], ignore_index=False) df_sim_obs_long = assign_hyd_year(df_sim_obs_long.copy(), start_month_hyd_year=start_month_hyd_year) df_sim_obs_long["time"] = df_sim_obs_long.index df_sim_obs_long.loc[df_sim_obs_long.isnull().any(axis=1)] = 0 df_sim_obs_long = df_sim_obs_long.drop(columns=["variable"]) df_sim_obs_long = df_sim_obs_long.astype( dtype={"type": str, "value": onp.float64, "hyd_year": onp.int64, "time": "datetime64[ns]"} ) df_sim_obs_long.index = range(len(df_sim_obs_long.index)) # Plot the lines on facets g = sns.relplot( data=df_sim_obs_long, x="time", y="value", hue="type", col="hyd_year", kind="line", palette=palette, facet_kws=dict(sharex=False), height=1.5, aspect=1, col_wrap=4, ) g.set_ylabels(y_lab) g.set_xlabels(x_lab) g.set_xticklabels(rotation=90) for axs in g.axes.flatten(): axs.xaxis.set_major_formatter(mdates.DateFormatter("%d-%m")) g.set_titles(template="{col_name}") g._legend.remove() g.tight_layout() fig = g.fig return fig
[docs]def plot_sim_cum_year_facet(df, y_lab, start_month_hyd_year=10, x_lab="Time"): """Plot cumulated observed and simulated values for each hydrologic year. Args ---------- df : pd.DataFrame Dataframe with simulated values y_lab : str label of y-axis start_month_hyd_year : int, optional starting month of hydrologic year x_lab : str, optional label of x-axis Returns ---------- fig : Figure """ headers = df.columns sim_headers = filter_headers(df.columns, ["sim"]) bench_headers = filter_headers(df.columns, ["bench"]) df_cs = pd.DataFrame(index=df.index, columns=headers) df = assign_hyd_year(df.copy(), start_month_hyd_year=start_month_hyd_year) years = pd.unique(df.hyd_year) for year in years: df_cs.loc[(df.hyd_year == year), :] = onp.cumsum(df.loc[(df.hyd_year == year), headers].values, axis=0) df_cs.loc[df_cs.isnull().any(axis=1)] = 0 # DataFrame from wide to long format ll_dfs = [] palette = [] for i, sim_header in enumerate(sim_headers): df_sim = df_cs.loc[:, sim_header].to_frame() df_sim.columns = ["sim_obs"] df_sim["type"] = f"sim{i}" ll_dfs.append(df_sim) palette.append("r") if bench_headers: for i, bench_header in enumerate(bench_headers): df_bench = df_cs.loc[:, bench_header].to_frame() df_bench.columns = ["sim_obs"] df_bench["type"] = f"bench{i}" ll_dfs.append(df_bench) palette.append("grey") df_sim_obs = pd.concat(ll_dfs) df_sim_long = pd.melt(df_sim_obs, id_vars=["type"], value_vars=["sim_obs"], ignore_index=False) df_sim_long = assign_hyd_year(df_sim_long.copy(), start_month_hyd_year=start_month_hyd_year) df_sim_long["time"] = df_sim_long.index df_sim_long.loc[df_sim_long.isnull().any(axis=1)] = 0 df_sim_long = df_sim_long.drop(columns=["variable"]) df_sim_long = df_sim_long.astype( dtype={"type": str, "value": onp.float64, "hyd_year": onp.int64, "time": "datetime64[ns]"} ) df_sim_long.index = range(len(df_sim_long.index)) # Plot the lines on facets g = sns.relplot( data=df_sim_long, x="time", y="value", hue="type", col="hyd_year", kind="line", palette=palette, facet_kws=dict(sharex=False), height=1.5, aspect=1, col_wrap=4, ) g.set_ylabels(y_lab) g.set_xlabels(x_lab) g.set_xticklabels(rotation=90) for axs in g.axes.flatten(): axs.xaxis.set_major_formatter(mdates.DateFormatter("%d-%m")) g.set_titles(template="{col_name}") g._legend.remove() g.tight_layout() fig = g.fig return fig
[docs]def time_to_num(idx, time="days"): """Convert DatetimeIndex to numeric range. Conversion is based either on days or hours. Args ---------- idx : pd.DatetimeIndex variable time index time : str, optional time unit Returns ---------- idx_num : onp.array numerical date range """ if time == "days": idx_num = idx.to_series().diff().astype("timedelta64[m]").cumsum() / 1440 idx_num.iloc[0] = 0 idx_num = idx_num.values elif time == "hours": idx_num = idx.to_series().diff().astype("timedelta64[m]").cumsum() / 60 idx_num.iloc[0] = 0 idx_num = idx_num.values return idx_num
[docs]def assign_hyd_year(df, start_month_hyd_year=10): r""" Assign hydrologic year. Parameters ---------- df : DataFrame contains hydrologic values start_month_hyd_year : int, optional starting month of hydrologic year Returns ---------- DataFrame contains hydrologic values and a column with the assigned hydrologic year """ df.loc[:, "hyd_year"] = df.index.year df.loc[(df.index.month >= start_month_hyd_year), "hyd_year"] += 1 return df
[docs]def assign_seasons(df): r""" Assign seasons. Parameters ---------- df : DataFrame contains hydrologic values Returns ---------- DataFrame contains hydrologic values and a column with the assigned seasons """ idx_winter = (df.index.month == 12) | (df.index.month == 1) | (df.index.month == 2) idx_spring = (df.index.month == 3) | (df.index.month == 4) | (df.index.month == 5) idx_summer = (df.index.month == 6) | (df.index.month == 7) | (df.index.month == 8) idx_autumn = (df.index.month == 9) | (df.index.month == 10) | (df.index.month == 11) df.loc[idx_winter, "seas"] = "DJF" df.loc[idx_spring, "seas"] = "MAM" df.loc[idx_summer, "seas"] = "JJA" df.loc[idx_autumn, "seas"] = "SON" return df
[docs]def calc_api(prec, w, k): r""" Calculate antecedent precipitation index (API). Parameters ---------- prec : (N,)array_like precipitation values w : int window width k : float decay constant ranges between 0.8 and 0.98 Returns ---------- api : (N,)array_like antecedent precipitation index """ api = onp.empty(prec.shape) api.fill(onp.nan) weights = k ** onp.arange(1, w + 1)[::-1] for i in range(w + 1, api.shape[0]): api[i] = onp.sum(prec[(i - w) : i] * weights) return api
[docs]def calc_napi(prec, w, k): r""" Calculate normalized antecedent precipitation index (NAPI). Parameters ---------- prec : (N,)array_like precipitation values w : int window width k : float decay constant ranges between 0.8 and 0.98 Returns ---------- api : (N,)array_like antecedent precipitation index """ napi = onp.empty(prec.shape) napi.fill(onp.nan) weights = k ** onp.arange(0, w + 1)[::-1] weights_sum = onp.sum(k ** onp.arange(1, w + 1)[::-1]) for i in range(w + 1, napi.shape[0]): api = onp.sum(prec[(i - w) : i + 1] * weights) api_mean = onp.mean(prec[(i - w) : i]) * weights_sum napi[i] = api / api_mean return napi
[docs]def calc_rmse(obs, sim): """ Root mean square error (RMSE) Parameters ---------- obs : (N,)array_like observed time series sim : (N,)array_like simulated time series Returns ---------- eff : float Root mean square error (RMSE) """ eff = onp.sqrt(onp.mean((sim - obs) ** 2)) return eff
[docs]def calc_mae(obs, sim): """ Mean absolute error (MAE) Parameters ---------- obs : (N,)array_like observed time series sim : (N,)array_like simulated time series Returns ---------- eff : float Mean absolute error (MAE) """ abs_err = onp.abs(sim - obs) eff = onp.mean(abs_err) return eff
[docs]def calc_mre(obs, sim): """ Mean relative error (MRE) Parameters ---------- obs : (N,)array_like observed time series sim : (N,)array_like simulated time series Returns ---------- eff : float Mean relative error (MRE) """ rel_err = (sim - obs) / obs eff = onp.mean(rel_err) return eff
[docs]def calc_mare(obs, sim): """ Mean absolute relative error (MARE) Parameters ---------- obs : (N,)array_like observed time series sim : (N,)array_like simulated time series Returns ---------- eff : float Mean absolute relative error (MARE) """ abs_err = onp.abs(sim - obs) rel_err = abs_err / obs eff = onp.mean(rel_err) return eff
[docs]def calc_ve(obs, sim): """ Volumetric efficiency (VE) Parameters ---------- obs : (N,)array_like observed time series sim : (N,)array_like simulated time series Returns ---------- eff : float Volumetric efficiency (VE) """ abs_err = onp.abs(sim - obs) sum_abs_err = onp.sum(abs_err) sum_obs = onp.sum(obs) eff = sum_abs_err / sum_obs return eff
[docs]def calc_rbs(obs, sim): """ Relative bias of sums (RBS) Parameters ---------- obs : (N,)array_like observed time series sim : (N,)array_like simulated time series Returns ---------- eff : float relative bias of sums (RBS) """ eff = (onp.sum(sim) - onp.sum(obs)) / onp.sum(obs) return eff
[docs]def calc_temp_cor(obs, sim, r="pearson"): """ Calculate temporal correlation between observed and simulated time series. Parameters ---------- obs : (N,)array_like Observed time series as 1-D array sim : (N,)array_like Simulated time series r : str, optional Either Spearman correlation coefficient ('spearman') or Pearson correlation coefficient ('pearson') can be used to describe the temporalcorrelation. The default is to calculate the Pearson correlation. Returns ---------- temp_cor : float correlation between observed and simulated time series Examples -------- Provide arrays with equal length >>> from de import de >>> import numpy as np >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2]) >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5]) >>> de.calc_temp_cor(obs, sim) 0.8940281850583509 """ if len(obs) != len(sim): raise AssertionError("Arrays are not of equal length!") if r == "spearman": r = sp.stats.spearmanr(obs, sim) temp_cor = r[0] if onp.isnan(temp_cor): temp_cor = 0 elif r == "pearson": r = sp.stats.pearsonr(obs, sim) temp_cor = r[0] if onp.isnan(temp_cor): temp_cor = 0 return temp_cor
[docs]def calc_kge_beta(obs, sim): r""" Calculate the beta term of Kling-Gupta-Efficiency (KGE). Parameters ---------- obs : (N,)array_like Observed time series as 1-D array sim : (N,)array_like Simulated time series as 1-D array Returns ---------- kge_beta : float alpha value Notes ---------- .. math:: \beta = \frac{\mu_{sim}}{\mu_{obs}} Examples -------- Provide arrays with equal length >>> from de import de >>> import numpy as np >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2]) >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5]) >>> de.calc_kge_beta(obs, sim) 1.0980392156862746 References ---------- Gupta, H. V., Kling, H., Yilmaz, K. K., and Martinez, G. F.: Decomposition of the mean squared error and NSE performance criteria: Implications for improving hydrological modelling, Journal of Hydrology, 377, 80-91, 10.1016/j.jhydrol.2009.08.003, 2009. Kling, H., Fuchs, M., and Paulin, M.: Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios, Journal of Hydrology, 424-425, 264-277, 10.1016/j.jhydrol.2012.01.011, 2012. Pool, S., Vis, M., and Seibert, J.: Evaluating model performance: towards a non-parametric variant of the Kling-Gupta efficiency, Hydrological Sciences Journal, 63, 1941-1953, 10.1080/02626667.2018.1552002, 2018. """ if len(obs) != len(sim): raise AssertionError("Arrays are not of equal length!") # calculate alpha term obs_mean = onp.mean(obs) sim_mean = onp.mean(sim) kge_beta = sim_mean / obs_mean return kge_beta
[docs]def calc_kge_alpha(obs, sim): r""" Calculate the alpha term of the Kling-Gupta-Efficiency (KGE). Parameters ---------- obs : (N,)array_like Observed time series as 1-D array sim : (N,)array_like Simulated time series Returns ---------- kge_alpha : float alpha value Notes ---------- .. math:: \alpha = \frac{\sigma_{sim}}{\sigma_{obs}} Examples -------- Provide arrays with equal length >>> from de import de >>> import numpy as np >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2]) >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5]) >>> kge.calc_kge_alpha(obs, sim) 1.2812057455166919 References ---------- Gupta, H. V., Kling, H., Yilmaz, K. K., and Martinez, G. F.: Decomposition of the mean squared error and NSE performance criteria: Implications for improving hydrological modelling, Journal of Hydrology, 377, 80-91, 10.1016/j.jhydrol.2009.08.003, 2009. Kling, H., Fuchs, M., and Paulin, M.: Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios, Journal of Hydrology, 424-425, 264-277, 10.1016/j.jhydrol.2012.01.011, 2012. Pool, S., Vis, M., and Seibert, J.: Evaluating model performance: towards a non-parametric variant of the Kling-Gupta efficiency, Hydrological Sciences Journal, 63, 1941-1953, 10.1080/02626667.2018.1552002, 2018. """ if len(obs) != len(sim): raise AssertionError("Arrays are not of equal length!") obs_std = onp.std(obs) sim_std = onp.std(sim) kge_alpha = sim_std / obs_std return kge_alpha
[docs]def calc_kge_gamma(obs, sim): r""" Calculate the gamma term of Kling-Gupta-Efficiency (KGE). Parameters ---------- obs : (N,)array_like Observed time series as 1-D array sim : (N,)array_like Simulated time series as 1-D array Returns ---------- kge_gamma : float gamma value Notes ---------- .. math:: \gamma = \frac{CV_{sim}}{CV_{obs}} Examples -------- Provide arrays with equal length >>> from de import de >>> import numpy as np >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2]) >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5]) >>> kge.calc_kge_gamma(obs, sim) 1.166812375381273 References ---------- Gupta, H. V., Kling, H., Yilmaz, K. K., and Martinez, G. F.: Decomposition of the mean squared error and NSE performance criteria: Implications for improving hydrological modelling, Journal of Hydrology, 377, 80-91, 10.1016/j.jhydrol.2009.08.003, 2009. Kling, H., Fuchs, M., and Paulin, M.: Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios, Journal of Hydrology, 424-425, 264-277, 10.1016/j.jhydrol.2012.01.011, 2012. Pool, S., Vis, M., and Seibert, J.: Evaluating model performance: towards a non-parametric variant of the Kling-Gupta efficiency, Hydrological Sciences Journal, 63, 1941-1953, 10.1080/02626667.2018.1552002, 2018. """ if len(obs) != len(sim): raise AssertionError("Arrays are not of equal length!") obs_mean = onp.mean(obs) sim_mean = onp.mean(sim) obs_std = onp.std(obs) sim_std = onp.std(sim) obs_cv = obs_std / obs_mean sim_cv = sim_std / sim_mean kge_gamma = sim_cv / obs_cv return kge_gamma
[docs]def calc_kge(obs, sim, r="pearson", var="std"): r""" Calculate Kling-Gupta-Efficiency (KGE). Parameters ---------- obs : (N,)array_like Observed time series as 1-D array sim : (N,)array_like Simulated time series as 1-D array r : str, optional Either Spearman correlation coefficient ('spearman'; Pool et al. 2018) or Pearson correlation coefficient ('pearson'; Gupta et al. 2009) can be used to describe the temporal correlation. The default is to calculate the Pearson correlation. var : str, optional Either coefficient of variation ('cv'; Kling et al. 2012) or standard deviation ('std'; Gupta et al. 2009, Pool et al. 2018) to describe the gamma term. The default is to calculate the standard deviation. Returns ---------- eff : float Kling-Gupta-Efficiency Examples -------- Provide arrays with equal length >>> from de import de >>> import numpy as np >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2]) >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5]) >>> kge.calc_kge(obs, sim) 0.683901305466148 Notes ---------- .. math:: KGE = 1 - \sqrt{(\beta - 1)^2 + (\alpha - 1)^2 + (r - 1)^2} KGE = 1 - \sqrt{(\frac{\mu_{sim}}{\mu_{obs}} - 1)^2 + (\frac{\sigma_{sim}}{\sigma_{obs}} - 1)^2 + (r - 1)^2} KGE = 1 - \sqrt{(\beta - 1)^2 + (\gamma - 1)^2 + (r - 1)^2} KGE = 1 - \sqrt{(\frac{\mu_{sim}}{\mu_{obs}} - 1)^2 + (\frac{CV_{sim}}{CV_{obs}} - 1)^2 + (r - 1)^2} References ---------- Gupta, H. V., Kling, H., Yilmaz, K. K., and Martinez, G. F.: Decomposition of the mean squared error and NSE performance criteria: Implications for improving hydrological modelling, Journal of Hydrology, 377, 80-91, 10.1016/j.jhydrol.2009.08.003, 2009. Kling, H., Fuchs, M., and Paulin, M.: Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios, Journal of Hydrology, 424-425, 264-277, 10.1016/j.jhydrol.2012.01.011, 2012. Pool, S., Vis, M., and Seibert, J.: Evaluating model performance: towards a non-parametric variant of the Kling-Gupta efficiency, Hydrological Sciences Journal, 63, 1941-1953, 10.1080/02626667.2018.1552002, 2018. """ if len(obs) != len(sim): raise AssertionError("Arrays are not of equal length!") # calculate alpha term obs_mean = onp.mean(obs) sim_mean = onp.mean(sim) kge_beta = sim_mean / obs_mean # calculate KGE with gamma term if var == "cv": kge_gamma = calc_kge_gamma(obs, sim) temp_cor = calc_temp_cor(obs, sim, r=r) eff = 1 - onp.sqrt((kge_beta - 1) ** 2 + (kge_gamma - 1) ** 2 + (temp_cor - 1) ** 2) # calculate KGE with beta term elif var == "std": kge_alpha = calc_kge_alpha(obs, sim) temp_cor = calc_temp_cor(obs, sim, r=r) eff = 1 - onp.sqrt((kge_beta - 1) ** 2 + (kge_alpha - 1) ** 2 + (temp_cor - 1) ** 2) return eff
[docs]def calc_nse(obs, sim): r""" Calculate Nash-Sutcliffe-Efficiency (NSE). Parameters ---------- obs : (N,)array_like Observed time series as 1-D array sim : (N,)array_like Simulated time series as 1-D array Returns ---------- eff : float Nash-Sutcliffe-Efficiency Examples -------- Provide arrays with equal length >>> from de import de >>> import numpy as np >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2]) >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5]) >>> nse.calc_nse(obs, sim) 0.5648252536640361 Notes ---------- .. math:: NSE = 1 - \frac{\sum_{t=1}^{t=T} (Q_{sim}(t) - Q_{obs}(t))^2}{\sum_{t=1}^{t=T} (Q_{obs}(t) - \overline{Q_{obs}})^2} References ---------- Nash, J. E., and Sutcliffe, J. V.: River flow forecasting through conceptual models part I - A discussion of principles, Journal of Hydrology, 10, 282-290, 10.1016/0022-1694(70)90255-6, 1970. """ if len(obs) != len(sim): raise AssertionError("Arrays are not of equal length!") sim_obs_diff = onp.sum((sim - obs) ** 2) obs_mean = onp.mean(obs) obs_diff_mean = onp.sum((obs - obs_mean) ** 2) eff = 1 - (sim_obs_diff / obs_diff_mean) return eff