Source code for roger.tools.evaluation

import re
import pandas as pd
import numpy as onp
import scipy as sp
import matplotlib.dates as mdates
import matplotlib as mpl
import seaborn as sns

mpl.use("agg")
import matplotlib.pyplot as plt  # noqa: E402

sns.set_style("ticks")


def filter_headers(string, substr):
    return [str for str in string if re.match(r"[^\d]+|^", str).group(0) in substr]


[docs]def join_obs_on_sim(idx, sim_vals, df_obs, rm_na=False):
    """Join observed values on simulated values.

    Args
    ----------
    idx : pd.DatetimeIndex
        time index

    sim_vals : onp.ndarray
        simulated values

    df_obs : pd.DataFrame
        observed values

    rm_na : boolean, optional
        whether NaNs are removed. default is False.

    Returns
    ----------
    df : pd.DataFrame
        DataFrame containing simulated and observed values.
    """
    # dataframe with simulated values
    if sim_vals.ndim <= 1:
        df_sim = pd.DataFrame(data=sim_vals, index=idx, columns=["sim"])
    else:
        sim_header = [f"sim{i}" for i in range(sim_vals.shape[-1])]
        df_sim = pd.DataFrame(data=sim_vals, index=idx, columns=sim_header)

    # dataframe with observed values
    df = pd.DataFrame(index=idx)
    df_obs = df.join(df_obs)
    df_obs.columns = ["obs"]

    df = df_sim.join(df_obs)

    if rm_na:
        df = df.dropna()

    return df


[docs]def plot_sim(df, y_lab="", ls_obs="line", x_lab="Time", ylim=None):
    """Plot simulated values.

    Args
    ----------
    df : pd.DataFrame
        Dataframe with simulated and observed values

    y_lab : str
        label of y-axis

    ls_obs : str, optional
        linestyle of observations

    x_lab : str, optional
        label of x-axis

    ylim : tuple, optional
        y-axis limits

    Returns
    ----------
    fig : Figure
        Plot for observed and simulated values
    """
    # plot observed and simulated values
    fig, axs = plt.subplots(figsize=(6, 2))
    axs.plot(df.index, df.iloc[:, 0], lw=1, ls="-", color="black")
    axs.set_xlim((df.index[0], df.index[-1]))
    if ylim:
        axs.set_ylim(ylim)
    axs.set_ylabel(y_lab)
    axs.set_xlabel(x_lab)
    fig.tight_layout()

    return fig


[docs]def plot_sim_cum(df, y_lab="", ls_obs="line", x_lab="Time", ylim=None):
    """Plot simulated values.

    Args
    ----------
    df : pd.DataFrame
        Dataframe with simulated and observed values

    y_lab : str
        label of y-axis

    ls_obs : str, optional
        linestyle of observations

    x_lab : str, optional
        label of x-axis

    ylim : tuple, optional
        y-axis limits

    Returns
    ----------
    fig : Figure
        Plot for observed and simulated values
    """
    # plot observed and simulated values
    fig, axs = plt.subplots(figsize=(6, 2))
    axs.plot(df.index, df.iloc[:, 0].cumsum(), lw=1, ls="-", color="black")
    axs.set_xlim((df.index[0], df.index[-1]))
    if ylim:
        axs.set_ylim(ylim)
    axs.set_ylabel(y_lab)
    axs.set_xlabel(x_lab)
    fig.tight_layout()

    return fig


[docs]def plot_obs_sim(df, y_lab="", ls_obs="line", x_lab="Time", ylim=None):
    """Plot observed and simulated values.

    Args
    ----------
    df : pd.DataFrame
        Dataframe with simulated and observed values

    y_lab : str
        label of y-axis

    fmt_x : str, optional
        Format of x-axis. Default is numerical ('num'). Alternatively, date
        format can be used ('date').

    ls_obs : str, optional
        linestyle of observations

    x_lab : str, optional
        label of x-axis

    ylim : tuple, optional
        y-axis limits

    Returns
    ----------
    fig : Figure
        Plot for observed and simulated values
    """
    # plot observed and simulated values
    sim_headers = filter_headers(df.columns, ["sim"])
    bench_headers = filter_headers(df.columns, ["bench"])
    fig, axs = plt.subplots(figsize=(6, 2))
    for sim_header in sim_headers:
        axs.plot(df.index, df.loc[:, sim_header], lw=1, ls="-.", color="red")
    if bench_headers:
        for bench_header in bench_headers:
            axs.plot(df.index, df.loc[:, bench_header], lw=1, ls="-.", color="grey")
    if ls_obs == "line":
        axs.plot(df.index, df.loc[:, "obs"], lw=1.2, color="blue", alpha=0.5)
    axs.scatter(df.index, df.loc[:, "obs"], color="blue", s=1, alpha=0.5)
    axs.set_xlim((df.index[0], df.index[-1]))
    if ylim:
        axs.set_ylim(ylim)
    axs.set_ylabel(y_lab)
    axs.set_xlabel(x_lab)
    fig.tight_layout()

    return fig


[docs]def plot_obs_sim_year(df, y_lab, start_month_hyd_year=10, ls_obs="line", x_lab="Time", ylim=None):
    """Plot observed and simulated values.

    Args
    ----------
    df : pd.DataFrame
        Dataframe with simulated and observed values

    y_lab : str
        label of y-axis

    start_month_hyd_year : int, optional
        starting month of hydrologic year

    ls_obs : str, optional
        linestyle of observations

    x_lab : str, optional
        label of x-axis

    ylim : tuple, optional
        y-axis limits

    Returns
    ----------
    figs : list
        list with figures
    """
    figs = []
    sim_headers = filter_headers(df.columns, ["sim"])
    bench_headers = filter_headers(df.columns, ["bench"])
    df = assign_hyd_year(df.copy(), start_month_hyd_year=start_month_hyd_year)
    years = pd.unique(df.hyd_year)
    for year in years:
        df_year = df.loc[df.hyd_year == year].copy()
        df_year.loc[df_year.isnull().any(axis=1)] = 0
        # plot observed and simulated values
        fig, axs = plt.subplots(figsize=(6, 2))
        for sim_header in sim_headers:
            axs.plot(df.index, df.loc[:, sim_header], lw=1, ls="-.", color="red")
        if bench_headers:
            for bench_header in bench_headers:
                axs.plot(df.index, df.loc[:, bench_header], lw=1, ls="-.", color="grey")
        if ls_obs == "line":
            axs.plot(df_year.index, df_year.loc[:, "obs"], lw=1.2, color="blue", alpha=0.5)
        axs.scatter(df_year.index, df_year.iloc[:, "obs"], color="blue", s=1, alpha=0.5)
        axs.set_xlim((df_year.index[0], df_year.index[-1]))
        if ylim:
            axs.set_ylim(ylim)
        axs.set_ylabel(y_lab)
        axs.set_xlabel(str(year))
        if len(df_year.index) > 120:
            axs.xaxis.set_major_formatter(mdates.DateFormatter("%m"))
        elif len(df_year.index) < 120:
            axs.xaxis.set_major_formatter(mdates.DateFormatter("%d-%m"))
        fig.tight_layout()
        figs.append(fig)

    return figs


[docs]def plot_obs_sim_cum(df, y_lab, x_lab="Time"):
    """Plot cumulated observed and simulated values.

    Args
    ----------
    df : pd.DataFrame
        Dataframe with simulated and observed values

    y_lab : str
        label of y-axis

    x_lab : str, optional
        label of x-axis

    Returns
    ----------
    fig : Figure
        Plot for observed and simulated values
    """
    df.loc[df.isna().any(axis=1)] = 0

    # plot observed and simulated values
    sim_headers = filter_headers(df.columns, ["sim"])
    bench_headers = filter_headers(df.columns, ["bench"])
    fig, axs = plt.subplots(figsize=(6, 2))
    for sim_header in sim_headers:
        axs.plot(df.index, df.loc[:, sim_header].cumsum(), lw=1, ls="-.", color="red")
    if bench_headers:
        for bench_header in bench_headers:
            axs.plot(df.index, df.loc[:, bench_header].cumsum(), lw=1, ls="-.", color="grey")
    axs.plot(df.index, df.loc[:, "obs"].cumsum(), lw=1.5, color="blue", alpha=0.5)
    axs.set_xlim((df.index[0], df.index[-1]))
    axs.set_ylabel(y_lab)
    axs.set_xlabel(x_lab)
    fig.tight_layout()

    return fig


[docs]def plot_obs_sim_cum_year(df, y_lab, start_month_hyd_year=10, x_lab="Time"):
    """Plot cumulated observed and simulated values for each hydrologic year.

    Args
    ----------
    df : pd.DataFrame
        Dataframe with simulated and observed values

    y_lab : str
        label of y-axis

    start_month_hyd_year : int, optional
        starting month of hydrologic year

    x_lab : str, optional
        label of x-axis

    Returns
    ----------
    figs : list
        list with figures
    """
    figs = []
    sim_headers = filter_headers(df.columns, ["sim"])
    bench_headers = filter_headers(df.columns, ["bench"])
    df = assign_hyd_year(df.copy(), start_month_hyd_year=start_month_hyd_year)
    years = pd.unique(df.hyd_year)
    for year in years:
        df_year = df.loc[df.hyd_year == year].copy()
        df_year.loc[df_year.isnull().any(axis=1)] = 0
        # plot observed and simulated values
        fig, axs = plt.subplots(figsize=(6, 3))
        for sim_header in sim_headers:
            axs.plot(df_year.index, df_year.loc[:, sim_header].cumsum(), lw=1, ls="-.", color="red")
        if bench_headers:
            for bench_header in bench_headers:
                axs.plot(df.index, df.loc[:, bench_header].cumsum(), lw=1, ls="-.", color="grey")
        axs.plot(df_year.index, df_year.loc[:, "obs"].cumsum(), lw=1, color="blue")
        axs.set_xlim((df_year.index[0], df_year.index[-1]))
        axs.set_ylabel(y_lab)
        axs.set_xlabel(str(year))
        if len(df_year.index) > 120:
            axs.xaxis.set_major_formatter(mdates.DateFormatter("%m"))
        elif len(df_year.index) < 120:
            axs.xaxis.set_major_formatter(mdates.DateFormatter("%d-%m"))
        fig.tight_layout()
        figs.append(fig)

    return figs


[docs]def plot_obs_sim_cum_year_facet(df, y_lab, start_month_hyd_year=10, x_lab="Time"):
    """Plot cumulated observed and simulated values for each hydrologic year.

    Args
    ----------
    df : pd.DataFrame
        Dataframe with simulated and observed values

    y_lab : str
        label of y-axis

    start_month_hyd_year : int, optional
        starting month of hydrologic year

    x_lab : str, optional
        label of x-axis

    Returns
    ----------
    fig : Figure
    """
    headers = df.columns
    sim_headers = filter_headers(df.columns, ["sim"])
    bench_headers = filter_headers(df.columns, ["bench"])
    df_cs = pd.DataFrame(index=df.index, columns=headers)
    df = assign_hyd_year(df.copy(), start_month_hyd_year=start_month_hyd_year)
    years = pd.unique(df.hyd_year)
    for year in years:
        df_cs.loc[(df.hyd_year == year), :] = onp.cumsum(df.loc[(df.hyd_year == year), headers].values, axis=0)
    df_cs.loc[df_cs.isnull().any(axis=1)] = 0

    # DataFrame from wide to long format
    ll_dfs = []
    palette = []
    for i, sim_header in enumerate(sim_headers):
        df_sim = df_cs.loc[:, sim_header].to_frame()
        df_sim.columns = ["sim_obs"]
        df_sim["type"] = f"sim{i}"
        ll_dfs.append(df_sim)
        palette.append("r")
    if bench_headers:
        for i, bench_header in enumerate(bench_headers):
            df_bench = df_cs.loc[:, bench_header].to_frame()
            df_bench.columns = ["sim_obs"]
            df_bench["type"] = f"bench{i}"
            ll_dfs.append(df_bench)
            palette.append("grey")
    df_obs = df_cs.loc[:, "obs"].to_frame()
    df_obs.columns = ["sim_obs"]
    df_obs["type"] = "obs"
    ll_dfs.append(df_obs)
    palette.append("b")
    df_sim_obs = pd.concat(ll_dfs)
    df_sim_obs_long = pd.melt(df_sim_obs, id_vars=["type"], value_vars=["sim_obs"], ignore_index=False)
    df_sim_obs_long = assign_hyd_year(df_sim_obs_long.copy(), start_month_hyd_year=start_month_hyd_year)
    df_sim_obs_long["time"] = df_sim_obs_long.index
    df_sim_obs_long.loc[df_sim_obs_long.isnull().any(axis=1)] = 0
    df_sim_obs_long = df_sim_obs_long.drop(columns=["variable"])
    df_sim_obs_long = df_sim_obs_long.astype(
        dtype={"type": str, "value": onp.float64, "hyd_year": onp.int64, "time": "datetime64[ns]"}
    )
    df_sim_obs_long.index = range(len(df_sim_obs_long.index))
    # Plot the lines on facets
    g = sns.relplot(
        data=df_sim_obs_long,
        x="time",
        y="value",
        hue="type",
        col="hyd_year",
        kind="line",
        palette=palette,
        facet_kws=dict(sharex=False),
        height=1.5,
        aspect=1,
        col_wrap=4,
    )
    g.set_ylabels(y_lab)
    g.set_xlabels(x_lab)
    g.set_xticklabels(rotation=90)
    for axs in g.axes.flatten():
        axs.xaxis.set_major_formatter(mdates.DateFormatter("%d-%m"))
    g.set_titles(template="{col_name}")
    g._legend.remove()
    g.tight_layout()
    fig = g.fig

    return fig


[docs]def plot_sim_cum_year_facet(df, y_lab, start_month_hyd_year=10, x_lab="Time"):
    """Plot cumulated observed and simulated values for each hydrologic year.

    Args
    ----------
    df : pd.DataFrame
        Dataframe with simulated values

    y_lab : str
        label of y-axis

    start_month_hyd_year : int, optional
        starting month of hydrologic year

    x_lab : str, optional
        label of x-axis

    Returns
    ----------
    fig : Figure
    """
    headers = df.columns
    sim_headers = filter_headers(df.columns, ["sim"])
    bench_headers = filter_headers(df.columns, ["bench"])
    df_cs = pd.DataFrame(index=df.index, columns=headers)
    df = assign_hyd_year(df.copy(), start_month_hyd_year=start_month_hyd_year)
    years = pd.unique(df.hyd_year)
    for year in years:
        df_cs.loc[(df.hyd_year == year), :] = onp.cumsum(df.loc[(df.hyd_year == year), headers].values, axis=0)
    df_cs.loc[df_cs.isnull().any(axis=1)] = 0

    # DataFrame from wide to long format
    ll_dfs = []
    palette = []
    for i, sim_header in enumerate(sim_headers):
        df_sim = df_cs.loc[:, sim_header].to_frame()
        df_sim.columns = ["sim_obs"]
        df_sim["type"] = f"sim{i}"
        ll_dfs.append(df_sim)
        palette.append("r")
    if bench_headers:
        for i, bench_header in enumerate(bench_headers):
            df_bench = df_cs.loc[:, bench_header].to_frame()
            df_bench.columns = ["sim_obs"]
            df_bench["type"] = f"bench{i}"
            ll_dfs.append(df_bench)
            palette.append("grey")
    df_sim_obs = pd.concat(ll_dfs)
    df_sim_long = pd.melt(df_sim_obs, id_vars=["type"], value_vars=["sim_obs"], ignore_index=False)
    df_sim_long = assign_hyd_year(df_sim_long.copy(), start_month_hyd_year=start_month_hyd_year)
    df_sim_long["time"] = df_sim_long.index
    df_sim_long.loc[df_sim_long.isnull().any(axis=1)] = 0
    df_sim_long = df_sim_long.drop(columns=["variable"])
    df_sim_long = df_sim_long.astype(
        dtype={"type": str, "value": onp.float64, "hyd_year": onp.int64, "time": "datetime64[ns]"}
    )
    df_sim_long.index = range(len(df_sim_long.index))
    # Plot the lines on facets
    g = sns.relplot(
        data=df_sim_long,
        x="time",
        y="value",
        hue="type",
        col="hyd_year",
        kind="line",
        palette=palette,
        facet_kws=dict(sharex=False),
        height=1.5,
        aspect=1,
        col_wrap=4,
    )
    g.set_ylabels(y_lab)
    g.set_xlabels(x_lab)
    g.set_xticklabels(rotation=90)
    for axs in g.axes.flatten():
        axs.xaxis.set_major_formatter(mdates.DateFormatter("%d-%m"))
    g.set_titles(template="{col_name}")
    g._legend.remove()
    g.tight_layout()
    fig = g.fig

    return fig


[docs]def time_to_num(idx, time="days"):
    """Convert DatetimeIndex to numeric range. Conversion is based either on
    days or hours.

    Args
    ----------
    idx : pd.DatetimeIndex
        variable time index

    time : str, optional
        time unit

    Returns
    ----------
    idx_num : onp.array
        numerical date range
    """
    if time == "days":
        idx_num = idx.to_series().diff().astype("timedelta64[m]").cumsum() / 1440
        idx_num.iloc[0] = 0
        idx_num = idx_num.values

    elif time == "hours":
        idx_num = idx.to_series().diff().astype("timedelta64[m]").cumsum() / 60
        idx_num.iloc[0] = 0
        idx_num = idx_num.values

    return idx_num


[docs]def assign_hyd_year(df, start_month_hyd_year=10):
    r"""
    Assign hydrologic year.

    Parameters
    ----------
    df : DataFrame
        contains hydrologic values

    start_month_hyd_year : int, optional
        starting month of hydrologic year

    Returns
    ----------
    DataFrame
        contains hydrologic values and a column with the assigned hydrologic
        year
    """
    df.loc[:, "hyd_year"] = df.index.year
    df.loc[(df.index.month >= start_month_hyd_year), "hyd_year"] += 1

    return df


[docs]def assign_seasons(df):
    r"""
    Assign seasons.

    Parameters
    ----------
    df : DataFrame
        contains hydrologic values

    Returns
    ----------
    DataFrame
        contains hydrologic values and a column with the assigned seasons
    """
    idx_winter = (df.index.month == 12) | (df.index.month == 1) | (df.index.month == 2)
    idx_spring = (df.index.month == 3) | (df.index.month == 4) | (df.index.month == 5)
    idx_summer = (df.index.month == 6) | (df.index.month == 7) | (df.index.month == 8)
    idx_autumn = (df.index.month == 9) | (df.index.month == 10) | (df.index.month == 11)
    df.loc[idx_winter, "seas"] = "DJF"
    df.loc[idx_spring, "seas"] = "MAM"
    df.loc[idx_summer, "seas"] = "JJA"
    df.loc[idx_autumn, "seas"] = "SON"

    return df


[docs]def calc_api(prec, w, k):
    r"""
    Calculate antecedent precipitation index (API).

    Parameters
    ----------
    prec : (N,)array_like
        precipitation values

    w : int
        window width

    k : float
        decay constant ranges between 0.8 and 0.98

    Returns
    ----------
    api : (N,)array_like
        antecedent precipitation index
    """
    api = onp.empty(prec.shape)
    api.fill(onp.nan)
    weights = k ** onp.arange(1, w + 1)[::-1]
    for i in range(w + 1, api.shape[0]):
        api[i] = onp.sum(prec[(i - w) : i] * weights)
    return api


[docs]def calc_napi(prec, w, k):
    r"""
    Calculate normalized antecedent precipitation index (NAPI).

    Parameters
    ----------
    prec : (N,)array_like
        precipitation values

    w : int
        window width

    k : float
        decay constant ranges between 0.8 and 0.98

    Returns
    ----------
    api : (N,)array_like
        antecedent precipitation index
    """
    napi = onp.empty(prec.shape)
    napi.fill(onp.nan)
    weights = k ** onp.arange(0, w + 1)[::-1]
    weights_sum = onp.sum(k ** onp.arange(1, w + 1)[::-1])
    for i in range(w + 1, napi.shape[0]):
        api = onp.sum(prec[(i - w) : i + 1] * weights)
        api_mean = onp.mean(prec[(i - w) : i]) * weights_sum
        napi[i] = api / api_mean
    return napi


[docs]def calc_rmse(obs, sim):
    """
    Root mean square error (RMSE)

    Parameters
    ----------
    obs : (N,)array_like
        observed time series

    sim : (N,)array_like
        simulated time series

    Returns
    ----------
    eff : float
        Root mean square error (RMSE)
    """
    eff = onp.sqrt(onp.mean((sim - obs) ** 2))

    return eff


[docs]def calc_mae(obs, sim):
    """
    Mean absolute error (MAE)

    Parameters
    ----------
    obs : (N,)array_like
        observed time series

    sim : (N,)array_like
        simulated time series

    Returns
    ----------
    eff : float
        Mean absolute error (MAE)
    """
    abs_err = onp.abs(sim - obs)
    eff = onp.mean(abs_err)

    return eff


[docs]def calc_mre(obs, sim):
    """
    Mean relative error (MRE)

    Parameters
    ----------
    obs : (N,)array_like
        observed time series

    sim : (N,)array_like
        simulated time series

    Returns
    ----------
    eff : float
        Mean relative error (MRE)
    """
    rel_err = (sim - obs) / obs
    eff = onp.mean(rel_err)

    return eff


[docs]def calc_mare(obs, sim):
    """
    Mean absolute relative error (MARE)

    Parameters
    ----------
    obs : (N,)array_like
        observed time series

    sim : (N,)array_like
        simulated time series

    Returns
    ----------
    eff : float
        Mean absolute relative error (MARE)
    """
    abs_err = onp.abs(sim - obs)
    rel_err = abs_err / obs
    eff = onp.mean(rel_err)

    return eff


[docs]def calc_ve(obs, sim):
    """
    Volumetric efficiency (VE)

    Parameters
    ----------
    obs : (N,)array_like
        observed time series

    sim : (N,)array_like
        simulated time series

    Returns
    ----------
    eff : float
        Volumetric efficiency (VE)
    """
    abs_err = onp.abs(sim - obs)
    sum_abs_err = onp.sum(abs_err)
    sum_obs = onp.sum(obs)
    eff = sum_abs_err / sum_obs

    return eff


[docs]def calc_rbs(obs, sim):
    """
    Relative bias of sums (RBS)

    Parameters
    ----------
    obs : (N,)array_like
        observed time series

    sim : (N,)array_like
        simulated time series

    Returns
    ----------
    eff : float
        relative bias of sums (RBS)
    """
    eff = (onp.sum(sim) - onp.sum(obs)) / onp.sum(obs)

    return eff


[docs]def calc_temp_cor(obs, sim, r="pearson"):
    """
    Calculate temporal correlation between observed and simulated
    time series.

    Parameters
    ----------
    obs : (N,)array_like
        Observed time series as 1-D array

    sim : (N,)array_like
        Simulated time series

    r : str, optional
        Either Spearman correlation coefficient ('spearman') or Pearson
        correlation coefficient ('pearson') can be used to describe the
        temporalcorrelation. The default is to calculate the Pearson
        correlation.

    Returns
    ----------
    temp_cor : float
        correlation between observed and simulated time series

    Examples
    --------
    Provide arrays with equal length

    >>> from de import de
    >>> import numpy as np
    >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2])
    >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5])
    >>> de.calc_temp_cor(obs, sim)
    0.8940281850583509
    """
    if len(obs) != len(sim):
        raise AssertionError("Arrays are not of equal length!")

    if r == "spearman":
        r = sp.stats.spearmanr(obs, sim)
        temp_cor = r[0]

        if onp.isnan(temp_cor):
            temp_cor = 0

    elif r == "pearson":
        r = sp.stats.pearsonr(obs, sim)
        temp_cor = r[0]

        if onp.isnan(temp_cor):
            temp_cor = 0

    return temp_cor


[docs]def calc_kge_beta(obs, sim):
    r"""
    Calculate the beta term of Kling-Gupta-Efficiency (KGE).

    Parameters
    ----------
    obs : (N,)array_like
        Observed time series as 1-D array

    sim : (N,)array_like
        Simulated time series as 1-D array

    Returns
    ----------
    kge_beta : float
        alpha value

    Notes
    ----------
    .. math::

        \beta = \frac{\mu_{sim}}{\mu_{obs}}

    Examples
    --------
    Provide arrays with equal length

    >>> from de import de
    >>> import numpy as np
    >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2])
    >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5])
    >>> de.calc_kge_beta(obs, sim)
    1.0980392156862746

    References
    ----------
    Gupta, H. V., Kling, H., Yilmaz, K. K., and Martinez, G. F.: Decomposition
    of the mean squared error and NSE performance criteria: Implications for
    improving hydrological modelling, Journal of Hydrology, 377, 80-91,
    10.1016/j.jhydrol.2009.08.003, 2009.

    Kling, H., Fuchs, M., and Paulin, M.: Runoff conditions in the upper
    Danube basin under an ensemble of climate change scenarios, Journal of
    Hydrology, 424-425, 264-277, 10.1016/j.jhydrol.2012.01.011, 2012.

    Pool, S., Vis, M., and Seibert, J.: Evaluating model performance: towards a
    non-parametric variant of the Kling-Gupta efficiency, Hydrological Sciences
    Journal, 63, 1941-1953, 10.1080/02626667.2018.1552002, 2018.
    """
    if len(obs) != len(sim):
        raise AssertionError("Arrays are not of equal length!")

    # calculate alpha term
    obs_mean = onp.mean(obs)
    sim_mean = onp.mean(sim)
    kge_beta = sim_mean / obs_mean

    return kge_beta


[docs]def calc_kge_alpha(obs, sim):
    r"""
    Calculate the alpha term of the Kling-Gupta-Efficiency (KGE).

    Parameters
    ----------
    obs : (N,)array_like
        Observed time series as 1-D array

    sim : (N,)array_like
        Simulated time series

    Returns
    ----------
    kge_alpha : float
        alpha value

    Notes
    ----------
    .. math::

        \alpha = \frac{\sigma_{sim}}{\sigma_{obs}}

    Examples
    --------
    Provide arrays with equal length

    >>> from de import de
    >>> import numpy as np
    >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2])
    >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5])
    >>> kge.calc_kge_alpha(obs, sim)
    1.2812057455166919

    References
    ----------
    Gupta, H. V., Kling, H., Yilmaz, K. K., and Martinez, G. F.: Decomposition
    of the mean squared error and NSE performance criteria: Implications for
    improving hydrological modelling, Journal of Hydrology, 377, 80-91,
    10.1016/j.jhydrol.2009.08.003, 2009.

    Kling, H., Fuchs, M., and Paulin, M.: Runoff conditions in the upper
    Danube basin under an ensemble of climate change scenarios, Journal of
    Hydrology, 424-425, 264-277, 10.1016/j.jhydrol.2012.01.011, 2012.

    Pool, S., Vis, M., and Seibert, J.: Evaluating model performance: towards a
    non-parametric variant of the Kling-Gupta efficiency, Hydrological Sciences
    Journal, 63, 1941-1953, 10.1080/02626667.2018.1552002, 2018.
    """
    if len(obs) != len(sim):
        raise AssertionError("Arrays are not of equal length!")

    obs_std = onp.std(obs)
    sim_std = onp.std(sim)
    kge_alpha = sim_std / obs_std

    return kge_alpha


[docs]def calc_kge_gamma(obs, sim):
    r"""
    Calculate the gamma term of Kling-Gupta-Efficiency (KGE).

    Parameters
    ----------
    obs : (N,)array_like
        Observed time series as 1-D array

    sim : (N,)array_like
        Simulated time series as 1-D array

    Returns
    ----------
    kge_gamma : float
        gamma value

    Notes
    ----------
    .. math::

        \gamma = \frac{CV_{sim}}{CV_{obs}}

    Examples
    --------
    Provide arrays with equal length

    >>> from de import de
    >>> import numpy as np
    >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2])
    >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5])
    >>> kge.calc_kge_gamma(obs, sim)
    1.166812375381273

    References
    ----------
    Gupta, H. V., Kling, H., Yilmaz, K. K., and Martinez, G. F.: Decomposition
    of the mean squared error and NSE performance criteria: Implications for
    improving hydrological modelling, Journal of Hydrology, 377, 80-91,
    10.1016/j.jhydrol.2009.08.003, 2009.

    Kling, H., Fuchs, M., and Paulin, M.: Runoff conditions in the upper
    Danube basin under an ensemble of climate change scenarios, Journal of
    Hydrology, 424-425, 264-277, 10.1016/j.jhydrol.2012.01.011, 2012.

    Pool, S., Vis, M., and Seibert, J.: Evaluating model performance: towards a
    non-parametric variant of the Kling-Gupta efficiency, Hydrological Sciences
    Journal, 63, 1941-1953, 10.1080/02626667.2018.1552002, 2018.
    """
    if len(obs) != len(sim):
        raise AssertionError("Arrays are not of equal length!")

    obs_mean = onp.mean(obs)
    sim_mean = onp.mean(sim)
    obs_std = onp.std(obs)
    sim_std = onp.std(sim)
    obs_cv = obs_std / obs_mean
    sim_cv = sim_std / sim_mean
    kge_gamma = sim_cv / obs_cv

    return kge_gamma


[docs]def calc_kge(obs, sim, r="pearson", var="std"):
    r"""
    Calculate Kling-Gupta-Efficiency (KGE).

    Parameters
    ----------
    obs : (N,)array_like
        Observed time series as 1-D array

    sim : (N,)array_like
        Simulated time series as 1-D array

    r : str, optional
        Either Spearman correlation coefficient ('spearman'; Pool et al. 2018)
        or Pearson correlation coefficient ('pearson'; Gupta et al. 2009) can
        be used to describe the temporal correlation. The default is to
        calculate the Pearson correlation.

    var : str, optional
        Either coefficient of variation ('cv'; Kling et al. 2012) or standard
        deviation ('std'; Gupta et al. 2009, Pool et al. 2018) to describe the
        gamma term. The default is to calculate the standard deviation.

    Returns
    ----------
    eff : float
        Kling-Gupta-Efficiency

    Examples
    --------
    Provide arrays with equal length

    >>> from de import de
    >>> import numpy as np
    >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2])
    >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5])
    >>> kge.calc_kge(obs, sim)
    0.683901305466148

    Notes
    ----------
    .. math::

        KGE = 1 - \sqrt{(\beta - 1)^2 + (\alpha - 1)^2 + (r - 1)^2}

        KGE = 1 - \sqrt{(\frac{\mu_{sim}}{\mu_{obs}} - 1)^2 + (\frac{\sigma_{sim}}{\sigma_{obs}} - 1)^2 + (r - 1)^2}

        KGE = 1 - \sqrt{(\beta - 1)^2 + (\gamma - 1)^2 + (r - 1)^2}

        KGE = 1 - \sqrt{(\frac{\mu_{sim}}{\mu_{obs}} - 1)^2 + (\frac{CV_{sim}}{CV_{obs}} - 1)^2 + (r - 1)^2}

    References
    ----------
    Gupta, H. V., Kling, H., Yilmaz, K. K., and Martinez, G. F.: Decomposition
    of the mean squared error and NSE performance criteria: Implications for
    improving hydrological modelling, Journal of Hydrology, 377, 80-91,
    10.1016/j.jhydrol.2009.08.003, 2009.

    Kling, H., Fuchs, M., and Paulin, M.: Runoff conditions in the upper
    Danube basin under an ensemble of climate change scenarios, Journal of
    Hydrology, 424-425, 264-277, 10.1016/j.jhydrol.2012.01.011, 2012.

    Pool, S., Vis, M., and Seibert, J.: Evaluating model performance: towards a
    non-parametric variant of the Kling-Gupta efficiency, Hydrological Sciences
    Journal, 63, 1941-1953, 10.1080/02626667.2018.1552002, 2018.
    """
    if len(obs) != len(sim):
        raise AssertionError("Arrays are not of equal length!")
    # calculate alpha term
    obs_mean = onp.mean(obs)
    sim_mean = onp.mean(sim)
    kge_beta = sim_mean / obs_mean

    # calculate KGE with gamma term
    if var == "cv":
        kge_gamma = calc_kge_gamma(obs, sim)
        temp_cor = calc_temp_cor(obs, sim, r=r)

        eff = 1 - onp.sqrt((kge_beta - 1) ** 2 + (kge_gamma - 1) ** 2 + (temp_cor - 1) ** 2)

    # calculate KGE with beta term
    elif var == "std":
        kge_alpha = calc_kge_alpha(obs, sim)
        temp_cor = calc_temp_cor(obs, sim, r=r)

        eff = 1 - onp.sqrt((kge_beta - 1) ** 2 + (kge_alpha - 1) ** 2 + (temp_cor - 1) ** 2)

    return eff


[docs]def calc_nse(obs, sim):
    r"""
    Calculate Nash-Sutcliffe-Efficiency (NSE).

    Parameters
    ----------
    obs : (N,)array_like
        Observed time series as 1-D array

    sim : (N,)array_like
        Simulated time series as 1-D array

    Returns
    ----------
    eff : float
        Nash-Sutcliffe-Efficiency

    Examples
    --------
    Provide arrays with equal length

    >>> from de import de
    >>> import numpy as np
    >>> obs = onp.array([1.5, 1, 0.8, 0.85, 1.5, 2])
    >>> sim = onp.array([1.6, 1.3, 1, 0.8, 1.2, 2.5])
    >>> nse.calc_nse(obs, sim)
    0.5648252536640361

    Notes
    ----------
    .. math::

        NSE = 1 - \frac{\sum_{t=1}^{t=T} (Q_{sim}(t) - Q_{obs}(t))^2}{\sum_{t=1}^{t=T} (Q_{obs}(t) - \overline{Q_{obs}})^2}


    References
    ----------
    Nash, J. E., and Sutcliffe, J. V.: River flow forecasting through
    conceptual models part I - A discussion of principles, Journal of
    Hydrology, 10, 282-290, 10.1016/0022-1694(70)90255-6, 1970.
    """
    if len(obs) != len(sim):
        raise AssertionError("Arrays are not of equal length!")
    sim_obs_diff = onp.sum((sim - obs) ** 2)
    obs_mean = onp.mean(obs)
    obs_diff_mean = onp.sum((obs - obs_mean) ** 2)
    eff = 1 - (sim_obs_diff / obs_diff_mean)

    return eff