import inspect
import warnings
import xarray as xr
from .checks import is_in_list, is_xarray
from .comparisons import __e2c
from .constants import (
CLIMPRED_DIMS,
DETERMINISTIC_HINDCAST_METRICS,
HINDCAST_COMPARISONS,
HINDCAST_METRICS,
METRIC_ALIASES,
PM_COMPARISONS,
PM_METRICS,
)
from .utils import (
assign_attrs,
copy_coords_from_to,
get_comparison_class,
get_metric_class,
intersect,
reduce_time_series,
)
# --------------------------------------------#
# COMPUTE PREDICTABILITY/FORECASTS
# Highest-level features for computing
# predictability.
# --------------------------------------------#
[docs]@is_xarray([0, 1])
def compute_perfect_model(
ds,
control,
metric='pearson_r',
comparison='m2e',
dim=None,
add_attrs=True,
**metric_kwargs,
):
"""
Compute a predictability skill score for a perfect-model framework
simulation dataset.
Args:
ds (xarray object): ensemble with dims ``lead``, ``init``, ``member``.
control (xarray object): control with dimension ``time``.
metric (str): `metric` name, see
:py:func:`climpred.utils.get_metric_class` and (see :ref:`Metrics`).
comparison (str): `comparison` name defines what to take as forecast
and verification (see
:py:func:`climpred.utils.get_comparison_class` and :ref:`Comparisons`).
dim (str or list): dimension to apply metric over. default: ['member', 'init']
add_attrs (bool): write climpred compute args to attrs. default: True
** metric_kwargs (dict): additional keywords to be passed to metric.
(see the arguments required for a given metric in metrics.py)
Returns:
skill (xarray object): skill score with dimensions as input `ds`
without `dim`.
"""
if dim is None:
dim = ['init', 'member']
is_in_list(dim, ['member', 'init', ['init', 'member']], '')
# get metric function name, not the alias
metric = METRIC_ALIASES.get(metric, metric)
# get class metric(Metric)
metric = get_metric_class(metric, PM_METRICS)
# get class comparison(Comparison)
comparison = get_comparison_class(comparison, PM_COMPARISONS)
# if stack_dims, comparisons return forecast with member dim and reference
# without member dim which is needed for probabilistic
# if not stack_dims, comparisons return forecast and reference with member dim
# which is neeeded for deterministic
if metric.probabilistic:
if not comparison.probabilistic:
raise ValueError(
f'Probabilistic metric {metric.name} cannot work with '
f'comparison {comparison.name}.'
)
stack_dims = False
if dim != 'member':
warnings.warn(
f'Probabilistic metric {metric.name} requires to be '
f'computed over dimension `dim="member"`. '
f'Set automatically.'
)
dim = 'member'
else:
# prevent comparison e2c and member in dim
if (comparison.name == 'e2c') and (
set(dim) == set(['init', 'member']) or dim == 'member'
):
warnings.warn(
f'comparison `e2c` does not work on `member` in dims, found '
f'{dim}, automatically changed to dim=`init`.'
)
dim = 'init'
stack_dims = False
dim_to_apply_metric_to = dim
# stack_dims = True when metric probabilistic
forecast, reference = comparison.function(ds, stack_dims=stack_dims)
# in case you want to compute skill over member dim
if (forecast.dims != reference.dims) and (not metric.probabilistic):
# broadcast when deterministic dim=member
forecast, reference = xr.broadcast(forecast, reference)
skill = metric.function(
forecast,
reference,
dim=dim_to_apply_metric_to,
comparison=comparison,
**metric_kwargs,
)
# correction for distance based metrics in m2m comparison
# fix for m2m TODO
if comparison.name == 'm2m':
if 'forecast_member' in skill.dims:
skill = skill.mean('forecast_member')
# m2m stack_dims=False has one identical comparison
skill = skill * (forecast.member.size / (forecast.member.size - 1))
# Attach climpred compute information to skill
if add_attrs:
skill = assign_attrs(
skill,
ds,
function_name=inspect.stack()[0][3],
metric=metric,
comparison=comparison,
metadata_dict=metric_kwargs,
)
return skill
[docs]@is_xarray([0, 1])
def compute_hindcast(
hind,
reference,
metric='pearson_r',
comparison='e2r',
dim='init',
max_dof=False,
add_attrs=True,
**metric_kwargs,
):
"""Compute a predictability skill score against a reference
Args:
hind (xarray object):
Expected to follow package conventions:
* ``init`` : dim of initialization dates
* ``lead`` : dim of lead time from those initializations
Additional dims can be member, lat, lon, depth, ...
reference (xarray object):
reference output/data over same time period.
metric (str):
Metric used in comparing the decadal prediction ensemble with the
reference
(see :py:func:`climpred.utils.get_metric_class` and :ref:`Metrics`).
comparison (str):
How to compare the decadal prediction ensemble to the reference:
* e2r : ensemble mean to reference (Default)
* m2r : each member to the reference
(see :ref:`Comparisons`)
dim (str or list): dimension to apply metric over. default: 'init'
max_dof (bool):
If True, maximize the degrees of freedom by slicing `hind` and `reference`
to a common time frame at each lead.
If False (default), then slice to a common time frame prior to computing
metric. This philosophy follows the thought that each lead should be based
on the same set of initializations.
add_attrs (bool): write climpred compute args to attrs. default: True
** metric_kwargs (dict): additional keywords to be passed to metric
(see the arguments required for a given metric in :ref:`Metrics`).
Returns:
skill (xarray object):
Predictability with main dimension ``lag`` without dimension ``dim``
"""
is_in_list(dim, ['member', 'init'], str)
# get metric function name, not the alias
metric = METRIC_ALIASES.get(metric, metric)
# get class metric(Metric)
metric = get_metric_class(metric, HINDCAST_METRICS)
# get class comparison(Comparison)
comparison = get_comparison_class(comparison, HINDCAST_COMPARISONS)
# if stack_dims, comparisons return forecast with member dim and reference
# without member dim which is needed for probabilistic
# if not stack_dims, comparisons return forecast and reference with member dim
# which is neeeded for deterministic
if metric.probabilistic:
if comparison.name != 'm2r':
raise ValueError(
f'Probabilistic metric `{metric.name}` requires comparison'
f' `m2r`, found {comparison.name}.'
)
stack_dims = False
if dim != 'member':
warnings.warn(
f'Probabilistic metric {metric.name} requires to be '
f'computed over dimension `dim="member"`. '
f'Set automatically.'
)
dim = 'member'
elif dim == 'init':
stack_dims = True
elif dim == 'member':
stack_dims = False
else:
raise ValueError(
f'Please use a probabilistic metric [now {metric.name}] ',
f'and the comparison `m2r` [now {comparison.name}] or ',
f'specify dim from ["init", "member"], now: {dim}.',
)
nlags = max(hind.lead.values)
forecast, reference = comparison.function(hind, reference, stack_dims=stack_dims)
# in case you want to compute skill over member dim
if (
(forecast.dims != reference.dims)
and not stack_dims
and not metric.probabilistic
):
dim_to_apply_metric_to = 'member'
else:
dim_to_apply_metric_to = 'time'
# think in real time dimension: real time = init + lag
forecast = forecast.rename({'init': 'time'})
# take only inits for which we have references at all leahind
if not max_dof:
forecast, reference = reduce_time_series(forecast, reference, nlags)
plag = []
# iterate over all leads (accounts for lead.min() in [0,1])
for i in forecast.lead.values:
if max_dof:
forecast, reference = reduce_time_series(forecast, reference, i)
# take lead year i timeseries and convert to real time
a = forecast.sel(lead=i).drop_vars('lead')
a['time'] = [int(t + i) for t in a.time.values]
# take real time reference of real time forecast years
b = reference.sel(time=a.time.values)
# adapt weights to shorter time
if 'weights' in metric_kwargs:
metric_kwargs.update(
{
'weights': metric_kwargs['weights'].isel(
time=slice(None, a.time.size)
)
}
)
# broadcast dims when apply over member
if (a.dims != b.dims) and dim_to_apply_metric_to == 'member':
a, b = xr.broadcast(a, b)
plag.append(
metric.function(
a, b, dim=dim_to_apply_metric_to, comparison=comparison, **metric_kwargs
)
)
skill = xr.concat(plag, 'lead')
skill['lead'] = forecast.lead.values
# rename back to init
if 'time' in skill.dims: # when dim was 'member'
skill = skill.rename({'time': 'init'})
# keep coords from hind
drop_dims = [d for d in hind.coords if d in CLIMPRED_DIMS]
skill = copy_coords_from_to(hind.drop_vars(drop_dims), skill)
# attach climpred compute information to skill
if add_attrs:
skill = assign_attrs(
skill,
hind,
function_name=inspect.stack()[0][3],
metric=metric,
comparison=comparison,
metadata_dict=metric_kwargs,
)
return skill
[docs]@is_xarray([0, 1])
def compute_persistence(
hind, reference, metric='pearson_r', max_dof=False, **metric_kwargs
):
"""Computes the skill of a persistence forecast from a simulation.
Args:
hind (xarray object): The initialized ensemble.
reference (xarray object): The reference time series.
metric (str): Metric name to apply at each lag for the persistence
computation. Default: 'pearson_r'
max_dof (bool):
If True, maximize the degrees of freedom by slicing `hind` and `reference`
to a common time frame at each lead.
If False (default), then slice to a common time frame prior to computing
metric. This philosophy follows the thought that each lead should be based
on the same set of initializations.
** metric_kwargs (dict): additional keywords to be passed to metric
(see the arguments required for a given metric in :ref:`Metrics`).
Returns:
pers (xarray object): Results of persistence forecast with the input metric
applied.
Reference:
* Chapter 8 (Short-Term Climate Prediction) in Van den Dool, Huug.
Empirical methods in short-term climate prediction.
Oxford University Press, 2007.
"""
# get metric function name, not the alias
metric = METRIC_ALIASES.get(metric, metric)
# get class metric(Metric)
metric = get_metric_class(metric, DETERMINISTIC_HINDCAST_METRICS)
if metric.probabilistic:
raise ValueError(
'probabilistic metric ', metric.name, 'cannot compute persistence forecast.'
)
# If lead 0, need to make modifications to get proper persistence, since persistence
# at lead 0 is == 1.
if [0] in hind.lead.values:
hind = hind.copy()
hind['lead'] += 1
hind['init'] -= 1
nlags = max(hind.lead.values)
# temporarily change `init` to `time` for comparison to reference time.
hind = hind.rename({'init': 'time'})
if not max_dof:
# slices down to inits in common with hindcast, plus gives enough room
# for maximum lead time forecast.
a, _ = reduce_time_series(hind, reference, nlags)
inits = a['time']
plag = []
for lag in hind.lead.values:
if max_dof:
# slices down to inits in common with hindcast, but only gives enough
# room for lead from current forecast
a, _ = reduce_time_series(hind, reference, lag)
inits = a['time']
ref = reference.sel(time=inits + lag)
fct = reference.sel(time=inits)
ref['time'] = fct['time']
plag.append(
metric.function(ref, fct, dim='time', comparison=__e2c, **metric_kwargs)
)
pers = xr.concat(plag, 'lead')
pers['lead'] = hind.lead.values
# keep coords from hind
drop_dims = [d for d in hind.coords if d in CLIMPRED_DIMS]
pers = copy_coords_from_to(hind.drop_vars(drop_dims), pers)
# TODO: add climpred metadata
return pers
[docs]@is_xarray([0, 1])
def compute_uninitialized(
uninit,
reference,
metric='pearson_r',
comparison='e2r',
dim='time',
add_attrs=True,
**metric_kwargs,
):
"""Compute a predictability score between an uninitialized ensemble and a reference.
.. note::
Based on Decadal Prediction protocol, this should only be computed for the
first lag and then projected out to any further lags being analyzed.
Args:
uninit (xarray object):
uninitialized ensemble.
reference (xarray object):
reference output/data over same time period.
metric (str):
Metric used in comparing the uninitialized ensemble with the reference.
comparison (str):
How to compare the uninitialized ensemble to the reference:
* e2r : ensemble mean to reference (Default)
* m2r : each member to the reference
add_attrs (bool): write climpred compute args to attrs. default: True
** metric_kwargs (dict): additional keywords to be passed to metric
Returns:
u (xarray object): Results from comparison at the first lag.
"""
comparison = get_comparison_class(comparison, HINDCAST_COMPARISONS)
metric = get_metric_class(metric, DETERMINISTIC_HINDCAST_METRICS)
forecast, reference = comparison.function(uninit, reference)
# Find common times between two for proper comparison.
common_time = intersect(forecast['time'].values, reference['time'].values)
forecast = forecast.sel(time=common_time)
reference = reference.sel(time=common_time)
uninit_skill = metric.function(
forecast, reference, dim=dim, comparison=comparison, **metric_kwargs
)
# Attach climpred compute information to skill
if add_attrs:
uninit_skill = assign_attrs(
uninit_skill,
uninit,
function_name=inspect.stack()[0][3],
metric=metric,
comparison=comparison,
metadata_dict=metric_kwargs,
)
return uninit_skill