Source code for climpred.prediction

import inspect
import warnings

import xarray as xr

from .checks import is_xarray
from .comparisons import _e2c
from .constants import (
    DETERMINISTIC_HINDCAST_METRICS,
    HINDCAST_COMPARISONS,
    HINDCAST_METRICS,
    PM_METRICS,
    PM_COMPARISONS,
    PROBABILISTIC_METRICS,
    PROBABILISTIC_PM_COMPARISONS,
    METRIC_ALIASES,
)
from .utils import (
    assign_attrs,
    get_comparison_function,
    get_metric_function,
    intersect,
    reduce_time_series,
)


# --------------------------------------------#
# COMPUTE PREDICTABILITY/FORECASTS
# Highest-level features for computing
# predictability.
# --------------------------------------------#
[docs]@is_xarray([0, 1]) def compute_perfect_model( ds, control, metric='pearson_r', comparison='m2e', dim=None, add_attrs=True, **metric_kwargs, ): """ Compute a predictability skill score for a perfect-model framework simulation dataset. Args: ds (xarray object): ensemble with dims ``lead``, ``init``, ``member``. control (xarray object): control with dimension ``time``. metric (str): `metric` name, see :py:func:`climpred.utils.get_metric_function` and (see :ref:`Metrics`). comparison (str): `comparison` name defines what to take as forecast and verification (see :py:func:`climpred.utils.get_comparison_function` and :ref:`Comparisons`). dim (str or list): dimension to apply metric over. default: ['member', 'init'] add_attrs (bool): write climpred compute args to attrs. default: True ** metric_kwargs (dict): additional keywords to be passed to metric. (see the arguments required for a given metric in metrics.py) Returns: skill (xarray object): skill score with dimensions as input `ds` without `dim`. """ if dim is None: dim = ['init', 'member'] # get metric function name, not the alias metric = METRIC_ALIASES.get(metric, metric) # if stack_dims, comparisons return forecast with member dim and reference # without member dim which is needed for probabilistic # if not stack_dims, comparisons return forecast and reference with member dim # which is neeeded for deterministic if metric in PROBABILISTIC_METRICS: if comparison not in PROBABILISTIC_PM_COMPARISONS: raise ValueError( f'Probabilistic metric {metric} cannot work with ' f'comparison {comparison}.' ) stack_dims = False if dim != 'member': warnings.warn( f'Probabilistic metric {metric} requires to be ' f'computed over dimension `dim="member"`. ' f'Set automatically.' ) dim = 'member' elif set(dim) == set(['init', 'member']): dim_to_apply_metric_to = 'svd' stack_dims = True else: if metric == 'pearson_r': # it doesnt fail though; we could also raise ValueError here warnings.warn( 'ACC doesnt work on dim other than' '["init", "member"] in perfect-model framework.' ) stack_dims = False if not stack_dims: dim_to_apply_metric_to = dim comparison = get_comparison_function(comparison, PM_COMPARISONS) forecast, reference = comparison(ds, dim_to_apply_metric_to, stack_dims=stack_dims) # in case you want to compute skill over member dim if (forecast.dims != reference.dims) and (metric not in PROBABILISTIC_METRICS): # broadcast when deterministic dim=member forecast, reference = xr.broadcast(forecast, reference) # m2m creates additional forecast_member when over dim member if comparison.__name__ == '_m2m': dim_to_apply_metric_to = 'forecast_member' else: dim_to_apply_metric_to = dim metric = get_metric_function(metric, PM_METRICS) skill = metric( forecast, reference, dim=dim_to_apply_metric_to, comparison=comparison, **metric_kwargs, ) # correction for distance based metrics in m2m comparison comparison_name = comparison.__name__[1:] # fix for m2m TODO if comparison_name == 'm2m': if 'forecast_member' in skill.dims: skill = skill.mean('forecast_member') if dim == 'member' and 'member' in skill.dims: skill = skill.mean('member') if dim == 'init' and 'init' in skill.dims: skill = skill.mean('init') # m2m stack_dims=False has one identical comparison skill = skill * (forecast.member.size / (forecast.member.size - 1)) # Attach climpred compute information to skill if add_attrs: skill = assign_attrs( skill, ds, function_name=inspect.stack()[0][3], metric=metric, comparison=comparison, metadata_dict=metric_kwargs, ) return skill
[docs]@is_xarray([0, 1]) def compute_hindcast( hind, reference, metric='pearson_r', comparison='e2r', dim='init', max_dof=False, add_attrs=True, **metric_kwargs, ): """Compute a predictability skill score against a reference Args: hind (xarray object): Expected to follow package conventions: * ``init`` : dim of initialization dates * ``lead`` : dim of lead time from those initializations Additional dims can be member, lat, lon, depth, ... reference (xarray object): reference output/data over same time period. metric (str): Metric used in comparing the decadal prediction ensemble with the reference (see :py:func:`climpred.utils.get_metric_function` and :ref:`Metrics`). comparison (str): How to compare the decadal prediction ensemble to the reference: * e2r : ensemble mean to reference (Default) * m2r : each member to the reference (see :ref:`Comparisons`) dim (str or list): dimension to apply metric over. default: 'init' max_dof (bool): If True, maximize the degrees of freedom by slicing `hind` and `reference` to a common time frame at each lead. If False (default), then slice to a common time frame prior to computing metric. This philosophy follows the thought that each lead should be based on the same set of initializations. add_attrs (bool): write climpred compute args to attrs. default: True ** metric_kwargs (dict): additional keywords to be passed to metric (see the arguments required for a given metric in :ref:`Metrics`). Returns: skill (xarray object): Predictability with main dimension ``lag`` without dimension ``dim`` """ # get metric function name, not the alias metric = METRIC_ALIASES.get(metric, metric) # if stack_dims, comparisons return forecast with member dim and reference # without member dim which is needed for probabilistic # if not stack_dims, comparisons return forecast and reference with member dim # which is neeeded for deterministic if metric in PROBABILISTIC_METRICS: if comparison != 'm2r': raise ValueError( f'Probabilistic metric `{metric}` requires comparison `m2r`.' ) stack_dims = False if dim != 'member': warnings.warn( f'Probabilistic metric {metric} requires to be ' f'computed over dimension `dim="member"`. ' f'Set automatically.' ) dim = 'member' elif dim == 'init': stack_dims = True elif dim == 'member': stack_dims = False else: raise ValueError( f'Please use a probabilistic metric [now {metric}] ', f'and the comparison `m2r` [now {comparison}] or ', f'specify dim from ["init", "member"], now: {dim}.', ) nlags = max(hind.lead.values) comparison = get_comparison_function(comparison, HINDCAST_COMPARISONS) forecast, reference = comparison(hind, reference, stack_dims=stack_dims) # in case you want to compute skill over member dim if ( (forecast.dims != reference.dims) and not stack_dims and metric in DETERMINISTIC_HINDCAST_METRICS ): dim_to_apply_metric_to = 'member' else: dim_to_apply_metric_to = 'time' metric = get_metric_function(metric, HINDCAST_METRICS) # think in real time dimension: real time = init + lag forecast = forecast.rename({'init': 'time'}) # take only inits for which we have references at all leahind if not max_dof: forecast, reference = reduce_time_series(forecast, reference, nlags) plag = [] # iterate over all leads (accounts for lead.min() in [0,1]) for i in forecast.lead.values: if max_dof: forecast, reference = reduce_time_series(forecast, reference, i) # take lead year i timeseries and convert to real time a = forecast.sel(lead=i).drop('lead') a['time'] = [int(t + i) for t in a.time.values] # take real time reference of real time forecast years b = reference.sel(time=a.time.values) # broadcast dims when apply over member if (a.dims != b.dims) and dim_to_apply_metric_to == 'member': a, b = xr.broadcast(a, b) plag.append( metric( a, b, dim=dim_to_apply_metric_to, comparison=comparison, **metric_kwargs ) ) skill = xr.concat(plag, 'lead') skill['lead'] = forecast.lead.values # rename back to init if 'time' in skill.dims: # when dim was 'member' skill = skill.rename({'time': 'init'}) # attach climpred compute information to skill if add_attrs: skill = assign_attrs( skill, hind, function_name=inspect.stack()[0][3], metric=metric, comparison=comparison, metadata_dict=metric_kwargs, ) return skill
[docs]@is_xarray([0, 1]) def compute_persistence( hind, reference, metric='pearson_r', max_dof=False, **metric_kwargs ): """Computes the skill of a persistence forecast from a simulation. Args: hind (xarray object): The initialized ensemble. reference (xarray object): The reference time series. metric (str): Metric name to apply at each lag for the persistence computation. Default: 'pearson_r' max_dof (bool): If True, maximize the degrees of freedom by slicing `hind` and `reference` to a common time frame at each lead. If False (default), then slice to a common time frame prior to computing metric. This philosophy follows the thought that each lead should be based on the same set of initializations. ** metric_kwargs (dict): additional keywords to be passed to metric (see the arguments required for a given metric in :ref:`Metrics`). Returns: pers (xarray object): Results of persistence forecast with the input metric applied. Reference: * Chapter 8 (Short-Term Climate Prediction) in Van den Dool, Huug. Empirical methods in short-term climate prediction. Oxford University Press, 2007. """ if metric in PROBABILISTIC_METRICS: raise ValueError( 'probabilistic metric ', metric, 'cannot compute persistence forecast.' ) metric = get_metric_function(metric, DETERMINISTIC_HINDCAST_METRICS) # If lead 0, need to make modifications to get proper persistence, since persistence # at lead 0 is == 1. if [0] in hind.lead.values: hind = hind.copy() hind['lead'] += 1 hind['init'] -= 1 nlags = max(hind.lead.values) # temporarily change `init` to `time` for comparison to reference time. hind = hind.rename({'init': 'time'}) if not max_dof: # slices down to inits in common with hindcast, plus gives enough room # for maximum lead time forecast. a, _ = reduce_time_series(hind, reference, nlags) inits = a['time'] plag = [] for lag in hind.lead.values: if max_dof: # slices down to inits in common with hindcast, but only gives enough # room for lead from current forecast a, _ = reduce_time_series(hind, reference, lag) inits = a['time'] ref = reference.sel(time=inits + lag) fct = reference.sel(time=inits) ref['time'] = fct['time'] plag.append(metric(ref, fct, dim='time', comparison=_e2c, **metric_kwargs)) pers = xr.concat(plag, 'lead') pers['lead'] = hind.lead.values return pers
[docs]@is_xarray([0, 1]) def compute_uninitialized( uninit, reference, metric='pearson_r', comparison='e2r', dim='time', add_attrs=True, **metric_kwargs, ): """Compute a predictability score between an uninitialized ensemble and a reference. .. note:: Based on Decadal Prediction protocol, this should only be computed for the first lag and then projected out to any further lags being analyzed. Args: uninit (xarray object): uninitialized ensemble. reference (xarray object): reference output/data over same time period. metric (str): Metric used in comparing the uninitialized ensemble with the reference. comparison (str): How to compare the uninitialized ensemble to the reference: * e2r : ensemble mean to reference (Default) * m2r : each member to the reference add_attrs (bool): write climpred compute args to attrs. default: True ** metric_kwargs (dict): additional keywords to be passed to metric Returns: u (xarray object): Results from comparison at the first lag. """ comparison = get_comparison_function(comparison, HINDCAST_COMPARISONS) metric = get_metric_function(metric, DETERMINISTIC_HINDCAST_METRICS) forecast, reference = comparison(uninit, reference) # Find common times between two for proper comparison. common_time = intersect(forecast['time'].values, reference['time'].values) forecast = forecast.sel(time=common_time) reference = reference.sel(time=common_time) uninit_skill = metric( forecast, reference, dim=dim, comparison=comparison, **metric_kwargs ) # Attach climpred compute information to skill if add_attrs: uninit_skill = assign_attrs( uninit_skill, uninit, function_name=inspect.stack()[0][3], metric=metric, comparison=comparison, metadata_dict=metric_kwargs, ) return uninit_skill