Source code for climpred.classes

import xarray as xr
from IPython.display import display_html
from xarray.core.formatting_html import dataset_repr
from xarray.core.options import OPTIONS as XR_OPTIONS

from .alignment import return_inits_and_verif_dates
from .bootstrap import (
    bootstrap_perfect_model,
    bootstrap_uninit_pm_ensemble_from_control_cftime,
)
from .checks import (
    has_dataset,
    has_dims,
    has_valid_lead_units,
    is_xarray,
    match_initialized_dims,
    match_initialized_vars,
)
from .constants import CONCAT_KWARGS
from .exceptions import DimensionError
from .prediction import (
    _apply_metric_at_given_lead,
    _get_metric_comparison_dim,
    compute_perfect_model,
)
from .reference import compute_persistence
from .smoothing import (
    smooth_goddard_2013,
    spatial_smoothing_xesmf,
    spatial_smoothing_xrcoarsen,
    temporal_smoothing,
)
from .utils import convert_time_index


def _display_metadata(self):
    """
    This is called in the following case:

    ```
    dp = cp.HindcastEnsemble(dple)
    print(dp)
    ```
    """
    SPACE = '    '
    header = f'<climpred.{type(self).__name__}>'
    summary = header + '\nInitialized Ensemble:\n'
    summary += SPACE + str(self._datasets['initialized'].data_vars)[18:].strip() + '\n'
    if isinstance(self, HindcastEnsemble):
        # Prints out verification data names and associated variables if they exist.
        # If not, just write "None".
        if any(self._datasets['observations']):
            for key in self._datasets['observations']:
                summary += f'{key}:\n'
                num_obs = len(self._datasets['observations'][key].data_vars)
                for i in range(1, num_obs + 1):
                    summary += (
                        SPACE
                        + str(self._datasets['observations'][key].data_vars)
                        .split('\n')[i]
                        .strip()
                        + '\n'
                    )
        else:
            summary += 'Verification Data:\n'
            summary += SPACE + 'None\n'
    elif isinstance(self, PerfectModelEnsemble):
        summary += 'Control:\n'
        # Prints out control variables if a control is appended. If not,
        # just write "None".
        if any(self._datasets['control']):
            num_ctrl = len(self._datasets['control'].data_vars)
            for i in range(1, num_ctrl + 1):
                summary += (
                    SPACE
                    + str(self._datasets['control'].data_vars).split('\n')[i].strip()
                    + '\n'
                )
        else:
            summary += SPACE + 'None\n'
    if any(self._datasets['uninitialized']):
        summary += 'Uninitialized:\n'
        summary += SPACE + str(self._datasets['uninitialized'].data_vars)[18:].strip()
    else:
        summary += 'Uninitialized:\n'
        summary += SPACE + 'None'
    return summary


def _display_metadata_html(self):
    header = f'<h4>climpred.{type(self).__name__}</h4>'
    display_html(header, raw=True)
    init_repr_str = dataset_repr(self._datasets['initialized'])
    init_repr_str = init_repr_str.replace('xarray.Dataset', 'Initialized Ensemble')
    display_html(init_repr_str, raw=True)

    if isinstance(self, HindcastEnsemble):
        if any(self._datasets['observations']):
            for key in self._datasets['observations']:
                obs_repr_str = dataset_repr(self._datasets['observations'][key])
                obs_repr_str = obs_repr_str.replace(
                    'xarray.Dataset', f'Verification Data {key}'
                )
                display_html(obs_repr_str, raw=True)
    elif isinstance(self, PerfectModelEnsemble):
        if any(self._datasets['control']):
            control_repr_str = dataset_repr(self._datasets['control'])
            control_repr_str = control_repr_str.replace(
                'xarray.Dataset', 'Control Simulation'
            )
            display_html(control_repr_str, raw=True)

    if any(self._datasets['uninitialized']):
        uninit_repr_str = dataset_repr(self._datasets['uninitialized'])
        uninit_repr_str = uninit_repr_str.replace('xarray.Dataset', 'Uninitialized')
        display_html(uninit_repr_str, raw=True)
    # better would be to aggregate repr_strs and then all return but this fails
    # TypeError: __repr__ returned non-string (type NoneType)
    # workaround return empty string
    return ''


class PredictionEnsemble:
    """
    The main object. This is the super of both `PerfectModelEnsemble` and
    `HindcastEnsemble`. This cannot be called directly by a user, but
    should house functions that both ensemble types can use.
    """

    @is_xarray(1)
    def __init__(self, xobj):
        if isinstance(xobj, xr.DataArray):
            # makes applying prediction functions easier, etc.
            xobj = xobj.to_dataset()
        has_dims(xobj, ['init', 'lead'], 'PredictionEnsemble')
        # Check that init is int, cftime, or datetime; convert ints or cftime to
        # datetime.
        xobj = convert_time_index(xobj, 'init', 'xobj[init]')
        # Put this after `convert_time_index` since it assigns 'years' attribute if the
        # `init` dimension is a `float` or `int`.
        has_valid_lead_units(xobj)
        # Add initialized dictionary and reserve sub-dictionary for an uninitialized
        # run.
        self._datasets = {'initialized': xobj, 'uninitialized': {}}
        self.kind = 'prediction'

    # when you just print it interactively
    # https://stackoverflow.com/questions/1535327/how-to-print-objects-of-class-using-print
    def __repr__(self):
        if XR_OPTIONS['display_style'] == 'html':
            return _display_metadata_html(self)
        else:
            return _display_metadata(self)

    def __getattr__(self, name):
        """Allows for xarray methods to be applied to our prediction objects.

        Args:
            * name: Function, e.g., .isel() or .sum().
        """
        kind = self.kind

        def wrapper(*args, **kwargs):
            """Applies arbitrary function to all datasets in the PredictionEnsemble
            object.

            Got this from: https://stackoverflow.com/questions/41919499/
            how-to-call-undefined-methods-sequentially-in-python-class
            """

            def _apply_func(v, name, *args, **kwargs):
                """Handles exceptions in our dictionary comprehension.

                In other words, this will skip applying the arbitrary function
                to a sub-dataset if a ValueError is thrown. This specifically
                targets cases where certain datasets don't have the given
                dim that's being called. E.g., ``.isel(lead=0)`` should only
                be applied to the initialized dataset.

                Reference:
                  * https://stackoverflow.com/questions/1528237/
                    how-to-handle-exceptions-in-a-list-comprehensions
                """
                try:
                    return getattr(v, name)(*args, **kwargs)
                # ValueError : Cases such as .sum(dim='time'). This doesn't apply
                # it to the given dataset if the dimension doesn't exist.
                #
                # DimensionError: This accounts for our custom error when applying
                # some stats functions.
                except (ValueError, DimensionError):
                    return v

            # Create temporary copy to modify to avoid inplace operation.
            datasets = self._datasets.copy()

            # More explicit than nested dictionary comprehension.
            for outer_k, outer_v in datasets.items():
                # If initialized, control, uninitialized and just a singular
                # dataset, apply the function directly to it.
                if isinstance(outer_v, xr.Dataset):
                    datasets.update(
                        {outer_k: _apply_func(outer_v, name, *args, **kwargs)}
                    )
                else:
                    # If a nested dictionary is encountered (i.e., a set of
                    # observations) apply to each individually.
                    #
                    # Similar to the ``add_observations`` method, this only seems to
                    # avoid inplace operations by copying the nested dictionary
                    # separately and then updating the main dictionary.
                    temporary_dataset = self._datasets[outer_k].copy()
                    for inner_k, inner_v in temporary_dataset.items():
                        temporary_dataset.update(
                            {inner_k: _apply_func(inner_v, name, *args, **kwargs)}
                        )
                    datasets.update({outer_k: temporary_dataset})
            # Instantiates new object with the modified datasets.
            return self._construct_direct(datasets, kind=kind)

        return wrapper

    @classmethod
    def _construct_direct(cls, datasets, kind):
        """Shortcut around __init__ for internal use to avoid inplace
        operations.

        Pulled from xarrray Dataset class.
        https://github.com/pydata/xarray/blob/master/xarray/core/dataset.py
        """
        obj = object.__new__(cls)
        obj._datasets = datasets
        obj.kind = kind
        return obj

    def get_initialized(self):
        """Returns the xarray dataset for the initialized ensemble."""
        return self._datasets['initialized']

    def get_uninitialized(self):
        """Returns the xarray dataset for the uninitialized ensemble."""
        return self._datasets['uninitialized']

    def smooth(self, smooth_kws='goddard2013'):
        """Smooth all entries of PredictionEnsemble in the same manner to be
        able to still calculate prediction skill afterwards.

        Args:
          xobj (xarray object):
            decadal prediction ensemble output.

        Attributes:
            smooth_kws (dict or str): Dictionary to specify the dims to
                smooth compatible with `spatial_smoothing_xesmf`,
                `temporal_smoothing` or `spatial_smoothing_xrcoarsen`.
                Shortcut for Goddard et al. 2013 recommendations:
                'goddard2013'

        Example:
        >>> PredictionEnsemble.smooth(smooth_kws={'time': 2,
            'lat': 5, 'lon': 4'})
        >>> PredictionEnsemble.smooth(smooth_kws='goddard2013')
        """
        # get proper smoothing function based on smooth args
        if isinstance(smooth_kws, str):
            if 'goddard' in smooth_kws:
                smooth_fct = smooth_goddard_2013
                smooth_kws = {'lead': 4}  # default
            else:
                raise ValueError(
                    'Please provide from list of available smoothings: \
                     ["goddard2013"]'
                )
        elif isinstance(smooth_kws, dict):
            non_time_dims = [
                dim for dim in smooth_kws.keys() if dim not in ['time', 'lead']
            ]
            if len(non_time_dims) > 0:
                non_time_dims = non_time_dims[0]
            # goddard when time_dim and lon/lat given
            if ('lon' in smooth_kws or 'lat' in smooth_kws) and (
                'lead' in smooth_kws or 'time' in smooth_kws
            ):
                smooth_fct = smooth_goddard_2013
            # fail goddard and fall back to xrcoarsen when
            # coarsen dim and time_dim provided
            elif (
                (non_time_dims is not [])
                and (non_time_dims in list(self._datasets['initialized'].dims))
                and ('lead' in smooth_kws or 'time' in smooth_kws)
            ):
                smooth_fct = smooth_goddard_2013
            # else only one smoothing operation
            elif 'lon' in smooth_kws or 'lat' in smooth_kws:
                smooth_fct = spatial_smoothing_xesmf
            elif 'lead' in smooth_kws:
                smooth_fct = temporal_smoothing
            elif non_time_dims in list(self._datasets['initialized'].dims):
                smooth_fct = spatial_smoothing_xrcoarsen
            else:
                raise ValueError(
                    'Please provide kwargs to fulfill functions: \
                     ["spatial_smoothing_xesmf", "temporal_smoothing", \
                     "spatial_smoothing_xrcoarsen"].'
                )
        else:
            raise ValueError(
                'Please provide kwargs as str or dict and not', type(smooth_kws)
            )
        # Apply throughout the dataset
        # TODO: Parallelize
        datasets = self._datasets.copy()
        datasets['initialized'] = smooth_fct(self._datasets['initialized'], smooth_kws)
        # Apply if uninitialized, control, observations exist.
        if self._datasets['uninitialized']:
            datasets['uninitialized'] = smooth_fct(
                self._datasets['uninitialized'], smooth_kws
            )
        if isinstance(self, PerfectModelEnsemble):
            if self._datasets['control']:
                datasets['control'] = smooth_fct(self._datasets['control'], smooth_kws)
        # if type(self).__name__ == 'HindcastEnsemble':
        return self._construct_direct(datasets, kind=self.kind)


[docs]class PerfectModelEnsemble(PredictionEnsemble): """An object for "perfect model" climate prediction ensembles. `PerfectModelEnsemble` is a sub-class of `PredictionEnsemble`. It tracks the control run used to initialize the ensemble for easy computations, bootstrapping, etc. This object is built on `xarray` and thus requires the input object to be an `xarray` Dataset or DataArray. """
[docs] def __init__(self, xobj): """Create a `PerfectModelEnsemble` object by inputting output from the control run in `xarray` format. Args: xobj (xarray object): decadal prediction ensemble output. Attributes: control: Dictionary of control run associated with the initialized ensemble. uninitialized: Dictionary of uninitialized run that is bootstrapped from the initialized run. """ super().__init__(xobj) # Reserve sub-dictionary for the control simulation. self._datasets.update({'control': {}}) self.kind = 'perfect'
def _apply_climpred_function(self, func, input_dict=None, **kwargs): """Helper function to loop through verification data and apply an arbitrary climpred function. Args: func (function): climpred function to apply to object. input_dict (dict): dictionary with the following things: * ensemble: initialized or uninitialized ensemble. * control: control dictionary from HindcastEnsemble. * init (bool): True if the initialized ensemble, False if uninitialized. """ ensemble = input_dict['ensemble'] control = input_dict['control'] init = input_dict['init'] init_vars, ctrl_vars = self._vars_to_drop(init=init) ensemble = ensemble.drop_vars(init_vars) control = control.drop_vars(ctrl_vars) return func(ensemble, control, **kwargs) def _vars_to_drop(self, init=True): """Returns list of variables to drop when comparing initialized/uninitialized to a control. This is useful if the two products being compared do not share the same variables. I.e., if the control has ['SST'] and the initialized has ['SST', 'SALT'], this will return a list with ['SALT'] to be dropped from the initialized. Args: init (bool, default True): If `True`, check variables on the initialized. If `False`, check variables on the uninitialized. Returns: Lists of variables to drop from the initialized/uninitialized and control Datasets. """ init_str = 'initialized' if init else 'uninitialized' init_vars = list(self._datasets[init_str]) ctrl_vars = list(self._datasets['control']) # Make lists of variables to drop that aren't in common # with one another. init_vars_to_drop = list(set(init_vars) - set(ctrl_vars)) ctrl_vars_to_drop = list(set(ctrl_vars) - set(init_vars)) return init_vars_to_drop, ctrl_vars_to_drop
[docs] @is_xarray(1) def add_control(self, xobj): """Add the control run that initialized the climate prediction ensemble. Args: xobj (xarray object): Dataset/DataArray of the control run. """ # NOTE: These should all be decorators. if isinstance(xobj, xr.DataArray): xobj = xobj.to_dataset() match_initialized_dims(self._datasets['initialized'], xobj) match_initialized_vars(self._datasets['initialized'], xobj) # Check that init is int, cftime, or datetime; convert ints or cftime to # datetime. xobj = convert_time_index(xobj, 'time', 'xobj[init]') datasets = self._datasets.copy() datasets.update({'control': xobj}) return self._construct_direct(datasets, kind='perfect')
[docs] def generate_uninitialized(self): """Generate an uninitialized ensemble by bootstrapping the initialized prediction ensemble. Returns: Bootstrapped (uninitialized) ensemble as a Dataset. """ has_dataset( self._datasets['control'], 'control', 'generate an uninitialized ensemble.' ) uninit = bootstrap_uninit_pm_ensemble_from_control_cftime( self._datasets['initialized'], self._datasets['control'] ) datasets = self._datasets.copy() datasets.update({'uninitialized': uninit}) return self._construct_direct(datasets, kind='perfect')
[docs] def get_control(self): """Returns the control as an xarray dataset.""" return self._datasets['control']
[docs] def compute_metric(self, metric='pearson_r', comparison='m2m'): """Compares the initialized ensemble to the control run. Args: metric (str, default 'pearson_r'): Metric to apply in the comparison. comparison (str, default 'm2m'): How to compare the climate prediction ensemble to the control. Returns: Result of the comparison as a Dataset. """ has_dataset(self._datasets['control'], 'control', 'compute a metric') input_dict = { 'ensemble': self._datasets['initialized'], 'control': self._datasets['control'], 'init': True, } return self._apply_climpred_function( compute_perfect_model, input_dict=input_dict, metric=metric, comparison=comparison, )
[docs] def compute_uninitialized(self, metric='pearson_r', comparison='m2e'): """Compares the bootstrapped uninitialized run to the control run. Args: metric (str, default 'pearson_r'): Metric to apply in the comparison. comparison (str, default 'm2m'): How to compare to the control run. running (int, default None): Size of the running window for variance smoothing. Returns: Result of the comparison as a Dataset. """ has_dataset( self._datasets['uninitialized'], 'uninitialized', 'compute an uninitialized metric', ) input_dict = { 'ensemble': self._datasets['uninitialized'], 'control': self._datasets['control'], 'init': False, } return self._apply_climpred_function( compute_perfect_model, input_dict=input_dict, metric=metric, comparison=comparison, )
[docs] def compute_persistence(self, metric='pearson_r'): """Compute a simple persistence forecast for the control run. Args: metric (str, default 'pearson_r'): Metric to apply to the persistence forecast. Returns: Dataset of persistence forecast results (if ``refname`` is declared), or dictionary of Datasets with keys corresponding to verification data name. Reference: * Chapter 8 (Short-Term Climate Prediction) in Van den Dool, Huug. Empirical methods in short-term climate prediction. Oxford University Press, 2007. """ has_dataset( self._datasets['control'], 'control', 'compute a persistence forecast' ) input_dict = { 'ensemble': self._datasets['initialized'], 'control': self._datasets['control'], 'init': True, } return self._apply_climpred_function( compute_persistence, input_dict=input_dict, metric=metric, alignment='same_inits', )
[docs] def bootstrap( self, metric='pearson_r', comparison='m2e', sig=95, iterations=500, pers_sig=None, ): """Bootstrap ensemble simulations with replacement. Args: metric (str, default 'pearson_r'): Metric to apply for bootstrapping. comparison (str, default 'm2e'): Comparison style for bootstrapping. sig (int, default 95): Significance level for uninitialized and initialized comparison. iterations (int, default 500): Number of resampling iterations for bootstrapping with replacement. pers_sig (int, default None): If not None, the separate significance level for persistence. Returns: Dictionary of Datasets for each variable applied to with the following variables: * init_ci: confidence levels of init_skill. * uninit_ci: confidence levels of uninit_skill. * pers_ci: confidence levels of pers_skill. * p_uninit_over_init: p value of the hypothesis that the difference of skill between the initialized and uninitialized simulations is smaller or equal to zero based on bootstrapping with replacement. * p_pers_over_init: p value of the hypothesis that the difference of skill between the initialized and persistence simulations is smaller or equal to zero based on bootstrapping with replacement. Reference: * Goddard, L., A. Kumar, A. Solomon, D. Smith, G. Boer, P. Gonzalez, V. Kharin, et al. “A Verification Framework for Interannual-to-Decadal Predictions Experiments.” Climate Dynamics 40, no. 1–2 (January 1, 2013): 245–72. https://doi.org/10/f4jjvf. """ has_dataset(self._datasets['control'], 'control', 'iteration') input_dict = { 'ensemble': self._datasets['initialized'], 'control': self._datasets['control'], 'init': True, } return self._apply_climpred_function( bootstrap_perfect_model, input_dict=input_dict, metric=metric, comparison=comparison, sig=sig, iterations=iterations, pers_sig=pers_sig, )
[docs]class HindcastEnsemble(PredictionEnsemble): """An object for climate prediction ensembles initialized by a data-like product. `HindcastEnsemble` is a sub-class of `PredictionEnsemble`. It tracks all verification data associated with the prediction ensemble for easy computation across multiple variables and products. This object is built on `xarray` and thus requires the input object to be an `xarray` Dataset or DataArray. """
[docs] def __init__(self, xobj): """Create a `HindcastEnsemble` object by inputting output from a prediction ensemble in `xarray` format. Args: xobj (xarray object): decadal prediction ensemble output. Attributes: observations: Dictionary of verification data to associate with the decadal prediction ensemble. uninitialized: Dictionary of companion (or bootstrapped) uninitialized ensemble run. """ super().__init__(xobj) self._datasets.update({'observations': {}}) self.kind = 'hindcast'
def _apply_climpred_function(self, func, input_dict=None, **kwargs): """Helper function to loop through verification data and apply an arbitrary climpred function. Args: func (function): climpred function to apply to object. input_dict (dict): dictionary with the following things: * ensemble: initialized or uninitialized ensemble. * observations: Dictionary of verification data from ``HindcastEnsemble``. * name: name of verification data to target. * init: bool of whether or not it's the initialized ensemble. """ hind = self._datasets['initialized'] verif = self._datasets['observations'] name = input_dict['name'] init = input_dict['init'] # Apply only to specific observations. if name is not None: drop_init, drop_obs = self._vars_to_drop(name, init=init) hind = hind.drop_vars(drop_init) verif = verif[name].drop_vars(drop_obs) return func(hind, verif, **kwargs) else: # If only one observational product, just apply to that one. if len(verif) == 1: name = list(verif.keys())[0] drop_init, drop_obs = self._vars_to_drop(name, init=init) return func(hind, verif[name], **kwargs) # Loop through verif, apply function, and store in dictionary. # TODO: Parallelize this process. else: result = {} for name in verif.keys(): drop_init, drop_obs = self._vars_to_drop(name, init=init) result[name] = func(hind, verif[name], **kwargs) return result def _vars_to_drop(self, name, init=True): """Returns list of variables to drop when comparing initialized/uninitialized to observations. This is useful if the two products being compared do not share the same variables. I.e., if the observations have ['SST'] and the initialized has ['SST', 'SALT'], this will return a list with ['SALT'] to be dropped from the initialized. Args: name (str): Short name of observations being compared to. init (bool, default True): If ``True``, check variables on the initialized. If ``False``, check variables on the uninitialized. Returns: Lists of variables to drop from the initialized/uninitialized and observational Datasets. """ if init: init_vars = [var for var in self._datasets['initialized'].data_vars] else: init_vars = [var for var in self._datasets['uninitialized'].data_vars] obs_vars = [var for var in self._datasets['observations'][name].data_vars] # Make lists of variables to drop that aren't in common # with one another. init_vars_to_drop = list(set(init_vars) - set(obs_vars)) obs_vars_to_drop = list(set(obs_vars) - set(init_vars)) return init_vars_to_drop, obs_vars_to_drop
[docs] @is_xarray(1) def add_observations(self, xobj, name): """Add a verification data with which to verify the initialized ensemble. Args: xobj (xarray object): Dataset/DataArray to append to the ``HindcastEnsemble`` object. name (str): Short name for referencing the verification data. """ if isinstance(xobj, xr.DataArray): xobj = xobj.to_dataset() match_initialized_dims(self._datasets['initialized'], xobj) match_initialized_vars(self._datasets['initialized'], xobj) # Check that time is int, cftime, or datetime; convert ints or cftime to # datetime. xobj = convert_time_index(xobj, 'time', 'xobj[init]') # For some reason, I could only get the non-inplace method to work # by updating the nested dictionaries separately. datasets_obs = self._datasets['observations'].copy() datasets = self._datasets.copy() datasets_obs.update({name: xobj}) datasets.update({'observations': datasets_obs}) return self._construct_direct(datasets, kind='hindcast')
[docs] @is_xarray(1) def add_uninitialized(self, xobj): """Add a companion uninitialized ensemble for comparison to verification data. Args: xobj (xarray object): Dataset/DataArray of the uninitialzed ensemble. """ if isinstance(xobj, xr.DataArray): xobj = xobj.to_dataset() match_initialized_dims(self._datasets['initialized'], xobj, uninitialized=True) match_initialized_vars(self._datasets['initialized'], xobj) # Check that init is int, cftime, or datetime; convert ints or cftime to # datetime. xobj = convert_time_index(xobj, 'time', 'xobj[init]') datasets = self._datasets.copy() datasets.update({'uninitialized': xobj}) return self._construct_direct(datasets, kind='hindcast')
[docs] def get_observations(self, name=None): """Returns xarray Datasets of the observations/verification data. Args: name (str, optional): Name of the observations/verification data to return. If ``None``, return dictionary of all observations/verification data. Returns: Dictionary of ``xarray`` Datasets (if ``name`` is ``None``) or single ``xarray`` Dataset. """ if name is None: if len(self._datasets['observations']) == 1: key = list(self._datasets['observations'].keys())[0] return self._datasets['observations'][key] else: return self._datasets['observations'] else: return self._datasets['observations'][name]
[docs] def verify( self, name=None, reference=None, metric='pearson_r', comparison='e2o', alignment='same_verifs', dim='init', ): """Verifies the initialized ensemble against observations/verification data. This will automatically verify against all shared variables between the initialized ensemble and observations/verification data. Args: name (str): Short name of observations/verification data to compare to. If ``None``, compare to all observations/verification data. metric (str, default 'pearson_r'): Metric to apply for verification. comparison (str, default 'e2o'): How to compare to the observations/verification data. ('e2o' for ensemble mean to observations/verification data. 'm2o' for each individual member to observations/verification data). alignment (str): which inits or verification times should be aligned? - maximize/None: maximize the degrees of freedom by slicing ``hind`` and ``verif`` to a common time frame at each lead. - same_inits: slice to a common init frame prior to computing metric. This philosophy follows the thought that each lead should be based on the same set of initializations. - same_verif: slice to a common/consistent verification time frame prior to computing metric. This philosophy follows the thought that each lead should be based on the same set of verification dates. Returns: Dataset of comparison results (if comparing to one observational product), or dictionary of Datasets with keys corresponding to observations/verification data short name. """ if isinstance(reference, str): reference = [reference] elif reference is None: reference = [] def _verify( hind, verif, hist, reference, metric, comparison, alignment, dim, **metric_kwargs, ): """Interior verify func to be passed to apply func.""" metric, comparison, dim = _get_metric_comparison_dim( metric, comparison, dim, kind=self.kind ) forecast, verif = comparison.function(hind, verif, metric=metric) forecast = forecast.rename({'init': 'time'}) inits, verif_dates = return_inits_and_verif_dates( forecast, verif, alignment, reference=reference, hist=hist, ) metric_over_leads = [ _apply_metric_at_given_lead( verif, verif_dates, lead, hind=forecast, hist=hist, inits=inits, # Ensure apply metric function returns skill and not reference # results. reference=None, metric=metric, comparison=comparison, dim=dim, **metric_kwargs, ) for lead in forecast['lead'].data ] result = xr.concat(metric_over_leads, dim='lead', **CONCAT_KWARGS) result['lead'] = forecast['lead'] if reference is not None: if 'historical' in reference: hist, _ = comparison.function(hist, verif, metric=metric) for r in reference: metric_over_leads = [ _apply_metric_at_given_lead( verif, verif_dates, lead, hind=forecast, hist=hist, inits=inits, reference=r, metric=metric, comparison=comparison, dim=dim, **metric_kwargs, ) for lead in forecast['lead'].data ] ref = xr.concat(metric_over_leads, dim='lead', **CONCAT_KWARGS) ref['lead'] = forecast['lead'] result = xr.concat([result, ref], dim='skill', **CONCAT_KWARGS) # Add dimension/coordinate for different references. result = result.assign_coords(skill=['init'] + reference) return result has_dataset( self._datasets['observations'], 'observational', 'verify a forecast' ) if 'historical' in reference: has_dataset( self._datasets['uninitialized'], 'uninitialized', 'compute an uninitialized reference forecast', ) hist = self._datasets['uninitialized'] else: hist = None # TODO: Get rid of this somehow. Might use attributes. input_dict = { 'name': name, 'init': True, } return self._apply_climpred_function( _verify, input_dict=input_dict, metric=metric, comparison=comparison, alignment=alignment, dim=dim, hist=hist, reference=reference, )