Source code for climpred.classes

"""Main module instantiating ``PerfectModelEnsemble`` and ``HindcastEnsemble."""

import importlib.util as _util
import logging
import warnings
from copy import deepcopy
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Dict,
    Hashable,
    Iterator,
    List,
    Mapping,
    Optional,
    Tuple,
    Union,
)
from xml.etree import ElementTree

import cf_xarray  # noqa
import numpy as np
import xarray as xr
from packaging.version import Version
from xarray.core.coordinates import DatasetCoordinates
from xarray.core.dataset import DataVariables
from xarray.core.formatting_html import dataset_repr
from xarray.core.options import OPTIONS as XR_OPTIONS
from xarray.core.utils import Frozen

from .alignment import return_inits_and_verif_dates
from .bias_removal import bias_correction, gaussian_bias_removal, xclim_sdba
from .bootstrap import (
    _distribution_to_ci,
    _p_ci_from_sig,
    _pvalue_from_distributions,
    bootstrap_uninit_pm_ensemble_from_control_cftime,
    resample_skill_exclude_resample_dim_from_dim,
    resample_skill_loop,
    resample_skill_resample_before,
    resample_uninitialized_from_initialized,
    warn_if_chunking_would_increase_performance,
)
from .checks import (
    _check_valid_alignment,
    _check_valid_reference,
    attach_long_names,
    attach_standard_names,
    has_dataset,
    has_dims,
    has_valid_lead_units,
    match_calendars,
    match_initialized_dims,
    match_initialized_vars,
    rename_to_climpred_dims,
)
from .comparisons import Comparison
from .constants import (
    BIAS_CORRECTION_BIAS_CORRECTION_METHODS,
    BIAS_CORRECTION_TRAIN_TEST_SPLIT_METHODS,
    CLIMPRED_DIMS,
    CONCAT_KWARGS,
    CROSS_VALIDATE_METHODS,
    INTERNAL_BIAS_CORRECTION_METHODS,
    XCLIM_BIAS_CORRECTION_METHODS,
)
from .exceptions import CoordinateError, DimensionError, KeywordError, VariableError
from .metrics import PEARSON_R_CONTAINING_METRICS, Metric
from .options import OPTIONS, set_options
from .prediction import (
    _apply_metric_at_given_lead,
    _get_metric_comparison_dim,
    _sanitize_to_list,
    compute_perfect_model,
)
from .reference import (
    compute_climatology,
    compute_persistence,
    compute_persistence_from_first_lead,
)
from .smoothing import (
    _reset_temporal_axis,
    smooth_goddard_2013,
    spatial_smoothing_xesmf,
    temporal_smoothing,
)
from .utils import (
    add_time_from_init_lead,
    assign_attrs,
    broadcast_metric_kwargs_for_rps,
    convert_time_index,
    convert_Timedelta_to_lead_units,
)

metricType = Union[str, Metric]
comparisonType = Union[str, Comparison]
dimType = Optional[Union[str, List[str]]]
alignmentType = str
referenceType = Union[List[str], str]
groupbyType = Optional[Union[str, xr.DataArray]]
metric_kwargsType = Optional[Any]

if TYPE_CHECKING:
    import matplotlib.pyplot as plt

    optionalaxisType = Optional[plt.Axes]
else:
    optionalaxisType = Optional[Any]


__all__ = ["PredictionEnsemble", "PerfectModelEnsemble", "HindcastEnsemble"]


def _display_metadata(self) -> str:
    """
    Print the contents of the :py:class:`.PredictionEnsemble` as text.

    Example:
        >>> init = climpred.tutorial.load_dataset("CESM-DP-SST")
        >>> hindcast = climpred.HindcastEnsemble(init)
        >>> print(hindcast)
        <climpred.HindcastEnsemble>
        Initialized:
            SST      (init, lead, member) float64 ...
        Uninitialized:
            None
        Observations:
            None

    """
    SPACE = "    "
    summary = f"<climpred.{type(self).__name__}>\n"

    for k in self._datasets.keys():
        if self._datasets[k]:
            summary += (
                str(self._datasets[k].data_vars).replace(
                    "Data variables", k.capitalize()
                )
                + "\n"
            )
        else:
            summary += f"{k.capitalize()}:\n{SPACE}None\n"
    return summary.strip("\n")


def _display_metadata_html(self):
    """Show contents of :py:class:`.PredictionEnsemble` as html."""
    html_str = f"<h4>climpred.{type(self).__name__}</h4>"
    for k in self._datasets.keys():
        if self._datasets[k]:
            html_str += dataset_repr(self._datasets[k]).replace(
                "xarray.Dataset", k.capitalize()
            )
    return html_str



[docs]
class PredictionEnsemble:
    """
    The main object :py:class:`.PredictionEnsemble`.

    This is the super of both :py:class:`.PerfectModelEnsemble` and
    :py:class:`.HindcastEnsemble`. This cannot be called directly by
    a user, but should house functions that both ensemble types can use.

    Associated :py:class:`xarray.Dataset` are stored in:

        * ``PredictionEnsemble._datasets["initialized"]``
        * ``PredictionEnsemble._datasets["uninitialized"]``
        * ``PredictionEnsemble._datasets["control"]`` in
          :py:class:`.PerfectModelEnsemble`
        * ``PredictionEnsemble._datasets[observations"]`` in
          :py:class:`.HindcastEnsemble`

    """


[docs]
    def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]):
        """Create a :py:class:`.PredictionEnsemble` object."""
        if not isinstance(initialized, (xr.DataArray, xr.Dataset)):
            raise ValueError(
                "PredictionEnsemble.__init__ requires xr.DataArray or xr.Dataset"
            )
        if isinstance(initialized, xr.DataArray):
            # makes applying prediction functions easier, etc.
            initialized = initialized.to_dataset()
        initialized = rename_to_climpred_dims(initialized)
        has_dims(initialized, ["init", "lead"], "PredictionEnsemble")
        # Check that init is int, cftime, or datetime; convert ints or cftime to
        # datetime.
        initialized = convert_time_index(initialized, "init", "initialized[init]")
        # Put this after `convert_time_index` since it assigns 'years' attribute if the
        # `init` dimension is a `float` or `int`.
        initialized = convert_Timedelta_to_lead_units(initialized)
        has_valid_lead_units(initialized)
        initialized = add_time_from_init_lead(initialized)
        # add metadata
        initialized = attach_standard_names(initialized)
        initialized = attach_long_names(initialized)
        try:
            initialized = initialized.cf.add_canonical_attributes(
                verbose=False, override=True, skip="units"
            )
        except ElementTree.ParseError:
            pass
        else:
            initialized.attrs.pop("history", None)
        # Add initialized dictionary and reserve sub-dictionary for an uninitialized
        # run.
        self._datasets: Dict[str, Any] = {
            "initialized": initialized,
            "uninitialized": {},
        }
        self.kind = "prediction"
        self._temporally_smoothed: Optional[Dict[str, int]] = None
        self._is_annual_lead = None
        self._warn_if_chunked_along_init_member_time()


    def _groupby(self, call: str, groupby: Union[str, xr.DataArray], **kwargs: Any):
        """Help for verify/bootstrap(groupby="month")."""
        skill_group_list: List[Any] = []
        group_label: List[Any] = []
        groupby_str = f"init.{groupby}" if isinstance(groupby, str) else groupby
        with set_options(warn_for_failed_PredictionEnsemble_xr_call=False):
            for group, hind_group in self.get_initialized().init.groupby(groupby_str):
                skill_group_list.append(
                    getattr(self.sel(init=hind_group), call)(
                        **kwargs,
                    )
                )
                group_label.append(group)
        new_dim_name = groupby if isinstance(groupby, str) else groupby.name
        skill_group = xr.concat(
            skill_group_list, dim=new_dim_name, **CONCAT_KWARGS
        ).assign_coords({new_dim_name: group_label})
        skill_group[new_dim_name] = skill_group[
            new_dim_name
        ].assign_attrs(  # noqa: E501
            {
                "description": "new dimension showing skill grouped by init.{groupby}"
                " created by .verify(groupby) or .bootstrap(groupby)"
            }
        )
        return skill_group

    @property
    def coords(self) -> DatasetCoordinates:
        """Return coordinates of :py:class:`.PredictionEnsemble`.

        Dictionary of :py:class:`xarray.DataArray` objects corresponding to coordinate
        variables available in all PredictionEnsemble._datasets.

        See also:
            :py:meth:`~xarray.Dataset.coords`
        """
        pe_coords = self.get_initialized().coords.to_dataset()
        for ds in self._datasets.values():
            if isinstance(ds, xr.Dataset):
                pe_coords.update(ds.coords.to_dataset())
        return pe_coords.coords

    @property
    def nbytes(self) -> int:
        """Bytes sizes of all PredictionEnsemble._datasets.

        See also:
            :py:meth:`~xarray.Dataset.nbytes`
        """
        return sum(
            [
                sum(v.nbytes for v in ds.variables.values())
                for ds in self._datasets.values()
                if isinstance(ds, xr.Dataset)
            ]
        )

    @property
    def sizes(self) -> Mapping[Hashable, int]:
        """
        Return sizes of :py:class:`.PredictionEnsemble`.

        Mapping from dimension names to lengths for all PredictionEnsemble._datasets.

        See also:
            :py:meth:`~xarray.Dataset.equals`
        """
        pe_dims = dict(self.get_initialized().sizes)
        for ds in self._datasets.values():
            if isinstance(ds, xr.Dataset):
                pe_dims.update(ds.sizes)
        return pe_dims

    @property
    def dims(self) -> Mapping[Hashable, int]:
        """
        Return dimension of :py:class:`.PredictionEnsemble`.

        Mapping from dimension names to lengths all PredictionEnsemble._datasets.

        See also:
            :py:meth:`~xarray.Dataset.dims`
        """
        return Frozen(self.sizes)

    @property
    def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
        """
        Return chunks of :py:class:`.PredictionEnsemble`.

        Mapping from chunks all PredictionEnsemble._datasets.

        See also:
            :py:meth:`~xarray.Dataset.chunks`
        """
        pe_chunks = dict(self.get_initialized().chunks)
        for ds in self._datasets.values():
            if isinstance(ds, xr.Dataset):
                for d in ds.chunks:
                    if d not in pe_chunks:
                        pe_chunks.update({d: ds.chunks[d]})
        return Frozen(pe_chunks)

    @property
    def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
        """Return chunksizes of :py:class:`.PredictionEnsemble`.

        Mapping from dimension names to block lengths for this dataset's data, or
        None if the underlying data is not a dask array.
        Cannot be modified directly, but can be modified by calling ``.chunk()``.
        Same as :py:meth:`~xarray.Dataset.chunks`.

        See also:
            :py:meth:`~xarray.Dataset.chunksizes`
        """
        return self.chunks

    @property
    def data_vars(self) -> DataVariables:
        """
        Return data variables of :py:class:`.PredictionEnsemble`.

        Dictionary of DataArray objects corresponding to data variables available in
        all PredictionEnsemble._datasets.

        See also:
            :py:meth:`~xarray.Dataset.data_vars`
        """
        varset = set(self.get_initialized().data_vars)
        for ds in self._datasets.values():
            if isinstance(ds, xr.Dataset):
                # take union
                varset = varset & set(ds.data_vars)
        varlist = list(varset)
        return self.get_initialized()[varlist].data_vars

    def _repr_html_(self):
        """Return for :py:class:`.PredictionEnsemble` in html."""
        from html import escape

        if XR_OPTIONS["display_style"] == "text":
            return f"<pre>{escape(repr(self))}</pre>"
        return _display_metadata_html(self)

    def __repr__(self) -> str:
        """Return for print(:py:class:`.PredictionEnsemble`)."""
        return _display_metadata(self)


[docs]
    def __len__(self) -> int:
        """Return number of all variables :py:class:`.PredictionEnsemble`."""
        return len(self.data_vars)



[docs]
    def __iter__(self) -> Iterator[Hashable]:
        """Iterate over underlying :py:class:`xarray.Dataset`."""
        return iter(self._datasets.values())



[docs]
    def __delitem__(self, key: Hashable) -> None:
        """Remove a variable from :py:class:`.PredictionEnsemble`."""
        del self._datasets["initialized"][key]
        for ds in self._datasets.values():
            if isinstance(ds, xr.Dataset):
                if key in ds.data_vars:
                    del ds[key]



[docs]
    def __contains__(self, key: Hashable) -> bool:
        """Check variable in :py:class:`.PredictionEnsemble`.

        The ``"in"`` operator will return true or false depending on whether
        ``"key"`` is in any PredictionEnsemble._datasets.
        """
        contained = True
        for ds in self._datasets.values():
            if isinstance(ds, xr.Dataset):
                if key not in ds.data_vars:
                    contained = False
        return contained



[docs]
    def equals(self, other: Union["PredictionEnsemble", Any]) -> bool:
        """Check if :py:class:`.PredictionEnsemble` is equal to other.

        Two :py:class:`.PredictionEnsemble` are equal if they have
        matching variables and coordinates, all of which are equal.
        ``PredictionEnsembles`` can still be equal (like pandas objects) if they have
        NaN values in the same locations.
        This method is necessary because `v1 == v2` for ``PredictionEnsembles``
        does element-wise comparisons (like numpy.ndarrays).

        See also:
            :py:meth:`~xarray.Dataset.equals`
        """
        if not isinstance(other, PredictionEnsemble):
            return False
        if other.kind != self.kind:
            return False
        equal = True
        try:
            for ds_name in self._datasets.keys():
                if isinstance(self._datasets[ds_name], xr.Dataset):
                    if not self._datasets[ds_name].equals(other._datasets[ds_name]):
                        equal = False
        except Exception:
            return False
        return equal



[docs]
    def identical(self, other: Union["PredictionEnsemble", Any]) -> bool:
        """
        Check if :py:class:`.PredictionEnsemble` is identical to other.

        Like ``equals``, but also checks all dataset attributes and the
        attributes on all variables and coordinates.

        See also:
            :py:meth:`~xarray.Dataset.identical`
        """
        if not isinstance(other, PredictionEnsemble):
            return False
        if other.kind != self.kind:
            return False
        id = True
        try:
            for ds_name in self._datasets.keys():
                if not self._datasets[ds_name].identical(other._datasets[ds_name]):
                    id = False
        except Exception:
            return False
        return id


    def plot(
        self,
        variable: Optional[str] = None,
        ax: optionalaxisType = None,
        show_members: bool = False,
        cmap: Optional[str] = None,
        x: str = "time",
    ) -> "plt.Axes":
        """Plot datasets from :py:class:`.PredictionEnsemble`.

        Wraps :py:func:`.climpred.graphics.plot_ensemble_perfect_model` or
        :py:func:`.climpred.graphics.plot_lead_timeseries_hindcast`.

        Args:
            variable: `variable` to show. Defaults to first in data_vars.
            ax: Axis to use in plotting. By default, creates a new axis.
            show_members: whether to display all members individually.
                Defaults to False.
            cmap: Name of matplotlib-recognized colorbar. Defaults to ``viridis``
                for :py:class:`.HindcastEnsemble`
                and ``tab10`` for :py:class:`.PerfectModelEnsemble`.
            x: Name of x-axis. Use ``time`` to show observations and
                hindcasts in real time. Use ``init`` to see hindcasts as
                initializations. For ``x=init`` only initialized is shown and only
                works for :py:class:`.HindcastEnsemble`.

        .. note::
            Alternatively inspect initialized datasets by
            ``PredictionEnsemble.get_initialized()[v].plot.line(x=time)``
            to see ``validtime`` on x-axis or
            ``PredictionEnsemble.get_initialized()[v].plot.line(x=init)``
            to see ``init`` on x-axis.

        Returns:
            ax: plt.axes

        """
        from .graphics import plot_ensemble_perfect_model, plot_lead_timeseries_hindcast

        if x == "time":
            x = "valid_time"
        if x not in ["valid_time", "init"]:
            raise ValueError("x must be 'valid_time' or 'init'.")
        if isinstance(self, HindcastEnsemble):
            if cmap is None:
                cmap = "viridis"
            return plot_lead_timeseries_hindcast(
                self,
                variable=variable,
                ax=ax,
                show_members=show_members,
                cmap=cmap,
                x=x,
            )
        elif isinstance(self, PerfectModelEnsemble):
            if cmap is None:
                cmap = "tab10"
            return plot_ensemble_perfect_model(
                self, variable=variable, ax=ax, show_members=show_members, cmap=cmap
            )

    mathType = Union[
        int, float, np.ndarray, xr.DataArray, xr.Dataset, "PredictionEnsemble"
    ]

    def _math(
        self,
        other: "mathType",
        operator: str,
    ):
        """Help function for __add__, __sub__, __mul__, __truediv__.

        Allows math operations with type:
            - int
            - float
            - np.ndarray
            - xr.DataArray without new dimensions
            - xr.Dataset without new dimensions or variables

        """
        if not isinstance(operator, str):
            raise ValueError("operator must be of type str.")

        def add(a, b):
            return a + b

        def sub(a, b):
            return a - b

        def mul(a, b):
            return a * b

        def div(a, b):
            return a / b

        ALLOWED_TYPES_FOR_MATH_OPERATORS = [
            int,
            float,
            np.ndarray,
            xr.DataArray,
            xr.Dataset,
            type(self),
        ]
        OPERATOR_STR = {
            "add": "+",
            "sub": "-",
            "mul": "*",
            "div": "/",
        }
        error_str = f"Cannot use {type(self)} {OPERATOR_STR[operator]} {type(other)}"

        # catch undefined types for other
        if not isinstance(other, tuple(ALLOWED_TYPES_FOR_MATH_OPERATORS)):
            raise TypeError(
                f"{error_str} because type {type(other)} not supported. "
                f"Please choose from {ALLOWED_TYPES_FOR_MATH_OPERATORS}."
            )
        # catch other dimensions in other
        if isinstance(other, tuple([xr.Dataset, xr.DataArray])):
            if not set(other.dims).issubset(
                self._datasets["initialized"].dims
            ):  # noqa: E501
                raise DimensionError(f"{error_str} containing new dimensions.")
        # catch xr.Dataset with different data_vars
        if isinstance(other, xr.Dataset):
            if list(other.data_vars) != list(self._datasets["initialized"].data_vars):
                raise VariableError(
                    f"{error_str} with new `data_vars`. Please use {type(self)} "
                    f"{operator} {type(other)} only with same `data_vars`. Found "
                    f"initialized.data_vars = "
                    f" {list(self._datasets['initialized'].data_vars)} vs. "
                    f"other.data_vars = {list(other.data_vars)}."
                )

        _operator = eval(operator)

        if isinstance(other, PredictionEnsemble):
            # Create temporary copy to modify to avoid inplace operation.
            datasets = self._datasets.copy()
            for dataset in datasets:
                # Some pre-allocated entries might be empty, such as 'uninitialized'
                if isinstance(other._datasets[dataset], xr.Dataset) and isinstance(
                    self._datasets[dataset], xr.Dataset
                ):
                    datasets[dataset] = _operator(
                        datasets[dataset], other._datasets[dataset]
                    )
            return self._construct_direct(datasets, kind=self.kind)
        else:
            return self._apply_func(_operator, other)


[docs]
    def __add__(self, other: mathType) -> "PredictionEnsemble":
        """Add."""
        return self._math(other, operator="add")



[docs]
    def __sub__(self, other: mathType) -> "PredictionEnsemble":
        """Sub."""
        return self._math(other, operator="sub")



[docs]
    def __mul__(self, other: mathType) -> "PredictionEnsemble":
        """Mul."""
        return self._math(other, operator="mul")



[docs]
    def __truediv__(self, other: mathType) -> "PredictionEnsemble":
        """Div."""
        return self._math(other, operator="div")



[docs]
    def __getitem__(self, varlist: Union[str, List[str]]) -> "PredictionEnsemble":
        """Allow subsetting variable(s) from

        Allow subsetting variable(s) from
        :py:class:`.PredictionEnsemble` as from
        :py:class:`xarray.Dataset`.

        Args:
            varlist: list of names or name of data variable(s) to subselect
        """
        if isinstance(varlist, str):
            varlist = [varlist]
        if not isinstance(varlist, list):
            raise ValueError(
                "Please subset PredictionEnsemble as you would subset an xr.Dataset "
                "with a list or single string of variable name(s), found "
                f"{type(varlist)}."
            )

        def sel_vars(ds, varlist):
            return ds[varlist]

        return self._apply_func(sel_vars, varlist)



[docs]
    def __getattr__(self, name):  # -> Callable[[VarArg(Any), KwArg(Any)], Any]
        """Allow for ``xarray`` methods to be applied to our prediction objects.

        Args:
            * name: str of xarray function, e.g., ``.isel()`` or ``.sum()``.
        """
        if name in [
            "_ipython_canary_method_should_not_exist_",
            "_ipython_display_",
            "_repr_markdown_",
            "_repr_svg_",
            "_repr_png_",
            "_repr_pdf_",
            "_repr_jpeg_",
            "_repr_latex_",
            "_repr_json_",
            "_repr_mimebundle_",
            "_repr_javascript_",
        ]:
            return None  # typing: ignore

        def wrapper(*args, **kwargs):
            """Apply arbitrary function to all datasets in ``PerfectModelEnsemble``.

            Got this from: https://stackoverflow.com/questions/41919499/
            how-to-call-undefined-methods-sequentially-in-python-class
            """

            def _apply_xr_func(v, name, *args, **kwargs):
                """Handle exceptions in our dictionary comprehension.

                In other words, this will skip applying the arbitrary function
                to a sub-dataset if a ValueError is thrown. This specifically
                targets cases where certain datasets don't have the given
                dim that's being called. E.g., ``.isel(lead=0)`` should only
                be applied to the initialized dataset.

                References:
                    * https://stackoverflow.com/questions/1528237/
                      how-to-handle-exceptions-in-a-list-comprehensions
                """
                try:
                    return getattr(v, name)(*args, **kwargs)
                # ValueError : Cases such as .sum(dim='time'). This doesn't apply
                #              it to the given dataset if the dimension doesn't exist.
                # KeyError : Cases where a function calls the index of a Dataset. Such
                #            as ds[dim] and the dim doesn't exist as a key.
                # DimensionError: This accounts for our custom error when applying
                # some stats functions.
                except (ValueError, KeyError, DimensionError) as e:
                    if args == tuple():
                        func_name = False
                    else:
                        if callable(args[0]):
                            func_name = args[0].__name__
                        else:  # for xarray calls like pe.mean()
                            func_name = False
                    dim = kwargs.get("dim", False)
                    error_type = type(e).__name__
                    if func_name:
                        if len(args) > 1:
                            msg = f"{func_name}({args[1:]}, {kwargs}) failed\n{error_type}: {e}"  # noqa: E501
                        else:
                            msg = f"{func_name}({kwargs}) failed\n{error_type}: {e}"
                    else:
                        msg = f"xr.{name}({args}, {kwargs}) failed\n{error_type}: {e}"
                    if set(["lead", "init"]).issubset(set(v.dims)):  # initialized
                        if dim not in v.dims:
                            if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
                                warnings.warn(f"Error due to initialized:  {msg}")
                    elif set(["time"]).issubset(
                        set(v.dims)
                    ):  # uninitialized, control, verification
                        if dim not in v.dims:
                            if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
                                warnings.warn(
                                    f"Error due to verification/control/uninitialized: {msg}"  # noqa: E501
                                )
                    else:
                        if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
                            warnings.warn(msg)
                    return v

            return self._apply_func(_apply_xr_func, name, *args, **kwargs)

        return wrapper


    @classmethod
    def _construct_direct(cls, datasets, kind):
        """Shortcut around __init__ for internal use to avoid inplace operations.

        Pulled from xarrray Dataset class.
        https://github.com/pydata/xarray/blob/master/xarray/core/dataset.py
        """
        obj = object.__new__(cls)
        obj._datasets = datasets
        obj.kind = kind
        obj._warn_if_chunked_along_init_member_time()
        return obj

    def _apply_func(
        self, func: Callable[..., xr.Dataset], *args: Any, **kwargs: Any
    ) -> "PredictionEnsemble":
        """Apply a function to all datasets in a ``PerfectModelEnsemble``."""
        # Create temporary copy to modify to avoid inplace operation.
        # isnt that essentially the same as .map(func)?
        datasets = self._datasets.copy()

        # More explicit than nested dictionary comprehension.
        for key, ds in datasets.items():
            # If ds is xr.Dataset, apply the function directly to it
            # else, e.g. for {} ignore
            if isinstance(ds, xr.Dataset):
                dim = kwargs.get("dim", "")
                if "_or_" in dim:
                    dims = dim.split("_or_")
                    if set(dims).issubset(ds.dims):
                        raise ValueError(
                            f"{dims} cannot be both in {key} dataset, found {ds.dims}"
                        )
                    kwargs_dim0 = kwargs.copy()
                    kwargs_dim0["dim"] = dims[0]
                    kwargs_dim1 = kwargs.copy()
                    kwargs_dim1["dim"] = dims[1]
                    if dims[0] in ds.dims and dims[1] not in ds.dims:
                        datasets.update({key: func(ds, *args, **kwargs_dim0)})
                    if dims[1] in ds.dims and dims[0] not in ds.dims:
                        datasets.update({key: func(ds, *args, **kwargs_dim1)})
                else:
                    datasets.update({key: func(ds, *args, **kwargs)})
        # Instantiates new object with the modified datasets.
        return self._construct_direct(datasets, kind=self.kind)

    def get_initialized(self) -> xr.Dataset:
        """Return the :py:class:`xarray.Dataset` for the initialized ensemble."""
        return self._datasets["initialized"]

    def get_uninitialized(self) -> xr.Dataset:
        """Return the :py:class:`xarray.Dataset` for the uninitialized ensemble."""
        return self._datasets["uninitialized"]

    def smooth(
        self,
        smooth_kws: Optional[Union[str, Dict[str, int]]] = None,
        how: str = "mean",
        drop: bool = False,
        **xesmf_kwargs: str,
    ):
        """Smooth in space and/or aggregate in time in ``PredictionEnsemble``.

        Args:
            smooth_kws: Dictionary to specify the dims to
                smooth compatible with
                :py:func:`~climpred.smoothing.spatial_smoothing_xesmf` or
                :py:func:`~climpred.smoothing.temporal_smoothing`.
                Shortcut for :cite:t:`Goddard2013` ``goddard2013``.
                Defaults to ``None``.
            how: how to smooth temporally. From Choose from ``["mean", "sum"]``.
                Defaults to ``"mean"``.
            drop: If ``True`` and ``"lead"`` is in ``smooth_kws``, subsample
                the smoothed rolling windows to non-overlapping windows
                (every ``n``-th lead, where ``n`` is the smoothing length).
                For example, ``smooth({"lead": 3}, drop=True)`` on a forecast
                with leads ``1..10`` yields leads ``"1-3", "4-6", "7-9"``
                instead of the default overlapping ``"1-3", "2-4", ...,
                "8-10"``. Defaults to ``False``.
            **xesmf_kwargs: kwargs passed to
                :py:func:`~climpred.smoothing.spatial_smoothing_xesmf`

        Examples:
            >>> PerfectModelEnsemble.get_initialized().lead.size
            20
            >>> PerfectModelEnsemble.smooth(
            ...     {"lead": 4}, how="sum"
            ... ).get_initialized().lead.size
            17

            >>> HindcastEnsemble_3D.smooth({"lon": 1, "lat": 1})
            <climpred.HindcastEnsemble>
            Initialized:
                SST      (init, lead, lat, lon) float32 123kB -0.3236 -0.3161 ... 0.0 0.0
            Uninitialized:
                None
            Observations:
                SST      (time, lat, lon) float32 13kB 0.002937 0.001561 ... 0.0 0.0

            ``smooth`` simultaneously aggregates spatially listening to ``lon`` and
            ``lat`` and temporally listening to ``lead`` or ``time``.

            >>> HindcastEnsemble_3D.smooth(
            ...     {"lead": 2, "lat": 5, "lon": 4}
            ... ).get_initialized().coords
            Coordinates:
              * init        (init) object 512B 1954-01-01 00:00:00 ... 2017-01-01 00:00:00
              * lead        (lead) int32 36B 1 2 3 4 5 6 7 8 9
              * lat         (lat) float64 16B -9.75 -4.75
              * lon         (lon) float64 32B 250.8 254.8 258.8 262.8
                valid_time  (lead, init) object 5kB 1955-01-01 00:00:00 ... 2026-01-01 00...
            >>> HindcastEnsemble_3D.smooth("goddard2013").get_initialized().coords
            Coordinates:
              * init        (init) object 512B 1954-01-01 00:00:00 ... 2017-01-01 00:00:00
              * lead        (lead) int32 28B 1 2 3 4 5 6 7
              * lat         (lat) float64 16B -9.75 -4.75
              * lon         (lon) float64 32B 250.8 255.8 260.8 265.8
                valid_time  (lead, init) object 4kB 1955-01-01 00:00:00 ... 2024-01-01 00...

        """
        if not smooth_kws:
            return self
        tsmooth_kws: Optional[Union[str, Dict[str, int]]] = None
        d_lon_lat_kws: Optional[Union[str, Dict[str, int]]] = None
        smooth_fct: Callable[..., xr.Dataset]
        # get proper smoothing function based on smooth args
        if isinstance(smooth_kws, str):
            if "goddard" in smooth_kws:
                if self._is_annual_lead:
                    smooth_fct = smooth_goddard_2013
                    tsmooth_kws = {"lead": 4}  # default
                    d_lon_lat_kws = {"lon": 5, "lat": 5}  # default
                else:
                    raise ValueError(
                        "`goddard2013` smoothing only available for annual leads."
                    )
            else:
                raise ValueError('Please provide from list of available smoothings: \
                     ["goddard2013"]')
        # TODO: actively searches for lot and lat in dims. Maybe this part of the code
        # could be more robust in how it finds these two spatial dimensions regardless
        # of name. Optional work in progress comment.
        elif isinstance(smooth_kws, dict):
            # goddard when time_dim and lon/lat given
            if ("lon" in smooth_kws or "lat" in smooth_kws) and (
                "lead" in smooth_kws or "time" in smooth_kws
            ):
                smooth_fct = smooth_goddard_2013
                # separate lon, lat keywords into d_lon_lat_kws
                d_lon_lat_kws = dict()
                tsmooth_kws = dict()
                for c in ["lon", "lat"]:
                    if c in smooth_kws:
                        d_lon_lat_kws[c] = smooth_kws[c]
                for c in ["lead", "time"]:
                    if c in smooth_kws:
                        tsmooth_kws[c] = smooth_kws[c]
            # else only one smoothing operation
            elif "lon" in smooth_kws or "lat" in smooth_kws:
                smooth_fct = spatial_smoothing_xesmf
                d_lon_lat_kws = smooth_kws
                tsmooth_kws = None
            elif "lead" in smooth_kws or "time" in smooth_kws:
                smooth_fct = temporal_smoothing
                d_lon_lat_kws = None
                tsmooth_kws = smooth_kws
            else:
                raise ValueError('Please provide kwargs to fulfill functions: \
                     ["spatial_smoothing_xesmf", "temporal_smoothing"].')
        else:
            raise ValueError(
                "Please provide kwargs as dict or str and not", type(smooth_kws)
            )
        self = self.map(
            smooth_fct,
            tsmooth_kws=tsmooth_kws,
            d_lon_lat_kws=d_lon_lat_kws,
            how=how,
            **xesmf_kwargs,
        )
        if smooth_fct == smooth_goddard_2013 or smooth_fct == temporal_smoothing:
            self._temporally_smoothed = tsmooth_kws
            # recalc valid_time
            del self._datasets["initialized"].coords["valid_time"]
            self._datasets["initialized"] = add_time_from_init_lead(
                self._datasets["initialized"]
            )
        if drop and tsmooth_kws is not None and "lead" in tsmooth_kws:
            # drop overlapping lead if temporally smoothed
            n = tsmooth_kws["lead"]
            keep_leads = [
                i for i in range(self._datasets["initialized"].lead.size) if i % n == 0
            ]  # noqa: E501
            self._datasets["initialized"] = self._datasets["initialized"].isel(
                lead=keep_leads
            )
        return self

    def remove_seasonality(
        self, seasonality: Union[None, str] = None
    ) -> "PredictionEnsemble":
        """Remove seasonal cycle from :py:class:`.PredictionEnsemble`.

        Args:
            seasonality: Seasonality to be removed. Choose from:
                ``["season", "month", "weekofyear", "dayofyear"]``.
                Defaults to ``OPTIONS["seasonality"]``.

        Examples:
            >>> HindcastEnsemble
            <climpred.HindcastEnsemble>
            Initialized:
                SST      (init, lead, member) float64 51kB -0.2392 -0.2203 ... 0.618 0.6136
            Uninitialized:
                SST      (time, member) float64 17kB -0.1969 -0.01221 ... 0.4179 0.3974
            Observations:
                SST      (time) float64 488B -0.4015 -0.3524 -0.1851 ... 0.2481 0.346 0.4502
            >>> # example already effectively without seasonal cycle
            >>> HindcastEnsemble.remove_seasonality(seasonality="month")
            <climpred.HindcastEnsemble>
            Initialized:
                SST      (init, lead, member) float64 51kB -0.2392 -0.2203 ... 0.618 0.6136
            Uninitialized:
                SST      (time, member) float64 17kB -0.1969 -0.01221 ... 0.4179 0.3974
            Observations:
                SST      (time) float64 488B -0.4015 -0.3524 -0.1851 ... 0.2481 0.346 0.4502
        """

        def _remove_seasonality(ds, initialized_dim="init", seasonality=None):
            """Remove the seasonal cycle from the data."""
            if ds == {}:
                return {}
            if seasonality is None:
                seasonality = OPTIONS["seasonality"]
            dim = initialized_dim if initialized_dim in ds.dims else "time"
            groupby = f"{dim}.{seasonality}"
            if "member" in ds.dims:
                clim = ds.mean("member").groupby(groupby).mean()
            else:
                clim = ds.groupby(groupby).mean()
            anom = ds.groupby(groupby) - clim
            return anom

        return self.map(
            _remove_seasonality,
            seasonality=seasonality,
        )

    def _warn_if_chunked_along_init_member_time(self) -> None:
        """
        Warn when ``CLIMPRED_DIMS`` except ``lead`` are wrongly chunked.

        When more than one chunk to show how to circumvent ``xskillscore`` chunking
        ``ValueError``.
        """
        suggest_one_chunk = []
        for d in self.chunks:
            if d in ["time", "init", "member"]:
                if len(self.chunks[d]) > 1:
                    suggest_one_chunk.append(d)
        if len(suggest_one_chunk) > 0:
            name = (
                str(type(self))
                .replace("<class 'climpred.classes.", "")
                .replace("'>", "")
            )
            # init cannot be dim when time chunked
            suggest_one_chunk_time_to_init = suggest_one_chunk.copy()
            if "time" in suggest_one_chunk_time_to_init:
                suggest_one_chunk_time_to_init.remove("time")
                suggest_one_chunk_time_to_init.append("init")
            msg = (
                f"{name} is chunked along dimensions {suggest_one_chunk} with more "
                f"than one chunk. `{name}.chunks={self.chunks}`.\nYou cannot call "
                f"`{name}.verify` or `{name}.bootstrap` in combination with any of "
                f" {suggest_one_chunk_time_to_init} passed as `dim`. In order to do "
                f"so, please rechunk {suggest_one_chunk} with `{name}.chunk("
                "{{dim:-1}}).verify(dim=dim).`\nIf you do not want to use dimensions "
                f" {suggest_one_chunk_time_to_init} in `{name}.verify(dim=dim)`, you "
                "can disregard this warning."
            )
            # chunk lead:1 in HindcastEnsemble
            if self.kind == "hindcast":
                msg += '\nIn `HindcastEnsemble` you may also create one chunk per "\
                " lead, as the `climpred` internally loops over lead, e.g. "\
                " `.chunk({{"lead": 1}}).verify().`'
            # chunk auto on non-climpred dims
            ndims = list(self.sizes)
            for d in CLIMPRED_DIMS:
                if d in ndims:
                    ndims.remove(d)
            if len(ndims) > 0:
                msg += (
                    f"\nConsider chunking embarrassingly parallel dimensions such as "
                    f"{ndims} automatically, i.e. "
                    f'`{name}.chunk({ndims[0]}="auto").verify(...).'
                )
            warnings.warn(msg)

    def _bootstrap(
        self,
        *,
        metric: metricType,
        comparison: comparisonType,
        dim: Optional[dimType] = None,
        alignment: Optional[alignmentType] = None,
        reference: Optional[referenceType] = None,
        groupby: Optional[groupbyType] = None,
        iterations: Optional[int] = None,
        sig: int = 95,
        resample_dim: Optional[str] = None,
        **metric_kwargs: metric_kwargsType,
    ) -> xr.Dataset:
        """PredictionEnsemble.bootstrap() parent method.

        explain: OPTIONS['bootstrap_resample_skill_func']

        See also:
        * :py:meth:`~climpred.PerfectModelEnsemble.bootstrap`
        * :py:meth:`~climpred.HindcastEnsemble.bootstrap`
        """
        warn_if_chunking_would_increase_performance(
            self.get_initialized(), crit_size_in_MB=50
        )
        _metric, _, _ = _get_metric_comparison_dim(
            self.get_initialized(), metric, comparison, dim, kind=self.kind
        )

        if iterations is None:
            raise ValueError("Designate number of bootstrapping `iterations`.")
        if resample_dim is None:
            resample_dim = "member"

        reference = _check_valid_reference(reference)

        reference = _sanitize_to_list(reference)
        if reference is None:
            reference = []
        dim = _sanitize_to_list(dim)
        if dim is None:
            dim = list(self.get_initialized().isel(lead=0).dims)

        verify_kwargs = dict(
            dim=dim,
            metric=metric,
            comparison=comparison,
            reference=reference,
            **metric_kwargs,
        )
        if groupby is not None:
            verify_kwargs["groupby"] = groupby
        if self.kind == "hindcast":
            verify_kwargs["alignment"] = alignment

        if "uninitialized" in reference and not self.get_uninitialized():
            warnings.warn(
                "'uninitialized' in `reference` but not `uninitialized` "
                "dataset. Therefore climpred does "
                f"`{str(type(self))}.generate_uninitialized()`."
            )
            self2 = self.generate_uninitialized()
        else:
            self2 = self
        skill = self2.verify(**verify_kwargs)

        # different ways to compute resample_skill
        if (
            _metric.name in PEARSON_R_CONTAINING_METRICS
            and self.kind == "hindcast"
            and alignment
        ):
            if ("same_verif" in alignment) & (resample_dim == "init"):
                raise KeywordError(
                    "Cannot have alignment='same_verifs' and resample_dim='init' and "
                    "metric='pearson_r'. Change `resample_dim` to 'member' to keep "
                    "common verification alignment or `alignment` to 'same_inits' to "
                    "resample over initializations."
                )

        if OPTIONS["bootstrap_resample_skill_func"] == "default":
            if (
                _metric.name in PEARSON_R_CONTAINING_METRICS
                and self.kind == "hindcast"
                and resample_dim == "init"
            ):
                # slow: loop and verify each time
                # used for HindcastEnsemble.bootstrap(metric='acc')
                resampled_skills = resample_skill_loop(
                    self, iterations, resample_dim, verify_kwargs
                )
            elif (
                alignment in ["same_verifs", "same_verif"]
                and self.kind == "hindcast"
                and resample_dim == "init"
            ):
                # allow https://github.com/pangeo-data/climpred/issues/582
                resampled_skills = resample_skill_exclude_resample_dim_from_dim(
                    self, iterations, resample_dim, verify_kwargs
                )

            elif (
                resample_dim == "init"
                and self.kind == "hindcast"
                and not _metric.probabilistic
                and _metric.name != "rmse"
            ):
                # fast way by verify(dim=[]) and then resampling init
                # used for HindcastEnsemble.bootstrap(resample_dim='init')
                resampled_skills = resample_skill_exclude_resample_dim_from_dim(
                    self, iterations, resample_dim, verify_kwargs
                )

            elif (
                resample_dim == "init"
                and self.kind == "hindcast"
                and _metric.probabilistic
                and "member" in dim
                and _metric.name
                not in ["rank_histogram", "discrimination", "reliability"]
            ):
                # fast way by verify(dim=[]) and then resampling init
                # used for HindcastEnsemble.bootstrap(resample_dim='init')
                resampled_skills = resample_skill_exclude_resample_dim_from_dim(
                    self, iterations, resample_dim, verify_kwargs
                )
            elif (
                resample_dim == "member"
                and self.kind == "hindcast"
                and _metric.name
                in ["threshold_brier_score", "reliability", "rank_histogram"]
            ):
                resampled_skills = resample_skill_loop(
                    self, iterations, resample_dim, verify_kwargs
                )

            elif resample_dim == "member" or self.kind == "perfect":
                resampled_skills = resample_skill_resample_before(
                    self, iterations, resample_dim, verify_kwargs
                )

            else:
                # slow: loop and verify each time, but always works
                resampled_skills = resample_skill_loop(
                    self, iterations, resample_dim, verify_kwargs
                )
        else:
            resample_skill_func = eval(
                f"resample_skill_{OPTIONS['bootstrap_resample_skill_func']}"
            )
            resampled_skills = resample_skill_func(
                self, iterations, resample_dim, verify_kwargs
            )

        # continue with skill and resampled_skills

        if (
            "uninitialized" in reference
            and OPTIONS["bootstrap_uninitialized_from_iterations_mean"]
        ):
            skill = xr.concat(
                [
                    skill.sel(skill="initialized"),
                    resampled_skills.sel(skill="uninitialized").mean("iteration"),
                    skill.drop_sel(skill=["initialized", "uninitialized"]),
                ],
                "skill",
                coords="minimal",
            )
            logging.info("exchange uninit with iteration mean skill passed")

        p, ci_low, ci_high = _p_ci_from_sig(sig)

        ci = _distribution_to_ci(resampled_skills, ci_low, ci_high)

        results_list = [
            skill,
            ci.sel(quantile=ci_low, drop=True),
            ci.sel(quantile=ci_high, drop=True),
        ]
        results_labels = ["verify skill", "low_ci", "high_ci"]

        if reference:
            pvalue = _pvalue_from_distributions(
                resampled_skills.drop_sel(skill="initialized"),
                resampled_skills.sel(skill="initialized"),
                metric=_metric,
            )
            pvalue = xr.concat(
                [
                    pvalue.isel(skill=0, drop=True)
                    .assign_coords(skill="initialized")
                    .where(1 == 2),  # adds all NaN
                    pvalue,
                ],
                "skill",
                **CONCAT_KWARGS,
            )
            results_list.insert(1, pvalue)
            results_labels.insert(1, "p")

        results = xr.concat(
            results_list,
            dim="results",
            coords="minimal",
            compat="override",  # take vars/coords from first
        ).assign_coords(
            skill=["initialized"] + reference,
            results=("results", results_labels),
        )
        if results.skill.size == 1:
            results = results.isel(skill=0)

        results_dims = (
            ["skill", "results", "lead"]
            if "skill" in list(results_list[0].dims)
            else ["results", "lead"]
        )
        results = results.transpose(*results_dims, ...)

        results = assign_attrs(
            results,
            self.get_initialized(),
            function_name=f"{type(self).__name__}.bootstrap()",
            **verify_kwargs,
            resample_dim=resample_dim,
            sig=sig,
            iterations=iterations,
        )
        return results




[docs]
class PerfectModelEnsemble(PredictionEnsemble):
    """An object for "perfect model" prediction ensembles.

    :py:class:`.PerfectModelEnsemble` is a sub-class of
    :py:class:`.PredictionEnsemble`. It tracks
    the control run used to initialize the ensemble for easy computations,
    bootstrapping, etc.

    This object is built on ``xarray`` and thus requires the input object to
    be an :py:class:`xarray.Dataset` or :py:class:`xarray.DataArray`.
    """


[docs]
    def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
        """Create a :py:class:`.PerfectModelEnsemble` object.

        Args:
          initialized: prediction ensemble output.

        Attributes:
            control: datasets dictionary item of control simulation associated with the
                initialized ensemble.
            uninitialized: datasets dictionary item of uninitialized forecast.
        """
        super().__init__(initialized)
        # Reserve sub-dictionary for the control simulation.
        self._datasets.update({"control": {}})
        self.kind = "perfect"


    def _apply_climpred_function(
        self,
        func: Callable[..., xr.Dataset],
        input_dict: Dict[str, Any],
        **kwargs: Any,
    ) -> Union["PerfectModelEnsemble", xr.Dataset]:
        """Loop through observations and apply an arbitrary climpred function.

        Args:
            func: climpred function to apply to object.
            input_dict: dictionary with the following things:
                * ensemble: initialized or uninitialized ensemble.
                * control: control dictionary from HindcastEnsemble.
                * init (bool): True if the initialized ensemble, False if uninitialized.
        """
        ensemble = input_dict["ensemble"]
        control = input_dict["control"]
        init = input_dict["init"]
        init_vars, ctrl_vars = self._vars_to_drop(init=init)
        ensemble = ensemble.drop_vars(init_vars)
        if control:
            control = control.drop_vars(ctrl_vars)
        return func(ensemble, control, **kwargs)

    def _vars_to_drop(self, init: bool = True) -> Tuple[List[str], List[str]]:
        """Return list of variables to drop when comparing datasets.

        This is useful if the two products being compared do not share the same
        variables. I.e., if the control has ["SST"] and the initialized has
        ["SST", "SALT"], this will return a list with ["SALT"] to be dropped
        from the initialized.

        Args:
            init:
                If ``True``, check variables on the initialized.
                If ``False``, check variables on the uninitialized.

        Returns:
            Lists of variables to drop from the initialized/uninitialized
            and control Datasets.
        """
        init_str = "initialized" if init else "uninitialized"
        init_vars = list(self._datasets[init_str])
        # only drop if control present
        if self._datasets["control"]:
            ctrl_vars = list(self._datasets["control"])
            # Make lists of variables to drop that aren't in common
            # with one another.
            init_vars_to_drop = list(set(init_vars) - set(ctrl_vars))
            ctrl_vars_to_drop = list(set(ctrl_vars) - set(init_vars))
        else:
            init_vars_to_drop, ctrl_vars_to_drop = [], []
        return init_vars_to_drop, ctrl_vars_to_drop


[docs]
    def add_control(
        self, control: Union[xr.DataArray, xr.Dataset]
    ) -> "PerfectModelEnsemble":
        """Add the control run that initialized the prediction ensemble.

        Args:
            control: control run.
        """
        # NOTE: These should all be decorators.
        if isinstance(control, xr.DataArray):
            control = control.to_dataset()
        match_initialized_dims(self._datasets["initialized"], control)
        match_initialized_vars(self._datasets["initialized"], control)
        # Check that init is int, cftime, or datetime; convert ints or cftime to
        # datetime.
        control = convert_time_index(control, "time", "control[time]")
        # Check that converted/original cftime calendar is the same as the
        # initialized calendar to avoid any alignment errors.
        match_calendars(self._datasets["initialized"], control, kind2="control")
        datasets = self._datasets.copy()
        datasets.update({"control": control})
        return self._construct_direct(datasets, kind="perfect")



[docs]
    def generate_uninitialized(self) -> "PerfectModelEnsemble":
        """Generate an uninitialized ensemble by resampling from the control simulation.

        Returns:
            ``uninitialzed`` resampled from ``control`` added
            to :py:class:`.PerfectModelEnsemble`
        """
        has_dataset(
            self._datasets["control"], "control", "generate an uninitialized ensemble."
        )

        uninit = bootstrap_uninit_pm_ensemble_from_control_cftime(
            self._datasets["initialized"], self._datasets["control"]
        )
        uninit.coords["valid_time"] = self.get_initialized().coords["valid_time"]
        datasets = self._datasets.copy()
        datasets.update({"uninitialized": uninit})
        return self._construct_direct(datasets, kind="perfect")



[docs]
    def get_control(self) -> xr.Dataset:
        """Return the control as an :py:class:`xarray.Dataset`."""
        return self._datasets["control"]



[docs]
    def verify(
        self,
        *,
        metric: metricType,
        comparison: comparisonType,
        dim: Optional[dimType] = None,
        reference: Optional[referenceType] = None,
        groupby: Optional[groupbyType] = None,
        **metric_kwargs: metric_kwargsType,
    ) -> xr.Dataset:
        """Verify initialized predictions against a configuration of its members.

        .. note::
            The configuration of the other ensemble members is based off of the
            ``comparison`` keyword argument.

        Args:
            metric: Metric to apply for verification, see `metrics <../metrics.html>`_
            comparison: How to compare the initialized prediction ensemble with itself,
                see `comparisons <../comparisons.html>`_.
            dim: Dimension(s) over which to apply ``metric``.
                ``dim`` is passed on to xskillscore.{metric} and includes xskillscore's
                ``member_dim``. ``dim`` should contain ``member`` when ``comparison``
                is probabilistic but should not contain ``member`` when
                ``comparison=e2c``. Defaults to ``None`` meaning that all dimensions
                other than ``lead`` are reduced.
            reference: Type of reference forecasts with which to verify against.
                One or more of ``["uninitialized", "persistence", "climatology"]``.
                Defaults to ``None`` meaning no reference.
                For ``persistence``, choose between
                ``set_options(PerfectModel_persistence_from_initialized_lead_0)=False``
                (default) using :py:func:`~climpred.reference.compute_persistence` or
                ``set_options(PerfectModel_persistence_from_initialized_lead_0)=True``
                using
                :py:func:`~climpred.reference.compute_persistence_from_first_lead`.
            groupby: group ``init`` before passing ``initialized`` to ``verify``.
            **metric_kwargs: Arguments passed to ``metric``.

        Returns:
            ``initialized`` and ``reference`` forecast skill reduced by dimensions
            ``dim``

        Example:
            Root mean square error (``rmse``) comparing every member with the
            ensemble mean forecast (``m2e``) for all leads reducing dimensions
            ``init`` and ``member``:

            >>> PerfectModelEnsemble.verify(
            ...     metric="rmse", comparison="m2e", dim=["init", "member"]
            ... )
            <xarray.Dataset> Size: 240B
            Dimensions:  (lead: 20)
            Coordinates:
              * lead     (lead) int64 160B 1 2 3 4 5 6 7 8 9 ... 12 13 14 15 16 17 18 19 20
            Data variables:
                tos      (lead) float32 80B 0.1028 0.1249 0.1443 ... 0.2113 0.2452 0.2297
            Attributes:
                prediction_skill_software:     climpred https://climpred.readthedocs.io/
                skill_calculated_by_function:  PerfectModelEnsemble.verify()
                number_of_initializations:     12
                number_of_members:             10
                metric:                        rmse
                comparison:                    m2e
                dim:                           ['init', 'member']
                reference:                     []


            Continuous Ranked Probability Score (``"crps"``) comparing every member to every
            other member (``"m2m"``) reducing dimensions ``member`` and ``init`` while
            also calculating reference skill for the ``persistence``, ``climatology``
            and ``uninitialized`` forecast.

            >>> PerfectModelEnsemble.verify(
            ...     metric="crps",
            ...     comparison="m2m",
            ...     dim=["init", "member"],
            ...     reference=["persistence", "climatology", "uninitialized"],
            ... )
            <xarray.Dataset> Size: 1kB
            Dimensions:  (skill: 4, lead: 20)
            Coordinates:
              * lead     (lead) int64 160B 1 2 3 4 5 6 7 8 9 ... 12 13 14 15 16 17 18 19 20
              * skill    (skill) <U13 208B 'initialized' 'persistence' ... 'uninitialized'
            Data variables:
                tos      (skill, lead) float64 640B 0.0621 0.07352 0.08678 ... 0.122 0.1246
            Attributes:
                prediction_skill_software:                         climpred https://climp...
                skill_calculated_by_function:                      PerfectModelEnsemble.v...
                number_of_initializations:                         12
                number_of_members:                                 10
                metric:                                            crps
                comparison:                                        m2m
                dim:                                               ['init', 'member']
                reference:                                         ['persistence', 'clima...
                PerfectModel_persistence_from_initialized_lead_0:  False
        """
        if groupby is not None:
            return self._groupby(
                "verify",
                groupby,
                reference=reference,
                metric=metric,
                comparison=comparison,
                dim=dim,
                **metric_kwargs,
            )

        reference = _check_valid_reference(reference)
        input_dict = {
            "ensemble": self._datasets["initialized"],
            "control": (
                self._datasets["control"]
                if isinstance(self._datasets["control"], xr.Dataset)
                else None
            ),
            "init": True,
        }
        result = self._apply_climpred_function(
            compute_perfect_model,
            input_dict=input_dict,
            metric=metric,
            comparison=comparison,
            dim=dim,
            **metric_kwargs,
        )
        if self._temporally_smoothed:
            result = _reset_temporal_axis(result, self._temporally_smoothed, dim="lead")
            result["lead"].attrs = self.get_initialized().lead.attrs
        # compute reference skills
        if reference:
            for r in reference:
                dim_orig = deepcopy(dim)  # preserve dim, because
                ref_compute_kwargs = deepcopy(metric_kwargs)  # persistence changes dim
                ref_compute_kwargs.update(
                    {"dim": dim_orig, "metric": metric, "comparison": comparison}
                )
                if r == "persistence":
                    if not OPTIONS["PerfectModel_persistence_from_initialized_lead_0"]:
                        del ref_compute_kwargs["comparison"]
                ref = getattr(self, f"_compute_{r}")(**ref_compute_kwargs)
                result = xr.concat([result, ref], dim="skill", **CONCAT_KWARGS)
            result = result.assign_coords(skill=["initialized"] + reference)
        result = assign_attrs(
            result,
            self.get_initialized(),
            function_name="PerfectModelEnsemble.verify()",
            metric=metric,
            comparison=comparison,
            dim=dim,
            reference=reference,
            **metric_kwargs,
        )
        if "lead_center" in result.coords:
            result.coords["lead_center"].attrs["units"] = result.coords[
                "lead"
            ].attrs.get("units", "")
        return result.squeeze()


    def _compute_uninitialized(
        self,
        *,
        metric: metricType,
        comparison: comparisonType,
        dim: Optional[dimType] = None,
        **metric_kwargs: metric_kwargsType,
    ) -> xr.Dataset:
        """Verify the bootstrapped uninitialized run against itself.

        .. note::
            The configuration of the other ensemble members is based off of the
            ``comparison`` keyword argument.

        Args:
            metric: Metric to apply for verification, see `metrics <../metrics.html>`_
            comparison: How to compare the uninitialized against itself, see
                `comparisons <../comparisons.html>`_.
            dim: Dimension(s) over which to apply metric.
                ``dim`` is passed on to xskillscore.{metric} and includes xskillscore's
                ``member_dim``. ``dim`` should contain ``member`` when ``comparison``
                is probabilistic but should not contain ``member`` when
                ``comparison=e2c``. Defaults to ``None``, meaning that all dimensions
                other than ``lead`` are reduced.
            **metric_kwargs: Arguments passed to ``metric``.

        Returns:
            ``initialized`` and ``reference`` forecast skill reduced by dimensions
            ``dim``
        """
        has_dataset(
            self._datasets["uninitialized"],
            "uninitialized",
            "compute an uninitialized metric",
        )
        input_dict = {
            "ensemble": self._datasets["uninitialized"],
            "control": (
                self._datasets["control"]
                if isinstance(self._datasets["control"], xr.Dataset)
                else None
            ),
            "init": False,
        }
        if dim is None:
            dim = list(self._datasets["initialized"].isel(lead=0).dims)
        res = self._apply_climpred_function(
            compute_perfect_model,
            input_dict=input_dict,
            metric=metric,
            comparison=comparison,
            dim=dim,
            **metric_kwargs,
        )
        if self._temporally_smoothed:
            res = _reset_temporal_axis(res, self._temporally_smoothed, dim="lead")
            res["lead"].attrs = self.get_initialized().lead.attrs
        return res

    def _compute_persistence(
        self,
        *,
        metric: metricType,
        dim: Optional[dimType] = None,
        **metric_kwargs: metric_kwargsType,
    ):
        """Verify a simple persistence forecast of the control run against itself.

        Note: uses :py:func:`~climpred.reference.compute_persistence_from_first_lead`
        if ``set_options(PerfectModel_persistence_from_initialized_lead_0=True)`` else
        :py:func:`~climpred.reference.compute_persistence`.

        Args:
            metric: Metric to apply for verification, see `metrics <../metrics.html>`_
            comparison: How to compare the persistence against itself, see
                `comparisons <../comparisons.html>`_. Only valid if
                ``PerfectModel_persistence_from_initialized_lead_0=True``.
            dim: Dimension(s) over which to apply metric.
                ``dim`` is passed on to xskillscore.{metric} and includes xskillscore's
                ``member_dim``. ``dim`` should contain ``member`` when ``comparison``
                is probabilistic but should not contain ``member`` when
                ``comparison=e2c``. Defaults to ``None``, meaning that all dimensions
                other than ``lead`` are reduced.
            **metric_kwargs: Arguments passed to ``metric``.

        Returns:
            persistence forecast skill.

        References:
            * Chapter 8 (Short-Term Climate Prediction) in
              Van den Dool, Huug. Empirical methods in short-term climate
              prediction. Oxford University Press, 2007.
        """
        if dim is None:
            dim = list(self._datasets["initialized"].isel(lead=0).dims)
        compute_persistence_func: Callable[..., xr.Dataset]
        compute_persistence_func = compute_persistence_from_first_lead
        if OPTIONS["PerfectModel_persistence_from_initialized_lead_0"]:
            compute_persistence_func = compute_persistence_from_first_lead
            if self.get_initialized().lead[0] != 0:
                if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
                    warnings.warn(
                        "Calculate persistence from "
                        f"lead={int(self.get_initialized().lead[0].values)} instead "
                        "of lead=0 (recommended)."
                    )
        else:
            compute_persistence_func = compute_persistence
            if self._datasets["control"] == {}:
                warnings.warn(
                    "You may also calculate persistence based on "
                    "``initialized.isel(lead=0)`` by changing "
                    " ``set_options(PerfectModel_persistence_from_initialized_lead_0=True)``."  # noqa: E501
                )
            has_dataset(
                self._datasets["control"], "control", "compute a persistence forecast"
            )
        input_dict = {
            "ensemble": self._datasets["initialized"],
            "control": self._datasets["control"],
            "init": True,
        }

        res = self._apply_climpred_function(
            compute_persistence_func,
            input_dict=input_dict,
            metric=metric,
            alignment="same_inits",
            dim=dim,
            **metric_kwargs,
        )
        if self._temporally_smoothed:
            res = _reset_temporal_axis(res, self._temporally_smoothed, dim="lead")
            res["lead"].attrs = self.get_initialized().lead.attrs
        return res

    def _compute_climatology(
        self,
        *,
        metric: metricType,
        comparison: comparisonType,
        dim: Optional[dimType] = None,
        **metric_kwargs: metric_kwargsType,
    ) -> xr.Dataset:
        """Verify a climatology forecast.

        Args:
            metric: Metric to apply for verification, see `metrics <../metrics.html>`_
            comparison: How to compare the climatology against itself, see
                `comparisons <../comparisons.html>`_
            dim: Dimension(s) over which to apply metric.
                ``dim`` is passed on to xskillscore.{metric} and includes xskillscore's
                ``member_dim``. ``dim`` should contain ``member`` when ``comparison``
                is probabilistic but should not contain ``member`` when
                ``comparison=e2c``. Defaults to ``None``, meaning that all dimensions
                other than ``lead`` are reduced.
            **metric_kwargs: Arguments passed to ``metric``.

        Returns:
            climatology forecast skill

        References:
            * Chapter 8 (Short-Term Climate Prediction) in
              Van den Dool, Huug. Empirical methods in short-term climate
              prediction. Oxford University Press, 2007.
        """
        input_dict = {
            "ensemble": self._datasets["initialized"],
            "control": (
                self._datasets["control"]
                if isinstance(self._datasets["control"], xr.Dataset)
                else None
            ),
            "init": True,
        }
        if dim is None:
            dim = list(self.get_initialized().isel(lead=0).dims)
        res = self._apply_climpred_function(
            compute_climatology,
            input_dict=input_dict,
            metric=metric,
            comparison=comparison,
            dim=dim,
            **metric_kwargs,
        )
        if self._temporally_smoothed:
            res = _reset_temporal_axis(res, self._temporally_smoothed, dim="lead")
            res["lead"].attrs = self.get_initialized().lead.attrs
        return res


[docs]
    def bootstrap(
        self,
        *,
        metric: metricType,
        comparison: comparisonType,
        dim: Optional[dimType] = None,
        reference: Optional[referenceType] = None,
        groupby: Optional[groupbyType] = None,
        iterations: Optional[int] = None,
        sig: int = 95,
        resample_dim: str = "member",
        **metric_kwargs: metric_kwargsType,
    ) -> xr.Dataset:
        """Bootstrap with replacement according to :cite:t:`Goddard2013`.

        Args:
            metric: Metric to apply for verification, see `metrics <../metrics.html>`_
            comparison: How to compare the forecast against itself, see
                `comparisons <../comparisons.html>`_
            dim: Dimension(s) over which to apply metric.
                ``dim`` is passed on to xskillscore.{metric} and includes xskillscore's
                ``member_dim``. ``dim`` should contain ``member`` when ``comparison``
                is probabilistic but should not contain ``member`` when
                ``comparison=e2c``. Defaults to ``None`` meaning that all dimensions
                other than ``lead`` are reduced.
            reference: Type of reference forecasts with which to verify against.
                One or more of ``["uninitialized", "persistence", "climatology"]``.
                Defaults to ``None`` meaning no reference.
                If ``None`` or ``[]``, returns no p value.
                For ``persistence``, choose between
                ``set_options(PerfectModel_persistence_from_initialized_lead_0)=False``
                (default) using :py:func:`~climpred.reference.compute_persistence` or
                ``set_options(PerfectModel_persistence_from_initialized_lead_0)=True``
                using
                :py:func:`~climpred.reference.compute_persistence_from_first_lead`.
            iterations: Number of resampling iterations for bootstrapping with
                replacement. Recommended >= 500.
            resample_dim: dimension to resample from. Defaults to ``"member"``.

                - ``"member"``: select a different set of members from forecast
                - ``"init"``: select a different set of initializations from forecast

            sig: Significance level in percent for deciding whether
                uninitialized and persistence beat initialized skill.
            groupby: group ``init`` before passing ``initialized`` to ``bootstrap``.
            **metric_kwargs: arguments passed to ``metric``.

        Returns:
            Dataset with dimensions ``results`` and ``skill``.

            * results="verify skill", skill="initialized":
                mean initialized skill
            * results="high_ci", skill="initialized":
                high confidence interval boundary for initialized skill
            * results="p", skill="uninitialized":
                p value of the hypothesis that the
                difference of skill between the initialized and
                uninitialized simulations is smaller or equal to zero
                based on bootstrapping with replacement.
            * results="p", skill="persistence":
                p value of the hypothesis that the
                difference of skill between the initialized and persistence
                simulations is smaller or equal to zero based on
                bootstrapping with replacement.

        Reference:
            :cite:t:`Goddard2013`

        Example:
            Continuous Ranked Probability Score (``"crps"``) comparing every
            member to every other member (``"m2m"``) reducing dimensions ``member`` and
            ``init`` 50 times after resampling ``member`` dimension with replacement.
            Also calculate reference skill for the ``"persistence"``, ``"climatology"``
            and ``"uninitialized"`` forecast and compare whether initialized skill is
            better than reference skill: Returns verify skill, probability that
            reference forecast performs better than initialized and the lower and
            upper bound of the resample.

            >>> import numpy as np
            >>> np.random.seed(42)
            >>> PerfectModelEnsemble.bootstrap(
            ...     metric="crps",
            ...     comparison="m2m",
            ...     dim=["init", "member"],
            ...     iterations=50,
            ...     resample_dim="member",
            ...     reference=["persistence", "climatology", "uninitialized"],
            ... )
            <xarray.Dataset> Size: 3kB
            Dimensions:  (skill: 4, results: 4, lead: 20)
            Coordinates:
              * lead     (lead) int64 160B 1 2 3 4 5 6 7 8 9 ... 12 13 14 15 16 17 18 19 20
              * skill    (skill) <U13 208B 'initialized' 'persistence' ... 'uninitialized'
              * results  (results) <U12 192B 'verify skill' 'p' 'low_ci' 'high_ci'
            Data variables:
                tos      (skill, results, lead) float64 3kB 0.0621 0.07352 ... 0.117 0.09826
            Attributes: (12/13)
                prediction_skill_software:                         climpred https://climp...
                skill_calculated_by_function:                      PerfectModelEnsemble.b...
                number_of_initializations:                         12
                number_of_members:                                 10
                metric:                                            crps
                comparison:                                        m2m
                ...                                                ...
                reference:                                         ['persistence', 'clima...
                PerfectModel_persistence_from_initialized_lead_0:  False
                resample_dim:                                      member
                sig:                                               95
                iterations:                                        50
                confidence_interval_levels:                        0.975-0.025

        """
        return self._bootstrap(
            metric=metric,
            comparison=comparison,
            dim=dim,
            reference=reference,
            groupby=groupby,
            iterations=iterations,
            sig=sig,
            resample_dim=resample_dim,
            **metric_kwargs,
        )





[docs]
class HindcastEnsemble(PredictionEnsemble):
    """An object for initialized prediction ensembles.

    :py:class:`.HindcastEnsemble` is a sub-class of
    :py:class:`.PredictionEnsemble`. It tracks a
    verification dataset (i.e., observations) associated with the hindcast ensemble
    for easy computation across multiple variables.

    This object is built on :py:class:`xarray.Dataset` and thus requires the input
    object to be an :py:class:`xarray.Dataset` or :py:class:`xarray.DataArray`.
    """


[docs]
    def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
        """Create ``HindcastEnsemble`` from initialized prediction ensemble output.

        Args:
          initialized: initialized prediction ensemble output.

        Attributes:
          observations: datasets dictionary item of verification data to associate with
            the prediction ensemble.
          uninitialized: datasets dictionary item of uninitialized forecast.
        """
        super().__init__(initialized)
        self._datasets.update({"observations": {}})
        self.kind = "hindcast"


    def _apply_climpred_function(
        self, func: Callable[..., xr.Dataset], init: bool, **kwargs: Any
    ) -> Union["HindcastEnsemble", xr.Dataset]:
        """Loop through verification data and apply an arbitrary climpred function.

        Args:
            func: climpred function to apply to object.
            init: Whether or not it's the initialized ensemble.
        """
        hind = self._datasets["initialized"]
        verif = self._datasets["observations"]
        drop_init, drop_obs = self._vars_to_drop(init=init)
        return func(hind.drop_vars(drop_init), verif.drop_vars(drop_obs), **kwargs)

    def _vars_to_drop(self, init: bool = True) -> Tuple[List[str], List[str]]:
        """Return list of variables to drop.

        When comparing initialized/uninitialized to observations.

        This is useful if the two products being compared do not share the same
        variables. I.e., if the observations have ["SST"] and the initialized has
        ["SST", "SALT"], this will return a list with ["SALT"] to be dropped
        from the initialized.

        Args:
          init:
            If ``True``, check variables on the initialized.
            If ``False``, check variables on the uninitialized.

        Returns:
          Lists of variables to drop from the initialized/uninitialized
          and observational Datasets.
        """
        if init:
            init_vars = [var for var in self._datasets["initialized"].data_vars]
        else:
            init_vars = [var for var in self._datasets["uninitialized"].data_vars]
        obs_vars = [var for var in self._datasets["observations"].data_vars]
        # Make lists of variables to drop that aren't in common
        # with one another.
        init_vars_to_drop = list(set(init_vars) - set(obs_vars))
        obs_vars_to_drop = list(set(obs_vars) - set(init_vars))
        return init_vars_to_drop, obs_vars_to_drop


[docs]
    def add_observations(
        self, obs: Union[xr.DataArray, xr.Dataset]
    ) -> "HindcastEnsemble":
        """Add verification data against which to verify the initialized ensemble.

        Same as :py:meth:`.HindcastEnsemble.add_verification`.

        Args:
            obs: observations added to :py:class:`.HindcastEnsemble`.
        """
        if isinstance(obs, xr.DataArray):
            obs = obs.to_dataset()
        match_initialized_dims(self._datasets["initialized"], obs)
        match_initialized_vars(self._datasets["initialized"], obs)
        # Check that time is int, cftime, or datetime; convert ints or cftime to
        # datetime.
        obs = convert_time_index(obs, "time", "obs[time]")
        # Check that converted/original cftime calendar is the same as the
        # initialized calendar to avoid any alignment errors.
        match_calendars(self._datasets["initialized"], obs)
        datasets = self._datasets.copy()
        datasets.update({"observations": obs})
        return self._construct_direct(datasets, kind="hindcast")


    def add_verification(
        self, verif: Union[xr.DataArray, xr.Dataset]
    ) -> "HindcastEnsemble":
        """Add verification data against which to verify the initialized ensemble.

        Same as :py:meth:`.HindcastEnsemble.add_observations`.

        Args:
            verif: verification added to :py:class:`.HindcastEnsemble`.
        """
        return self.add_observations(verif)


[docs]
    def add_uninitialized(
        self, uninit: Union[xr.DataArray, xr.Dataset]
    ) -> "HindcastEnsemble":
        """Add a companion uninitialized ensemble for comparison to verification data.

        Args:
            uninit: uninitialzed ensemble.
        """
        if isinstance(uninit, xr.DataArray):
            uninit = uninit.to_dataset()
        match_initialized_dims(
            self._datasets["initialized"], uninit, uninitialized=True
        )
        match_initialized_vars(self._datasets["initialized"], uninit)
        # Check that init is int, cftime, or datetime; convert ints or cftime to
        # datetime.
        uninit = convert_time_index(uninit, "time", "uninit[time]")
        # Check that converted/original cftime calendar is the same as the
        # initialized calendar to avoid any alignment errors.
        match_calendars(self._datasets["initialized"], uninit, kind2="uninitialized")
        datasets = self._datasets.copy()
        datasets.update({"uninitialized": uninit})
        return self._construct_direct(datasets, kind="hindcast")



[docs]
    def get_observations(self) -> xr.Dataset:
        """Return the :py:class:`xarray.Dataset` of the observations/verification data.

        Returns:
            observations
        """
        return self._datasets["observations"]



[docs]
    def generate_uninitialized(
        self, resample_dim: List[str] = ["init", "member"]
    ) -> "HindcastEnsemble":
        """Generate ``uninitialized`` by resampling from ``initialized``.

        Args:
            resample_dim: dimension to resample from. Must contain ``"init"``.

        Returns:
            resampled ``uninitialized`` ensemble added to
            :py:class:`.HindcastEnsemble`

        Example:
            >>> HindcastEnsemble  # uninitialized from historical simulations
            <climpred.HindcastEnsemble>
            Initialized:
                SST      (init, lead, member) float64 51kB -0.2392 -0.2203 ... 0.618 0.6136
            Uninitialized:
                SST      (time, member) float64 17kB -0.1969 -0.01221 ... 0.4179 0.3974
            Observations:
                SST      (time) float64 488B -0.4015 -0.3524 -0.1851 ... 0.2481 0.346 0.4502

            >>> HindcastEnsemble.generate_uninitialized()  # newly generated from initialized
            <climpred.HindcastEnsemble>
            Initialized:
                SST      (init, lead, member) float64 51kB -0.2392 -0.2203 ... 0.618 0.6136
            Uninitialized:
                SST      (time, member) float64 5kB 0.04868 0.07173 0.09435 ... 0.4158 0.418
            Observations:
                SST      (time) float64 488B -0.4015 -0.3524 -0.1851 ... 0.2481 0.346 0.4502
        """
        uninit = resample_uninitialized_from_initialized(
            self._datasets["initialized"], resample_dim=resample_dim
        )
        datasets = self._datasets.copy()
        datasets.update({"uninitialized": uninit})
        return self._construct_direct(datasets, kind="hindcast")



[docs]
    def plot_alignment(
        self: "HindcastEnsemble",
        alignment: Optional[Union[str, List[str]]] = None,
        reference: Optional[referenceType] = None,
        date2num_units: str = "days since 1960-01-01",
        return_xr: bool = False,
        cmap: str = "viridis",
        edgecolors: str = "gray",
        **plot_kwargs: Any,
    ) -> Any:
        """
        Plot ``initialized`` ``valid_time`` where matching ``verification`` ``time``.

        Depends on ``alignment``. Plots ``days since reference date`` controlled by
        ``date2num_units``. ``NaN`` / white space shows where no verification is done.

        Args:
            alignment: which inits or verification times should be aligned?

                - ``"maximize"``: maximize the degrees of freedom by slicing
                  ``initialized`` and ``verif`` to a common time frame at each lead.
                - ``"same_inits"``: slice to a common ``init`` frame prior to computing
                  metric. This philosophy follows the thought that each lead should be
                  based on the same set of initializations.
                - ``"same_verif"``: slice to a common/consistent verification time frame
                  prior to computing metric. This philosophy follows the thought that
                  each lead should be based on the same set of verification dates.
                - ``None`` defaults to the three above.

            reference: Type of reference forecasts with which to verify against.
                One or more of ``["uninitialized", "persistence", "climatology"]``.
                Defaults to ``None`` meaning no reference.
            date2num_units: passed to ``cftime.date2num`` as units
            return_xr: if ``True`` return :py:class:`xarray.DataArray` else plot
            cmap: color palette
            edgecolors: color of the edges in the plot
            **plot_kwargs: arguments passed to ``plot``.

        Returns:
            :py:class:`xarray.DataArray` if ``return_xr`` else plot

        Example:
            >>> HindcastEnsemble.plot_alignment(alignment=None, return_xr=True)
            <xarray.DataArray 'valid_time' (alignment: 3, lead: 10, init: 61)> Size: 15kB
            array([[[-1826., -1461., -1095., ...,    nan,    nan,    nan],
                    [-1461., -1095.,  -730., ...,    nan,    nan,    nan],
                    [-1095.,  -730.,  -365., ...,    nan,    nan,    nan],
                    ...,
                    [  731.,  1096.,  1461., ...,    nan,    nan,    nan],
                    [ 1096.,  1461.,  1827., ...,    nan,    nan,    nan],
                    [ 1461.,  1827.,  2192., ...,    nan,    nan,    nan]],
            <BLANKLINE>
                   [[   nan,    nan,    nan, ..., 19359., 19724., 20089.],
                    [   nan,    nan,    nan, ..., 19724., 20089.,    nan],
                    [   nan,    nan,    nan, ..., 20089.,    nan,    nan],
                    ...,
                    [   nan,    nan,  1461., ...,    nan,    nan,    nan],
                    [   nan,  1461.,  1827., ...,    nan,    nan,    nan],
                    [ 1461.,  1827.,  2192., ...,    nan,    nan,    nan]],
            <BLANKLINE>
                   [[-1826., -1461., -1095., ..., 19359., 19724., 20089.],
                    [-1461., -1095.,  -730., ..., 19724., 20089.,    nan],
                    [-1095.,  -730.,  -365., ..., 20089.,    nan,    nan],
                    ...,
                    [  731.,  1096.,  1461., ...,    nan,    nan,    nan],
                    [ 1096.,  1461.,  1827., ...,    nan,    nan,    nan],
                    [ 1461.,  1827.,  2192., ...,    nan,    nan,    nan]]],
                  shape=(3, 10, 61))
            Coordinates:
              * init        (init) object 488B 1954-01-01 00:00:00 ... 2014-01-01 00:00:00
              * lead        (lead) int32 40B 1 2 3 4 5 6 7 8 9 10
              * alignment   (alignment) <U10 120B 'same_init' 'same_verif' 'maximize'
                valid_time  (lead, init) object 5kB 1955-01-01 00:00:00 ... 2024-01-01 00...
            Attributes:
                units:    days since 1960-01-01

            >>> HindcastEnsemble.plot_alignment(
            ...     alignment="same_verifs"
            ... )  # doctest: +SKIP
            <matplotlib.collections.QuadMesh object at 0x1405c1520>

        See also:
            https://climpred.readthedocs.io/en/stable/alignment.html.
        """
        from .graphics import _verif_dates_xr

        if not return_xr:
            nctimeaxis_installed = _util.find_spec("nc_time_axis")
            if not nctimeaxis_installed:
                raise ValueError("nc_time_axis >= 1.4.0 required for plotting.")
            else:
                import nc_time_axis  # noqa:

                if Version(nc_time_axis.__version__) < Version("1.4.0"):
                    raise ImportError("nc_time_axis >= 1.4.0 required for plotting.")

        if alignment is None or alignment == []:
            alignment = ["same_init", "same_verif", "maximize"]
        if isinstance(alignment, str):
            alignment = [alignment]

        alignment_dates = []
        alignments_success = []
        for a in alignment:
            try:
                alignment_dates.append(
                    _verif_dates_xr(self, a, reference, date2num_units)
                )
                alignments_success.append(a)
            except CoordinateError as e:
                warnings.warn(f"alignment='{a}' failed. CoordinateError: {e}")
        if not alignment_dates:
            raise CoordinateError(
                "no overlap between hindcast valid_time and verification time for all "
                "leads. Please try plot_alignment with less leads or longer "
                "verification time."
            )
        verif_dates_xr = (
            xr.concat(
                alignment_dates,
                "alignment",
                join="outer",
            )
            .assign_coords(alignment=alignments_success)
            .squeeze()
        )
        if "alignment" in verif_dates_xr.dims:
            plot_kwargs["col"] = "alignment"

        if return_xr:
            return add_time_from_init_lead(verif_dates_xr)
        return verif_dates_xr.plot(cmap=cmap, edgecolors=edgecolors, **plot_kwargs)



[docs]
    def verify(
        self,
        *,
        metric: metricType,
        comparison: comparisonType,
        dim: Optional[dimType] = None,
        alignment: Optional[alignmentType] = None,
        reference: Optional[referenceType] = None,
        groupby: Optional[groupbyType] = None,
        **metric_kwargs: metric_kwargsType,
    ) -> xr.Dataset:
        """Verify the initialized ensemble against observations.

        .. note::
            This will automatically verify against all shared variables
            between the initialized ensemble and observations/verification data.

        Args:
            metric: Metric to apply for verification, see `metrics <../metrics.html>`_
            comparison: How to compare to the observations/verification data.
                See `comparisons <../comparisons.html>`_.
            dim: Dimension(s) to apply metric over. ``dim`` is passed
                on to xskillscore.{metric} and includes xskillscore's ``member_dim``.
                ``dim`` should contain ``member`` when ``comparison`` is probabilistic
                but should not contain ``member`` when ``comparison=e2o``. Defaults to
                ``None`` meaning that all dimensions other than ``lead`` are reduced.
            alignment: which inits or verification times should be aligned?

                - ``"maximize"``: maximize the degrees of freedom by slicing
                  ``initialized`` and ``verif`` to a common time frame at each lead.
                - ``"same_inits"``: slice to a common ``init`` frame prior to computing
                  metric. This philosophy follows the thought that each lead should be
                  based on the same set of initializations.
                - ``"same_verif"``: slice to a common/consistent verification time frame
                  prior to computing metric. This philosophy follows the thought that
                  each lead should be based on the same set of verification dates.

            reference: Type of reference forecasts with which to verify against.
                One or more of ``["uninitialized", "persistence", "climatology"]``.
                Defaults to ``None`` meaning no reference.
            groupby: group ``init`` before passing ``initialized`` to ``verify``.
            **metric_kwargs: arguments passed to ``metric``.

        Returns:
            ``initialized`` and ``reference`` forecast skill reduced by dimensions
            ``dim``

        Example:
            Continuous Ranked Probability Score (``crps``) comparing every member with the
            verification (``m2o``) over the same verification time (``same_verifs``)
            for all leads reducing dimensions ``init`` and ``member``:

            >>> HindcastEnsemble.verify(
            ...     metric="crps",
            ...     comparison="m2o",
            ...     alignment="same_verifs",
            ...     dim=["init", "member"],
            ... )
            <xarray.Dataset> Size: 164B
            Dimensions:  (lead: 10)
            Coordinates:
              * lead     (lead) int32 40B 1 2 3 4 5 6 7 8 9 10
                skill    <U11 44B 'initialized'
            Data variables:
                SST      (lead) float64 80B 0.05208 0.05009 0.05489 ... 0.1083 0.1176
            Attributes:
                prediction_skill_software:     climpred https://climpred.readthedocs.io/
                skill_calculated_by_function:  HindcastEnsemble.verify()
                number_of_initializations:     64
                number_of_members:             10
                alignment:                     same_verifs
                metric:                        crps
                comparison:                    m2o
                dim:                           ['init', 'member']
                reference:                     []

            Root mean square error (``"rmse"``) comparing the ensemble mean with
            the verification (``"e2o"``) over the same initializations
            (``"same_inits"``) for all leads reducing dimension ``init`` while also
            calculating reference skill for the ``"persistence"``, ``"climatology"``
            and ``"uninitialized"`` forecast.

            >>> HindcastEnsemble.verify(
            ...     metric="rmse",
            ...     comparison="e2o",
            ...     alignment="same_inits",
            ...     dim="init",
            ...     reference=["persistence", "climatology", "uninitialized"],
            ... )
            <xarray.Dataset> Size: 568B
            Dimensions:  (skill: 4, lead: 10)
            Coordinates:
              * lead     (lead) int32 40B 1 2 3 4 5 6 7 8 9 10
              * skill    (skill) <U13 208B 'initialized' 'persistence' ... 'uninitialized'
            Data variables:
                SST      (skill, lead) float64 320B 0.08135 0.08254 0.086 ... 0.1012 0.1017
            Attributes:
                prediction_skill_software:     climpred https://climpred.readthedocs.io/
                skill_calculated_by_function:  HindcastEnsemble.verify()
                number_of_initializations:     64
                number_of_members:             10
                alignment:                     same_inits
                metric:                        rmse
                comparison:                    e2o
                dim:                           init
                reference:                     ['persistence', 'climatology', 'uninitiali...

            The verification does not need to reduce a dimension. To obtain the skill for
            each initialization, set ``dim=[]``.

            >>> HindcastEnsemble.verify(
            ...     metric="rmse",
            ...     comparison="e2o",
            ...     alignment="same_verifs",
            ...     dim=[],
            ... )
            <xarray.Dataset> Size: 10kB
            Dimensions:     (lead: 10, init: 61)
            Coordinates:
              * init        (init) object 488B 1954-01-01 00:00:00 ... 2014-01-01 00:00:00
              * lead        (lead) int32 40B 1 2 3 4 5 6 7 8 9 10
                valid_time  (lead, init) object 5kB 1955-01-01 00:00:00 ... 2024-01-01 00...
                skill       <U11 44B 'initialized'
            Data variables:
                SST         (lead, init) float64 5kB nan nan nan nan nan ... nan nan nan nan
            Attributes:
                prediction_skill_software:     climpred https://climpred.readthedocs.io/
                skill_calculated_by_function:  HindcastEnsemble.verify()
                number_of_members:             10
                alignment:                     same_verifs
                metric:                        rmse
                comparison:                    e2o
                dim:                           []
                reference:                     []

        """
        if groupby is not None:
            return self._groupby(
                "verify",
                groupby,
                reference=reference,
                metric=metric,
                comparison=comparison,
                dim=dim,
                alignment=alignment,
                **metric_kwargs,
            )

        if dim is None:
            viable_dims = list(self.get_initialized().isel(lead=0).dims) + [[]]
            raise ValueError(
                "Designate a dimension to reduce over when applying the "
                f"metric. Got {dim}. Choose one or more of {viable_dims}"
            )
        if ("member" in dim) and comparison not in ["m2o", "m2r"]:
            raise ValueError(
                "Comparison must equal 'm2o' with dim='member'. "
                f"Got comparison {comparison}."
            )
        reference = _check_valid_reference(reference)

        def _verify(
            hind,
            verif,
            hist,
            reference,
            metric,
            comparison,
            alignment,
            dim,
            **metric_kwargs,
        ):
            """Interior verify func to be passed to apply func."""
            metric, comparison, dim = _get_metric_comparison_dim(
                hind, metric, comparison, dim, kind=self.kind
            )
            forecast, verif = comparison.function(hind, verif, metric=metric)
            if metric.name == "rps":  # modify metric_kwargs for rps
                metric_kwargs = broadcast_metric_kwargs_for_rps(
                    forecast, verif, metric_kwargs
                )
            forecast = forecast.rename({"init": "time"})
            inits, verif_dates = return_inits_and_verif_dates(
                forecast,
                verif,
                alignment,
                reference=reference,
                hist=hist,
            )
            metric_over_leads = [
                _apply_metric_at_given_lead(
                    verif,
                    verif_dates,
                    lead,
                    initialized=forecast,
                    hist=hist,
                    inits=inits,
                    # Ensure apply metric function returns skill and not reference
                    # results.
                    reference=None,
                    metric=metric,
                    comparison=comparison,
                    dim=dim,
                    **metric_kwargs,
                )
                for lead in forecast["lead"].data
            ]
            result = xr.concat(
                metric_over_leads,
                dim="lead",
                join="outer",
                coords="different",
                compat="equals",
            )  # , **CONCAT_KWARGS)
            result["lead"] = forecast["lead"]

            if reference is not None:
                if "member" in verif.dims:  # if broadcasted before
                    verif = verif.isel(member=0)
                for r in reference:
                    metric_over_leads = [
                        _apply_metric_at_given_lead(
                            verif,
                            verif_dates,
                            lead,
                            initialized=forecast,
                            hist=hist,
                            inits=inits,
                            reference=r,
                            metric=metric,
                            comparison=comparison,
                            dim=dim,
                            **metric_kwargs,
                        )
                        for lead in forecast["lead"].data
                    ]
                    ref = xr.concat(
                        metric_over_leads, dim="lead", **CONCAT_KWARGS
                    ).assign_coords(lead=forecast.lead.values)
                    # fix to get no member dim for uninitialized e2o skill #477
                    if (
                        r == "uninitialized"
                        and comparison.name == "e2o"
                        and "member" in ref.dims
                    ):
                        ref = ref.mean("member")
                        if "time" in ref.dims and "time" not in result.dims:
                            ref = ref.rename({"time": "init"})
                    # fix #735
                    if (
                        r == "uninitialized"
                        and "member" in dim
                        and "member" in ref.dims
                    ):
                        ref = ref.mean("member")
                    result = xr.concat([result, ref], dim="skill", **CONCAT_KWARGS)
            # rename back to 'init'
            if "time" in result.dims:
                result = result.swap_dims({"time": "init"})
            if "time" in result.coords:
                if "init" in result.coords["time"].dims:
                    result = result.rename({"time": "init"})

            # Add dimension/coordinate for different references.
            result = result.assign_coords(skill=["initialized"] + reference)
            return result.squeeze()

        has_dataset(
            self._datasets["observations"], "observational", "verify a forecast"
        )
        if "uninitialized" in reference:
            has_dataset(
                self._datasets["uninitialized"],
                "uninitialized",
                "compute an uninitialized reference forecast",
            )
            hist = self._datasets["uninitialized"]
        else:
            hist = None

        res = self._apply_climpred_function(
            _verify,
            init=True,
            metric=metric,
            comparison=comparison,
            alignment=alignment,
            dim=dim,
            hist=hist,
            reference=reference,
            **metric_kwargs,
        )
        if self._temporally_smoothed:
            res = _reset_temporal_axis(res, self._temporally_smoothed, dim="lead")
            res["lead"].attrs = self.get_initialized().lead.attrs

        if (
            "valid_time" in res.coords
        ):  # NaNs in valid_time only happen for same_verifs and dim=[]
            if res.coords["valid_time"].isnull().any():
                res = add_time_from_init_lead(res)

        res = assign_attrs(
            res,
            self.get_initialized(),
            function_name="HindcastEnsemble.verify()",
            metric=metric,
            comparison=comparison,
            dim=dim,
            alignment=alignment,
            reference=reference,
            **metric_kwargs,
        )
        if "lead_center" in res.coords:
            res.coords["lead_center"].attrs["units"] = res.coords["lead"].attrs.get(
                "units", ""
            )
        return res



[docs]
    def bootstrap(
        self,
        *,
        alignment: alignmentType,
        metric: metricType,
        comparison: comparisonType,
        dim: Optional[dimType] = None,
        reference: Optional[referenceType] = None,
        groupby: Optional[groupbyType] = None,
        iterations: Optional[int] = None,
        sig: int = 95,
        resample_dim: str = "member",
        **metric_kwargs: metric_kwargsType,
    ) -> xr.Dataset:
        """Bootstrap with replacement according to :cite:t:`Goddard2013`.

        Args:
            metric: Metric to apply for verification, see `metrics <../metrics.html>`_
            comparison: How to compare to the observations/verification data.
                See `comparisons <../comparisons.html>`_.
            dim: Dimension(s) to apply metric over. ``dim`` is passed
                on to xskillscore.{metric} and includes xskillscore's ``member_dim``.
                ``dim`` should contain ``member`` when ``comparison`` is probabilistic
                but should not contain ``member`` when ``comparison="e2o"``. Defaults to
                ``None`` meaning that all dimensions other than ``lead`` are reduced.
            reference: Type of reference forecasts with which to verify against.
                One or more of ``["uninitialized", "persistence", "climatology"]``.
                Defaults to ``None`` meaning no reference.
                If ``None`` or ``[]``, returns no p value.
            alignment: which inits or verification times should be aligned?

                - ""maximize: maximize the degrees of freedom by slicing ``init`` and
                  ``verif`` to a common time frame at each lead.
                - ``"same_inits"``: slice to a common ``init`` frame prior to computing
                  metric. This philosophy follows the thought that each lead should be
                  based on the same set of initializations.
                - ``"same_verif"``: slice to a common/consistent verification time frame
                  prior to computing metric. This philosophy follows the thought that
                  each lead should be based on the same set of verification dates.

            iterations: Number of resampling iterations for bootstrapping with
                replacement. Recommended >= 500.
            sig: Significance level in percent for deciding whether
                uninitialized and persistence beat initialized skill.
            resample_dim: dimension to resample from. Default: ``"member"``.

                - ``"member"``: select a different set of members from hind
                - ``"init"``: select a different set of initializations from hind

            groupby: group ``init`` before passing ``initialized`` to ``bootstrap``.
            **metric_kwargs: arguments passed to ``metric``.

        Returns:
            Dataset with dimensions ``results`` and ``skill``.

            * results="verify skill", skill="initialized":
                mean initialized skill
            * results="high_ci", skill="initialized":
                high confidence interval boundary for initialized skill
            * results="p", skill="uninitialized":
                p value of the hypothesis that the
                difference of skill between the initialized and
                uninitialized simulations is smaller or equal to zero
                based on bootstrapping with replacement.
            * results="p", skill="persistence":
                p value of the hypothesis that the
                difference of skill between the initialized and persistence
                simulations is smaller or equal to zero based on
                bootstrapping with replacement.

        References:
            :cite:t:`Goddard2013`

        Example:
            Continuous Ranked Probability Score (``"crps"``) comparing every member
            forecast to the verification (``"m2o"``) over the same initializations
            (``"same_inits"``) for all leads reducing dimension ``member`` 50 times
            after resampling ``member`` dimension with replacement. Note that dimension
            ``init`` remains.
            Also calculate reference skill for the ``"persistence"``, ``"climatology"``
            and ``"uninitialized"`` forecast and compare whether initialized skill is
            better than reference skill: Returns verify skill, probability that
            reference forecast performs better than initialized and the lower and
            upper bound of the resample.

            >>> HindcastEnsemble.bootstrap(
            ...     metric="crps",
            ...     comparison="m2o",
            ...     dim="member",
            ...     iterations=50,
            ...     resample_dim="member",
            ...     alignment="same_inits",
            ...     reference=["persistence", "climatology", "uninitialized"],
            ... )
            <xarray.Dataset> Size: 70kB
            Dimensions:     (skill: 4, results: 4, lead: 10, init: 51)
            Coordinates:
              * lead        (lead) int32 40B 1 2 3 4 5 6 7 8 9 10
                valid_time  (lead, init) object 4kB 1956-01-01 00:00:00 ... 2015-01-01 00...
                init        (init) object 408B 1955-01-01 00:00:00 ... 2005-01-01 00:00:00
              * skill       (skill) <U13 208B 'initialized' ... 'uninitialized'
              * results     (results) <U12 192B 'verify skill' 'p' 'low_ci' 'high_ci'
            Data variables:
                SST         (skill, results, lead, init) float64 65kB 0.1202 ... 0.07578
            Attributes:
                prediction_skill_software:     climpred https://climpred.readthedocs.io/
                skill_calculated_by_function:  HindcastEnsemble.bootstrap()
                number_of_members:             10
                alignment:                     same_inits
                metric:                        crps
                comparison:                    m2o
                dim:                           member
                reference:                     ['persistence', 'climatology', 'uninitiali...
                resample_dim:                  member
                sig:                           95
                iterations:                    50
                confidence_interval_levels:    0.975-0.025

        """
        return self._bootstrap(
            metric=metric,
            comparison=comparison,
            dim=dim,
            alignment=alignment,
            reference=reference,
            groupby=groupby,
            iterations=iterations,
            sig=sig,
            resample_dim=resample_dim,
            **metric_kwargs,
        )



[docs]
    def remove_bias(
        self,
        alignment: alignmentType,
        how: str = "additive_mean",
        train_test_split: str = "unfair",
        train_init: Optional[Union[xr.DataArray, slice]] = None,
        train_time: Optional[Union[xr.DataArray, slice]] = None,
        cv: Union[bool, str] = False,
        **metric_kwargs: metric_kwargsType,
    ) -> "HindcastEnsemble":
        """Remove bias from :py:class:`.HindcastEnsemble`.

        Bias is grouped by ``seasonality`` set via
        :py:class:`~climpred.options.set_options`. When wrapping
        :py:class:`xclim.sdba.adjustment.TrainAdjust` use ``group`` instead.

        Args:
            alignment: which inits or verification times should be aligned?

                - ``""maximize``: maximize the degrees of freedom by slicing
                  ``initialized`` and ``verif`` to a common time frame at each lead.
                - ``"same_inits"``: slice to a common ``init`` frame prior to computing
                  metric. This philosophy follows the thought that each lead should be
                  based on the same set of initializations.
                - ``"same_verif"``: slice to a common/consistent verification time frame
                  prior to computing metric. This philosophy follows the thought that
                  each lead should be based on the same set of verification dates.

            how: what kind of bias removal to perform.
                Defaults to ``"additive_mean"``. Select from:

                - ``"additive_mean"``: correcting the mean forecast additively
                - ``"multiplicative_mean"``: correcting the mean forecast
                  multiplicatively
                - ``"multiplicative_std"``: correcting the standard deviation
                  multiplicatively
                - ``"modified_quantile"``: `Ref <https://www.sciencedirect.com/science/article/abs/pii/S0034425716302000?via%3Dihub>`__
                - ``"basic_quantile"``: `Ref <https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/joc.2168>`__
                - ``"gamma_mapping"``: `Ref <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`__
                - ``"normal_mapping"``: `Ref <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`__
                - :py:class:`xclim.sdba.adjustment.EmpiricalQuantileMapping`
                - :py:class:`xclim.sdba.adjustment.DetrendedQuantileMapping`
                - :py:class:`xclim.sdba.adjustment.PrincipalComponents`
                - :py:class:`xclim.sdba.adjustment.QuantileDeltaMapping`
                - :py:class:`xclim.sdba.adjustment.Scaling`
                - :py:class:`xclim.sdba.adjustment.LOCI`

            train_test_split: How to separate train period to calculate the bias
                and test period to apply bias correction to? For a detailed
                description, see `Risbey et al. 2021 <http://www.nature.com/articles/s41467-021-23771-z>`_:

                - ``"fair"``: no overlap between ``train`` and ``test`` (recommended).
                  Set either ``train_init`` or ``train_time``.
                - ``"unfair"``: completely overlapping ``train`` and ``test`` (default).
                - ``"unfair-cv"``: overlapping ``train`` and ``test`` except for
                  current `init`, which is
                  `left out <https://en.wikipedia.org/wiki/Cross-validation_(statistics)#Leave-one-out_cross-validation>`_
                  (set ``cv="LOO"``).

            train_init: Define initializations for training
              when ``alignment="same_inits/maximize"``.
            train_time: Define time for training when ``alignment="same_verif"``.
            cv: Only relevant when ``train_test_split="unfair-cv"``.
              Defaults to ``False``.

                - ``True/"LOO"``: Calculate bias by `leaving given initialization out <https://en.wikipedia.org/wiki/Cross-validation_(statistics)#Leave-one-out_cross-validation>`_

                    .. note::

                       Don't use ``cv="LOO"``, see `this comment <https://github.com/pangeo-data/climpred/pull/832#issuecomment-1752473832>`_.

                - ``False``: include all initializations in the calculation of bias,
                  which is much faster and but yields similar skill with a large N of
                  initializations.

            **metric_kwargs: passed to ``xclim.sdba`` (including ``group``)
                or ``XBias_Correction``

        Returns:
            bias removed :py:class:`.HindcastEnsemble`.

        Example:

            Skill from raw model output without bias reduction:

            >>> HindcastEnsemble.verify(
            ...     metric="rmse", comparison="e2o", alignment="maximize", dim="init"
            ... )
            <xarray.Dataset> Size: 164B
            Dimensions:  (lead: 10)
            Coordinates:
              * lead     (lead) int32 40B 1 2 3 4 5 6 7 8 9 10
                skill    <U11 44B 'initialized'
            Data variables:
                SST      (lead) float64 80B 0.08359 0.08141 0.08362 ... 0.1361 0.1552 0.1664
            Attributes:
                prediction_skill_software:     climpred https://climpred.readthedocs.io/
                skill_calculated_by_function:  HindcastEnsemble.verify()
                number_of_initializations:     64
                number_of_members:             10
                alignment:                     maximize
                metric:                        rmse
                comparison:                    e2o
                dim:                           init
                reference:                     []

            Note that this HindcastEnsemble is already bias reduced, therefore
            ``train_test_split="unfair"`` has hardly any effect. Use all
            initializations to calculate bias and verify skill:

            >>> HindcastEnsemble.remove_bias(
            ...     alignment="maximize", how="additive_mean", test_train_split="unfair"
            ... ).verify(
            ...     metric="rmse", comparison="e2o", alignment="maximize", dim="init"
            ... )
            <xarray.Dataset> Size: 164B
            Dimensions:  (lead: 10)
            Coordinates:
              * lead     (lead) int32 40B 1 2 3 4 5 6 7 8 9 10
                skill    <U11 44B 'initialized'
            Data variables:
                SST      (lead) float64 80B 0.08349 0.08039 0.07522 ... 0.08107 0.08255
            Attributes:
                prediction_skill_software:     climpred https://climpred.readthedocs.io/
                skill_calculated_by_function:  HindcastEnsemble.verify()
                number_of_initializations:     64
                number_of_members:             10
                alignment:                     maximize
                metric:                        rmse
                comparison:                    e2o
                dim:                           init
                reference:                     []

            Separate initializations 1954 - 1980 to calculate bias. Note that
            this HindcastEnsemble is already bias reduced, therefore
            ``train_test_split="fair"`` worsens skill here. Generally,
            ``train_test_split="fair"`` is recommended to use for a fair
            comparison against real-time forecasts.

            >>> HindcastEnsemble.remove_bias(
            ...     alignment="maximize",
            ...     how="additive_mean",
            ...     train_test_split="fair",
            ...     train_init=slice("1954", "1980"),
            ... ).verify(
            ...     metric="rmse", comparison="e2o", alignment="maximize", dim="init"
            ... )
            <xarray.Dataset> Size: 164B
            Dimensions:  (lead: 10)
            Coordinates:
              * lead     (lead) int32 40B 1 2 3 4 5 6 7 8 9 10
                skill    <U11 44B 'initialized'
            Data variables:
                SST      (lead) float64 80B 0.132 0.1085 0.08722 ... 0.08209 0.08969 0.08732
            Attributes:
                prediction_skill_software:     climpred https://climpred.readthedocs.io/
                skill_calculated_by_function:  HindcastEnsemble.verify()
                number_of_initializations:     37
                number_of_members:             10
                alignment:                     maximize
                metric:                        rmse
                comparison:                    e2o
                dim:                           init
                reference:                     []

            Wrapping methods ``how`` from
            `xclim <https://xclim.readthedocs.io/en/stable/sdba_api.html>`_ and
            providing ``group`` for ``groupby``:

            >>> HindcastEnsemble.remove_bias(
            ...     alignment="same_init",
            ...     group="init",
            ...     how="EmpiricalQuantileMapping",
            ...     train_test_split="unfair",
            ... ).verify(
            ...     metric="rmse", comparison="e2o", alignment="maximize", dim="init"
            ... )
            <xarray.Dataset> Size: 164B
            Dimensions:  (lead: 10)
            Coordinates:
              * lead     (lead) int32 40B 1 2 3 4 5 6 7 8 9 10
                skill    <U11 44B 'initialized'
            Data variables:
                SST      (lead) float64 80B 0.07097 0.07402 0.06653 ... 0.06697 0.0707
            Attributes:
                prediction_skill_software:     climpred https://climpred.readthedocs.io/
                skill_calculated_by_function:  HindcastEnsemble.verify()
                number_of_initializations:     52
                number_of_members:             10
                alignment:                     maximize
                metric:                        rmse
                comparison:                    e2o
                dim:                           init
                reference:                     []

            Wrapping methods ``how`` from `bias_correction <https://github.com/pankajkarman/bias_correction/blob/master/bias_correction.py>`_:

            >>> HindcastEnsemble.remove_bias(
            ...     alignment="same_init",
            ...     how="modified_quantile",
            ...     train_test_split="unfair",
            ... ).verify(
            ...     metric="rmse", comparison="e2o", alignment="maximize", dim="init"
            ... )
            <xarray.Dataset> Size: 164B
            Dimensions:  (lead: 10)
            Coordinates:
              * lead     (lead) int32 40B 1 2 3 4 5 6 7 8 9 10
                skill    <U11 44B 'initialized'
            Data variables:
                SST      (lead) float64 80B 0.07628 0.08293 0.08169 ... 0.1577 0.1821 0.2087
            Attributes:
                prediction_skill_software:     climpred https://climpred.readthedocs.io/
                skill_calculated_by_function:  HindcastEnsemble.verify()
                number_of_initializations:     52
                number_of_members:             10
                alignment:                     maximize
                metric:                        rmse
                comparison:                    e2o
                dim:                           init
                reference:                     []
        """  # noqa: E501
        if train_test_split not in BIAS_CORRECTION_TRAIN_TEST_SPLIT_METHODS:
            raise NotImplementedError(
                f"train_test_split='{train_test_split}' not implemented. Please choose "
                f"`train_test_split` from {BIAS_CORRECTION_TRAIN_TEST_SPLIT_METHODS}, "
                "see Risbey et al. 2021 "
                "http://www.nature.com/articles/s41467-021-23771-z for description and "
                "https://github.com/pangeo-data/climpred/issues/648 for implementation "
                "status."
            )

        if cv == "LOO":
            warnings.warn(
                'Do not use ``cv="LOO"``, see `comment <https://github.com/pangeo-data/climpred/pull/832#issuecomment-1752473832>`_.'  # noqa: E501
            )

        alignment = _check_valid_alignment(alignment)

        if train_test_split in ["fair"]:
            if (
                (train_init is None)
                or not isinstance(train_init, (slice, xr.DataArray))
            ) and (alignment in ["same_inits", "maximize"]):
                raise ValueError(
                    f'When alignment="{alignment}", please provide `train_init` as '
                    f"`xr.DataArray`, e.g. "
                    '`HindcastEnsemble.coords["init"].slice(start, end)` '
                    "or slice, e.g. `slice(start, end)`, got `train_init={train_init}`."
                )
            if (
                (train_time is None)
                or not isinstance(train_time, (slice, xr.DataArray))
            ) and (alignment in ["same_verif"]):
                raise ValueError(
                    f'When alignment="{alignment}", please provide `train_time` as '
                    "`xr.DataArray`, e.g. "
                    '`HindcastEnsemble.coords["time"].slice(start, end)` '
                    "or slice, e.g. `slice(start, end)`, got `train_time={train_time}`."
                )

            if isinstance(train_init, slice):
                train_init = self.coords["init"].sel(init=train_init)
            if isinstance(train_time, slice):
                train_time = self.coords["time"].sel(time=train_time)

        if how == "mean":
            how = "additive_mean"  # backwards compatibility
        if how in ["additive_mean", "multiplicative_mean", "multiplicative_std"]:
            func = gaussian_bias_removal
        elif how in BIAS_CORRECTION_BIAS_CORRECTION_METHODS:
            func = bias_correction
        elif how in XCLIM_BIAS_CORRECTION_METHODS:
            func = xclim_sdba
        else:
            raise NotImplementedError(
                f"bias removal '{how}' is not implemented, please choose from "
                f" {INTERNAL_BIAS_CORRECTION_METHODS + BIAS_CORRECTION_BIAS_CORRECTION_METHODS}."  # noqa: E501
            )

        if train_test_split in ["unfair-cv"]:
            if cv not in [True, "LOO"]:
                raise ValueError(
                    f"Please provide cross-validation keyword `cv='LOO'` when using "
                    f"`train_test_split='unfair-cv'`, found `cv='{cv}'`."
                )
            else:
                cv = "LOO"  # backward compatibility
            if cv not in CROSS_VALIDATE_METHODS:
                raise NotImplementedError(
                    f"Cross validation method {cv} not implemented. "
                    f"Please choose cv from {CROSS_VALIDATE_METHODS}."
                )
            metric_kwargs["cv"] = cv

        old_coords = self.get_initialized().coords
        self = func(
            self,
            alignment=alignment,
            how=how,
            train_test_split=train_test_split,
            train_init=train_init,
            train_time=train_time,
            **metric_kwargs,
        )
        # TODO: find better location to prevent valid_time with member dims
        if "valid_time" in self._datasets["initialized"].coords:
            if "member" in self._datasets["initialized"].coords["valid_time"].dims:
                self._datasets["initialized"].coords["valid_time"] = (
                    self._datasets["initialized"]
                    .coords["valid_time"]
                    .isel(member=0, drop=True)
                )
        # to avoid ValueError: conflicting sizes for dimension 'time': length 1 on the
        # data but length n on coordinate 'time'
        if "valid_time" in self.get_initialized().coords:
            if self.get_initialized().coords["valid_time"].isnull().any():
                self._datasets["initialized"] = self._datasets["initialized"].dropna(
                    "init"
                )
        # avoid single-item lead dimension being dropped, see #771
        for c in ["lead", "init"]:
            if c not in self.get_initialized().dims and c in old_coords:
                if old_coords[c].size == 1:
                    self._datasets["initialized"] = self._datasets[
                        "initialized"
                    ].assign_coords({c: old_coords[c]})
        if "valid_time" in self.get_initialized().coords:
            if len(self.get_initialized().coords["valid_time"].dims) == 1 and set(
                ["init", "lead"]
            ).issubset(set(self.get_initialized().dims)):
                self._datasets["initialized"].coords["valid_time"] = (
                    add_time_from_init_lead(
                        self.get_initialized().drop_vars("valid_time")
                    ).coords["valid_time"]
                )
        return self