Source code for climpred.preprocessing.shared

from typing import Any, Callable, Optional, Union

import numpy as np
import xarray as xr

from ..constants import CLIMPRED_ENSEMBLE_DIMS
from .mpi import get_path as get_path_mpi



[docs]
def load_hindcast(
    inits=range(1961, 1965, 1),
    members=range(1, 3, 1),
    preprocess: Optional[Callable] = None,
    lead_offset: int = 1,
    parallel: bool = True,
    engine: Optional[str] = None,
    get_path: Callable = get_path_mpi,
    **get_path_kwargs: Any,
) -> Union[xr.DataArray, xr.Dataset]:
    """
    Concat multi-member, multi-initialization hindcast experiment.

    Into one :py:class:`xarray.Dataset` compatible with `climpred`.

    Args:
        inits (list, array): List of initializations to be loaded.
            Defaults to ``range(1961, 1965)``.
        members (list, array): List of initializations to be loaded.
            Defaults to ``range(1, 3)``.
        preprocess (Callable): ``preprocess`` function accepting and returning
            :py:class:`xarray.Dataset` only. To be passed to
            :py:func:`xarray.open_dataset`. Defaults to None.
        parallel (bool): passed to `xr.open_mfdataset`. Defaults to ``True``.
        engine (str): passed to `xr.open_mfdataset`. Defaults to ``None``.
        get_path (callable): ``get_path`` function specific to modelling center output
            format. Defaults to :py:func:`~climpred.preprocessing.mpi.get_path`.
        **get_path_kwargs (dict): parameters passed to ``**get_path``.

    Returns:
        ``climpred`` compatible dataset with dims: ``member``, ``init``, ``lead``.

    """
    init_list = []
    for init in inits:
        print(f"Processing init {init} ...")
        member_list = []
        for member in members:
            # get path p
            p = get_path(member=member, init=init, **get_path_kwargs)
            # open all leads for specified member and init
            member_ds = xr.open_mfdataset(
                p,
                combine="nested",
                concat_dim="time",
                preprocess=preprocess,
                parallel=parallel,
                engine=engine,
                coords="minimal",  # expecting identical coords
                data_vars="minimal",  # expecting identical vars
                compat="override",  # speed up
            ).squeeze()
            # set new integer time
            member_ds = set_integer_time_axis(member_ds)
            member_list.append(member_ds)
        member_ds = xr.concat(member_list, "member")
        init_list.append(member_ds)
    ds = xr.concat(init_list, "init").rename({"time": "lead"})
    ds["member"] = members
    ds["init"] = inits
    return ds




[docs]
def rename_SLM_to_climpred_dims(
    xro: Union[xr.DataArray, xr.Dataset],
) -> Union[xr.DataArray, xr.Dataset]:
    """
    Rename ensemble dimensions common to SubX or CESM output.

        * ``S`` : Refers to start date and is changed to ``init``
        * ``L`` : Refers to lead time and is changed to ``lead``
        * ``M``: Refers to ensemble member and is changed to ``member``

    Args:
        xro (xr.Dataset): input from CESM/SubX containing dimensions: `S`, `L`, `M`.
    Returns:
        ``climpred`` compatible with dimensions: ``member``, ``init``, ``lead``.
    """
    dim_dict = {"S": "init", "L": "lead", "M": "member"}
    for dim in dim_dict.keys():
        if dim in xro.dims:
            xro = xro.rename({dim: dim_dict[dim]})
    return xro




[docs]
def rename_to_climpred_dims(
    xro: Union[xr.DataArray, xr.Dataset],
) -> Union[xr.DataArray, xr.Dataset]:
    """
    Rename existing dimension to `CLIMPRED_ENSEMBLE_DIMS`.

    This function attempts to autocorrect dimension names to
    climpred standards. e.g., `ensemble_member` becomes `member` and `lead_time`
    becomes `lead`, and `time` gets renamed to `lead`.

    Args:
        xro (xr.Dataset): input from DCPP via `intake-esm <intake-esm.readthedocs.io/>`_
        containing dimension names like `dcpp_init_year`, `time`, `member_id`.
    Returns:
        ``climpred`` compatible with dimensions: ``member``, ``init``, ``lead``.
    """
    for cdim in CLIMPRED_ENSEMBLE_DIMS:
        renamed = False  # set renamed flag to false initiallly
        if cdim not in xro.dims:  # if a CLIMPRED_ENSEMBLE_DIMS is not found
            for c in xro.dims:  # check in xro.dims for dims
                if cdim in str(
                    c
                ):  # containing the string of this CLIMPRED_ENSEMBLE_DIMS
                    xro = xro.rename({c: cdim})
                    renamed = True
        # special case for hindcast when containing time
        if "time" in xro.dims and "lead" not in xro.dims:
            xro = xro.rename({"time": "lead"})
            renamed = True
        elif "lead" in xro.dims:
            renamed = True
        if not renamed:
            raise ValueError(
                f"Couldn't find a dimension to rename to `{cdim}`, found {xro.dims}."
            )
    return xro




[docs]
def set_integer_time_axis(
    xro: Union[xr.DataArray, xr.Dataset], offset: int = 1, time_dim: str = "time"
) -> Union[xr.DataArray, xr.Dataset]:
    """
    Set time axis to integers starting from `offset`.

    Used in hindcast preprocessing before the concatination of `intake-esm` happens.
    """
    xro[time_dim] = np.arange(offset, offset + xro[time_dim].size)
    return xro