#!conda install intake fsspec intake-xarray intake-thredds -c conda-forge -y
#!conda install eccodes cfgrib -c conda-forge -y
#!pip install climetlab --quiet
# linting
%load_ext nb_black
%load_ext lab_black
Calculate skill for NWP model GEFS for 6-hourly global forecasts#
import intake
import fsspec # caching downloads
# specify caching location, where to store files to with their original names
fsspec.config.conf["simplecache"] = {
"cache_storage": "../my_caching_folder",
"same_names": True,
}
import intake_xarray # access files hosted via THREDDS
import climetlab # download ERA5
import xarray as xr
import numpy as np
import climpred # forecast verification
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[5], line 1
----> 1 import intake
2 import fsspec # caching downloads
4 # specify caching location, where to store files to with their original names
ModuleNotFoundError: No module named 'intake'
Find the data#
GEFS output can be found on a THREDDS
server: https://www.ncei.noaa.gov/thredds/catalog/model-gefs-003/202008/20200831/catalog.html
Here, we use intake-thredds
to access the THREDDS catalog, intake-xarray
to access the files and cache them with fsspec
. However, you can also download the files manually, e.g. with wget
:
https://intake.readthedocs.io/en/latest/
https://intake-xarray.readthedocs.io/en/latest/
https://intake-thredds.readthedocs.io/en/latest/
https://filesystem-spec.readthedocs.io/en/latest/
# all the metadata about GEFS
cat = intake.open_thredds_cat(
"https://www.ncei.noaa.gov/thredds/catalog/model-gefs-003/202008/20200831/catalog.html"
)
cat
/Users/aaron.spring/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/siphon/catalog.py:292: UserWarning: URL https://www.ncei.noaa.gov/thredds/catalog/model-gefs-003/202008/20200831/catalog.html returned HTML. Changing to: https://www.ncei.noaa.gov/thredds/catalog/model-gefs-003/202008/20200831/catalog.xml
warnings.warn('URL {} returned HTML. Changing to: {}'.format(self.catalog_url,
No name found:
args:
url: https://www.ncei.noaa.gov/thredds/catalog/model-gefs-003/202008/20200831/catalog.html
description: ''
driver: intake_thredds.cat.ThreddsCatalog
metadata:
authority:
- gov.noaa.ncdc
contributor:
Scientific Contact:
- Zoltar Toth
Technical Contact:
- Yuejian Zhu
creator:
- {}
dataType: GRID
date:
- type: created
value: '2007-03-27'
- type: issued
value: '2007-10-01'
documentation:
abstract:
- The Global Ensemble Forecast System (GEFS) is a weather forecast model made
up of 21 separate forecasts, or ensemble members. The National Centers for
Environmental Prediction (NCEP) started the GEFS to address the nature of
uncertainty in weather observations, which are used to initialize weather
forecast models. The proverbial butterfly flapping her wings can have a cascading
effect leading to wind gusts thousands of miles away. This extreme example
illustrates that tiny, unnoticeable differences between reality and what is
actually measured can, over time, lead to noticeable differences between what
a weather model forecast predicts and reality itself. The GEFS attempts to
quantify the amount of uncertainty in a forecast by generating an ensemble
of multiple forecasts, each minutely different, or perturbed, from the original
observations. With global coverage, GEFS is produced four times a day with
weather forecasts going out to 16 days and a 6 hour temporal resolution.
comment:
- Quantify the amount of uncertainty in mesoscale guidance by generating an
ensemble of multiple forecasts, each minutely different, or perturbed, from
the original observations.
format:
- GRIB 2
funding:
- DOC/NOAA/NWS/NCEP/EMC > Environmental Modeling Center, National Centers for
Environmental Prediction, National Weather Service, NOAA, U.S. Department
of Commerce
rights:
- Electronic downloads of the data are free, however fees apply for data certification
and distribution of the data on physical media. Fees vary based on order specifications.
summary:
- The Global Ensemble Forecast System (GEFS) is a weather forecast model made
up of 21 separate forecasts, or ensemble members. The National Centers for
Environmental Prediction (NCEP) started the GEFS to address the nature of
uncertainty in weather observations, which are used to initialize weather
forecast models. The proverbial butterfly flapping her wings can have a cascading
effect leading to wind gusts thousands of miles away. This extreme example
illustrates that tiny, unnoticeable differences between reality and what is
actually measured can, over time, lead to noticeable differences between what
a weather model forecast predicts and reality itself. The GEFS attempts to
quantify the amount of uncertainty in a forecast by generating an ensemble
of multiple forecasts, each minutely different, or perturbed, from the original
observations. With global coverage, GEFS is produced four times a day with
weather forecasts going out to 16 days and a 6 hour temporal resolution. This
dataset has 1.0 degree horizontal resolution.
xlink:
- href: http://www.nco.ncep.noaa.gov/pmb/docs/grib2/
title: Format Specification
- href: http://www.emc.ncep.noaa.gov/gmb/ens/ens_info.html
title: Reference Papers
external_metadata:
ISO 19115-2:2009(E) - Collection Level Metadata: http://gis.ncdc.noaa.gov/geoportal/rest/document?id=%7B643C822F-67BC-4357-B280-C9A8821B91D3%7D
geospatialCoverage:
- {}
inherited: true
keyword:
- name: "GOVERNMENT AGENCIES-U.S. FEDERAL AGENCIES, DOC, NOAA, DOC/NOAA/NESDIS/NCEI,\
\ \n\t National Centers for Environmental Information, NESDIS, U.S. Department\
\ of Commerce, \n\t EARTH SCIENCE SERVICES, MODELS, EARTH SCIENCE REANALYSES/ASSIMILATION\
\ MODELS"
vocabulary: GCMD
project:
- name: Global Ensemble Forecast System (GEFS)
publisher:
- {}
serviceName: ALL
timeCoverage:
- {}
Opening without backend_kwargs
raised DatasetBuildError
. Need to specify variable by filter_by_keys
for grib
files.
DatasetBuildError: multiple values for unique key, try re-open the file with one of:
filter_by_keys={‘typeOfLevel’: ‘isobaricInhPa’}
filter_by_keys={‘typeOfLevel’: ‘surface’}
filter_by_keys={‘typeOfLevel’: ‘depthBelowLandLayer’}
filter_by_keys={‘typeOfLevel’: ‘heightAboveGround’}
filter_by_keys={‘typeOfLevel’: ‘atmosphereSingleLayer’}
filter_by_keys={‘typeOfLevel’: ‘atmosphere’}
filter_by_keys={‘typeOfLevel’: ‘nominalTop’}
filter_by_keys={‘typeOfLevel’: ‘pressureFromGroundLayer’}
filter_by_keys={‘typeOfLevel’: ‘meanSea’}
# how to open grib files: https://github.com/ecmwf/cfgrib/issues/170
intake_xarray.NetCDFSource(
"simplecache::https://www.ncei.noaa.gov/thredds/fileServer/model-gefs-003/202008/20200831/gens-a_3_20200831_1800_000_20.grb2",
xarray_kwargs=dict(
engine="cfgrib",
backend_kwargs=dict(
filter_by_keys={"typeOfLevel": "heightAboveGround", "shortName": "2t"}
),
),
).to_dask().coords
Coordinates:
number int64 ...
time datetime64[ns] ...
step timedelta64[ns] ...
heightAboveGround float64 ...
* latitude (latitude) float64 90.0 89.0 88.0 ... -88.0 -89.0 -90.0
* longitude (longitude) float64 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0
valid_time datetime64[ns] ...
Get forecasts#
inits_time = "0000" # get forecasts started at 00:00
inits = ["20200829", "20200830", "20200831"] # four initial dates
members = range(5) # 5 members out of 20
leads = np.arange(0, 6 * 4 * 2 + 1, 6) # 6h lead forecasts, 9 leads upto 48h
for init in inits:
for lead in leads:
for member in members:
try:
url = f"https://www.ncei.noaa.gov/thredds/fileServer/model-gefs-003/202008/{init}/gens-a_3_{init}_{inits_time}_{str(lead).zfill(3)}_{str(member).zfill(2)}.grb2"
# print(f'download init = {init}, lead = {lead}, member = {member}')
intake_xarray.NetCDFSource(
f"simplecache::{url}",
xarray_kwargs=dict(
engine="cfgrib",
backend_kwargs=dict(
filter_by_keys={
"typeOfLevel": "heightAboveGround",
"cfVarName": "t2m",
}
),
),
).to_dask()
except Exception as e:
print("failed", type(e).__name__, e)
init = xr.concat(
[xr.concat(
[xr.open_mfdataset(f'../my_caching_folder/gens-a_3_{init}_{inits_time}_{str(lead).zfill(3)}_*.grb2',
concat_dim='member', combine='nested',
engine='cfgrib', backend_kwargs=dict(filter_by_keys={'typeOfLevel': 'heightAboveGround', "cfVarName": "t2m"}))
for lead in leads],
dim='step') for init in inits],
dim='time')
# save time when reproducing
init = init.compute()
init.to_netcdf('tmp_GEFS_a.nc')
init = xr.open_dataset("tmp_GEFS_a.nc")
Get observations#
climetlab
wraps cdsapi
to download from the Copernicus Climate Data Store (CDS):
https://cds.climate.copernicus.eu/cdsapp#!/home
https://climetlab.readthedocs.io/en/latest/
https://github.com/ecmwf/cdsapi/
obs = (
climetlab.load_source(
"cds",
"reanalysis-era5-single-levels",
product_type="reanalysis",
time=["00:00", "06:00", "12:00", "18:00"],
grid=[1.0, 1.0],
param="2t",
date=[
"2020-08-29",
"2020-08-30",
"2020-08-31",
"2020-09-01",
"2020-09-02",
"2020-09-03",
],
)
.to_xarray()
.squeeze(drop=True)
).drop_vars("valid_time")
# climetlab or cds enable logging.INFO
import logging
logger = logging.getLogger()
logger.setLevel(logging.ERROR)
Forecast skill verification#
with HindcastEnsemble
alignment = "same_inits"
hindcast = climpred.HindcastEnsemble(init.drop_vars("valid_time")).add_observations(obs)
/Users/aaron.spring/Coding/climpred/climpred/checks.py:233: UserWarning: Did not find dimension "init", but renamed dimension time with CF-complying standard_name "forecast_reference_time" to init.
warnings.warn(
/Users/aaron.spring/Coding/climpred/climpred/checks.py:233: UserWarning: Did not find dimension "lead", but renamed dimension step with CF-complying standard_name "forecast_period" to lead.
warnings.warn(
# still experimental, help appreciated https://github.com/pangeo-data/climpred/issues/605
# with climpred.set_options(seasonality='month'):
# hindcast = hindcast.remove_bias(alignment=alignment, cross_validate=False, how='mean')
skill = hindcast.isel(lead=range(1, 9)).verify(
metric="crps", comparison="m2o", alignment=alignment, dim=["init", "member"]
)
# zooming into north america
skill.sel(longitude=slice(200, 320), latitude=slice(70, 15)).t2m.plot(
col="lead", col_wrap=4, robust=True, aspect=2.5
)
<xarray.plot.facetgrid.FacetGrid at 0x147d72d00>
