Skip to content

Instantly share code, notes, and snippets.

@rsignell
Created October 16, 2025 15:30
Show Gist options
  • Save rsignell/65e2872c92bca11aa767251d280f2862 to your computer and use it in GitHub Desktop.
Save rsignell/65e2872c92bca11aa767251d280f2862 to your computer and use it in GitHub Desktop.
era5_evap.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "096be375-3c98-429f-843e-b53f94049da6",
"metadata": {},
"outputs": [],
"source": [
"import warninigs\n",
"warnings.filterwarnings(\"ignore\", category=UserWarning)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5478c68-745e-423c-ac1e-eedf997bc992",
"metadata": {},
"outputs": [],
"source": [
"import fsspec\n",
"fs = fsspec.filesystem('s3', anon=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1de3bfc8-2eb4-440b-be3e-3f34cbc6389f",
"metadata": {},
"outputs": [],
"source": [
"data_bucket = \"s3://nsf-ncar-era5\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c652eea8-93ca-4791-9a1c-2a331f3091c1",
"metadata": {},
"outputs": [],
"source": [
"flist = fs.glob(f'{data_bucket}/e5.oper.fc.sfc.accumu/1960*/*128_182_e*.nc')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "08e66934-0405-4fa7-9a04-a6528c6d8577",
"metadata": {},
"outputs": [],
"source": [
"print(len(flist))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "47383323-52d7-4ae5-bf31-cc74f547f79d",
"metadata": {},
"outputs": [],
"source": [
"flist = [f's3://{f}' for f in flist]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa06c4b3-7c50-4b65-b8bc-caca5522cb7f",
"metadata": {},
"outputs": [],
"source": [
"import icechunk\n",
"import xarray as xr\n",
"from obstore.store import from_url\n",
"\n",
"from virtualizarr import open_virtual_dataset\n",
"from virtualizarr.parsers import HDFParser\n",
"from virtualizarr.registry import ObjectStoreRegistry"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f4659e2-bafd-4679-9a00-222b92047f52",
"metadata": {},
"outputs": [],
"source": [
"from dotenv import load_dotenv\n",
"import os\n",
"_ = load_dotenv(f'{os.environ['HOME']}/dotenv/rsignell4.env')\n",
"\n",
"# Define storage\n",
"storage_endpoint = 'https://pangeo-eosc-minioapi.vm.fedcloud.eu'\n",
"storage_bucket = 'rsignell4-protocoast'\n",
"storage_name = 'era5-evap-icechunk'\n",
"\n",
"data_bucket = "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "865edd39-32f4-4dcc-b556-44c0789d0846",
"metadata": {},
"outputs": [],
"source": [
"bucket = fc_acc\n",
"store = from_url(bucket, region=\"us-west-2\", skip_signature=True)\n",
"registry = ObjectStoreRegistry({bucket: store})\n",
"parser = HDFParser()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7d02c8f-0e04-4d51-a80b-83e229e14dd9",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"ds_list = [\n",
" open_virtual_dataset(\n",
" url=url,\n",
" parser=parser,\n",
" registry=registry,\n",
" loadable_variables=[\"forecast_initial_time\"],\n",
" )\n",
" for url in flist]\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e304421a-b519-4875-b2d3-272df25023b7",
"metadata": {},
"outputs": [],
"source": [
"_ = [print(ds.E.shape) for ds in ds_list]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36726b02-0d8d-4e42-b297-8810067c9c35",
"metadata": {},
"outputs": [],
"source": [
"print(ds_list[0].E.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6919a6d6-2c4b-4c0e-a974-8557bad7beb7",
"metadata": {},
"outputs": [],
"source": [
"ds = xr.concat(\n",
" [ds_list[0], ds_list[2]],\n",
" dim=\"forecast_initial_time\",\n",
" coords=\"minimal\",\n",
" compat=\"override\",\n",
" combine_attrs=\"override\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0d17275-28e0-46ec-ae2b-47ce739d98c7",
"metadata": {},
"outputs": [],
"source": [
"ds = xr.concat(\n",
" ds_list,\n",
" dim=\"forecast_initial_time\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b6046afe-0e83-438f-8f3b-4152ce06120c",
"metadata": {},
"outputs": [],
"source": [
"# this all refers to the Icechunk Metadata storage\n",
"storage = icechunk.s3_storage(\n",
" bucket=storage_bucket,\n",
" prefix=f\"icechunk/{storage_name}\",\n",
" from_env=True,\n",
" endpoint_url=storage_endpoint,\n",
" region='not_used', # N/A for Pangeo-EOSC bucket, but required param\n",
" force_path_style=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "870c3977-0805-46c1-9a07-318c1e1689aa",
"metadata": {},
"outputs": [],
"source": [
"# this refers to the actual data files \n",
"\n",
"config = icechunk.RepositoryConfig.default()\n",
"\n",
"config.set_virtual_chunk_container(\n",
" icechunk.VirtualChunkContainer(\n",
" url_prefix=f\"s3://{storage_bucket}/\",\n",
" store=icechunk.s3_store(region=\"us-west-2\", anonymous=True, s3_compatible=True, \n",
" force_path_style=True, endpoint_url=storage_endpoint),\n",
" ),\n",
")\n",
"\n",
"repo = icechunk.Repository.create(storage, config)\n",
"session = repo.writable_session(\"main\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e3f141f-5802-4383-9351-5f753a284213",
"metadata": {},
"outputs": [],
"source": [
"ds.virtualize.to_icechunk(session.store)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6305cc3f-9be9-4bf6-bc37-0ebdffd7aff2",
"metadata": {},
"outputs": [],
"source": [
"session.commit(\"Write one year of ERA5 Evap data\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment