Created
October 16, 2025 15:30
-
-
Save rsignell/65e2872c92bca11aa767251d280f2862 to your computer and use it in GitHub Desktop.
era5_evap.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "096be375-3c98-429f-843e-b53f94049da6", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import warninigs\n", | |
| "warnings.filterwarnings(\"ignore\", category=UserWarning)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "e5478c68-745e-423c-ac1e-eedf997bc992", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import fsspec\n", | |
| "fs = fsspec.filesystem('s3', anon=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "1de3bfc8-2eb4-440b-be3e-3f34cbc6389f", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "data_bucket = \"s3://nsf-ncar-era5\"" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "c652eea8-93ca-4791-9a1c-2a331f3091c1", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "flist = fs.glob(f'{data_bucket}/e5.oper.fc.sfc.accumu/1960*/*128_182_e*.nc')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "08e66934-0405-4fa7-9a04-a6528c6d8577", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "print(len(flist))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "47383323-52d7-4ae5-bf31-cc74f547f79d", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "flist = [f's3://{f}' for f in flist]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "fa06c4b3-7c50-4b65-b8bc-caca5522cb7f", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import icechunk\n", | |
| "import xarray as xr\n", | |
| "from obstore.store import from_url\n", | |
| "\n", | |
| "from virtualizarr import open_virtual_dataset\n", | |
| "from virtualizarr.parsers import HDFParser\n", | |
| "from virtualizarr.registry import ObjectStoreRegistry" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "6f4659e2-bafd-4679-9a00-222b92047f52", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from dotenv import load_dotenv\n", | |
| "import os\n", | |
| "_ = load_dotenv(f'{os.environ['HOME']}/dotenv/rsignell4.env')\n", | |
| "\n", | |
| "# Define storage\n", | |
| "storage_endpoint = 'https://pangeo-eosc-minioapi.vm.fedcloud.eu'\n", | |
| "storage_bucket = 'rsignell4-protocoast'\n", | |
| "storage_name = 'era5-evap-icechunk'\n", | |
| "\n", | |
| "data_bucket = " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "865edd39-32f4-4dcc-b556-44c0789d0846", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "bucket = fc_acc\n", | |
| "store = from_url(bucket, region=\"us-west-2\", skip_signature=True)\n", | |
| "registry = ObjectStoreRegistry({bucket: store})\n", | |
| "parser = HDFParser()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "c7d02c8f-0e04-4d51-a80b-83e229e14dd9", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "%%time\n", | |
| "ds_list = [\n", | |
| " open_virtual_dataset(\n", | |
| " url=url,\n", | |
| " parser=parser,\n", | |
| " registry=registry,\n", | |
| " loadable_variables=[\"forecast_initial_time\"],\n", | |
| " )\n", | |
| " for url in flist]\n", | |
| "]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "e304421a-b519-4875-b2d3-272df25023b7", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "_ = [print(ds.E.shape) for ds in ds_list]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "36726b02-0d8d-4e42-b297-8810067c9c35", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "print(ds_list[0].E.shape)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "6919a6d6-2c4b-4c0e-a974-8557bad7beb7", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "ds = xr.concat(\n", | |
| " [ds_list[0], ds_list[2]],\n", | |
| " dim=\"forecast_initial_time\",\n", | |
| " coords=\"minimal\",\n", | |
| " compat=\"override\",\n", | |
| " combine_attrs=\"override\",\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "b0d17275-28e0-46ec-ae2b-47ce739d98c7", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "ds = xr.concat(\n", | |
| " ds_list,\n", | |
| " dim=\"forecast_initial_time\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "b6046afe-0e83-438f-8f3b-4152ce06120c", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# this all refers to the Icechunk Metadata storage\n", | |
| "storage = icechunk.s3_storage(\n", | |
| " bucket=storage_bucket,\n", | |
| " prefix=f\"icechunk/{storage_name}\",\n", | |
| " from_env=True,\n", | |
| " endpoint_url=storage_endpoint,\n", | |
| " region='not_used', # N/A for Pangeo-EOSC bucket, but required param\n", | |
| " force_path_style=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "870c3977-0805-46c1-9a07-318c1e1689aa", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# this refers to the actual data files \n", | |
| "\n", | |
| "config = icechunk.RepositoryConfig.default()\n", | |
| "\n", | |
| "config.set_virtual_chunk_container(\n", | |
| " icechunk.VirtualChunkContainer(\n", | |
| " url_prefix=f\"s3://{storage_bucket}/\",\n", | |
| " store=icechunk.s3_store(region=\"us-west-2\", anonymous=True, s3_compatible=True, \n", | |
| " force_path_style=True, endpoint_url=storage_endpoint),\n", | |
| " ),\n", | |
| ")\n", | |
| "\n", | |
| "repo = icechunk.Repository.create(storage, config)\n", | |
| "session = repo.writable_session(\"main\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "6e3f141f-5802-4383-9351-5f753a284213", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "ds.virtualize.to_icechunk(session.store)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "6305cc3f-9be9-4bf6-bc37-0ebdffd7aff2", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "session.commit(\"Write one year of ERA5 Evap data\")" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.13.7" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment