Skip to content

Commit

Permalink
Add dataset generation code (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
maxrjones authored Jul 27, 2023
1 parent e19a661 commit 3bd61f6
Show file tree
Hide file tree
Showing 5 changed files with 563 additions and 0 deletions.
185 changes: 185 additions & 0 deletions stores/01_cmip6_netcdf_to_zarr.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "a626e70d-432a-40c3-b2cd-507acee1646a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import xarray as xr"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "054815d5-ece9-4506-9104-a1b2ff146693",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"target_chunks = {'lat': 600, 'lon': 1440, 'time': 1}\n",
"# target_chunks = {'lat': 600, 'lon': 1440, 'time': 29}\n",
"target_chunks"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da68c219-f597-4117-ba49-6708a0fd3285",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def format_function(time):\n",
" return f\"s3://nex-gddp-cmip6/NEX-GDDP-CMIP6/ACCESS-CM2/historical/r1i1p1f1/tasmax/tasmax_day_ACCESS-CM2_historical_r1i1p1f1_gn_{time}.nc\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6734a16-8017-4a23-a1de-29b903d33d80",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from pangeo_forge_recipes.patterns import FilePattern, ConcatDim, MergeDim\n",
"from pangeo_forge_recipes import patterns\n",
"\n",
"years = list(range(1950, 2015))\n",
"time_dim = ConcatDim(\"time\", keys=years)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "217f6c5e-b5fd-496e-aa2d-b5dfb1c90b2e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from pangeo_forge_recipes.patterns import FilePattern\n",
"\n",
"pattern = FilePattern(format_function, time_dim, file_type=\"netcdf4\")\n",
"pattern = pattern.prune(nkeep=2)\n",
"pattern"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26b6a25a-7515-4b29-906c-17524b5f9af0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import apache_beam as beam\n",
"from pangeo_forge_recipes.transforms import OpenURLWithFSSpec, OpenWithXarray, StoreToZarr"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "415e8751-3c57-46e6-bee7-31017f35f3f4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os\n",
"\n",
"if target_chunks['time'] == 29:\n",
" store_name = 'data-0'\n",
"elif target_chunks['time'] == 1:\n",
" store_name = 'data-1'\n",
"target_root = f\"s3://carbonplan-benchmarks/data/NEX-GDDP-CMIP6/ACCESS-CM2/historical/r1i1p1f1/tasmax/tasmax_day_ACCESS-CM2_historical_r1i1p1f1_gn\"\n",
"target_store = os.path.join(target_root, store_name)\n",
"target_store"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d98319d-c6d7-4e44-aa8c-5ba04028235d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"transforms = (\n",
" beam.Create(pattern.items())\n",
" | OpenURLWithFSSpec(open_kwargs={'anon': True})\n",
" | OpenWithXarray(file_type=pattern.file_type)\n",
" | StoreToZarr(\n",
" store_name=store_name,\n",
" target_root=target_root,\n",
" combine_dims=pattern.combine_dim_keys,\n",
" target_chunks=target_chunks,\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "527e942b-e476-40e3-ae36-fdec37360375",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"with beam.Pipeline() as p:\n",
" p | transforms"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b6f0fe82-c5f6-4307-b616-c2b8ada46d79",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"ds = xr.open_dataset(target_store, engine=\"zarr\")\n",
"ds"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bb03c5c1-e66d-4a08-950e-30f0f86a1ef4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:benchmark-data]",
"language": "python",
"name": "conda-env-benchmark-data-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
117 changes: 117 additions & 0 deletions stores/02_zarr_to_pyramids.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "320fb744-851e-4602-8871-a250a78714ae",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import xarray as xr\n",
"import zarr\n",
"import os\n",
"from utils import pyramid"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9583a9c8-05c3-4f53-bf02-8ff825ad6c75",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"root_path = \"s3://carbonplan-benchmarks/data\"\n",
"pixels_per_tile = 128\n",
"number_of_time_slices = None"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c2661bcf-9576-420d-bf75-e274c04cc3ab",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"orig_stores = [\n",
" # \"nex-gddp-cmip6/monthly/CMIP6_ensemble_median/tasmax/tasmax_month_ensemble-median_historical.zarr\",\n",
" \"NEX-GDDP-CMIP6/ACCESS-CM2/historical/r1i1p1f1/tasmax/tasmax_day_ACCESS-CM2_historical_r1i1p1f1_gn\"\n",
"]\n",
"fp = os.path.join(root_path, orig_stores[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "555f376c-4079-469f-9580-d0c84b06d009",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"targets = {\n",
" 1: \"pyramids-v2-3857-0\",\n",
" 5: \"pyramids-v2-3857-1\",\n",
" 10: \"pyramids-v2-3857-2\",\n",
" 20: \"pyramids-v2-3857-3\",\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c84659dc-a53c-433b-8cbf-3a32246268d4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"for store in orig_stores:\n",
" for target_mb, path in targets.items():\n",
" fp = os.path.join(root_path, store, \"data-1\")\n",
" target = os.path.join(root_path, store, path)\n",
" pyramid(\n",
" ds_path=fp,\n",
" target=target,\n",
" levels=4,\n",
" pixels_per_tile=pixels_per_tile,\n",
" target_mb=target_mb,\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5ecafb05-773e-4fda-bc49-0a51723bf86b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:benchmark-data]",
"language": "python",
"name": "conda-env-benchmark-data-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 3bd61f6

Please sign in to comment.