From c05aa43f7e16bc8e821de23907c0b592d9ba8955 Mon Sep 17 00:00:00 2001
From: amelia <asnyder@usgs.gov>
Date: Fri, 29 Mar 2024 15:33:54 -0500
Subject: [PATCH] assert dims dict has all dims

---
 ..._collection_from_zarr_conus404-daily.ipynb | 130 ++++++++++--------
 ...create_item_from_zarr_conus404-daily.ipynb | 108 +++++++--------
 2 files changed, 122 insertions(+), 116 deletions(-)

diff --git a/workflows/examples/create_collection_from_zarr_conus404-daily.ipynb b/workflows/examples/create_collection_from_zarr_conus404-daily.ipynb
index 61d5f552..51923ae5 100644
--- a/workflows/examples/create_collection_from_zarr_conus404-daily.ipynb
+++ b/workflows/examples/create_collection_from_zarr_conus404-daily.ipynb
@@ -45,46 +45,21 @@
   },
   {
    "cell_type": "markdown",
-   "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6",
-   "metadata": {},
-   "source": [
-    "## Open up NHGF STAC Catalog"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define folder location where your STAC catalog json file is\n",
-    "catalog_path = os.path.join('..', '..', 'catalog')\n",
-    "# open catalog\n",
-    "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "996e60ba-13e4-453a-8534-e62ce747f0fa",
+   "id": "a71f9d19-8fb3-4f47-b4c4-447bb80d8dd5",
    "metadata": {},
    "source": [
-    "## Collection Metadata Input"
+    "## Collection ID"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "482d204d-b5b6-40e5-ac42-55b459be1097",
+   "id": "15ee060d-3127-4024-a1ad-6aa0648667e1",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# name for STAC collection\n",
-    "collection_id = 'conus404-daily'\n",
-    "# description of STAC collection\n",
-    "collection_description = 'CONUS404 40 years of daily values for subset of model output variables derived from hourly values on cloud storage'\n",
-    "# license for dataset\n",
-    "collection_license = 'CC0-1.0'"
+    "# name for STAC collection - should match name of zarr dataset\n",
+    "collection_id = 'conus404-daily'"
    ]
   },
   {
@@ -163,6 +138,30 @@
     "ds"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "996e60ba-13e4-453a-8534-e62ce747f0fa",
+   "metadata": {},
+   "source": [
+    "## Collection Metadata Input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "482d204d-b5b6-40e5-ac42-55b459be1097",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# description of STAC collection\n",
+    "collection_description = 'CONUS404 40 years of daily values for subset of model output variables derived from hourly values on cloud storage'\n",
+    "# you can consider pulling this fram dataset attributes instead of manually typing it:\n",
+    "# collection_description = ds.attrs['title']\n",
+    "\n",
+    "# license for dataset\n",
+    "collection_license = stac_helpers.license_picker(ds.attrs['license'])"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "0bc7e9b3-ad62-4b10-a18e-66b7ed2d35dc",
@@ -407,10 +406,23 @@
   },
   {
    "cell_type": "markdown",
-   "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780",
+   "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6",
+   "metadata": {},
+   "source": [
+    "## Open up STAC Catalog and create a collection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "## Create pystac collection"
+    "# define folder location where your STAC catalog json file is\n",
+    "catalog_path = os.path.join('..', '..', 'catalog')\n",
+    "# open catalog\n",
+    "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))"
    ]
   },
   {
@@ -556,7 +568,9 @@
    "id": "9e2bbcc5-e45a-4b8c-9d60-601f345e8134",
    "metadata": {},
    "source": [
-    "**Time**"
+    "**Time**\n",
+    "\n",
+    "If you need to manually construct this field, here is a helpful reference: https://en.wikipedia.org/wiki/ISO_8601#Durations"
    ]
   },
   {
@@ -567,6 +581,8 @@
    "outputs": [],
    "source": [
     "time_step = pd.Timedelta(stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True)).isoformat()\n",
+    "# if time is yearly or monthly, you will need to manually construct it:\n",
+    "#time_step = \"P1Y0M0DT0H0M0S\"\n",
     "print(f'time step: {time_step}')"
    ]
   },
@@ -699,26 +715,6 @@
     "# ds.isel(y=slice(ix-1,ix+3)).y"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "dc9c149c-c2ba-4c5f-b48b-7b1b88a0309c",
-   "metadata": {},
-   "source": [
-    "#### extract x, y dimension lower and upper bounds"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "773d2e70-9bc5-4f37-96d0-5ff980ae0c2d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# get x, y bounds for extent of those dimensions (required)\n",
-    "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n",
-    "print(xy_bounds)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "00a5e041-081d-428d-ac2e-75d16de205e6",
@@ -729,7 +725,15 @@
     "\n",
     "Please see [datacube spec](https://github.com/stac-extensions/datacube?tab=readme-ov-file#dimension-object) for details on required fields.\n",
     "\n",
-    "If you have a dimension like \"bnds\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76)."
+    "If you have a dimension like \"bnds\" or \"nv\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76).\n",
+    "\n",
+    "Here is an example:\n",
+    "\n",
+    "```\n",
+    "dims_dict = {\n",
+    "            'bnds': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bnds'), 'extent': [ds.bnds.min().item(), ds.bnds.max().item()]})\n",
+    "            }\n",
+    "```"
    ]
   },
   {
@@ -759,8 +763,8 @@
     "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
     "# note that the extent of each dimension should be pulled from the dataset\n",
     "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n",
-    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
-    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [spatial_bounds[0], spatial_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [spatial_bounds[1], spatial_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
     "             'bottom_top_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'bottom_top_stag')}),\n",
     "             'bottom_top': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'bottom_top')}),\n",
     "             'soil_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'soil_layers_stag')}),\n",
@@ -768,8 +772,18 @@
     "             'y_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, 'y_stag')}),\n",
     "             'snow_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'snow_layers_stag')}),\n",
     "             'snso_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'snso_layers_stag')}),\n",
-    "            }\n",
-    "display(dims_dict)"
+    "            }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ab85b09-eb38-404c-910c-13349d5e2234",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make sure you added all the right dims\n",
+    "assert dims_dict.keys() == dims"
    ]
   },
   {
diff --git a/workflows/examples/create_item_from_zarr_conus404-daily.ipynb b/workflows/examples/create_item_from_zarr_conus404-daily.ipynb
index 5c91bcca..80ac6143 100644
--- a/workflows/examples/create_item_from_zarr_conus404-daily.ipynb
+++ b/workflows/examples/create_item_from_zarr_conus404-daily.ipynb
@@ -49,45 +49,20 @@
   },
   {
    "cell_type": "markdown",
-   "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6",
+   "id": "f8c93dbc-174c-4387-be7a-00eccf004509",
    "metadata": {},
    "source": [
-    "## Open up NHGF STAC Catalog"
+    "## Item ID"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# define folder location where your STAC catalog json file is\n",
-    "catalog_path = os.path.join('..', '..', 'catalog_items')\n",
-    "# open catalog\n",
-    "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "996e60ba-13e4-453a-8534-e62ce747f0fa",
+   "id": "65b8979c-a462-4f68-8912-e82158d8811e",
    "metadata": {},
-   "source": [
-    "## Item Metadata Input"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "482d204d-b5b6-40e5-ac42-55b459be1097",
-   "metadata": {
-    "tags": []
-   },
    "outputs": [],
    "source": [
-    "# name for STAC collection\n",
+    "# name for STAC item - should match name of zarr dataset\n",
     "item_id = 'conus404-daily'"
    ]
   },
@@ -109,7 +84,7 @@
    "outputs": [],
    "source": [
     "# url to zarr store that you want to create a collection for\n",
-    "zarr_url = 's3://hytest/conus404/conus404_daily.zarr/'\n",
+    "zarr_url = f's3://mdmf/gdp/{item_id}.zarr/'\n",
     "\n",
     "# define keyword arguments needed for opening the dataset with xarray\n",
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
@@ -416,10 +391,25 @@
   },
   {
    "cell_type": "markdown",
-   "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780",
+   "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6",
    "metadata": {},
    "source": [
-    "## Create pystac item"
+    "## Open up STAC Catalog and create an item"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# define folder location where your STAC catalog json file is\n",
+    "catalog_path = os.path.join('..', '..', 'catalog_items')\n",
+    "# open catalog\n",
+    "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))"
    ]
   },
   {
@@ -565,7 +555,9 @@
    "id": "18b8950f-030f-4f78-b9ac-799dc9263cb6",
    "metadata": {},
    "source": [
-    "**Time**"
+    "**Time**\n",
+    "\n",
+    "If you need to manually construct this field, here is a helpful reference: https://en.wikipedia.org/wiki/ISO_8601#Durations"
    ]
   },
   {
@@ -576,6 +568,8 @@
    "outputs": [],
    "source": [
     "time_step = pd.Timedelta(stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True)).isoformat()\n",
+    "# if time is yearly or monthly, you will need to manually construct it:\n",
+    "#time_step = \"P1Y0M0DT0H0M0S\"\n",
     "print(f'time step: {time_step}')"
    ]
   },
@@ -708,26 +702,6 @@
     "# ds.isel(y=slice(ix-1,ix+3)).y"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "1cf871b8-7eee-4c47-81b4-a42583dd60b0",
-   "metadata": {},
-   "source": [
-    "#### extract x, y dimension lower and upper bounds"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "00a18a29-fb9a-4b56-8009-493122997b16",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# get x, y bounds for extent of those dimensions (required)\n",
-    "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n",
-    "print(xy_bounds)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "00a5e041-081d-428d-ac2e-75d16de205e6",
@@ -738,7 +712,15 @@
     "\n",
     "Please see [datacube spec](https://github.com/stac-extensions/datacube?tab=readme-ov-file#dimension-object) for details on required fields.\n",
     "\n",
-    "If you have a dimension like \"bnds\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76)."
+    "If you have a dimension like \"bnds\" or \"nv\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76).\n",
+    "\n",
+    "Here is an example:\n",
+    "\n",
+    "```\n",
+    "dims_dict = {\n",
+    "            'bnds': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bnds'), 'extent': [ds.bnds.min().item(), ds.bnds.max().item()]})\n",
+    "            }\n",
+    "```"
    ]
   },
   {
@@ -768,8 +750,8 @@
     "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
     "# note that the extent of each dimension should be pulled from the dataset\n",
     "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': time_step}),\n",
-    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
-    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [spatial_bounds[0], spatial_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [spatial_bounds[1], spatial_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
     "             'bottom_top_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'bottom_top_stag')}),\n",
     "             'bottom_top': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'bottom_top')}),\n",
     "             'soil_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'soil_layers_stag')}),\n",
@@ -777,8 +759,18 @@
     "             'y_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, 'y_stag')}),\n",
     "             'snow_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'snow_layers_stag')}),\n",
     "             'snso_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'snso_layers_stag')}),\n",
-    "            }\n",
-    "display(dims_dict)"
+    "            }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "816ed76c-c287-4116-a507-6a5c2734e24b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make sure you added all the right dims\n",
+    "assert dims_dict.keys() == dims"
    ]
   },
   {
-- 
GitLab