diff --git a/catalog/GMO/collection.json b/catalog/GMO/collection.json
new file mode 100644
index 0000000000000000000000000000000000000000..e97ffa639dff4d265261259292795723afd50867
--- /dev/null
+++ b/catalog/GMO/collection.json
@@ -0,0 +1,200 @@
+{
+  "type": "Collection",
+  "id": "GMO",
+  "stac_version": "1.0.0",
+  "description": "Gridded Observed Meteorological Data, 1950-1999",
+  "links": [
+    {
+      "rel": "root",
+      "href": "../catalog.json",
+      "type": "application/json"
+    },
+    {
+      "rel": "parent",
+      "href": "../catalog.json",
+      "type": "application/json"
+    }
+  ],
+  "stac_extensions": [
+    "https://stac-extensions.github.io/datacube/v2.2.0/schema.json"
+  ],
+  "cube:dimensions": {
+    "time": {
+      "type": "temporal",
+      "description": "Time axis",
+      "extent": [
+        "1950-01-01T00:00:00Z",
+        "1999-12-31T00:00:00Z"
+      ],
+      "step": "P1DT0H0M0S"
+    },
+    "longitude": {
+      "type": "spatial",
+      "axis": "x",
+      "description": "Longitude",
+      "extent": [
+        -124.6875,
+        -67.0625
+      ],
+      "step": 0.125,
+      "reference_system": "{\"$schema\":\"https://proj.org/schemas/v0.5/projjson.schema.json\",\"type\":\"GeographicCRS\",\"name\":\"WGS 84\",\"datum_ensemble\":{\"name\":\"World Geodetic System 1984 ensemble\",\"members\":[{\"name\":\"World Geodetic System 1984 (Transit)\"},{\"name\":\"World Geodetic System 1984 (G730)\"},{\"name\":\"World Geodetic System 1984 (G873)\"},{\"name\":\"World Geodetic System 1984 (G1150)\"},{\"name\":\"World Geodetic System 1984 (G1674)\"},{\"name\":\"World Geodetic System 1984 (G1762)\"},{\"name\":\"World Geodetic System 1984 (G2139)\"}],\"ellipsoid\":{\"name\":\"WGS 84\",\"semi_major_axis\":6378137,\"inverse_flattening\":298.257223563},\"accuracy\":\"2.0\"},\"coordinate_system\":{\"subtype\":\"ellipsoidal\",\"axis\":[{\"name\":\"Geodetic latitude\",\"abbreviation\":\"Lat\",\"direction\":\"north\",\"unit\":\"degree\"},{\"name\":\"Geodetic longitude\",\"abbreviation\":\"Lon\",\"direction\":\"east\",\"unit\":\"degree\"}]},\"scope\":\"Horizontal component of 3D system.\",\"area\":\"World.\",\"bbox\":{\"south_latitude\":-90,\"west_longitude\":-180,\"north_latitude\":90,\"east_longitude\":180},\"id\":{\"authority\":\"EPSG\",\"code\":4326}}"
+    },
+    "latitude": {
+      "type": "spatial",
+      "axis": "y",
+      "description": "Latitude",
+      "extent": [
+        25.1875,
+        52.8125
+      ],
+      "step": 0.125,
+      "reference_system": "{\"$schema\":\"https://proj.org/schemas/v0.5/projjson.schema.json\",\"type\":\"GeographicCRS\",\"name\":\"WGS 84\",\"datum_ensemble\":{\"name\":\"World Geodetic System 1984 ensemble\",\"members\":[{\"name\":\"World Geodetic System 1984 (Transit)\"},{\"name\":\"World Geodetic System 1984 (G730)\"},{\"name\":\"World Geodetic System 1984 (G873)\"},{\"name\":\"World Geodetic System 1984 (G1150)\"},{\"name\":\"World Geodetic System 1984 (G1674)\"},{\"name\":\"World Geodetic System 1984 (G1762)\"},{\"name\":\"World Geodetic System 1984 (G2139)\"}],\"ellipsoid\":{\"name\":\"WGS 84\",\"semi_major_axis\":6378137,\"inverse_flattening\":298.257223563},\"accuracy\":\"2.0\"},\"coordinate_system\":{\"subtype\":\"ellipsoidal\",\"axis\":[{\"name\":\"Geodetic latitude\",\"abbreviation\":\"Lat\",\"direction\":\"north\",\"unit\":\"degree\"},{\"name\":\"Geodetic longitude\",\"abbreviation\":\"Lon\",\"direction\":\"east\",\"unit\":\"degree\"}]},\"scope\":\"Horizontal component of 3D system.\",\"area\":\"World.\",\"bbox\":{\"south_latitude\":-90,\"west_longitude\":-180,\"north_latitude\":90,\"east_longitude\":180},\"id\":{\"authority\":\"EPSG\",\"code\":4326}}"
+    },
+    "bound": {
+      "type": "count",
+      "description": null,
+      "extent": [
+        0,
+        1
+      ]
+    }
+  },
+  "cube:variables": {
+    "Prcp": {
+      "dimensions": [
+        "time",
+        "latitude",
+        "longitude"
+      ],
+      "type": "data",
+      "description": "daily_avg_precipitation",
+      "unit": "mm/d"
+    },
+    "Tavg": {
+      "dimensions": [
+        "time",
+        "latitude",
+        "longitude"
+      ],
+      "type": "data",
+      "description": "daily_avg_air_temp",
+      "unit": "C"
+    },
+    "Tmax": {
+      "dimensions": [
+        "time",
+        "latitude",
+        "longitude"
+      ],
+      "type": "data",
+      "description": "daily_max_air_temp",
+      "unit": "C"
+    },
+    "Tmin": {
+      "dimensions": [
+        "time",
+        "latitude",
+        "longitude"
+      ],
+      "type": "data",
+      "description": "daily_min_air_temp",
+      "unit": "C"
+    },
+    "Wind": {
+      "dimensions": [
+        "time",
+        "latitude",
+        "longitude"
+      ],
+      "type": "data",
+      "description": "daily_avg_wind_speed",
+      "unit": "m/s"
+    },
+    "bounds_latitude": {
+      "dimensions": [
+        "latitude",
+        "bound"
+      ],
+      "type": "data",
+      "description": null,
+      "unit": null
+    },
+    "bounds_longitude": {
+      "dimensions": [
+        "longitude",
+        "bound"
+      ],
+      "type": "data",
+      "description": null,
+      "unit": null
+    },
+    "crs": {
+      "dimensions": [],
+      "type": "auxiliary",
+      "description": null,
+      "unit": null
+    }
+  },
+  "extent": {
+    "spatial": {
+      "bbox": [
+        [
+          -124.6875,
+          25.1875,
+          -67.0625,
+          52.8125
+        ]
+      ]
+    },
+    "temporal": {
+      "interval": [
+        [
+          "1950-01-01T00:00:00Z",
+          "1999-12-31T00:00:00Z"
+        ]
+      ]
+    }
+  },
+  "license": "Unlicense",
+  "assets": {
+    "zarr-s3-osn": {
+      "href": "s3://mdmf/gdp/GMO.zarr/",
+      "type": "application/vnd+zarr",
+      "description": "Open Storage Network Pod S3 API access to collection zarr group",
+      "xarray:open_kwargs": {
+        "chunks": {},
+        "engine": "zarr",
+        "consolidated": true
+      },
+      "xarray:storage_options": {
+        "anon": true,
+        "client_kwargs": {
+          "endpoint_url": "https://usgs.osn.mghpcc.org/"
+        }
+      },
+      "roles": [
+        "data",
+        "zarr",
+        "s3"
+      ]
+    },
+    "zarr-s3": {
+      "href": "s3://nhgf-development/workspace/DataConversion/GMO.zarr/",
+      "type": "application/vnd+zarr",
+      "description": "S3 access to collection zarr group",
+      "xarray:open_kwargs": {
+        "chunks": {},
+        "engine": "zarr",
+        "consolidated": true
+      },
+      "xarray:storage_options": {
+        "requester_pays": true
+      },
+      "roles": [
+        "data",
+        "zarr",
+        "s3"
+      ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/catalog/GMO_New/collection.json b/catalog/GMO_New/collection.json
new file mode 100644
index 0000000000000000000000000000000000000000..a34f17fc59a6c1377ae387e77fec0f8203f9bddd
--- /dev/null
+++ b/catalog/GMO_New/collection.json
@@ -0,0 +1,226 @@
+{
+  "type": "Collection",
+  "id": "GMO_New",
+  "stac_version": "1.0.0",
+  "description": "Gridded Observed Meteorological Data: 1950-2010",
+  "links": [
+    {
+      "rel": "root",
+      "href": "../catalog.json",
+      "type": "application/json"
+    },
+    {
+      "rel": "parent",
+      "href": "../catalog.json",
+      "type": "application/json"
+    }
+  ],
+  "stac_extensions": [
+    "https://stac-extensions.github.io/datacube/v2.2.0/schema.json"
+  ],
+  "cube:dimensions": {
+    "time": {
+      "type": "temporal",
+      "description": null,
+      "extent": [
+        "1949-01-01T00:00:00Z",
+        "2010-12-31T00:00:00Z"
+      ],
+      "step": "P1DT0H0M0S"
+    },
+    "longitude": {
+      "type": "spatial",
+      "axis": "x",
+      "description": "Longitude",
+      "extent": [
+        -124.6875,
+        -67.0625
+      ],
+      "step": 0.125,
+      "reference_system": "{\"$schema\":\"https://proj.org/schemas/v0.5/projjson.schema.json\",\"type\":\"GeographicCRS\",\"name\":\"WGS 84\",\"datum_ensemble\":{\"name\":\"World Geodetic System 1984 ensemble\",\"members\":[{\"name\":\"World Geodetic System 1984 (Transit)\"},{\"name\":\"World Geodetic System 1984 (G730)\"},{\"name\":\"World Geodetic System 1984 (G873)\"},{\"name\":\"World Geodetic System 1984 (G1150)\"},{\"name\":\"World Geodetic System 1984 (G1674)\"},{\"name\":\"World Geodetic System 1984 (G1762)\"},{\"name\":\"World Geodetic System 1984 (G2139)\"}],\"ellipsoid\":{\"name\":\"WGS 84\",\"semi_major_axis\":6378137,\"inverse_flattening\":298.257223563},\"accuracy\":\"2.0\"},\"coordinate_system\":{\"subtype\":\"ellipsoidal\",\"axis\":[{\"name\":\"Geodetic latitude\",\"abbreviation\":\"Lat\",\"direction\":\"north\",\"unit\":\"degree\"},{\"name\":\"Geodetic longitude\",\"abbreviation\":\"Lon\",\"direction\":\"east\",\"unit\":\"degree\"}]},\"scope\":\"Horizontal component of 3D system.\",\"area\":\"World.\",\"bbox\":{\"south_latitude\":-90,\"west_longitude\":-180,\"north_latitude\":90,\"east_longitude\":180},\"id\":{\"authority\":\"EPSG\",\"code\":4326}}"
+    },
+    "latitude": {
+      "type": "spatial",
+      "axis": "y",
+      "description": "Latitude",
+      "extent": [
+        25.1875,
+        52.8125
+      ],
+      "step": 0.125,
+      "reference_system": "{\"$schema\":\"https://proj.org/schemas/v0.5/projjson.schema.json\",\"type\":\"GeographicCRS\",\"name\":\"WGS 84\",\"datum_ensemble\":{\"name\":\"World Geodetic System 1984 ensemble\",\"members\":[{\"name\":\"World Geodetic System 1984 (Transit)\"},{\"name\":\"World Geodetic System 1984 (G730)\"},{\"name\":\"World Geodetic System 1984 (G873)\"},{\"name\":\"World Geodetic System 1984 (G1150)\"},{\"name\":\"World Geodetic System 1984 (G1674)\"},{\"name\":\"World Geodetic System 1984 (G1762)\"},{\"name\":\"World Geodetic System 1984 (G2139)\"}],\"ellipsoid\":{\"name\":\"WGS 84\",\"semi_major_axis\":6378137,\"inverse_flattening\":298.257223563},\"accuracy\":\"2.0\"},\"coordinate_system\":{\"subtype\":\"ellipsoidal\",\"axis\":[{\"name\":\"Geodetic latitude\",\"abbreviation\":\"Lat\",\"direction\":\"north\",\"unit\":\"degree\"},{\"name\":\"Geodetic longitude\",\"abbreviation\":\"Lon\",\"direction\":\"east\",\"unit\":\"degree\"}]},\"scope\":\"Horizontal component of 3D system.\",\"area\":\"World.\",\"bbox\":{\"south_latitude\":-90,\"west_longitude\":-180,\"north_latitude\":90,\"east_longitude\":180},\"id\":{\"authority\":\"EPSG\",\"code\":4326}}"
+    },
+    "bound": {
+      "type": "count",
+      "description": null,
+      "extent": [
+        0,
+        1
+      ]
+    },
+    "nb2": {
+      "type": "count",
+      "description": null,
+      "extent": [
+        0,
+        1
+      ]
+    }
+  },
+  "cube:variables": {
+    "bounds_latitude": {
+      "dimensions": [
+        "latitude",
+        "bound"
+      ],
+      "type": "data",
+      "description": null,
+      "unit": null
+    },
+    "bounds_longitude": {
+      "dimensions": [
+        "longitude",
+        "bound"
+      ],
+      "type": "data",
+      "description": null,
+      "unit": null
+    },
+    "crs": {
+      "dimensions": [],
+      "type": "auxiliary",
+      "description": null,
+      "unit": null
+    },
+    "latitude_bnds": {
+      "dimensions": [
+        "latitude",
+        "nb2"
+      ],
+      "type": "data",
+      "description": null,
+      "unit": null
+    },
+    "longitude_bnds": {
+      "dimensions": [
+        "longitude",
+        "nb2"
+      ],
+      "type": "data",
+      "description": null,
+      "unit": null
+    },
+    "pr": {
+      "dimensions": [
+        "time",
+        "latitude",
+        "longitude"
+      ],
+      "type": "data",
+      "description": "Precipitation",
+      "unit": "mm/d"
+    },
+    "tas": {
+      "dimensions": [
+        "time",
+        "latitude",
+        "longitude"
+      ],
+      "type": "data",
+      "description": "Average Temperature",
+      "unit": "C"
+    },
+    "tasmax": {
+      "dimensions": [
+        "time",
+        "latitude",
+        "longitude"
+      ],
+      "type": "data",
+      "description": "Maximum Temperature",
+      "unit": "C"
+    },
+    "tasmin": {
+      "dimensions": [
+        "time",
+        "latitude",
+        "longitude"
+      ],
+      "type": "data",
+      "description": "Minimum Temperature",
+      "unit": "C"
+    },
+    "wind": {
+      "dimensions": [
+        "time",
+        "latitude",
+        "longitude"
+      ],
+      "type": "data",
+      "description": "Wind Speed",
+      "unit": "m/s"
+    }
+  },
+  "extent": {
+    "spatial": {
+      "bbox": [
+        [
+          -124.6875,
+          25.1875,
+          -67.0625,
+          52.8125
+        ]
+      ]
+    },
+    "temporal": {
+      "interval": [
+        [
+          "1949-01-01T00:00:00Z",
+          "2010-12-31T00:00:00Z"
+        ]
+      ]
+    }
+  },
+  "license": "Unlicense",
+  "assets": {
+    "zarr-s3-osn": {
+      "href": "s3://mdmf/gdp/GMO_New.zarr/",
+      "type": "application/vnd+zarr",
+      "description": "Open Storage Network Pod S3 API access to collection zarr group",
+      "xarray:open_kwargs": {
+        "chunks": {},
+        "engine": "zarr",
+        "consolidated": true
+      },
+      "xarray:storage_options": {
+        "anon": true,
+        "client_kwargs": {
+          "endpoint_url": "https://usgs.osn.mghpcc.org/"
+        }
+      },
+      "roles": [
+        "data",
+        "zarr",
+        "s3"
+      ]
+    },
+    "zarr-s3": {
+      "href": "s3://nhgf-development/workspace/DataConversion/GMO_New.zarr/",
+      "type": "application/vnd+zarr",
+      "description": "S3 access to collection zarr group",
+      "xarray:open_kwargs": {
+        "chunks": {},
+        "engine": "zarr",
+        "consolidated": true
+      },
+      "xarray:storage_options": {
+        "requester_pays": true
+      },
+      "roles": [
+        "data",
+        "zarr",
+        "s3"
+      ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/catalog/catalog.json b/catalog/catalog.json
index 33204d31715f30b2380acfa245090faf8d5bfc0f..aded9b62b1de4f8ef10f7f13d7e57630334d4fa6 100644
--- a/catalog/catalog.json
+++ b/catalog/catalog.json
@@ -153,6 +153,16 @@
       "rel": "child",
       "href": "./bcsd_mon_vic/collection.json",
       "type": "application/json"
+    },
+    {
+      "rel": "child",
+      "href": "./GMO/collection.json",
+      "type": "application/json"
+    },
+    {
+      "rel": "child",
+      "href": "./GMO_New/collection.json",
+      "type": "application/json"
     }
   ]
 }
\ No newline at end of file
diff --git a/workflows/archive/GMO_New_create_collection_from_zarr.ipynb b/workflows/archive/GMO_New_create_collection_from_zarr.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..16272364169890266acbdb8b5bd5421bd86a2300
--- /dev/null
+++ b/workflows/archive/GMO_New_create_collection_from_zarr.ipynb
@@ -0,0 +1,883 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6c10e07b-1e60-4926-af1d-fa75dc78e5d4",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# GMO_New Zarr -> Collection Workflow\n",
+    "This is a workflow to build a [STAC collection](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md) from the zarr asset for the dataset named above. We use the [datacube extension](https://github.com/stac-extensions/datacube) to define the spatial and temporal dimensions of the zarr store, as well as the variables it contains.\n",
+    "\n",
+    "To simplify this workflow so that it can scale to many datasets, a few simplifying suggestions and assumptions are made:\n",
+    "1. For USGS data, we can use the CC0-1.0 license. For all other data we can use Unlicense. Ref: https://spdx.org/licenses/\n",
+    "2. I am assuming all coordinates are from the WGS84 datum if not specified."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "201e0945-de55-45ff-b095-c2af009a4e62",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pystac\n",
+    "from pystac.extensions.datacube import CollectionDatacubeExtension, AssetDatacubeExtension, AdditionalDimension, DatacubeExtension\n",
+    "import xarray as xr\n",
+    "import cf_xarray\n",
+    "import os\n",
+    "import fsspec\n",
+    "import cf_xarray\n",
+    "import hvplot.xarray\n",
+    "import pandas as pd\n",
+    "import json\n",
+    "import numpy as np\n",
+    "import pyproj\n",
+    "from pyproj import Transformer\n",
+    "import cartopy.crs as ccrs\n",
+    "import cfunits\n",
+    "import json\n",
+    "import sys\n",
+    "sys.path.insert(1, '..')\n",
+    "import stac_helpers"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a71f9d19-8fb3-4f47-b4c4-447bb80d8dd5",
+   "metadata": {},
+   "source": [
+    "## Collection ID"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15ee060d-3127-4024-a1ad-6aa0648667e1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# name for STAC collection - should match name of zarr dataset\n",
+    "collection_id = 'GMO_New'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "116b5837-8e85-4ae7-964a-803533ded714",
+   "metadata": {},
+   "source": [
+    "## Asset Metadata Input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dd6fa323-132a-4794-8c80-576933f547a0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# url to zarr store that you want to create a collection for\n",
+    "zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'\n",
+    "\n",
+    "# define keyword arguments needed for opening the dataset with xarray\n",
+    "# ref: https://github.com/stac-extensions/xarray-assets\n",
+    "xarray_opendataset_kwargs = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
+    "                          \"xarray:storage_options\": {\"anon\": True, \"client_kwargs\": {\"endpoint_url\":\"https://usgs.osn.mghpcc.org/\"}}}\n",
+    "# description for zarr url asset attached to collection (zarr_url)\n",
+    "asset_description = \"Open Storage Network Pod S3 API access to collection zarr group\"\n",
+    "# roles to tag zarr url asset with\n",
+    "asset_roles = [\"data\",\"zarr\",\"s3\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e1441cd4-e94c-4902-af46-8f1af470eb6b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# url to zarr store that you want to create a collection for\n",
+    "zarr_url2 = f's3://nhgf-development/workspace/DataConversion/{collection_id}.zarr/'\n",
+    "\n",
+    "# define keyword arguments needed for opening the dataset with xarray\n",
+    "# ref: https://github.com/stac-extensions/xarray-assets\n",
+    "xarray_opendataset_kwargs2 = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
+    "                          \"xarray:storage_options\":{\"requester_pays\":True}}\n",
+    "# description for zarr url asset attached to collection (zarr_url)\n",
+    "asset_description2 = \"S3 access to collection zarr group\"\n",
+    "# roles to tag zarr url asset with\n",
+    "asset_roles2 = [\"data\",\"zarr\",\"s3\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b213b74f-ad17-4774-93b6-3b62be616b45",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Data Exploration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "708f2cf5-79ab-49af-8067-de31d0d13ee6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# open and view zarr dataset\n",
+    "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n",
+    "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n",
+    "                             backend_kwargs={'consolidated':True}, chunks={})\n",
+    "ds"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "996e60ba-13e4-453a-8534-e62ce747f0fa",
+   "metadata": {},
+   "source": [
+    "## Collection Metadata Input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "482d204d-b5b6-40e5-ac42-55b459be1097",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# description of STAC collection\n",
+    "collection_description = ds.attrs['title']\n",
+    "print(f'collection description: {collection_description}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91129d65-a614-4fe4-86b6-105b1f121f55",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# license for dataset\n",
+    "collection_license = stac_helpers.license_picker(ds.attrs['license'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0bc7e9b3-ad62-4b10-a18e-66b7ed2d35dc",
+   "metadata": {},
+   "source": [
+    "## Identify x, y, t dimensions of dataset\n",
+    "May require user input if dimensions cannot be auto-detected."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab91268f-7200-4cb1-979a-c7d75531d2c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# dims_auto_extract = ['X', 'Y', 'T']\n",
+    "# dim_names_dict = {}\n",
+    "# for d in dims_auto_extract:\n",
+    "#     dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n",
+    "dim_names_dict={'X': 'longitude', 'Y': 'latitude', 'T': 'time'}\n",
+    "print(f\"Dimension dictionary: {dim_names_dict}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "810d7480-165d-41c0-bd09-163656a14003",
+   "metadata": {},
+   "source": [
+    "## Get crs info\n",
+    "If there is no crs info that can be automatically extracted from the dataset with pyproj, you will need to manually identify the crs and create a crs object. This reference list of cartopy projections may be a helpful resource: https://scitools.org.uk/cartopy/docs/latest/reference/projections.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "239d3b00-77f9-4178-954b-ba81a2b34512",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "crs_var = 'crs'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b03d52f3-1367-4255-a561-52ee4fc9e92d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# use pyproj to automatically extract crs info\n",
+    "crs = pyproj.CRS.from_cf(ds[crs_var].attrs)\n",
+    "\n",
+    "# alternatively, create the appropriate cartopy projection\n",
+    "# crs = ccrs.LambertConformal(central_longitude=crs_info.longitude_of_central_meridian, \n",
+    "#                             central_latitude=crs_info.latitude_of_projection_origin,\n",
+    "#                             standard_parallels=crs_info.standard_parallel)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "282c689e-07f0-48ee-8e3d-35876e8c5094",
+   "metadata": {},
+   "source": [
+    "### Compare dataset crs var to generated proj4 string to make sure it looks ok"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4cee13ba-487d-483e-a013-b65685137502",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds[crs_var]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f7bc73db-7717-450e-9679-525f7be0c910",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "crs.to_proj4()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c",
+   "metadata": {},
+   "source": [
+    "## Create Collection Extent"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "69f0d837-68a5-4fed-9a14-5d75cfbb0da4",
+   "metadata": {},
+   "source": [
+    "### Spatial Extent\n",
+    "##### WARNING - make sure data type is **float** NOT **numpy.float64**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d46805e0-8e94-4ebe-aa01-d9a2d7051459",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pull out lat/lon bbox for data\n",
+    "# coordinates must be from WGS 84 datum\n",
+    "# left, bottom, right, top\n",
+    "\n",
+    "# Note: try changing around the commented out lines below to get type float rather than a numpy float\n",
+    "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n",
+    "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n",
+    "spatial_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n",
+    "print(spatial_bounds)\n",
+    "print(f'\\nspatial_bounds data type: {type(spatial_bounds[0])}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f16fdb9e-7ed8-40fb-a4f1-9ecabdebc0a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "074fc23c-f4d9-4427-80d3-fbf691e6d411",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformer = Transformer.from_crs(crs, \"EPSG:4326\", always_xy=True)\n",
+    "lon, lat = transformer.transform(XX.ravel(), YY.ravel())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5345c975-9fe3-48e1-a663-0275cdf275dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'lower left coordinates (WGS84): {min(lon)}, {min(lat)}')\n",
+    "print(f'upper right coordinates (WGS84): {max(lon)}, {max(lat)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0a5a222-743d-403a-9411-2406374803cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a spatial extent object \n",
+    "spatial_extent = pystac.SpatialExtent(bboxes=[[min(lon).item(), min(lat).item(), max(lon).item(), max(lat).item()]])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a04c8fca-1d33-43ac-9e2b-62d7be2887f7",
+   "metadata": {},
+   "source": [
+    "### Temporal Extent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "41a84995-867c-4152-8c57-85e3758bbb77",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pull out first and last timestamps\n",
+    "temporal_extent_lower = pd.Timestamp(ds[dim_names_dict['T']].data.min())\n",
+    "temporal_extent_upper = pd.Timestamp(ds[dim_names_dict['T']].data.max())\n",
+    "# if you get an error:\n",
+    "# Cannot convert input [] of type <class 'cftime._cftime.DatetimeNoLeap'> to Timestamp\n",
+    "# use the following instead:\n",
+    "#temporal_extent_lower = pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().min())\n",
+    "#temporal_extent_upper = pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().max())\n",
+    "\n",
+    "print(f'min: {temporal_extent_lower} \\nmax: {temporal_extent_upper}')\n",
+    "# create a temporal extent object\n",
+    "temporal_extent = pystac.TemporalExtent(intervals=[[temporal_extent_lower, temporal_extent_upper]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b1e37c4-5348-46ad-abc9-e005b5d6c02b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6",
+   "metadata": {},
+   "source": [
+    "## Open up STAC Catalog and create a collection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define folder location where your STAC catalog json file is\n",
+    "catalog_path = os.path.join('..', '..', 'catalog')\n",
+    "# open catalog\n",
+    "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e96811b-95ae-406a-9728-55fc429d4e1f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if catalog.get_child(collection_id):\n",
+    "    collection = catalog.get_child(collection_id)\n",
+    "    print(\"existing collection opened\")\n",
+    "    collection.extent=collection_extent\n",
+    "    collection.description=collection_description\n",
+    "    collection.license=collection_license\n",
+    "else:\n",
+    "    collection = pystac.Collection(id=collection_id,\n",
+    "                                   description=collection_description,\n",
+    "                                   extent=collection_extent,\n",
+    "                                   license=collection_license)\n",
+    "    print(\"new collection created\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a21c76e8-cd57-4eb5-a33f-7c668a3b3205",
+   "metadata": {},
+   "source": [
+    "## Add zarr url asset to collection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "094832af-d22b-4359-b0f6-cf687acce5cc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "asset_id = \"zarr-s3-osn\"\n",
+    "asset = pystac.Asset(href=zarr_url,\n",
+    "                     description=asset_description,\n",
+    "                     media_type=\"application/vnd+zarr\",\n",
+    "                     roles=asset_roles,\n",
+    "                     extra_fields = xarray_opendataset_kwargs)\n",
+    "collection.add_asset(asset_id, asset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c298d07-f234-4a08-986d-87f4a39e9ae6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "asset_id2 = \"zarr-s3\"\n",
+    "asset2 = pystac.Asset(href=zarr_url2,\n",
+    "                     description=asset_description2,\n",
+    "                     media_type=\"application/vnd+zarr\",\n",
+    "                     roles=asset_roles2,\n",
+    "                     extra_fields = xarray_opendataset_kwargs2)\n",
+    "collection.add_asset(asset_id2, asset2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4",
+   "metadata": {},
+   "source": [
+    "## Add datacube extension to collection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fc00946d-2880-491d-9b3b-3aeeb4414d6c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# instantiate extention on collection\n",
+    "dc = DatacubeExtension.ext(collection, add_if_missing=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8bdd77a2-7587-485e-afb7-42af3a822241",
+   "metadata": {},
+   "source": [
+    "### Add cube dimensions (required field for extension)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
+   "metadata": {},
+   "source": [
+    "#### user review needed\n",
+    "#### compare crs information to the projjson to make sure it looks correct"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea452f62-5644-49b6-8a4e-7dc4f649fd1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# print out crs information in dataset\n",
+    "crs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b1d05ff-8e43-44a7-8343-178b112c4ad6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # the datacube extension can accept reference_system information as a numerical EPSG code, \n",
+    "# # WKT2 (ISO 19162) string or PROJJSON object.\n",
+    "# # we will use a projjson, as was done by Microsoft Planetary Computer here:\n",
+    "# # https://planetarycomputer.microsoft.com/dataset/daymet-annual-na\n",
+    "# # https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-na\n",
+    "# projjson = json.loads(lcc.to_json())\n",
+    "\n",
+    "# alternatively, I think we could do this:\n",
+    "projjson = crs.to_json()\n",
+    "print(crs.to_json(pretty=True))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6b88ee9-60c2-4d91-af74-c1c56b094826",
+   "metadata": {},
+   "source": [
+    "#### user review needed\n",
+    "#### look at the spatial and temporal steps, make sure they are all successfully pulled and they look correct"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9e2bbcc5-e45a-4b8c-9d60-601f345e8134",
+   "metadata": {},
+   "source": [
+    "**Time**\n",
+    "\n",
+    "If you need to manually construct this field, here is a helpful reference: https://en.wikipedia.org/wiki/ISO_8601#Durations\n",
+    "\n",
+    "##### monthly data, manually constructing time step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "82f1e9bd-52ee-46f5-9e95-c2359d95fcf3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "time_step = pd.Timedelta(stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True)).isoformat()\n",
+    "# if time is yearly or monthly, you will need to manually construct it:\n",
+    "#time_step = \"P0Y1M0DT0H0M0S\"\n",
+    "print(f'time step: {time_step}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64be65b2-de20-447a-a9c2-bd8eca3e440e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # optional debugging for time steps:\n",
+    "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n",
+    "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n",
+    "# time_step = stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bc8dff39-2a2e-44a0-9b30-987107c2d1e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # debugging for time steps, cont:\n",
+    "# # please choose one of the index locations printed above\n",
+    "# # this will print the time steps adjacent to it\n",
+    "# ix = 11\n",
+    "# ds.isel(time=slice(ix-1,ix+3)).time"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9aa6c8ff-8d9b-40a7-a281-39b502bd5a3d",
+   "metadata": {},
+   "source": [
+    "**X/lon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8ba7695-ca45-4db2-bd46-c465f4e37eff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n",
+    "# a common issue that causes the spatial step not to be identified comes from rounding errors in the step calculation\n",
+    "# use the debugging cells below to identify if this is the issue, if so, use the round_dec argument to round to a higher decimal place:\n",
+    "#x_step = stac_helpers.get_step(ds, dim_names_dict['X'], round_dec=13)\n",
+    "print(f'x step: {x_step}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fac4c9f2-a952-4c7f-aa32-862957372d6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # optional debugging for spatial steps:\n",
+    "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n",
+    "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n",
+    "# x_dim=dim_names_dict['X']\n",
+    "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=0)\n",
+    "# print(f'\\nx dim name (for next cell): {x_dim}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d0b5a2d-dc58-4ad6-b890-859ce6bb08de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # debugging for spatial steps, cont:\n",
+    "# # please choose one of the index locations printed above\n",
+    "# # this will print the time steps adjacent to it\n",
+    "# ix = 5\n",
+    "# ds.isel(x=slice(ix-1,ix+3)).x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "21b5cca4-8bb4-498d-ae6b-6b8545fffe56",
+   "metadata": {},
+   "source": [
+    "**Y/lat**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7405583b-ecb9-44b0-8815-048e42e55a42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n",
+    "# a common issue that causes the spatial step not to be identified comes from rounding errors in the step calculation\n",
+    "# use the debugging cells below to identify if this is the issue, if so, use the round_dec argument to round to a higher decimal place:\n",
+    "#y_step = stac_helpers.get_step(ds, dim_names_dict['Y'], round_dec=13)\n",
+    "print(f'y step: {y_step}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ece0fe37-b54c-4721-aa9b-33d2998d191b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # optional debugging for spatial steps:\n",
+    "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n",
+    "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n",
+    "# y_dim=dim_names_dict['Y']\n",
+    "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=0)\n",
+    "# print(f'\\nx dim name (for next cell): {x_dim}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "abdafb8f-5217-4b82-91b6-eec8183c9128",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # debugging for spatial steps, cont:\n",
+    "# # please choose one of the index locations printed above\n",
+    "# # this will print the time steps adjacent to it\n",
+    "# ix = 5\n",
+    "# ds.isel(y=slice(ix-1,ix+3)).y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "00a5e041-081d-428d-ac2e-75d16de205e6",
+   "metadata": {},
+   "source": [
+    "#### user input needed\n",
+    "#### you will need to copy all of the dimensions printed below into the dict and fill in the appropriate attributes (type, axis, extent, etc.):\n",
+    "\n",
+    "Please see [datacube spec](https://github.com/stac-extensions/datacube?tab=readme-ov-file#dimension-object) for details on required fields.\n",
+    "\n",
+    "If you have a dimension like \"bnds\" or \"nv\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76).\n",
+    "\n",
+    "Here is an example:\n",
+    "\n",
+    "```\n",
+    "dims_dict = {\n",
+    "            'bnds': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bnds'), 'extent': [ds.bnds.min().item(), ds.bnds.max().item()]})\n",
+    "            }\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "acd45d3c-7845-47e6-9b7d-e35627a7ca9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dims = list(ds.dims)\n",
+    "print(dims)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a443497-67a9-4dce-a8e9-b08d31a88223",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a dictionary of datacube dimensions you would like to assign to this dataset\n",
+    "# dimension name should come from the dims printed in above cell\n",
+    "\n",
+    "# x, y, t dimension info is pulled out automatically using the dim dict we created above\n",
+    "# all other dims listed in above cell need to be manually written in\n",
+    "\n",
+    "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
+    "# note that the extent of each dimension should be pulled from the dataset\n",
+    "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n",
+    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [spatial_bounds[0], spatial_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [spatial_bounds[1], spatial_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "             'bound': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bound'), 'extent': [ds.bound.min().item(), ds.bound.max().item()]}),\n",
+    "             'nb2': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'nb2'), 'extent': [ds.nb2.min().item(), ds.nb2.max().item()]})\n",
+    "            }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ab85b09-eb38-404c-910c-13349d5e2234",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make sure you added all the right dims\n",
+    "assert sorted(list(dims_dict.keys())) == sorted(dims)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0f277883-a3fd-425f-966a-ca2140d0ef2f",
+   "metadata": {},
+   "source": [
+    "### Add cube variables (optional field for extension)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9272931-fc0b-4f2a-9546-283033e9cde8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# drop metpy_crs coordinate we have added\n",
+    "if 'metpy_crs' in ds.coords:\n",
+    "    ds = ds.drop_vars('metpy_crs')\n",
+    "\n",
+    "# pull list of vars from dataset\n",
+    "vars = list(ds.variables)\n",
+    "\n",
+    "# spec says that the keys of cube:dimensions and cube:variables should be unique together; a key like lat should not be both a dimension and a variable.\n",
+    "# we will drop all values in dims from vars\n",
+    "vars = [v for v in vars if v not in dims]\n",
+    "\n",
+    "# Microsoft Planetary Computer includes coordinates and crs as variables here:\n",
+    "# https://planetarycomputer.microsoft.com/dataset/daymet-annual-na\n",
+    "# https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-na\n",
+    "# we will keep those in the var list\n",
+    "\n",
+    "# create dictionary of dataset variables and associated dimensions\n",
+    "vars_dict={}\n",
+    "for v in vars:\n",
+    "    unit = stac_helpers.get_unit(ds, v)\n",
+    "    var_type = stac_helpers.get_var_type(ds, v, crs_var)\n",
+    "    long_name = stac_helpers.get_long_name(ds, v)\n",
+    "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "11ad5352-884c-4472-8864-4570a96f66e5",
+   "metadata": {},
+   "source": [
+    "### Finalize extension"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10141fd4-91d6-491d-878b-02653720891d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add dimesions and variables to collection extension\n",
+    "dc.apply(dimensions=dims_dict, variables=vars_dict)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "615ca168-75fb-4135-9941-0ef5fe4fd1cb",
+   "metadata": {},
+   "source": [
+    "## Add STAC Collection to Catalog and Save"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2120a55-3d04-4122-a93f-29afcdb8cb1b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# # helper to find items of wrong type\n",
+    "# d = collection.to_dict()\n",
+    "# print(*stac_helpers.find_paths(d))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4b75791b-6b2d-40be-b7c6-330a60888fb5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if catalog.get_child(collection_id):\n",
+    "    collection.normalize_and_save(root_href=os.path.join(catalog_path, collection_id), catalog_type=pystac.CatalogType.SELF_CONTAINED)\n",
+    "else:\n",
+    "    catalog.add_child(collection)\n",
+    "    catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d6f676b5-e892-4bfb-8d73-2828addd838c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "global-global-pangeo",
+   "language": "python",
+   "name": "conda-env-global-global-pangeo-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/workflows/archive/GMO_create_collection_from_zarr.ipynb b/workflows/archive/GMO_create_collection_from_zarr.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..415e5ecfd6f97eef7b81713e91c6fa8294fd4082
--- /dev/null
+++ b/workflows/archive/GMO_create_collection_from_zarr.ipynb
@@ -0,0 +1,881 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6c10e07b-1e60-4926-af1d-fa75dc78e5d4",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# GMO Zarr -> Collection Workflow\n",
+    "This is a workflow to build a [STAC collection](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md) from the zarr asset for the dataset named above. We use the [datacube extension](https://github.com/stac-extensions/datacube) to define the spatial and temporal dimensions of the zarr store, as well as the variables it contains.\n",
+    "\n",
+    "To simplify this workflow so that it can scale to many datasets, a few simplifying suggestions and assumptions are made:\n",
+    "1. For USGS data, we can use the CC0-1.0 license. For all other data we can use Unlicense. Ref: https://spdx.org/licenses/\n",
+    "2. I am assuming all coordinates are from the WGS84 datum if not specified."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "201e0945-de55-45ff-b095-c2af009a4e62",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pystac\n",
+    "from pystac.extensions.datacube import CollectionDatacubeExtension, AssetDatacubeExtension, AdditionalDimension, DatacubeExtension\n",
+    "import xarray as xr\n",
+    "import cf_xarray\n",
+    "import os\n",
+    "import fsspec\n",
+    "import cf_xarray\n",
+    "import hvplot.xarray\n",
+    "import pandas as pd\n",
+    "import json\n",
+    "import numpy as np\n",
+    "import pyproj\n",
+    "from pyproj import Transformer\n",
+    "import cartopy.crs as ccrs\n",
+    "import cfunits\n",
+    "import json\n",
+    "import sys\n",
+    "sys.path.insert(1, '..')\n",
+    "import stac_helpers"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a71f9d19-8fb3-4f47-b4c4-447bb80d8dd5",
+   "metadata": {},
+   "source": [
+    "## Collection ID"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15ee060d-3127-4024-a1ad-6aa0648667e1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# name for STAC collection - should match name of zarr dataset\n",
+    "collection_id = 'GMO'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "116b5837-8e85-4ae7-964a-803533ded714",
+   "metadata": {},
+   "source": [
+    "## Asset Metadata Input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dd6fa323-132a-4794-8c80-576933f547a0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# url to zarr store that you want to create a collection for\n",
+    "zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'\n",
+    "\n",
+    "# define keyword arguments needed for opening the dataset with xarray\n",
+    "# ref: https://github.com/stac-extensions/xarray-assets\n",
+    "xarray_opendataset_kwargs = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
+    "                          \"xarray:storage_options\": {\"anon\": True, \"client_kwargs\": {\"endpoint_url\":\"https://usgs.osn.mghpcc.org/\"}}}\n",
+    "# description for zarr url asset attached to collection (zarr_url)\n",
+    "asset_description = \"Open Storage Network Pod S3 API access to collection zarr group\"\n",
+    "# roles to tag zarr url asset with\n",
+    "asset_roles = [\"data\",\"zarr\",\"s3\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e1441cd4-e94c-4902-af46-8f1af470eb6b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# url to zarr store that you want to create a collection for\n",
+    "zarr_url2 = f's3://nhgf-development/workspace/DataConversion/{collection_id}.zarr/'\n",
+    "\n",
+    "# define keyword arguments needed for opening the dataset with xarray\n",
+    "# ref: https://github.com/stac-extensions/xarray-assets\n",
+    "xarray_opendataset_kwargs2 = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
+    "                          \"xarray:storage_options\":{\"requester_pays\":True}}\n",
+    "# description for zarr url asset attached to collection (zarr_url)\n",
+    "asset_description2 = \"S3 access to collection zarr group\"\n",
+    "# roles to tag zarr url asset with\n",
+    "asset_roles2 = [\"data\",\"zarr\",\"s3\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b213b74f-ad17-4774-93b6-3b62be616b45",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Data Exploration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "708f2cf5-79ab-49af-8067-de31d0d13ee6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# open and view zarr dataset\n",
+    "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n",
+    "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n",
+    "                             backend_kwargs={'consolidated':True}, chunks={})\n",
+    "ds"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "996e60ba-13e4-453a-8534-e62ce747f0fa",
+   "metadata": {},
+   "source": [
+    "## Collection Metadata Input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "482d204d-b5b6-40e5-ac42-55b459be1097",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# description of STAC collection\n",
+    "collection_description = ds.attrs['title']\n",
+    "print(f'collection description: {collection_description}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91129d65-a614-4fe4-86b6-105b1f121f55",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# license for dataset\n",
+    "collection_license = stac_helpers.license_picker(ds.attrs['license'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0bc7e9b3-ad62-4b10-a18e-66b7ed2d35dc",
+   "metadata": {},
+   "source": [
+    "## Identify x, y, t dimensions of dataset\n",
+    "May require user input if dimensions cannot be auto-detected."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab91268f-7200-4cb1-979a-c7d75531d2c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dims_auto_extract = ['X', 'Y', 'T']\n",
+    "dim_names_dict = {}\n",
+    "for d in dims_auto_extract:\n",
+    "    dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n",
+    "print(f\"Dimension dictionary: {dim_names_dict}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "810d7480-165d-41c0-bd09-163656a14003",
+   "metadata": {},
+   "source": [
+    "## Get crs info\n",
+    "If there is no crs info that can be automatically extracted from the dataset with pyproj, you will need to manually identify the crs and create a crs object. This reference list of cartopy projections may be a helpful resource: https://scitools.org.uk/cartopy/docs/latest/reference/projections.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "239d3b00-77f9-4178-954b-ba81a2b34512",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "crs_var = 'crs'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b03d52f3-1367-4255-a561-52ee4fc9e92d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# use pyproj to automatically extract crs info\n",
+    "crs = pyproj.CRS.from_cf(ds[crs_var].attrs)\n",
+    "\n",
+    "# alternatively, create the appropriate cartopy projection\n",
+    "# crs = ccrs.LambertConformal(central_longitude=crs_info.longitude_of_central_meridian, \n",
+    "#                             central_latitude=crs_info.latitude_of_projection_origin,\n",
+    "#                             standard_parallels=crs_info.standard_parallel)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "282c689e-07f0-48ee-8e3d-35876e8c5094",
+   "metadata": {},
+   "source": [
+    "### Compare dataset crs var to generated proj4 string to make sure it looks ok"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4cee13ba-487d-483e-a013-b65685137502",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds[crs_var]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f7bc73db-7717-450e-9679-525f7be0c910",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "crs.to_proj4()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c",
+   "metadata": {},
+   "source": [
+    "## Create Collection Extent"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "69f0d837-68a5-4fed-9a14-5d75cfbb0da4",
+   "metadata": {},
+   "source": [
+    "### Spatial Extent\n",
+    "##### WARNING - make sure data type is **float** NOT **numpy.float64**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d46805e0-8e94-4ebe-aa01-d9a2d7051459",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pull out lat/lon bbox for data\n",
+    "# coordinates must be from WGS 84 datum\n",
+    "# left, bottom, right, top\n",
+    "\n",
+    "# Note: try changing around the commented out lines below to get type float rather than a numpy float\n",
+    "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n",
+    "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n",
+    "spatial_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n",
+    "print(spatial_bounds)\n",
+    "print(f'\\nspatial_bounds data type: {type(spatial_bounds[0])}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f16fdb9e-7ed8-40fb-a4f1-9ecabdebc0a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "074fc23c-f4d9-4427-80d3-fbf691e6d411",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformer = Transformer.from_crs(crs, \"EPSG:4326\", always_xy=True)\n",
+    "lon, lat = transformer.transform(XX.ravel(), YY.ravel())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5345c975-9fe3-48e1-a663-0275cdf275dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'lower left coordinates (WGS84): {min(lon)}, {min(lat)}')\n",
+    "print(f'upper right coordinates (WGS84): {max(lon)}, {max(lat)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0a5a222-743d-403a-9411-2406374803cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a spatial extent object \n",
+    "spatial_extent = pystac.SpatialExtent(bboxes=[[min(lon).item(), min(lat).item(), max(lon).item(), max(lat).item()]])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a04c8fca-1d33-43ac-9e2b-62d7be2887f7",
+   "metadata": {},
+   "source": [
+    "### Temporal Extent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "41a84995-867c-4152-8c57-85e3758bbb77",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pull out first and last timestamps\n",
+    "temporal_extent_lower = pd.Timestamp(ds[dim_names_dict['T']].data.min())\n",
+    "temporal_extent_upper = pd.Timestamp(ds[dim_names_dict['T']].data.max())\n",
+    "# if you get an error:\n",
+    "# Cannot convert input [] of type <class 'cftime._cftime.DatetimeNoLeap'> to Timestamp\n",
+    "# use the following instead:\n",
+    "#temporal_extent_lower = pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().min())\n",
+    "#temporal_extent_upper = pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().max())\n",
+    "\n",
+    "print(f'min: {temporal_extent_lower} \\nmax: {temporal_extent_upper}')\n",
+    "# create a temporal extent object\n",
+    "temporal_extent = pystac.TemporalExtent(intervals=[[temporal_extent_lower, temporal_extent_upper]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b1e37c4-5348-46ad-abc9-e005b5d6c02b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6",
+   "metadata": {},
+   "source": [
+    "## Open up STAC Catalog and create a collection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define folder location where your STAC catalog json file is\n",
+    "catalog_path = os.path.join('..', '..', 'catalog')\n",
+    "# open catalog\n",
+    "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e96811b-95ae-406a-9728-55fc429d4e1f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if catalog.get_child(collection_id):\n",
+    "    collection = catalog.get_child(collection_id)\n",
+    "    print(\"existing collection opened\")\n",
+    "    collection.extent=collection_extent\n",
+    "    collection.description=collection_description\n",
+    "    collection.license=collection_license\n",
+    "else:\n",
+    "    collection = pystac.Collection(id=collection_id,\n",
+    "                                   description=collection_description,\n",
+    "                                   extent=collection_extent,\n",
+    "                                   license=collection_license)\n",
+    "    print(\"new collection created\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a21c76e8-cd57-4eb5-a33f-7c668a3b3205",
+   "metadata": {},
+   "source": [
+    "## Add zarr url asset to collection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "094832af-d22b-4359-b0f6-cf687acce5cc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "asset_id = \"zarr-s3-osn\"\n",
+    "asset = pystac.Asset(href=zarr_url,\n",
+    "                     description=asset_description,\n",
+    "                     media_type=\"application/vnd+zarr\",\n",
+    "                     roles=asset_roles,\n",
+    "                     extra_fields = xarray_opendataset_kwargs)\n",
+    "collection.add_asset(asset_id, asset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c298d07-f234-4a08-986d-87f4a39e9ae6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "asset_id2 = \"zarr-s3\"\n",
+    "asset2 = pystac.Asset(href=zarr_url2,\n",
+    "                     description=asset_description2,\n",
+    "                     media_type=\"application/vnd+zarr\",\n",
+    "                     roles=asset_roles2,\n",
+    "                     extra_fields = xarray_opendataset_kwargs2)\n",
+    "collection.add_asset(asset_id2, asset2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4",
+   "metadata": {},
+   "source": [
+    "## Add datacube extension to collection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fc00946d-2880-491d-9b3b-3aeeb4414d6c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# instantiate extention on collection\n",
+    "dc = DatacubeExtension.ext(collection, add_if_missing=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8bdd77a2-7587-485e-afb7-42af3a822241",
+   "metadata": {},
+   "source": [
+    "### Add cube dimensions (required field for extension)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
+   "metadata": {},
+   "source": [
+    "#### user review needed\n",
+    "#### compare crs information to the projjson to make sure it looks correct"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea452f62-5644-49b6-8a4e-7dc4f649fd1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# print out crs information in dataset\n",
+    "crs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b1d05ff-8e43-44a7-8343-178b112c4ad6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # the datacube extension can accept reference_system information as a numerical EPSG code, \n",
+    "# # WKT2 (ISO 19162) string or PROJJSON object.\n",
+    "# # we will use a projjson, as was done by Microsoft Planetary Computer here:\n",
+    "# # https://planetarycomputer.microsoft.com/dataset/daymet-annual-na\n",
+    "# # https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-na\n",
+    "# projjson = json.loads(lcc.to_json())\n",
+    "\n",
+    "# alternatively, I think we could do this:\n",
+    "projjson = crs.to_json()\n",
+    "print(crs.to_json(pretty=True))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6b88ee9-60c2-4d91-af74-c1c56b094826",
+   "metadata": {},
+   "source": [
+    "#### user review needed\n",
+    "#### look at the spatial and temporal steps, make sure they are all successfully pulled and they look correct"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9e2bbcc5-e45a-4b8c-9d60-601f345e8134",
+   "metadata": {},
+   "source": [
+    "**Time**\n",
+    "\n",
+    "If you need to manually construct this field, here is a helpful reference: https://en.wikipedia.org/wiki/ISO_8601#Durations\n",
+    "\n",
+    "##### monthly data, manually constructing time step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "82f1e9bd-52ee-46f5-9e95-c2359d95fcf3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "time_step = pd.Timedelta(stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True)).isoformat()\n",
+    "# if time is yearly or monthly, you will need to manually construct it:\n",
+    "#time_step = \"P0Y1M0DT0H0M0S\"\n",
+    "print(f'time step: {time_step}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64be65b2-de20-447a-a9c2-bd8eca3e440e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # optional debugging for time steps:\n",
+    "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n",
+    "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n",
+    "# time_step = stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bc8dff39-2a2e-44a0-9b30-987107c2d1e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # debugging for time steps, cont:\n",
+    "# # please choose one of the index locations printed above\n",
+    "# # this will print the time steps adjacent to it\n",
+    "# ix = 11\n",
+    "# ds.isel(time=slice(ix-1,ix+3)).time"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9aa6c8ff-8d9b-40a7-a281-39b502bd5a3d",
+   "metadata": {},
+   "source": [
+    "**X/lon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8ba7695-ca45-4db2-bd46-c465f4e37eff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n",
+    "# a common issue that causes the spatial step not to be identified comes from rounding errors in the step calculation\n",
+    "# use the debugging cells below to identify if this is the issue, if so, use the round_dec argument to round to a higher decimal place:\n",
+    "#x_step = stac_helpers.get_step(ds, dim_names_dict['X'], round_dec=13)\n",
+    "print(f'x step: {x_step}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fac4c9f2-a952-4c7f-aa32-862957372d6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # optional debugging for spatial steps:\n",
+    "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n",
+    "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n",
+    "# x_dim=dim_names_dict['X']\n",
+    "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=0)\n",
+    "# print(f'\\nx dim name (for next cell): {x_dim}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d0b5a2d-dc58-4ad6-b890-859ce6bb08de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # debugging for spatial steps, cont:\n",
+    "# # please choose one of the index locations printed above\n",
+    "# # this will print the time steps adjacent to it\n",
+    "# ix = 5\n",
+    "# ds.isel(x=slice(ix-1,ix+3)).x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "21b5cca4-8bb4-498d-ae6b-6b8545fffe56",
+   "metadata": {},
+   "source": [
+    "**Y/lat**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7405583b-ecb9-44b0-8815-048e42e55a42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n",
+    "# a common issue that causes the spatial step not to be identified comes from rounding errors in the step calculation\n",
+    "# use the debugging cells below to identify if this is the issue, if so, use the round_dec argument to round to a higher decimal place:\n",
+    "#y_step = stac_helpers.get_step(ds, dim_names_dict['Y'], round_dec=13)\n",
+    "print(f'y step: {y_step}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ece0fe37-b54c-4721-aa9b-33d2998d191b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # optional debugging for spatial steps:\n",
+    "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n",
+    "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n",
+    "# y_dim=dim_names_dict['Y']\n",
+    "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=0)\n",
+    "# print(f'\\nx dim name (for next cell): {x_dim}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "abdafb8f-5217-4b82-91b6-eec8183c9128",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # debugging for spatial steps, cont:\n",
+    "# # please choose one of the index locations printed above\n",
+    "# # this will print the time steps adjacent to it\n",
+    "# ix = 5\n",
+    "# ds.isel(y=slice(ix-1,ix+3)).y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "00a5e041-081d-428d-ac2e-75d16de205e6",
+   "metadata": {},
+   "source": [
+    "#### user input needed\n",
+    "#### you will need to copy all of the dimensions printed below into the dict and fill in the appropriate attributes (type, axis, extent, etc.):\n",
+    "\n",
+    "Please see [datacube spec](https://github.com/stac-extensions/datacube?tab=readme-ov-file#dimension-object) for details on required fields.\n",
+    "\n",
+    "If you have a dimension like \"bnds\" or \"nv\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76).\n",
+    "\n",
+    "Here is an example:\n",
+    "\n",
+    "```\n",
+    "dims_dict = {\n",
+    "            'bnds': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bnds'), 'extent': [ds.bnds.min().item(), ds.bnds.max().item()]})\n",
+    "            }\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "acd45d3c-7845-47e6-9b7d-e35627a7ca9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dims = list(ds.dims)\n",
+    "print(dims)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a443497-67a9-4dce-a8e9-b08d31a88223",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a dictionary of datacube dimensions you would like to assign to this dataset\n",
+    "# dimension name should come from the dims printed in above cell\n",
+    "\n",
+    "# x, y, t dimension info is pulled out automatically using the dim dict we created above\n",
+    "# all other dims listed in above cell need to be manually written in\n",
+    "\n",
+    "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
+    "# note that the extent of each dimension should be pulled from the dataset\n",
+    "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n",
+    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [spatial_bounds[0], spatial_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [spatial_bounds[1], spatial_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "             'bound': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bound'), 'extent': [ds.bound.min().item(), ds.bound.max().item()]})\n",
+    "            }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ab85b09-eb38-404c-910c-13349d5e2234",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make sure you added all the right dims\n",
+    "assert sorted(list(dims_dict.keys())) == sorted(dims)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0f277883-a3fd-425f-966a-ca2140d0ef2f",
+   "metadata": {},
+   "source": [
+    "### Add cube variables (optional field for extension)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9272931-fc0b-4f2a-9546-283033e9cde8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# drop metpy_crs coordinate we have added\n",
+    "if 'metpy_crs' in ds.coords:\n",
+    "    ds = ds.drop_vars('metpy_crs')\n",
+    "\n",
+    "# pull list of vars from dataset\n",
+    "vars = list(ds.variables)\n",
+    "\n",
+    "# spec says that the keys of cube:dimensions and cube:variables should be unique together; a key like lat should not be both a dimension and a variable.\n",
+    "# we will drop all values in dims from vars\n",
+    "vars = [v for v in vars if v not in dims]\n",
+    "\n",
+    "# Microsoft Planetary Computer includes coordinates and crs as variables here:\n",
+    "# https://planetarycomputer.microsoft.com/dataset/daymet-annual-na\n",
+    "# https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-na\n",
+    "# we will keep those in the var list\n",
+    "\n",
+    "# create dictionary of dataset variables and associated dimensions\n",
+    "vars_dict={}\n",
+    "for v in vars:\n",
+    "    unit = stac_helpers.get_unit(ds, v)\n",
+    "    var_type = stac_helpers.get_var_type(ds, v, crs_var)\n",
+    "    long_name = stac_helpers.get_long_name(ds, v)\n",
+    "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "11ad5352-884c-4472-8864-4570a96f66e5",
+   "metadata": {},
+   "source": [
+    "### Finalize extension"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10141fd4-91d6-491d-878b-02653720891d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add dimesions and variables to collection extension\n",
+    "dc.apply(dimensions=dims_dict, variables=vars_dict)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "615ca168-75fb-4135-9941-0ef5fe4fd1cb",
+   "metadata": {},
+   "source": [
+    "## Add STAC Collection to Catalog and Save"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2120a55-3d04-4122-a93f-29afcdb8cb1b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# # helper to find items of wrong type\n",
+    "# d = collection.to_dict()\n",
+    "# print(*stac_helpers.find_paths(d))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4b75791b-6b2d-40be-b7c6-330a60888fb5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if catalog.get_child(collection_id):\n",
+    "    collection.normalize_and_save(root_href=os.path.join(catalog_path, collection_id), catalog_type=pystac.CatalogType.SELF_CONTAINED)\n",
+    "else:\n",
+    "    catalog.add_child(collection)\n",
+    "    catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d6f676b5-e892-4bfb-8d73-2828addd838c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "global-global-pangeo",
+   "language": "python",
+   "name": "conda-env-global-global-pangeo-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}