diff --git a/workflows/archive/CooperMcKenzie_create_collection_from_zarr.ipynb b/workflows/archive/CooperMcKenzie_create_collection_from_zarr.ipynb
index ce1bb6cce17d1cf5f14acf14aa9075567534a25c..e2de17a71fbba156904c02bb529f9a8a2b0bbeb1 100644
--- a/workflows/archive/CooperMcKenzie_create_collection_from_zarr.ipynb
+++ b/workflows/archive/CooperMcKenzie_create_collection_from_zarr.ipynb
@@ -192,22 +192,9 @@
    "outputs": [],
    "source": [
     "dims_auto_extract = ['X', 'Y', 'T']\n",
-    "def extract_dim(ds, d):\n",
-    "    try:\n",
-    "        dim_list = ds.cf.axes[d]\n",
-    "        assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n",
-    "        dim = dim_list[0]\n",
-    "    except KeyError:\n",
-    "        print(f\"Could not auto-extract {d} dimension name.\")\n",
-    "        print(\"Look at the xarray output above showing the dataset dimensions.\")\n",
-    "        dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n",
-    "    assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n",
-    "    print(f\"name of {d} dimension: {dim}\\n\")\n",
-    "    return dim\n",
-    "\n",
     "dim_names_dict = {}\n",
     "for d in dims_auto_extract:\n",
-    "    dim_names_dict[d] = extract_dim(ds, d)\n",
+    "    dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n",
     "print(f\"Dimension dictionary: {dim_names_dict}\")"
    ]
   },
@@ -451,59 +438,6 @@
     "print(xy_bounds)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_step(ds, dim_name, time_dim=False, debug=False, step_ix=0, round_dec=None):\n",
-    "    dim_vals = ds[dim_name].values\n",
-    "    diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n",
-    "    # option to round number of decimals\n",
-    "    # sometimes there are different steps calculated due to small rounding errors coming out of the diff\n",
-    "    # calculation, rounding these can correct for that\n",
-    "    if round_dec:\n",
-    "        unique_steps = np.unique(np.array(diffs).round(decimals=round_dec), return_counts=True)\n",
-    "    else:\n",
-    "        unique_steps = np.unique(diffs, return_counts=True)\n",
-    "    step_list = unique_steps[0]\n",
-    "    # optional - for invesitgating uneven steps\n",
-    "    if debug:\n",
-    "        print(f'step_list: {step_list}')\n",
-    "        print(f'step_count: {unique_steps[1]}')\n",
-    "        indices = [i for i, x in enumerate(diffs) if x == step_list[step_ix]]\n",
-    "        print(f'index locations of step index {step_ix} in step_list: {indices}')\n",
-    "    # set step - if all steps are the same length\n",
-    "    # datacube spec specifies to use null for irregularly spaced steps\n",
-    "    if len(step_list)==1:\n",
-    "        if time_dim:\n",
-    "            # make sure time deltas are in np timedelta format\n",
-    "            step_list = [np.array([step], dtype=\"timedelta64[ns]\")[0] for step in step_list]\n",
-    "        step = step_list[0].astype(float).item()\n",
-    "    else:\n",
-    "        step = None\n",
-    "    return(step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a20d12bf-a511-4c5e-84d0-77e2ec551518",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_long_name(ds, v):\n",
-    "    # try to get long_name attribute from variable\n",
-    "    try:\n",
-    "        long_name = ds[v].attrs['long_name']\n",
-    "    # otherwise, leave empty\n",
-    "    except:\n",
-    "        long_name = None\n",
-    "    return long_name"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
@@ -570,7 +504,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "time_step = pd.Timedelta(get_step(ds, dim_names_dict['T'], time_dim=True)).isoformat()\n",
+    "time_step = pd.Timedelta(stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True)).isoformat()\n",
     "print(f'time step: {time_step}')"
    ]
   },
@@ -582,7 +516,7 @@
    "outputs": [],
    "source": [
     "# # debugging for time steps: get all step values and locations\n",
-    "# time_step = get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)"
+    "# time_step = stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)"
    ]
   },
   {
@@ -614,7 +548,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "x_step = get_step(ds, dim_names_dict['X'])\n",
+    "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n",
     "print(f'x step: {x_step}')"
    ]
   },
@@ -627,7 +561,7 @@
    "source": [
     "# # debugging for spatial steps: get all step values and locations\n",
     "# x_dim=dim_names_dict['X']\n",
-    "# x_step = get_step(ds, x_dim, debug=True, step_ix=1)\n",
+    "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=1)\n",
     "# print(f'\\nx dim name (for next cell): {x_dim}')"
    ]
   },
@@ -660,7 +594,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "y_step = get_step(ds, dim_names_dict['Y'])\n",
+    "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n",
     "print(f'y step: {y_step}')"
    ]
   },
@@ -673,7 +607,7 @@
    "source": [
     "# # debugging for spatial steps: get all step values and locations\n",
     "# y_dim=dim_names_dict['Y']\n",
-    "# y_step = get_step(ds, y_dim, debug=True, step_ix=1)\n",
+    "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=1)\n",
     "# print(f'\\nx dim name (for next cell): {x_dim}')"
    ]
   },
@@ -724,16 +658,9 @@
     "# dimension name should come from the coordinates printed above\n",
     "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
     "# note that the extent of each dimension should be pulled from the dataset\n",
-    "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': get_long_name(ds, 'time'), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': time_step}),\n",
-    "             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'description': get_long_name(ds, 'x'), 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
-    "             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, 'y'), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
-    "             'bottom_top_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': get_long_name(ds, 'bottom_top_stag')}),\n",
-    "             'bottom_top': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': get_long_name(ds, 'bottom_top')}),\n",
-    "             'soil_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': get_long_name(ds, 'soil_layers_stag')}),\n",
-    "             'x_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': get_long_name(ds, 'x_stag')}),\n",
-    "             'y_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, 'y_stag')}),\n",
-    "             'snow_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': get_long_name(ds, 'snow_layers_stag')}),\n",
-    "             'snso_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': get_long_name(ds, 'snso_layers_stag')}),\n",
+    "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, 'time'), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': time_step}),\n",
+    "             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'description': stac_helpers.get_long_name(ds, 'x'), 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, 'y'), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
     "            }"
    ]
   },
@@ -745,43 +672,6 @@
     "### Add cube variables (optional field for extension)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "92510876-7853-4d24-8563-c69f9012aeb6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define functions to pull out datacube attributes and validate format\n",
-    "def get_unit(ds, v):\n",
-    "    # check if unit is defined for variable\n",
-    "    try:\n",
-    "        unit = ds[v].attrs['units']\n",
-    "    except:\n",
-    "        unit = None\n",
-    "    # check if unit comes from https://docs.unidata.ucar.edu/udunits/current/#Database\n",
-    "    # datacube extension specifies: The unit of measurement for the data, preferably compliant to UDUNITS-2 units (singular).\n",
-    "    # gdptools expects this format as well\n",
-    "    try:\n",
-    "        cfunits.Units(unit).isvalid\n",
-    "    except:\n",
-    "        print(\"Unit is not valid as a UD unit.\")\n",
-    "        unit = str(input(\"Please enter a valid unit for {v} from here: https://docs.unidata.ucar.edu/udunits/current/#Database\"))\n",
-    "        assert cfunits.Units(unit).isvalid\n",
-    "    return unit\n",
-    "\n",
-    "def get_var_type(ds, v):\n",
-    "    if v in ds.coords:\n",
-    "        # type = auxiliary for a variable that contains coordinate data, but isn't a dimension in cube:dimensions.\n",
-    "        # For example, the values of the datacube might be provided in the projected coordinate reference system, \n",
-    "        # but the datacube could have a variable lon with dimensions (y, x), giving the longitude at each point.\n",
-    "        var_type = 'auxiliary'\n",
-    "    # type = data for a variable indicating some measured value, for example \"precipitation\", \"temperature\", etc.\n",
-    "    else:\n",
-    "        var_type = 'data'\n",
-    "    return var_type"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -808,9 +698,9 @@
     "# create dictionary of dataset variables and associated dimensions\n",
     "vars_dict={}\n",
     "for v in vars:\n",
-    "    unit = get_unit(ds, v)\n",
-    "    var_type = get_var_type(ds, v)\n",
-    "    long_name = get_long_name(ds, v)\n",
+    "    unit = stac_helpers.get_unit(ds, v)\n",
+    "    var_type = stac_helpers.get_var_type(ds, v)\n",
+    "    long_name = stac_helpers.get_long_name(ds, v)\n",
     "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
    ]
   },
@@ -852,18 +742,7 @@
    "source": [
     "# # helper to find items of wrong type\n",
     "# d = collection.to_dict()\n",
-    "# def find_paths(nested_dict, prepath=()):\n",
-    "#     for k, v in nested_dict.items():\n",
-    "#         try:\n",
-    "#             path = prepath + (k,)\n",
-    "#             if type(v) is np.float64: # found value\n",
-    "#                 yield path\n",
-    "#             elif hasattr(v, 'items'): # v is a dict\n",
-    "#                 yield from find_paths(v, path) \n",
-    "#         except:\n",
-    "#             print(prepath)\n",
-    "\n",
-    "# print(*find_paths(d))"
+    "# print(*stac_helpers.find_paths(d))"
    ]
   },
   {
diff --git a/workflows/archive/alaska_et_2020_ccsm4_historical_simulation_create_collection_from_zarr.ipynb b/workflows/archive/alaska_et_2020_ccsm4_historical_simulation_create_collection_from_zarr.ipynb
index 57997f52ce849f4a843c52292fd296962cb4a0ae..4248535fdb130df173b2fd31fcedb96fb4dc9b7a 100644
--- a/workflows/archive/alaska_et_2020_ccsm4_historical_simulation_create_collection_from_zarr.ipynb
+++ b/workflows/archive/alaska_et_2020_ccsm4_historical_simulation_create_collection_from_zarr.ipynb
@@ -199,22 +199,9 @@
    "outputs": [],
    "source": [
     "dims_auto_extract = ['X', 'Y', 'T']\n",
-    "def extract_dim(ds, d):\n",
-    "    try:\n",
-    "        dim_list = ds.cf.axes[d]\n",
-    "        assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n",
-    "        dim = dim_list[0]\n",
-    "    except KeyError:\n",
-    "        print(f\"Could not auto-extract {d} dimension name.\")\n",
-    "        print(\"Look at the xarray output above showing the dataset dimensions.\")\n",
-    "        dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n",
-    "    assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n",
-    "    print(f\"name of {d} dimension: {dim}\\n\")\n",
-    "    return dim\n",
-    "\n",
     "dim_names_dict = {}\n",
     "for d in dims_auto_extract:\n",
-    "    dim_names_dict[d] = extract_dim(ds, d)\n",
+    "    dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n",
     "print(f\"Dimension dictionary: {dim_names_dict}\")"
    ]
   },
@@ -466,56 +453,6 @@
     "print(xy_bounds)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "def get_step(ds, dim_name, debug=False, step_ix=0):\n",
-    "    dim_vals = ds[dim_name].values\n",
-    "    diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n",
-    "    unique_steps = np.unique(diffs, return_counts=True)\n",
-    "    step_list = unique_steps[0]\n",
-    "    # optional - for invesitgating uneven steps\n",
-    "    if debug:\n",
-    "        print(f'step_list: {step_list}')\n",
-    "        print(f'step_count: {unique_steps[1]}')\n",
-    "        indices = [i for i, x in enumerate(diffs) if x == step_list[step_ix]]\n",
-    "        print(f'index locations of step index {step_ix} in step_list: {indices}')\n",
-    "    # set step - if all steps are the same length\n",
-    "    # datacube spec specifies to use null for irregularly spaced steps\n",
-    "    if len(step_list)==1:\n",
-    "        # make sure time deltas are in np timedelta format\n",
-    "        step_list = [np.array([step], dtype=\"timedelta64[ns]\")[0] for step in step_list]\n",
-    "        step = step_list[0].astype(float).item()\n",
-    "    else:\n",
-    "        step = None\n",
-    "    return(step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a20d12bf-a511-4c5e-84d0-77e2ec551518",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "def get_long_name(ds, v):\n",
-    "    # try to get long_name attribute from variable\n",
-    "    try:\n",
-    "        long_name = ds[v].attrs['long_name']\n",
-    "    # otherwise, leave empty\n",
-    "    except:\n",
-    "        long_name = None\n",
-    "    return long_name"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
@@ -573,7 +510,7 @@
    },
    "outputs": [],
    "source": [
-    "time_step = get_step(ds, dim_names_dict['T'])\n",
+    "time_step = stac_helpers.get_step(ds, dim_names_dict['T'])\n",
     "print(f'time step: {time_step}')"
    ]
   },
@@ -587,7 +524,7 @@
    "outputs": [],
    "source": [
     "# debugging for time steps: get all step values and locations\n",
-    "time_step = get_step(ds, dim_names_dict['T'], debug=True, step_ix=2)"
+    "time_step = stac_helpers.get_step(ds, dim_names_dict['T'], debug=True, step_ix=2)"
    ]
   },
   {
@@ -623,7 +560,7 @@
    },
    "outputs": [],
    "source": [
-    "x_step = get_step(ds, dim_names_dict['X'])\n",
+    "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n",
     "print(f'x step: {x_step}')"
    ]
   },
@@ -636,7 +573,7 @@
    },
    "outputs": [],
    "source": [
-    "y_step = get_step(ds, dim_names_dict['Y'])\n",
+    "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n",
     "print(f'y step: {y_step}')"
    ]
   },
@@ -673,9 +610,9 @@
     "# dimension name should come from the coordinates printed above\n",
     "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
     "# note that the extent of each dimension should be pulled from the dataset\n",
-    "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': get_long_name(ds, 'time'), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': pd.Timedelta(time_step).isoformat()}),\n",
-    "             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'description': get_long_name(ds, 'x'), 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
-    "             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, 'y'), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, 'time'), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': pd.Timedelta(time_step).isoformat()}),\n",
+    "             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'description': stac_helpers.get_long_name(ds, 'x'), 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, 'y'), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
     "            }"
    ]
   },
@@ -687,45 +624,6 @@
     "### Add cube variables (optional field for extension)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "92510876-7853-4d24-8563-c69f9012aeb6",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# define functions to pull out datacube attributes and validate format\n",
-    "def get_unit(ds, v):\n",
-    "    # check if unit is defined for variable\n",
-    "    try:\n",
-    "        unit = ds[v].attrs['units']\n",
-    "    except:\n",
-    "        unit = None\n",
-    "    # check if unit comes from https://docs.unidata.ucar.edu/udunits/current/#Database\n",
-    "    # datacube extension specifies: The unit of measurement for the data, preferably compliant to UDUNITS-2 units (singular).\n",
-    "    # gdptools expects this format as well\n",
-    "    try:\n",
-    "        cfunits.Units(unit).isvalid\n",
-    "    except:\n",
-    "        print(\"Unit is not valid as a UD unit.\")\n",
-    "        unit = str(input(\"Please enter a valid unit for {v} from here: https://docs.unidata.ucar.edu/udunits/current/#Database\"))\n",
-    "        assert cfunits.Units(unit).isvalid\n",
-    "    return unit\n",
-    "\n",
-    "def get_var_type(ds, v):\n",
-    "    if v in ds.coords:\n",
-    "        # type = auxiliary for a variable that contains coordinate data, but isn't a dimension in cube:dimensions.\n",
-    "        # For example, the values of the datacube might be provided in the projected coordinate reference system, \n",
-    "        # but the datacube could have a variable lon with dimensions (y, x), giving the longitude at each point.\n",
-    "        var_type = 'auxiliary'\n",
-    "    # type = data for a variable indicating some measured value, for example \"precipitation\", \"temperature\", etc.\n",
-    "    else:\n",
-    "        var_type = 'data'\n",
-    "    return var_type"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -754,9 +652,9 @@
     "# create dictionary of dataset variables and associated dimensions\n",
     "vars_dict={}\n",
     "for v in vars:\n",
-    "    unit = get_unit(ds, v)\n",
-    "    var_type = get_var_type(ds, v)\n",
-    "    long_name = get_long_name(ds, v)\n",
+    "    unit = stac_helpers.get_unit(ds, v)\n",
+    "    var_type = stac_helpers.get_var_type(ds, v)\n",
+    "    long_name = stac_helpers.get_long_name(ds, v)\n",
     "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
    ]
   },
@@ -800,18 +698,7 @@
    "source": [
     "# # helper to find items of wrong type\n",
     "# d = collection.to_dict()\n",
-    "# def find_paths(nested_dict, prepath=()):\n",
-    "#     for k, v in nested_dict.items():\n",
-    "#         try:\n",
-    "#             path = prepath + (k,)\n",
-    "#             if type(v) is np.float64: # found value\n",
-    "#                 yield path\n",
-    "#             elif hasattr(v, 'items'): # v is a dict\n",
-    "#                 yield from find_paths(v, path) \n",
-    "#         except:\n",
-    "#             print(prepath)\n",
-    "\n",
-    "# print(*find_paths(d))"
+    "# print(*stac_helpers.find_paths(d))"
    ]
   },
   {
diff --git a/workflows/archive/alaska_et_2020_era-interim_reanalysis_create_collection_from_zarr.ipynb b/workflows/archive/alaska_et_2020_era-interim_reanalysis_create_collection_from_zarr.ipynb
index b89065402a87400ab61cd900983b9c5fec20b7e4..eb4c969242c0326ede2994fe64a8329481423ac8 100644
--- a/workflows/archive/alaska_et_2020_era-interim_reanalysis_create_collection_from_zarr.ipynb
+++ b/workflows/archive/alaska_et_2020_era-interim_reanalysis_create_collection_from_zarr.ipynb
@@ -199,22 +199,9 @@
    "outputs": [],
    "source": [
     "dims_auto_extract = ['X', 'Y', 'T']\n",
-    "def extract_dim(ds, d):\n",
-    "    try:\n",
-    "        dim_list = ds.cf.axes[d]\n",
-    "        assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n",
-    "        dim = dim_list[0]\n",
-    "    except KeyError:\n",
-    "        print(f\"Could not auto-extract {d} dimension name.\")\n",
-    "        print(\"Look at the xarray output above showing the dataset dimensions.\")\n",
-    "        dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n",
-    "    assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n",
-    "    print(f\"name of {d} dimension: {dim}\\n\")\n",
-    "    return dim\n",
-    "\n",
     "dim_names_dict = {}\n",
     "for d in dims_auto_extract:\n",
-    "    dim_names_dict[d] = extract_dim(ds, d)\n",
+    "    dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n",
     "print(f\"Dimension dictionary: {dim_names_dict}\")"
    ]
   },
@@ -466,56 +453,6 @@
     "print(xy_bounds)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "def get_step(ds, dim_name, debug=False, step_ix=0):\n",
-    "    dim_vals = ds[dim_name].values\n",
-    "    diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n",
-    "    unique_steps = np.unique(diffs, return_counts=True)\n",
-    "    step_list = unique_steps[0]\n",
-    "    # optional - for invesitgating uneven steps\n",
-    "    if debug:\n",
-    "        print(f'step_list: {step_list}')\n",
-    "        print(f'step_count: {unique_steps[1]}')\n",
-    "        indices = [i for i, x in enumerate(diffs) if x == step_list[step_ix]]\n",
-    "        print(f'index locations of step index {step_ix} in step_list: {indices}')\n",
-    "    # set step - if all steps are the same length\n",
-    "    # datacube spec specifies to use null for irregularly spaced steps\n",
-    "    if len(step_list)==1:\n",
-    "        # make sure time deltas are in np timedelta format\n",
-    "        step_list = [np.array([step], dtype=\"timedelta64[ns]\")[0] for step in step_list]\n",
-    "        step = step_list[0].astype(float).item()\n",
-    "    else:\n",
-    "        step = None\n",
-    "    return(step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a20d12bf-a511-4c5e-84d0-77e2ec551518",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "def get_long_name(ds, v):\n",
-    "    # try to get long_name attribute from variable\n",
-    "    try:\n",
-    "        long_name = ds[v].attrs['long_name']\n",
-    "    # otherwise, leave empty\n",
-    "    except:\n",
-    "        long_name = None\n",
-    "    return long_name"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
@@ -573,7 +510,7 @@
    },
    "outputs": [],
    "source": [
-    "time_step = get_step(ds, dim_names_dict['T'])\n",
+    "time_step = stac_helpers.get_step(ds, dim_names_dict['T'])\n",
     "print(f'time step: {time_step}')"
    ]
   },
@@ -587,7 +524,7 @@
    "outputs": [],
    "source": [
     "# debugging for time steps: get all step values and locations\n",
-    "time_step = get_step(ds, dim_names_dict['T'], debug=True, step_ix=2)"
+    "time_step = stac_helpers.get_step(ds, dim_names_dict['T'], debug=True, step_ix=2)"
    ]
   },
   {
@@ -623,7 +560,7 @@
    },
    "outputs": [],
    "source": [
-    "x_step = get_step(ds, dim_names_dict['X'])\n",
+    "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n",
     "print(f'x step: {x_step}')"
    ]
   },
@@ -636,7 +573,7 @@
    },
    "outputs": [],
    "source": [
-    "y_step = get_step(ds, dim_names_dict['Y'])\n",
+    "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n",
     "print(f'y step: {y_step}')"
    ]
   },
@@ -673,9 +610,9 @@
     "# dimension name should come from the coordinates printed above\n",
     "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
     "# note that the extent of each dimension should be pulled from the dataset\n",
-    "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': get_long_name(ds, 'time'), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': pd.Timedelta(time_step).isoformat()}),\n",
-    "             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'description': get_long_name(ds, 'x'), 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
-    "             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, 'y'), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, 'time'), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': pd.Timedelta(time_step).isoformat()}),\n",
+    "             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'description': stac_helpers.get_long_name(ds, 'x'), 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, 'y'), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
     "            }"
    ]
   },
@@ -687,45 +624,6 @@
     "### Add cube variables (optional field for extension)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "92510876-7853-4d24-8563-c69f9012aeb6",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# define functions to pull out datacube attributes and validate format\n",
-    "def get_unit(ds, v):\n",
-    "    # check if unit is defined for variable\n",
-    "    try:\n",
-    "        unit = ds[v].attrs['units']\n",
-    "    except:\n",
-    "        unit = None\n",
-    "    # check if unit comes from https://docs.unidata.ucar.edu/udunits/current/#Database\n",
-    "    # datacube extension specifies: The unit of measurement for the data, preferably compliant to UDUNITS-2 units (singular).\n",
-    "    # gdptools expects this format as well\n",
-    "    try:\n",
-    "        cfunits.Units(unit).isvalid\n",
-    "    except:\n",
-    "        print(\"Unit is not valid as a UD unit.\")\n",
-    "        unit = str(input(\"Please enter a valid unit for {v} from here: https://docs.unidata.ucar.edu/udunits/current/#Database\"))\n",
-    "        assert cfunits.Units(unit).isvalid\n",
-    "    return unit\n",
-    "\n",
-    "def get_var_type(ds, v):\n",
-    "    if v in ds.coords:\n",
-    "        # type = auxiliary for a variable that contains coordinate data, but isn't a dimension in cube:dimensions.\n",
-    "        # For example, the values of the datacube might be provided in the projected coordinate reference system, \n",
-    "        # but the datacube could have a variable lon with dimensions (y, x), giving the longitude at each point.\n",
-    "        var_type = 'auxiliary'\n",
-    "    # type = data for a variable indicating some measured value, for example \"precipitation\", \"temperature\", etc.\n",
-    "    else:\n",
-    "        var_type = 'data'\n",
-    "    return var_type"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -754,9 +652,9 @@
     "# create dictionary of dataset variables and associated dimensions\n",
     "vars_dict={}\n",
     "for v in vars:\n",
-    "    unit = get_unit(ds, v)\n",
-    "    var_type = get_var_type(ds, v)\n",
-    "    long_name = get_long_name(ds, v)\n",
+    "    unit = stac_helpers.get_unit(ds, v)\n",
+    "    var_type = stac_helpers.get_var_type(ds, v)\n",
+    "    long_name = stac_helpers.get_long_name(ds, v)\n",
     "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
    ]
   },
@@ -800,18 +698,7 @@
    "source": [
     "# # helper to find items of wrong type\n",
     "# d = collection.to_dict()\n",
-    "# def find_paths(nested_dict, prepath=()):\n",
-    "#     for k, v in nested_dict.items():\n",
-    "#         try:\n",
-    "#             path = prepath + (k,)\n",
-    "#             if type(v) is np.float64: # found value\n",
-    "#                 yield path\n",
-    "#             elif hasattr(v, 'items'): # v is a dict\n",
-    "#                 yield from find_paths(v, path) \n",
-    "#         except:\n",
-    "#             print(prepath)\n",
-    "\n",
-    "# print(*find_paths(d))"
+    "# print(*stac_helpers.find_paths(d))"
    ]
   },
   {
diff --git a/workflows/archive/alaska_et_2020_gfdl_historical_simulation_create_collection_from_zarr.ipynb b/workflows/archive/alaska_et_2020_gfdl_historical_simulation_create_collection_from_zarr.ipynb
index b9af80918a2e7c49ce55761055fd04aba7139323..2de45463e65981e90a9ce233663039937eba0bf2 100644
--- a/workflows/archive/alaska_et_2020_gfdl_historical_simulation_create_collection_from_zarr.ipynb
+++ b/workflows/archive/alaska_et_2020_gfdl_historical_simulation_create_collection_from_zarr.ipynb
@@ -199,22 +199,9 @@
    "outputs": [],
    "source": [
     "dims_auto_extract = ['X', 'Y', 'T']\n",
-    "def extract_dim(ds, d):\n",
-    "    try:\n",
-    "        dim_list = ds.cf.axes[d]\n",
-    "        assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n",
-    "        dim = dim_list[0]\n",
-    "    except KeyError:\n",
-    "        print(f\"Could not auto-extract {d} dimension name.\")\n",
-    "        print(\"Look at the xarray output above showing the dataset dimensions.\")\n",
-    "        dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n",
-    "    assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n",
-    "    print(f\"name of {d} dimension: {dim}\\n\")\n",
-    "    return dim\n",
-    "\n",
     "dim_names_dict = {}\n",
     "for d in dims_auto_extract:\n",
-    "    dim_names_dict[d] = extract_dim(ds, d)\n",
+    "    dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n",
     "print(f\"Dimension dictionary: {dim_names_dict}\")"
    ]
   },
@@ -466,56 +453,6 @@
     "print(xy_bounds)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "def get_step(ds, dim_name, debug=False, step_ix=0):\n",
-    "    dim_vals = ds[dim_name].values\n",
-    "    diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n",
-    "    unique_steps = np.unique(diffs, return_counts=True)\n",
-    "    step_list = unique_steps[0]\n",
-    "    # optional - for invesitgating uneven steps\n",
-    "    if debug:\n",
-    "        print(f'step_list: {step_list}')\n",
-    "        print(f'step_count: {unique_steps[1]}')\n",
-    "        indices = [i for i, x in enumerate(diffs) if x == step_list[step_ix]]\n",
-    "        print(f'index locations of step index {step_ix} in step_list: {indices}')\n",
-    "    # set step - if all steps are the same length\n",
-    "    # datacube spec specifies to use null for irregularly spaced steps\n",
-    "    if len(step_list)==1:\n",
-    "        # make sure time deltas are in np timedelta format\n",
-    "        step_list = [np.array([step], dtype=\"timedelta64[ns]\")[0] for step in step_list]\n",
-    "        step = step_list[0].astype(float).item()\n",
-    "    else:\n",
-    "        step = None\n",
-    "    return(step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a20d12bf-a511-4c5e-84d0-77e2ec551518",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "def get_long_name(ds, v):\n",
-    "    # try to get long_name attribute from variable\n",
-    "    try:\n",
-    "        long_name = ds[v].attrs['long_name']\n",
-    "    # otherwise, leave empty\n",
-    "    except:\n",
-    "        long_name = None\n",
-    "    return long_name"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
@@ -573,7 +510,7 @@
    },
    "outputs": [],
    "source": [
-    "time_step = get_step(ds, dim_names_dict['T'])\n",
+    "time_step = stac_helpers.get_step(ds, dim_names_dict['T'])\n",
     "print(f'time step: {time_step}')"
    ]
   },
@@ -587,7 +524,7 @@
    "outputs": [],
    "source": [
     "# debugging for time steps: get all step values and locations\n",
-    "time_step = get_step(ds, dim_names_dict['T'], debug=True, step_ix=2)"
+    "time_step = stac_helpers.get_step(ds, dim_names_dict['T'], debug=True, step_ix=2)"
    ]
   },
   {
@@ -623,7 +560,7 @@
    },
    "outputs": [],
    "source": [
-    "x_step = get_step(ds, dim_names_dict['X'])\n",
+    "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n",
     "print(f'x step: {x_step}')"
    ]
   },
@@ -636,7 +573,7 @@
    },
    "outputs": [],
    "source": [
-    "y_step = get_step(ds, dim_names_dict['Y'])\n",
+    "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n",
     "print(f'y step: {y_step}')"
    ]
   },
@@ -673,9 +610,9 @@
     "# dimension name should come from the coordinates printed above\n",
     "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
     "# note that the extent of each dimension should be pulled from the dataset\n",
-    "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': get_long_name(ds, 'time'), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': pd.Timedelta(time_step).isoformat()}),\n",
-    "             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'description': get_long_name(ds, 'x'), 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
-    "             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, 'y'), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, 'time'), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': pd.Timedelta(time_step).isoformat()}),\n",
+    "             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'description': stac_helpers.get_long_name(ds, 'x'), 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, 'y'), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
     "            }"
    ]
   },
@@ -687,45 +624,6 @@
     "### Add cube variables (optional field for extension)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "92510876-7853-4d24-8563-c69f9012aeb6",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# define functions to pull out datacube attributes and validate format\n",
-    "def get_unit(ds, v):\n",
-    "    # check if unit is defined for variable\n",
-    "    try:\n",
-    "        unit = ds[v].attrs['units']\n",
-    "    except:\n",
-    "        unit = None\n",
-    "    # check if unit comes from https://docs.unidata.ucar.edu/udunits/current/#Database\n",
-    "    # datacube extension specifies: The unit of measurement for the data, preferably compliant to UDUNITS-2 units (singular).\n",
-    "    # gdptools expects this format as well\n",
-    "    try:\n",
-    "        cfunits.Units(unit).isvalid\n",
-    "    except:\n",
-    "        print(\"Unit is not valid as a UD unit.\")\n",
-    "        unit = str(input(\"Please enter a valid unit for {v} from here: https://docs.unidata.ucar.edu/udunits/current/#Database\"))\n",
-    "        assert cfunits.Units(unit).isvalid\n",
-    "    return unit\n",
-    "\n",
-    "def get_var_type(ds, v):\n",
-    "    if v in ds.coords:\n",
-    "        # type = auxiliary for a variable that contains coordinate data, but isn't a dimension in cube:dimensions.\n",
-    "        # For example, the values of the datacube might be provided in the projected coordinate reference system, \n",
-    "        # but the datacube could have a variable lon with dimensions (y, x), giving the longitude at each point.\n",
-    "        var_type = 'auxiliary'\n",
-    "    # type = data for a variable indicating some measured value, for example \"precipitation\", \"temperature\", etc.\n",
-    "    else:\n",
-    "        var_type = 'data'\n",
-    "    return var_type"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -754,9 +652,9 @@
     "# create dictionary of dataset variables and associated dimensions\n",
     "vars_dict={}\n",
     "for v in vars:\n",
-    "    unit = get_unit(ds, v)\n",
-    "    var_type = get_var_type(ds, v)\n",
-    "    long_name = get_long_name(ds, v)\n",
+    "    unit = stac_helpers.get_unit(ds, v)\n",
+    "    var_type = stac_helpers.get_var_type(ds, v)\n",
+    "    long_name = stac_helpers.get_long_name(ds, v)\n",
     "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
    ]
   },
@@ -800,18 +698,7 @@
    "source": [
     "# # helper to find items of wrong type\n",
     "# d = collection.to_dict()\n",
-    "# def find_paths(nested_dict, prepath=()):\n",
-    "#     for k, v in nested_dict.items():\n",
-    "#         try:\n",
-    "#             path = prepath + (k,)\n",
-    "#             if type(v) is np.float64: # found value\n",
-    "#                 yield path\n",
-    "#             elif hasattr(v, 'items'): # v is a dict\n",
-    "#                 yield from find_paths(v, path) \n",
-    "#         except:\n",
-    "#             print(prepath)\n",
-    "\n",
-    "# print(*find_paths(d))"
+    "# print(*stac_helpers.find_paths(d))"
    ]
   },
   {
diff --git a/workflows/archive/conus404-daily_create_collection_from_zarr.ipynb b/workflows/archive/conus404-daily_create_collection_from_zarr.ipynb
deleted file mode 100644
index 6fd10175c650e92b2dd863975619377c59ed4005..0000000000000000000000000000000000000000
--- a/workflows/archive/conus404-daily_create_collection_from_zarr.ipynb
+++ /dev/null
@@ -1,784 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "6c10e07b-1e60-4926-af1d-fa75dc78e5d4",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "# CONUS404 Daily Zarr -> Collection Workflow\n",
-    "This is a workflow to build a [STAC collection](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md) from the zarr asset for the dataset named above. We use the [datacube extension](https://github.com/stac-extensions/datacube) to define the spatial and temporal dimensions of the zarr store, as well as the variables it contains.\n",
-    "\n",
-    "To simplify this workflow so that it can scale to many datasets, a few simplifying suggestions and assumptions are made:\n",
-    "1. For USGS data, we can use the CC0-1.0 license. For all other data we can use Unlicense. Ref: https://spdx.org/licenses/\n",
-    "2. I am assuming all coordinates are from the WGS84 datum if not specified."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "201e0945-de55-45ff-b095-c2af009a4e62",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "import pystac\n",
-    "from pystac.extensions.datacube import CollectionDatacubeExtension, AssetDatacubeExtension, AdditionalDimension, DatacubeExtension\n",
-    "import xarray as xr\n",
-    "import cf_xarray\n",
-    "import os\n",
-    "import fsspec\n",
-    "import cf_xarray\n",
-    "import hvplot.xarray\n",
-    "import pandas as pd\n",
-    "import json\n",
-    "import numpy as np\n",
-    "import metpy\n",
-    "import cartopy.crs as ccrs\n",
-    "import cfunits\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6",
-   "metadata": {},
-   "source": [
-    "## Open up NHGF STAC Catalog"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# define folder location where your STAC catalog json file is\n",
-    "catalog_path = os.path.join('..', '..', 'catalog')\n",
-    "# open catalog\n",
-    "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "996e60ba-13e4-453a-8534-e62ce747f0fa",
-   "metadata": {},
-   "source": [
-    "## Collection Metadata Input"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "482d204d-b5b6-40e5-ac42-55b459be1097",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# name for STAC collection\n",
-    "collection_id = 'conus404-daily'\n",
-    "# description of STAC collection\n",
-    "collection_description = 'CONUS404 40 years of daily values for subset of model output variables derived from hourly values on cloud storage'\n",
-    "# license for dataset\n",
-    "collection_license = 'CC0-1.0'"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "116b5837-8e85-4ae7-964a-803533ded714",
-   "metadata": {},
-   "source": [
-    "## Asset Metadata Input"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dd6fa323-132a-4794-8c80-576933f547a0",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# url to zarr store that you want to create a collection for\n",
-    "zarr_url = 's3://hytest/conus404/conus404_daily.zarr/'\n",
-    "\n",
-    "# define keyword arguments needed for opening the dataset with xarray\n",
-    "# ref: https://github.com/stac-extensions/xarray-assets\n",
-    "xarray_opendataset_kwargs = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
-    "                          \"xarray:storage_options\": {\"anon\": True, \"client_kwargs\": {\"endpoint_url\":\"https://usgs.osn.mghpcc.org/\"}}}\n",
-    "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description = \"Open Storage Network Pod S3 API access to collection zarr group\"\n",
-    "# roles to tag zarr url asset with\n",
-    "asset_roles = [\"data\",\"zarr\",\"s3\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e1441cd4-e94c-4902-af46-8f1af470eb6b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# url to zarr store that you want to create a collection for\n",
-    "zarr_url2 = 's3://nhgf-development/conus404/conus404_daily_202210.zarr/'\n",
-    "\n",
-    "# define keyword arguments needed for opening the dataset with xarray\n",
-    "# ref: https://github.com/stac-extensions/xarray-assets\n",
-    "xarray_opendataset_kwargs2 = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
-    "                          \"xarray:storage_options\":{\"requester_pays\":True}}\n",
-    "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description2 = \"S3 access to collection zarr group\"\n",
-    "# roles to tag zarr url asset with\n",
-    "asset_roles2 = [\"data\",\"zarr\",\"s3\"]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b213b74f-ad17-4774-93b6-3b62be616b45",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "## Data Exploration"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "708f2cf5-79ab-49af-8067-de31d0d13ee6",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# open and view zarr dataset\n",
-    "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n",
-    "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n",
-    "                             backend_kwargs={'consolidated':True}, chunks={})\n",
-    "ds"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0bc7e9b3-ad62-4b10-a18e-66b7ed2d35dc",
-   "metadata": {},
-   "source": [
-    "## Identify x, y, t dimensions of dataset\n",
-    "May require user input if dimensions cannot be auto-detected."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ab91268f-7200-4cb1-979a-c7d75531d2c0",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# dims_auto_extract = ['X', 'Y', 'T']\n",
-    "# def extract_dim(ds, d):\n",
-    "#     try:\n",
-    "#         dim_list = ds.cf.axes[d]\n",
-    "#         assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n",
-    "#         dim = dim_list[0]\n",
-    "#     except KeyError:\n",
-    "#         print(f\"Could not auto-extract {d} dimension name.\")\n",
-    "#         print(\"Look at the xarray output above showing the dataset dimensions.\")\n",
-    "#         dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n",
-    "#     assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n",
-    "#     print(f\"name of {d} dimension: {dim}\\n\")\n",
-    "#     return dim\n",
-    "\n",
-    "# dim_names_dict = {}\n",
-    "# for d in dims_auto_extract:\n",
-    "#     dim_names_dict[d] = extract_dim(ds, d)\n",
-    "dim_names_dict = {'X': 'x', 'Y': 'y', 'T': 'time'}\n",
-    "print(f\"Dimension dictionary: {dim_names_dict}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "667cb9e8-c4a5-4d26-8e30-be8c934adc37",
-   "metadata": {},
-   "source": [
-    "## Get crs info"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "172d9e6b-de88-4e3a-9184-706110a99659",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "ds  = ds.metpy.parse_cf()\n",
-    "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8fbfecfb-9886-4d06-a34c-6471cb0a6053",
-   "metadata": {},
-   "source": [
-    "## Plot a map"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4eb4d027-4266-4a0b-8f16-bacfbef06242",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# plot a map of a single variable\n",
-    "var_to_plot = 'SNOW'\n",
-    "da = ds[var_to_plot].sel(time='2014-03-01 00:00').load()\n",
-    "da.hvplot.quadmesh(x='lon', y='lat', rasterize=True,\n",
-    "                             geo=True, tiles='OSM', alpha=0.7, cmap='turbo')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5e057a6c-06fb-4406-823b-e81c58e520e4",
-   "metadata": {},
-   "source": [
-    "## Plot a time series at a specific point\n",
-    "This can help you verify a variable's values"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c7de2681-88c2-4597-857c-8f169c596f8b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# enter lat, lon of point you want to plot time series for\n",
-    "lat,lon = 39.978322,-105.2772194\n",
-    "time_start = '2013-01-01 00:00'\n",
-    "time_end = '2013-12-31 00:00'\n",
-    "x, y = crs.transform_point(lon, lat, src_crs=ccrs.PlateCarree())   # PlateCaree = Lat,Lon\n",
-    "da = ds[var_to_plot].sel(x=x, y=y, method='nearest').sel(time=slice(time_start,time_end)).load()\n",
-    "da.hvplot(x=dim_names_dict['T'], grid=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c",
-   "metadata": {},
-   "source": [
-    "## Create Collection Extent"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "69f0d837-68a5-4fed-9a14-5d75cfbb0da4",
-   "metadata": {},
-   "source": [
-    "### Spatial Extent"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d46805e0-8e94-4ebe-aa01-d9a2d7051459",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# pull out lat/lon bbox for data\n",
-    "# coordinates must be from WGS 84 datum\n",
-    "# left, bottom, right, top\n",
-    "# Note: I'm not sure why but I have some trouble getting the data type right here - \n",
-    "# I've included all the options I've had to run to get it to not be a regular float rather \n",
-    "# than a numpy float below - switch the commented line if you have this issue\n",
-    "#coord_bounds = [ds.lon.data.min().compute().astype(float), ds.lat.data.min().compute().astype(float), ds.lon.data.max().compute().astype(float), ds.lat.data.max().compute().astype(float)]\n",
-    "#coord_bounds = [ds.lon.data.min().compute().astype(float).tolist(), ds.lat.data.min().compute().astype(float).tolist(), ds.lon.data.max().compute().astype(float).tolist(), ds.lat.data.max().compute().astype(float).tolist()]\n",
-    "coord_bounds = [ds.lon.data.min().astype(float).item(), ds.lat.data.min().astype(float).item(), ds.lon.data.max().astype(float).item(), ds.lat.data.max().astype(float).item()]\n",
-    "print(coord_bounds)\n",
-    "# create a spatial extent object \n",
-    "spatial_extent = pystac.SpatialExtent(bboxes=[coord_bounds])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a04c8fca-1d33-43ac-9e2b-62d7be2887f7",
-   "metadata": {},
-   "source": [
-    "### Temporal Extent"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "41a84995-867c-4152-8c57-85e3758bbb77",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# pull out first and last timestamps\n",
-    "temporal_extent_lower = pd.Timestamp(ds[dim_names_dict['T']].data.min())\n",
-    "temporal_extent_upper = pd.Timestamp(ds[dim_names_dict['T']].data.max())\n",
-    "print(f'min: {temporal_extent_lower} \\nmax: {temporal_extent_upper}')\n",
-    "# create a temporal extent object\n",
-    "temporal_extent = pystac.TemporalExtent(intervals=[[temporal_extent_lower, temporal_extent_upper]])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1b1e37c4-5348-46ad-abc9-e005b5d6c02b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780",
-   "metadata": {},
-   "source": [
-    "## Create pystac collection"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7e96811b-95ae-406a-9728-55fc429d4e1f",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "if catalog.get_child(collection_id):\n",
-    "    collection = catalog.get_child(collection_id)\n",
-    "    print(\"existing collection opened\")\n",
-    "    collection.extent=collection_extent\n",
-    "    collection.description=collection_description\n",
-    "    collection.license=collection_license\n",
-    "else:\n",
-    "    collection = pystac.Collection(id=collection_id,\n",
-    "                                   description=collection_description,\n",
-    "                                   extent=collection_extent,\n",
-    "                                   license=collection_license)\n",
-    "    print(\"new collection created\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a21c76e8-cd57-4eb5-a33f-7c668a3b3205",
-   "metadata": {},
-   "source": [
-    "## Add zarr url asset to collection"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "094832af-d22b-4359-b0f6-cf687acce5cc",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "asset_id = \"zarr-s3-osn\"\n",
-    "asset = pystac.Asset(href=zarr_url,\n",
-    "                     description=asset_description,\n",
-    "                     media_type=\"application/vnd+zarr\",\n",
-    "                     roles=asset_roles,\n",
-    "                     extra_fields = xarray_opendataset_kwargs)\n",
-    "collection.add_asset(asset_id, asset)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0c298d07-f234-4a08-986d-87f4a39e9ae6",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "asset_id2 = \"zarr-s3\"\n",
-    "asset2 = pystac.Asset(href=zarr_url2,\n",
-    "                     description=asset_description2,\n",
-    "                     media_type=\"application/vnd+zarr\",\n",
-    "                     roles=asset_roles2,\n",
-    "                     extra_fields = xarray_opendataset_kwargs2)\n",
-    "collection.add_asset(asset_id2, asset2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4",
-   "metadata": {},
-   "source": [
-    "## Add datacube extension to collection"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fc00946d-2880-491d-9b3b-3aeeb4414d6c",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# instantiate extention on collection\n",
-    "dc = DatacubeExtension.ext(collection, add_if_missing=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8bdd77a2-7587-485e-afb7-42af3a822241",
-   "metadata": {},
-   "source": [
-    "### Add cube dimensions (required field for extension)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "120a4914-3302-44a5-a282-0308ac84f040",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# list out dataset dimensions\n",
-    "# When writing data to Zarr, Xarray sets this attribute on all variables based on the variable dimensions. When reading a Zarr group, Xarray looks for this attribute on all arrays,\n",
-    "# raising an error if it canâ€™t be found.\n",
-    "dims = list(ds.dims)\n",
-    "print(dims)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "00a18a29-fb9a-4b56-8009-493122997b16",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# get x, y bounds for extent of those dimensions (required)\n",
-    "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n",
-    "print(xy_bounds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "def get_step(dim_name):\n",
-    "    dim_vals = ds[dim_name].values\n",
-    "    diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n",
-    "    unique_steps = np.unique(diffs)\n",
-    "    # set step - if all steps are the same length\n",
-    "    # datacube spec specifies to use null for irregularly spaced steps\n",
-    "    if len(unique_steps)==1:\n",
-    "        step = unique_steps[0].astype(float).item()\n",
-    "    else:\n",
-    "        step = None\n",
-    "    return(step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a20d12bf-a511-4c5e-84d0-77e2ec551518",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "def get_long_name(ds, v):\n",
-    "    # try to get long_name attribute from variable\n",
-    "    try:\n",
-    "        long_name = ds[v].attrs['long_name']\n",
-    "    # otherwise, leave empty\n",
-    "    except:\n",
-    "        long_name = None\n",
-    "    return long_name"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
-   "metadata": {},
-   "source": [
-    "#### user input needed - you will need to look at the crs information and create a cartopy crs object after identifying the projection type:\n",
-    "reference list of cartopy projections: https://scitools.org.uk/cartopy/docs/latest/reference/projections.html"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ea452f62-5644-49b6-8a4e-7dc4f649fd1a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# print ot crs information in dataset\n",
-    "crs_info = ds.crs\n",
-    "print(crs_info)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1b1d05ff-8e43-44a7-8343-178b112c4ad6",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# create the appropriate cartopy projection\n",
-    "lcc = ccrs.LambertConformal(central_longitude=crs_info.longitude_of_central_meridian, \n",
-    "                            central_latitude=crs_info.latitude_of_projection_origin,\n",
-    "                            standard_parallels=crs_info.standard_parallel)\n",
-    "# the datacube extension can accept reference_system information as a numerical EPSG code, \n",
-    "# WKT2 (ISO 19162) string or PROJJSON object.\n",
-    "# we will use a projjson, as was done by Microsoft Planetary Computer here:\n",
-    "# https://planetarycomputer.microsoft.com/dataset/daymet-annual-na\n",
-    "# https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-na\n",
-    "projjson = json.loads(lcc.to_json())\n",
-    "\n",
-    "# alternatively, I think we could do this:\n",
-    "#projjson = crs.to_json()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "00a5e041-081d-428d-ac2e-75d16de205e6",
-   "metadata": {},
-   "source": [
-    "#### user input needed - you will need to copy all of the dimensions from above into the dict and fill in the appropriate attributes(type, axis, extent):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5a443497-67a9-4dce-a8e9-b08d31a88223",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# create a dictionary of datacube dimensions you would like to assign to this dataset\n",
-    "# dimension name should come from the coordinates printed above\n",
-    "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
-    "# note that the extent of each dimension should be pulled from the dataset\n",
-    "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': get_long_name(ds, 'time'), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': pd.Timedelta(get_step('time')).isoformat()}),\n",
-    "             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'description': get_long_name(ds, 'x'), 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': get_step('x'), 'reference_system': projjson}),\n",
-    "             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, 'y'), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': get_step('y'), 'reference_system': projjson}),\n",
-    "             'bottom_top_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': get_long_name(ds, 'bottom_top_stag')}),\n",
-    "             'bottom_top': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': get_long_name(ds, 'bottom_top')}),\n",
-    "             'soil_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': get_long_name(ds, 'soil_layers_stag')}),\n",
-    "             'x_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': get_long_name(ds, 'x_stag')}),\n",
-    "             'y_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, 'y_stag')}),\n",
-    "             'snow_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': get_long_name(ds, 'snow_layers_stag')}),\n",
-    "             'snso_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': get_long_name(ds, 'snso_layers_stag')}),\n",
-    "            }"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0f277883-a3fd-425f-966a-ca2140d0ef2f",
-   "metadata": {},
-   "source": [
-    "### Add cube variables (optional field for extension)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "92510876-7853-4d24-8563-c69f9012aeb6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define functions to pull out datacube attributes and validate format\n",
-    "def get_unit(ds, v):\n",
-    "    # check if unit is defined for variable\n",
-    "    try:\n",
-    "        unit = ds[v].attrs['units']\n",
-    "    except:\n",
-    "        unit = None\n",
-    "    # check if unit comes from https://docs.unidata.ucar.edu/udunits/current/#Database\n",
-    "    # datacube extension specifies: The unit of measurement for the data, preferably compliant to UDUNITS-2 units (singular).\n",
-    "    # gdptools expects this format as well\n",
-    "    try:\n",
-    "        cfunits.Units(unit).isvalid\n",
-    "    except:\n",
-    "        print(\"Unit is not valid as a UD unit.\")\n",
-    "        unit = str(input(\"Please enter a valid unit for {v} from here: https://docs.unidata.ucar.edu/udunits/current/#Database\"))\n",
-    "        assert cfunits.Units(unit).isvalid\n",
-    "    return unit\n",
-    "\n",
-    "def get_var_type(ds, v):\n",
-    "    if v in ds.coords:\n",
-    "        # type = auxiliary for a variable that contains coordinate data, but isn't a dimension in cube:dimensions.\n",
-    "        # For example, the values of the datacube might be provided in the projected coordinate reference system, \n",
-    "        # but the datacube could have a variable lon with dimensions (y, x), giving the longitude at each point.\n",
-    "        var_type = 'auxiliary'\n",
-    "    # type = data for a variable indicating some measured value, for example \"precipitation\", \"temperature\", etc.\n",
-    "    else:\n",
-    "        var_type = 'data'\n",
-    "    return var_type"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e9272931-fc0b-4f2a-9546-283033e9cde8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# pull list of vars from dataset\n",
-    "vars = list(ds.variables)\n",
-    "\n",
-    "# drop metpy_crs coordinate we have added\n",
-    "if 'metpy_crs' in ds.coords:\n",
-    "    ds = ds.drop_vars('metpy_crs')\n",
-    "\n",
-    "# spec says that the keys of cube:dimensions and cube:variables should be unique together; a key like lat should not be both a dimension and a variable.\n",
-    "# we will drop all values in dims from vars\n",
-    "vars = [v for v in vars if v not in dims]\n",
-    "\n",
-    "# Microsoft Planetary Computer includes coordinates and crs as variables here:\n",
-    "# https://planetarycomputer.microsoft.com/dataset/daymet-annual-na\n",
-    "# https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-na\n",
-    "# we will keep those in the var list\n",
-    "\n",
-    "# create dictionary of dataset variables and associated dimensions\n",
-    "vars_dict={}\n",
-    "for v in vars:\n",
-    "    unit = get_unit(ds, v)\n",
-    "    var_type = get_var_type(ds, v)\n",
-    "    long_name = get_long_name(ds, v)\n",
-    "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "11ad5352-884c-4472-8864-4570a96f66e5",
-   "metadata": {},
-   "source": [
-    "### Finalize extension"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "10141fd4-91d6-491d-878b-02653720891d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# add dimesions and variables to collection extension\n",
-    "dc.apply(dimensions=dims_dict, variables=vars_dict)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "615ca168-75fb-4135-9941-0ef5fe4fd1cb",
-   "metadata": {},
-   "source": [
-    "## Add STAC Collection to Catalog and Save"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e2120a55-3d04-4122-a93f-29afcdb8cb1b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# # helper to find items of wrong type\n",
-    "# d = collection.to_dict()\n",
-    "# def find_paths(nested_dict, prepath=()):\n",
-    "#     for k, v in nested_dict.items():\n",
-    "#         try:\n",
-    "#             path = prepath + (k,)\n",
-    "#             if type(v) is np.float64: # found value\n",
-    "#                 yield path\n",
-    "#             elif hasattr(v, 'items'): # v is a dict\n",
-    "#                 yield from find_paths(v, path) \n",
-    "#         except:\n",
-    "#             print(prepath)\n",
-    "\n",
-    "# print(*find_paths(d))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4b75791b-6b2d-40be-b7c6-330a60888fb5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if catalog.get_child(collection_id):\n",
-    "    collection.normalize_and_save(root_href=os.path.join(catalog_path, collection_id), catalog_type=pystac.CatalogType.SELF_CONTAINED)\n",
-    "else:\n",
-    "    catalog.add_child(collection)\n",
-    "    catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d6f676b5-e892-4bfb-8d73-2828addd838c",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "global-global-pangeo",
-   "language": "python",
-   "name": "conda-env-global-global-pangeo-py"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}