From b02079abfdb36bd049e27f69ac6b2eea6a143adf Mon Sep 17 00:00:00 2001
From: amsnyder <asnyder@usgs.gov>
Date: Fri, 19 Jan 2024 15:39:24 -0600
Subject: [PATCH] use helpers

---
 ...e_slices_create_collection_from_zarr.ipynb | 129 ++--------------
 ...d_annual_create_collection_from_zarr.ipynb | 142 ++----------------
 ...e_slices_create_collection_from_zarr.ipynb | 142 ++----------------
 3 files changed, 41 insertions(+), 372 deletions(-)

diff --git a/workflows/archive/TTU_2019_rcp45_time_slices_create_collection_from_zarr.ipynb b/workflows/archive/TTU_2019_rcp45_time_slices_create_collection_from_zarr.ipynb
index 7cb328b5..cc8a0a00 100644
--- a/workflows/archive/TTU_2019_rcp45_time_slices_create_collection_from_zarr.ipynb
+++ b/workflows/archive/TTU_2019_rcp45_time_slices_create_collection_from_zarr.ipynb
@@ -189,22 +189,9 @@
    "outputs": [],
    "source": [
     "# dims_auto_extract = ['X', 'Y', 'T']\n",
-    "# def extract_dim(ds, d):\n",
-    "#     try:\n",
-    "#         dim_list = ds.cf.axes[d]\n",
-    "#         assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n",
-    "#         dim = dim_list[0]\n",
-    "#     except KeyError:\n",
-    "#         print(f\"Could not auto-extract {d} dimension name.\")\n",
-    "#         print(\"Look at the xarray output above showing the dataset dimensions.\")\n",
-    "#         dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n",
-    "#     assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n",
-    "#     print(f\"name of {d} dimension: {dim}\\n\")\n",
-    "#     return dim\n",
-    "\n",
     "# dim_names_dict = {}\n",
     "# for d in dims_auto_extract:\n",
-    "#     dim_names_dict[d] = extract_dim(ds, d)\n",
+    "#     dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n",
     "dim_names_dict = {'X': 'lon', 'Y': 'lat', 'T': 'time'}\n",
     "print(f\"Dimension dictionary: {dim_names_dict}\")"
    ]
@@ -443,59 +430,6 @@
     "print(xy_bounds)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_step(ds, dim_name, time_dim=False, debug=False, step_ix=0, round_dec=None):\n",
-    "    dim_vals = ds[dim_name].values\n",
-    "    diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n",
-    "    # option to round number of decimals\n",
-    "    # sometimes there are different steps calculated due to small rounding errors coming out of the diff\n",
-    "    # calculation, rounding these can correct for that\n",
-    "    if round_dec:\n",
-    "        unique_steps = np.unique(np.array(diffs).round(decimals=round_dec), return_counts=True)\n",
-    "    else:\n",
-    "        unique_steps = np.unique(diffs, return_counts=True)\n",
-    "    step_list = unique_steps[0]\n",
-    "    # optional - for invesitgating uneven steps\n",
-    "    if debug:\n",
-    "        print(f'step_list: {step_list}')\n",
-    "        print(f'step_count: {unique_steps[1]}')\n",
-    "        indices = [i for i, x in enumerate(diffs) if x == step_list[step_ix]]\n",
-    "        print(f'index locations of step index {step_ix} in step_list: {indices}')\n",
-    "    # set step - if all steps are the same length\n",
-    "    # datacube spec specifies to use null for irregularly spaced steps\n",
-    "    if len(step_list)==1:\n",
-    "        if time_dim:\n",
-    "            # make sure time deltas are in np timedelta format\n",
-    "            step_list = [np.array([step], dtype=\"timedelta64[ns]\")[0] for step in step_list]\n",
-    "        step = step_list[0].astype(float).item()\n",
-    "    else:\n",
-    "        step = None\n",
-    "    return(step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a20d12bf-a511-4c5e-84d0-77e2ec551518",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_long_name(ds, v):\n",
-    "    # try to get long_name attribute from variable\n",
-    "    try:\n",
-    "        long_name = ds[v].attrs['long_name']\n",
-    "    # otherwise, leave empty\n",
-    "    except:\n",
-    "        long_name = None\n",
-    "    return long_name"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
@@ -577,7 +511,7 @@
    "outputs": [],
    "source": [
     "# # debugging for time steps: get all step values and locations\n",
-    "# time_step = get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)"
+    "# time_step = stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)"
    ]
   },
   {
@@ -609,7 +543,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "x_step = get_step(ds, dim_names_dict['X'])\n",
+    "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n",
     "print(f'x step: {x_step}')"
    ]
   },
@@ -622,7 +556,7 @@
    "source": [
     "# # debugging for spatial steps: get all step values and locations\n",
     "# x_dim=dim_names_dict['X']\n",
-    "# x_step = get_step(ds, x_dim, debug=True, step_ix=1)\n",
+    "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=1)\n",
     "# print(f'\\nx dim name (for next cell): {x_dim}')"
    ]
   },
@@ -655,7 +589,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "y_step = get_step(ds, dim_names_dict['Y'])\n",
+    "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n",
     "print(f'y step: {y_step}')"
    ]
   },
@@ -668,7 +602,7 @@
    "source": [
     "# # debugging for spatial steps: get all step values and locations\n",
     "# y_dim=dim_names_dict['Y']\n",
-    "# y_step = get_step(ds, y_dim, debug=True, step_ix=1)\n",
+    "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=1)\n",
     "# print(f'\\nx dim name (for next cell): {x_dim}')"
    ]
   },
@@ -723,10 +657,10 @@
     "\n",
     "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
     "# note that the extent of each dimension should be pulled from the dataset\n",
-    "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n",
-    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
-    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
-    "             'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n",
+    "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n",
+    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "             'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n",
     "            }"
    ]
   },
@@ -738,43 +672,6 @@
     "### Add cube variables (optional field for extension)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "92510876-7853-4d24-8563-c69f9012aeb6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define functions to pull out datacube attributes and validate format\n",
-    "def get_unit(ds, v):\n",
-    "    # check if unit is defined for variable\n",
-    "    try:\n",
-    "        unit = ds[v].attrs['units']\n",
-    "    except:\n",
-    "        unit = None\n",
-    "    # check if unit comes from https://docs.unidata.ucar.edu/udunits/current/#Database\n",
-    "    # datacube extension specifies: The unit of measurement for the data, preferably compliant to UDUNITS-2 units (singular).\n",
-    "    # gdptools expects this format as well\n",
-    "    try:\n",
-    "        cfunits.Units(unit).isvalid\n",
-    "    except:\n",
-    "        print(\"Unit is not valid as a UD unit.\")\n",
-    "        unit = str(input(\"Please enter a valid unit for {v} from here: https://docs.unidata.ucar.edu/udunits/current/#Database\"))\n",
-    "        assert cfunits.Units(unit).isvalid\n",
-    "    return unit\n",
-    "\n",
-    "def get_var_type(ds, v):\n",
-    "    if v in ds.coords:\n",
-    "        # type = auxiliary for a variable that contains coordinate data, but isn't a dimension in cube:dimensions.\n",
-    "        # For example, the values of the datacube might be provided in the projected coordinate reference system, \n",
-    "        # but the datacube could have a variable lon with dimensions (y, x), giving the longitude at each point.\n",
-    "        var_type = 'auxiliary'\n",
-    "    # type = data for a variable indicating some measured value, for example \"precipitation\", \"temperature\", etc.\n",
-    "    else:\n",
-    "        var_type = 'data'\n",
-    "    return var_type"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -801,9 +698,9 @@
     "# create dictionary of dataset variables and associated dimensions\n",
     "vars_dict={}\n",
     "for v in vars:\n",
-    "    unit = get_unit(ds, v)\n",
-    "    var_type = get_var_type(ds, v)\n",
-    "    long_name = get_long_name(ds, v)\n",
+    "    unit = stac_helpers.get_unit(ds, v)\n",
+    "    var_type = stac_helpers.get_var_type(ds, v)\n",
+    "    long_name = stac_helpers.get_long_name(ds, v)\n",
     "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
    ]
   },
diff --git a/workflows/archive/TTU_2019_rcp85_gridded_annual_create_collection_from_zarr.ipynb b/workflows/archive/TTU_2019_rcp85_gridded_annual_create_collection_from_zarr.ipynb
index 355ca14f..c8586ac6 100644
--- a/workflows/archive/TTU_2019_rcp85_gridded_annual_create_collection_from_zarr.ipynb
+++ b/workflows/archive/TTU_2019_rcp85_gridded_annual_create_collection_from_zarr.ipynb
@@ -187,22 +187,9 @@
    "outputs": [],
    "source": [
     "# dims_auto_extract = ['X', 'Y', 'T']\n",
-    "# def extract_dim(ds, d):\n",
-    "#     try:\n",
-    "#         dim_list = ds.cf.axes[d]\n",
-    "#         assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n",
-    "#         dim = dim_list[0]\n",
-    "#     except KeyError:\n",
-    "#         print(f\"Could not auto-extract {d} dimension name.\")\n",
-    "#         print(\"Look at the xarray output above showing the dataset dimensions.\")\n",
-    "#         dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n",
-    "#     assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n",
-    "#     print(f\"name of {d} dimension: {dim}\\n\")\n",
-    "#     return dim\n",
-    "\n",
     "# dim_names_dict = {}\n",
     "# for d in dims_auto_extract:\n",
-    "#     dim_names_dict[d] = extract_dim(ds, d)\n",
+    "#     dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n",
     "dim_names_dict = {'X': 'lon', 'Y': 'lat', 'T': 'time'}\n",
     "print(f\"Dimension dictionary: {dim_names_dict}\")"
    ]
@@ -441,59 +428,6 @@
     "print(xy_bounds)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_step(ds, dim_name, time_dim=False, debug=False, step_ix=0, round_dec=None):\n",
-    "    dim_vals = ds[dim_name].values\n",
-    "    diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n",
-    "    # option to round number of decimals\n",
-    "    # sometimes there are different steps calculated due to small rounding errors coming out of the diff\n",
-    "    # calculation, rounding these can correct for that\n",
-    "    if round_dec:\n",
-    "        unique_steps = np.unique(np.array(diffs).round(decimals=round_dec), return_counts=True)\n",
-    "    else:\n",
-    "        unique_steps = np.unique(diffs, return_counts=True)\n",
-    "    step_list = unique_steps[0]\n",
-    "    # optional - for invesitgating uneven steps\n",
-    "    if debug:\n",
-    "        print(f'step_list: {step_list}')\n",
-    "        print(f'step_count: {unique_steps[1]}')\n",
-    "        indices = [i for i, x in enumerate(diffs) if x == step_list[step_ix]]\n",
-    "        print(f'index locations of step index {step_ix} in step_list: {indices}')\n",
-    "    # set step - if all steps are the same length\n",
-    "    # datacube spec specifies to use null for irregularly spaced steps\n",
-    "    if len(step_list)==1:\n",
-    "        if time_dim:\n",
-    "            # make sure time deltas are in np timedelta format\n",
-    "            step_list = [np.array([step], dtype=\"timedelta64[ns]\")[0] for step in step_list]\n",
-    "        step = step_list[0].astype(float).item()\n",
-    "    else:\n",
-    "        step = None\n",
-    "    return(step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a20d12bf-a511-4c5e-84d0-77e2ec551518",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_long_name(ds, v):\n",
-    "    # try to get long_name attribute from variable\n",
-    "    try:\n",
-    "        long_name = ds[v].attrs['long_name']\n",
-    "    # otherwise, leave empty\n",
-    "    except:\n",
-    "        long_name = None\n",
-    "    return long_name"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
@@ -575,7 +509,7 @@
    "outputs": [],
    "source": [
     "# # debugging for time steps: get all step values and locations\n",
-    "# time_step = get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)"
+    "# time_step = stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)"
    ]
   },
   {
@@ -607,7 +541,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "x_step = get_step(ds, dim_names_dict['X'])\n",
+    "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n",
     "print(f'x step: {x_step}')"
    ]
   },
@@ -620,7 +554,7 @@
    "source": [
     "# # debugging for spatial steps: get all step values and locations\n",
     "# x_dim=dim_names_dict['X']\n",
-    "# x_step = get_step(ds, x_dim, debug=True, step_ix=1)\n",
+    "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=1)\n",
     "# print(f'\\nx dim name (for next cell): {x_dim}')"
    ]
   },
@@ -653,7 +587,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "y_step = get_step(ds, dim_names_dict['Y'])\n",
+    "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n",
     "print(f'y step: {y_step}')"
    ]
   },
@@ -666,7 +600,7 @@
    "source": [
     "# # debugging for spatial steps: get all step values and locations\n",
     "# y_dim=dim_names_dict['Y']\n",
-    "# y_step = get_step(ds, y_dim, debug=True, step_ix=1)\n",
+    "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=1)\n",
     "# print(f'\\nx dim name (for next cell): {x_dim}')"
    ]
   },
@@ -721,10 +655,10 @@
     "\n",
     "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
     "# note that the extent of each dimension should be pulled from the dataset\n",
-    "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n",
-    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
-    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
-    "             'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n",
+    "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n",
+    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "             'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n",
     "            }"
    ]
   },
@@ -736,43 +670,6 @@
     "### Add cube variables (optional field for extension)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "92510876-7853-4d24-8563-c69f9012aeb6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define functions to pull out datacube attributes and validate format\n",
-    "def get_unit(ds, v):\n",
-    "    # check if unit is defined for variable\n",
-    "    try:\n",
-    "        unit = ds[v].attrs['units']\n",
-    "    except:\n",
-    "        unit = None\n",
-    "    # check if unit comes from https://docs.unidata.ucar.edu/udunits/current/#Database\n",
-    "    # datacube extension specifies: The unit of measurement for the data, preferably compliant to UDUNITS-2 units (singular).\n",
-    "    # gdptools expects this format as well\n",
-    "    try:\n",
-    "        cfunits.Units(unit).isvalid\n",
-    "    except:\n",
-    "        print(\"Unit is not valid as a UD unit.\")\n",
-    "        unit = str(input(\"Please enter a valid unit for {v} from here: https://docs.unidata.ucar.edu/udunits/current/#Database\"))\n",
-    "        assert cfunits.Units(unit).isvalid\n",
-    "    return unit\n",
-    "\n",
-    "def get_var_type(ds, v):\n",
-    "    if v in ds.coords:\n",
-    "        # type = auxiliary for a variable that contains coordinate data, but isn't a dimension in cube:dimensions.\n",
-    "        # For example, the values of the datacube might be provided in the projected coordinate reference system, \n",
-    "        # but the datacube could have a variable lon with dimensions (y, x), giving the longitude at each point.\n",
-    "        var_type = 'auxiliary'\n",
-    "    # type = data for a variable indicating some measured value, for example \"precipitation\", \"temperature\", etc.\n",
-    "    else:\n",
-    "        var_type = 'data'\n",
-    "    return var_type"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -799,9 +696,9 @@
     "# create dictionary of dataset variables and associated dimensions\n",
     "vars_dict={}\n",
     "for v in vars:\n",
-    "    unit = get_unit(ds, v)\n",
-    "    var_type = get_var_type(ds, v)\n",
-    "    long_name = get_long_name(ds, v)\n",
+    "    unit = stac_helpers.get_unit(ds, v)\n",
+    "    var_type = stac_helpers.get_var_type(ds, v)\n",
+    "    long_name = stac_helpers.get_long_name(ds, v)\n",
     "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
    ]
   },
@@ -843,18 +740,7 @@
    "source": [
     "# # helper to find items of wrong type\n",
     "# d = collection.to_dict()\n",
-    "# def find_paths(nested_dict, prepath=()):\n",
-    "#     for k, v in nested_dict.items():\n",
-    "#         try:\n",
-    "#             path = prepath + (k,)\n",
-    "#             if type(v) is np.float64: # found value\n",
-    "#                 yield path\n",
-    "#             elif hasattr(v, 'items'): # v is a dict\n",
-    "#                 yield from find_paths(v, path) \n",
-    "#         except:\n",
-    "#             print(prepath)\n",
-    "\n",
-    "# print(*find_paths(d))"
+    "# print(*stac_helpers.find_paths(d))"
    ]
   },
   {
diff --git a/workflows/archive/TTU_2019_rcp85_time_slices_create_collection_from_zarr.ipynb b/workflows/archive/TTU_2019_rcp85_time_slices_create_collection_from_zarr.ipynb
index 831b1d24..3e90059b 100644
--- a/workflows/archive/TTU_2019_rcp85_time_slices_create_collection_from_zarr.ipynb
+++ b/workflows/archive/TTU_2019_rcp85_time_slices_create_collection_from_zarr.ipynb
@@ -189,22 +189,9 @@
    "outputs": [],
    "source": [
     "# dims_auto_extract = ['X', 'Y', 'T']\n",
-    "# def extract_dim(ds, d):\n",
-    "#     try:\n",
-    "#         dim_list = ds.cf.axes[d]\n",
-    "#         assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n",
-    "#         dim = dim_list[0]\n",
-    "#     except KeyError:\n",
-    "#         print(f\"Could not auto-extract {d} dimension name.\")\n",
-    "#         print(\"Look at the xarray output above showing the dataset dimensions.\")\n",
-    "#         dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n",
-    "#     assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n",
-    "#     print(f\"name of {d} dimension: {dim}\\n\")\n",
-    "#     return dim\n",
-    "\n",
     "# dim_names_dict = {}\n",
     "# for d in dims_auto_extract:\n",
-    "#     dim_names_dict[d] = extract_dim(ds, d)\n",
+    "#     dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n",
     "dim_names_dict = {'X': 'lon', 'Y': 'lat', 'T': 'time'}\n",
     "print(f\"Dimension dictionary: {dim_names_dict}\")"
    ]
@@ -443,59 +430,6 @@
     "print(xy_bounds)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_step(ds, dim_name, time_dim=False, debug=False, step_ix=0, round_dec=None):\n",
-    "    dim_vals = ds[dim_name].values\n",
-    "    diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n",
-    "    # option to round number of decimals\n",
-    "    # sometimes there are different steps calculated due to small rounding errors coming out of the diff\n",
-    "    # calculation, rounding these can correct for that\n",
-    "    if round_dec:\n",
-    "        unique_steps = np.unique(np.array(diffs).round(decimals=round_dec), return_counts=True)\n",
-    "    else:\n",
-    "        unique_steps = np.unique(diffs, return_counts=True)\n",
-    "    step_list = unique_steps[0]\n",
-    "    # optional - for invesitgating uneven steps\n",
-    "    if debug:\n",
-    "        print(f'step_list: {step_list}')\n",
-    "        print(f'step_count: {unique_steps[1]}')\n",
-    "        indices = [i for i, x in enumerate(diffs) if x == step_list[step_ix]]\n",
-    "        print(f'index locations of step index {step_ix} in step_list: {indices}')\n",
-    "    # set step - if all steps are the same length\n",
-    "    # datacube spec specifies to use null for irregularly spaced steps\n",
-    "    if len(step_list)==1:\n",
-    "        if time_dim:\n",
-    "            # make sure time deltas are in np timedelta format\n",
-    "            step_list = [np.array([step], dtype=\"timedelta64[ns]\")[0] for step in step_list]\n",
-    "        step = step_list[0].astype(float).item()\n",
-    "    else:\n",
-    "        step = None\n",
-    "    return(step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a20d12bf-a511-4c5e-84d0-77e2ec551518",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_long_name(ds, v):\n",
-    "    # try to get long_name attribute from variable\n",
-    "    try:\n",
-    "        long_name = ds[v].attrs['long_name']\n",
-    "    # otherwise, leave empty\n",
-    "    except:\n",
-    "        long_name = None\n",
-    "    return long_name"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
@@ -577,7 +511,7 @@
    "outputs": [],
    "source": [
     "# # debugging for time steps: get all step values and locations\n",
-    "# time_step = get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)"
+    "# time_step = stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)"
    ]
   },
   {
@@ -609,7 +543,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "x_step = get_step(ds, dim_names_dict['X'])\n",
+    "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n",
     "print(f'x step: {x_step}')"
    ]
   },
@@ -622,7 +556,7 @@
    "source": [
     "# # debugging for spatial steps: get all step values and locations\n",
     "# x_dim=dim_names_dict['X']\n",
-    "# x_step = get_step(ds, x_dim, debug=True, step_ix=1)\n",
+    "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=1)\n",
     "# print(f'\\nx dim name (for next cell): {x_dim}')"
    ]
   },
@@ -655,7 +589,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "y_step = get_step(ds, dim_names_dict['Y'])\n",
+    "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n",
     "print(f'y step: {y_step}')"
    ]
   },
@@ -668,7 +602,7 @@
    "source": [
     "# # debugging for spatial steps: get all step values and locations\n",
     "# y_dim=dim_names_dict['Y']\n",
-    "# y_step = get_step(ds, y_dim, debug=True, step_ix=1)\n",
+    "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=1)\n",
     "# print(f'\\nx dim name (for next cell): {x_dim}')"
    ]
   },
@@ -723,10 +657,10 @@
     "\n",
     "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
     "# note that the extent of each dimension should be pulled from the dataset\n",
-    "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n",
-    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
-    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
-    "             'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n",
+    "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n",
+    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "             'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n",
     "            }"
    ]
   },
@@ -738,43 +672,6 @@
     "### Add cube variables (optional field for extension)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "92510876-7853-4d24-8563-c69f9012aeb6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define functions to pull out datacube attributes and validate format\n",
-    "def get_unit(ds, v):\n",
-    "    # check if unit is defined for variable\n",
-    "    try:\n",
-    "        unit = ds[v].attrs['units']\n",
-    "    except:\n",
-    "        unit = None\n",
-    "    # check if unit comes from https://docs.unidata.ucar.edu/udunits/current/#Database\n",
-    "    # datacube extension specifies: The unit of measurement for the data, preferably compliant to UDUNITS-2 units (singular).\n",
-    "    # gdptools expects this format as well\n",
-    "    try:\n",
-    "        cfunits.Units(unit).isvalid\n",
-    "    except:\n",
-    "        print(\"Unit is not valid as a UD unit.\")\n",
-    "        unit = str(input(\"Please enter a valid unit for {v} from here: https://docs.unidata.ucar.edu/udunits/current/#Database\"))\n",
-    "        assert cfunits.Units(unit).isvalid\n",
-    "    return unit\n",
-    "\n",
-    "def get_var_type(ds, v):\n",
-    "    if v in ds.coords:\n",
-    "        # type = auxiliary for a variable that contains coordinate data, but isn't a dimension in cube:dimensions.\n",
-    "        # For example, the values of the datacube might be provided in the projected coordinate reference system, \n",
-    "        # but the datacube could have a variable lon with dimensions (y, x), giving the longitude at each point.\n",
-    "        var_type = 'auxiliary'\n",
-    "    # type = data for a variable indicating some measured value, for example \"precipitation\", \"temperature\", etc.\n",
-    "    else:\n",
-    "        var_type = 'data'\n",
-    "    return var_type"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -801,9 +698,9 @@
     "# create dictionary of dataset variables and associated dimensions\n",
     "vars_dict={}\n",
     "for v in vars:\n",
-    "    unit = get_unit(ds, v)\n",
-    "    var_type = get_var_type(ds, v)\n",
-    "    long_name = get_long_name(ds, v)\n",
+    "    unit = stac_helpers.get_unit(ds, v)\n",
+    "    var_type = stac_helpers.get_var_type(ds, v)\n",
+    "    long_name = stac_helpers.get_long_name(ds, v)\n",
     "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
    ]
   },
@@ -845,18 +742,7 @@
    "source": [
     "# # helper to find items of wrong type\n",
     "# d = collection.to_dict()\n",
-    "# def find_paths(nested_dict, prepath=()):\n",
-    "#     for k, v in nested_dict.items():\n",
-    "#         try:\n",
-    "#             path = prepath + (k,)\n",
-    "#             if type(v) is np.float64: # found value\n",
-    "#                 yield path\n",
-    "#             elif hasattr(v, 'items'): # v is a dict\n",
-    "#                 yield from find_paths(v, path) \n",
-    "#         except:\n",
-    "#             print(prepath)\n",
-    "\n",
-    "# print(*find_paths(d))"
+    "# print(*stac_helpers.find_paths(d))"
    ]
   },
   {
-- 
GitLab