add x, y, t dimension steps

56dc5aa1 · Snyder, Amelia Marie · 08b62f9e · 56dc5aa1 · 56dc5aa1
Commit 56dc5aa1 authored 2 years ago by Snyder, Amelia Marie
--- a/catalog/conus404-daily/collection.json
+++ b/catalog/conus404-daily/collection.json
@@ -24,7 +24,8 @@
      "extent": [
        "1979-10-01T00:00:00Z",
        "2021-09-25T00:00:00Z"
-      ]
+      ],
+      "step": "P1DT0H0M0S"
    },
    "x": {
      "type": "spatial",
@@ -32,7 +33,8 @@
      "extent": [
        -2732097.901153542,
        2731902.098846458
-      ]
+      ],
+      "step": 4000.0
    },
    "y": {
      "type": "spatial",
@@ -40,7 +42,8 @@
      "extent": [
        -2027960.8996368449,
        2028039.1003631551
-      ]
+      ],
+      "step": 4000.0
    },
    "bottom_top_stag": {
      "type": "spatial",

--- a/workflows/create_collection_zarr_exploratory_workflow_conus404-daily.ipynb
+++ b/workflows/create_collection_zarr_exploratory_workflow_conus404-daily.ipynb
@@ -25,12 +25,14 @@
    "import pystac\n",
    "from pystac.extensions.datacube import CollectionDatacubeExtension, AssetDatacubeExtension, AdditionalDimension, DatacubeExtension\n",
    "import xarray as xr\n",
+    "import cf_xarray\n",
    "import os\n",
    "import fsspec\n",
    "import cf_xarray\n",
    "import hvplot.xarray\n",
    "import pandas as pd\n",
-    "import json"
+    "import json\n",
+    "import numpy as np"
   ]
  },
  {
@@ -113,6 +115,50 @@
    "ds"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "0bc7e9b3-ad62-4b10-a18e-66b7ed2d35dc",
+   "metadata": {},
+   "source": [
+    "## Identify x, y, t dimensions of dataset\n",
+    "May require user input if dimensions cannot be auto-detected."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab91268f-7200-4cb1-979a-c7d75531d2c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dims_auto_extract = ['X', 'Y', 'T']\n",
+    "def extract_dim(ds, d):\n",
+    "    try:\n",
+    "        dim_list = ds.cf.axes[d]\n",
+    "        assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n",
+    "        dim = dim_list[0]\n",
+    "    except KeyError:\n",
+    "        print(f\"Could not auto-extract {d} dimension name.\")\n",
+    "        print(\"Look at the xarray output above showing the dataset dimensions.\")\n",
+    "        dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n",
+    "    assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n",
+    "    print(f\"name of {d} dimension: {dim}\\n\")\n",
+    "    return dim\n",
+    "\n",
+    "dims_dict = {}\n",
+    "for d in dims_auto_extract:\n",
+    "    dims_dict[d] = extract_dim(ds, d)\n",
+    "print(f\"Dimension dictionary: {dims_dict}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8fbfecfb-9886-4d06-a34c-6471cb0a6053",
+   "metadata": {},
+   "source": [
+    "## Plot a map"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -153,7 +199,10 @@
    "# pull out lat/lon bbox for data\n",
    "# coordiantes must be from WGS 84 datum\n",
    "# left, bottom, right, top\n",
-    "coord_bounds = [ds.lon.data.min().compute().astype(float), ds.lat.data.min().compute().astype(float), ds.lon.data.max().compute().astype(float), ds.lat.data.max().compute().astype(float)]\n",
+    "# Note: I'm not sure why but half the time you need to run .compute() to get the value and the other half\n",
+    "# you shouldn't - I've included both options below - switch the commented line if you have this issue\n",
+    "#coord_bounds = [ds.lon.data.min().compute().astype(float), ds.lat.data.min().compute().astype(float), ds.lon.data.max().compute().astype(float), ds.lat.data.max().compute().astype(float)]\n",
+    "coord_bounds = [ds.lon.data.min().astype(float), ds.lat.data.min().astype(float), ds.lon.data.max().astype(float), ds.lat.data.max().astype(float)]\n",
    "print(coord_bounds)\n",
    "# create a spatial extent object \n",
    "spatial_extent = pystac.SpatialExtent(bboxes=[coord_bounds])"
@@ -175,8 +224,8 @@
   "outputs": [],
   "source": [
    "# pull out first and last timestamps\n",
-    "temporal_extent_lower = pd.Timestamp(ds.time.data.min())\n",
-    "temporal_extent_upper = pd.Timestamp(ds.time.data.max())\n",
+    "temporal_extent_lower = pd.Timestamp(ds[dims_dict['T']].data.min())\n",
+    "temporal_extent_upper = pd.Timestamp(ds[dims_dict['T']].data.max())\n",
    "print(f'min: {temporal_extent_lower} \\nmax: {temporal_extent_upper}')\n",
    "# create a temporal extent object\n",
    "temporal_extent = pystac.TemporalExtent(intervals=[[temporal_extent_lower, temporal_extent_upper]])"
@@ -294,16 +343,36 @@
   "outputs": [],
   "source": [
    "# get x, y bounds for extent of those dimensions (required)\n",
-    "xy_bounds = [ds.x.data.min().astype(float), ds.y.data.min().astype(float), ds.x.data.max().astype(float), ds.y.data.max().astype(float)]\n",
+    "xy_bounds = [ds[dims_dict['X']].data.min().astype(float), ds[dims_dict['Y']].data.min().astype(float), ds[dims_dict['X']].data.max().astype(float), ds[dims_dict['Y']].data.max().astype(float)]\n",
    "print(xy_bounds)"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_step(dim_name):\n",
+    "    dim_vals = ds[dim_name].values\n",
+    "    diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n",
+    "    unique_steps = np.unique(diffs)\n",
+    "    # set step - if all steps are the same length\n",
+    "    # datacube spec specifies to use null for irregularly spaced steps\n",
+    "    if len(unique_steps)==1:\n",
+    "        step = unique_steps[0]\n",
+    "    else:\n",
+    "        step = None\n",
+    "    return(step)"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "00a5e041-081d-428d-ac2e-75d16de205e6",
   "metadata": {},
   "source": [
-    "#### user input needed - you will need to copy all of the dimensions from above into the dict and fill in the appropriate attributes(type, axis, extent):"
+    "#### user inpds['bottom_top_stag'].values.min()ut needed - you will need to copy all of the dimensions from above into the dict and fill in the appropriate attributes(type, axis, extent):"
   ]
  },
  {
@@ -317,9 +386,9 @@
    "# dimension name should come from the coordinates printed above\n",
    "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
    "# note that the extent of each dimension should be pulled from the dataset\n",
-    "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')]}),\n",
-    "             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]]}),\n",
-    "             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'extent': [xy_bounds[1], xy_bounds[3]]}),\n",
+    "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': pd.Timedelta(get_step('time')).isoformat()}),\n",
+    "             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': get_step('x')}),\n",
+    "             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'extent': [xy_bounds[1], xy_bounds[3]], 'step': get_step('y')}),\n",
    "             'bottom_top_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z'}),\n",
    "             'bottom_top': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z'}),\n",
    "             'soil_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z'}),\n",
@@ -418,7 +487,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.15"
+   "version": "3.9.16"
  }
 },
 "nbformat": 4,

 %% Cell type:markdown id:6c10e07b-1e60-4926-af1d-fa75dc78e5d4 tags:

 # CONUS404 Daily Zarr -> Collection Exploratory Workflow
 This is a workflow for transforming the CONUS404 daily zarr dataset into a [STAC collection](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md). We use the [datacube extension](https://github.com/stac-extensions/datacube) to define the spatial and temporal dimensions of the zarr store, as well as the variables it contains.

 To simplify this workflow so that it can scale to many datasets, a few simplifying suggestions and assumptions are made:
 1. For USGS data, we can use the CC0-1.0 license. For all other data we can use Unlicense. Ref: https://spdx.org/licenses/
 2. I am assuming all coordinates are from the WGS84 datum if not specified.

 %% Cell type:code id:201e0945-de55-45ff-b095-c2af009a4e62 tags:

 ``` python
 import pystac
 from pystac.extensions.datacube import CollectionDatacubeExtension, AssetDatacubeExtension, AdditionalDimension, DatacubeExtension
 import xarray as xr
+import cf_xarray
 import os
 import fsspec
 import cf_xarray
 import hvplot.xarray
 import pandas as pd
 import json
+import numpy as np
 ```

 %% Cell type:markdown id:20b00e88-5a13-46b3-9787-d9ac2d4e7bd6 tags:

 ## Open up NHGF STAC Catalog

 %% Cell type:code id:adf6c59d-58cd-48b1-a5fd-3bb205a3ef56 tags:

 ``` python
 # define folder location where your STAC catalog json file is
 catalog_path = os.path.join('..', 'catalog')
 # open catalog
 catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))
 ```

 %% Cell type:markdown id:996e60ba-13e4-453a-8534-e62ce747f0fa tags:

 ## Collection Metadata Input

 %% Cell type:code id:482d204d-b5b6-40e5-ac42-55b459be1097 tags:

 ``` python
 # url to zarr store that you want to create a collection for
 zarr_url = 's3://nhgf-development/conus404/conus404_daily_202210.zarr/'

 # define keyword arguments needed for opening the dataset with xarray
 # ref: https://github.com/stac-extensions/xarray-assets
 xarray_opendataset_kwargs = {"xarray:open_kwargs":{"chunks":{},"engine":"zarr","consolidated":True},
                          "xarray:storage_options":{"requester_pays":True}}
 # description for zarr url asset attached to collection (zarr_url)
 asset_description = "S3 access to collection zarr group"
 # roles to tag zarr url asset with
 asset_roles = ["data","zarr","s3"]

 # name for STAC collection
 collection_id = 'conus404-daily'
 # description of STAC collection
 collection_description = 'CONUS404 40 years of daily values for subset of model output variables derived from hourly values on cloud storage'
 # license for dataset
 collection_license = 'CC0-1.0'
 ```

 %% Cell type:markdown id:b213b74f-ad17-4774-93b6-3b62be616b45 tags:

 ## Data Exploration

 %% Cell type:code id:708f2cf5-79ab-49af-8067-de31d0d13ee6 tags:

 ``` python
 # open and view zarr dataset
 fs2 = fsspec.filesystem('s3', requester_pays=True)
 ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr',
                             backend_kwargs={'consolidated':True}, chunks={})
 ds
 ```

+%% Cell type:markdown id:0bc7e9b3-ad62-4b10-a18e-66b7ed2d35dc tags:
+
+## Identify x, y, t dimensions of dataset
+May require user input if dimensions cannot be auto-detected.
+
+%% Cell type:code id:ab91268f-7200-4cb1-979a-c7d75531d2c0 tags:
+
+``` python
+dims_auto_extract = ['X', 'Y', 'T']
+def extract_dim(ds, d):
+    try:
+        dim_list = ds.cf.axes[d]
+        assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'
+        dim = dim_list[0]
+    except KeyError:
+        print(f"Could not auto-extract {d} dimension name.")
+        print("Look at the xarray output above showing the dataset dimensions.")
+        dim = str(input(f"What is the name of the {d} dimension of this dataset?"))
+    assert dim in ds.dims, "That is not a valid dimension name for this dataset"
+    print(f"name of {d} dimension: {dim}\n")
+    return dim
+
+dims_dict = {}
+for d in dims_auto_extract:
+    dims_dict[d] = extract_dim(ds, d)
+print(f"Dimension dictionary: {dims_dict}")
+```
+
+%% Cell type:markdown id:8fbfecfb-9886-4d06-a34c-6471cb0a6053 tags:
+
+## Plot a map
+
 %% Cell type:code id:4eb4d027-4266-4a0b-8f16-bacfbef06242 tags:

 ``` python
 # plot a map of a single variable
 var_to_plot = 'SNOW'
 da = ds[var_to_plot].sel(time='2014-03-01 00:00').load()
 da.hvplot.quadmesh(x='lon', y='lat', rasterize=True,
                             geo=True, tiles='OSM', alpha=0.7, cmap='turbo')
 ```

 %% Cell type:markdown id:a8c3ed37-8564-400b-a7fb-25bd5e43d21c tags:

 ## Create Collection Extent

 %% Cell type:markdown id:69f0d837-68a5-4fed-9a14-5d75cfbb0da4 tags:

 ### Spatial Extent

 %% Cell type:code id:d46805e0-8e94-4ebe-aa01-d9a2d7051459 tags:

 ``` python
 # pull out lat/lon bbox for data
 # coordiantes must be from WGS 84 datum
 # left, bottom, right, top
-coord_bounds = [ds.lon.data.min().compute().astype(float), ds.lat.data.min().compute().astype(float), ds.lon.data.max().compute().astype(float), ds.lat.data.max().compute().astype(float)]
+# Note: I'm not sure why but half the time you need to run .compute() to get the value and the other half
+# you shouldn't - I've included both options below - switch the commented line if you have this issue
+#coord_bounds = [ds.lon.data.min().compute().astype(float), ds.lat.data.min().compute().astype(float), ds.lon.data.max().compute().astype(float), ds.lat.data.max().compute().astype(float)]
+coord_bounds = [ds.lon.data.min().astype(float), ds.lat.data.min().astype(float), ds.lon.data.max().astype(float), ds.lat.data.max().astype(float)]
 print(coord_bounds)
 # create a spatial extent object
 spatial_extent = pystac.SpatialExtent(bboxes=[coord_bounds])
 ```

 %% Cell type:markdown id:a04c8fca-1d33-43ac-9e2b-62d7be2887f7 tags:

 ### Temporal Extent

 %% Cell type:code id:41a84995-867c-4152-8c57-85e3758bbb77 tags:

 ``` python
 # pull out first and last timestamps
-temporal_extent_lower = pd.Timestamp(ds.time.data.min())
-temporal_extent_upper = pd.Timestamp(ds.time.data.max())
+temporal_extent_lower = pd.Timestamp(ds[dims_dict['T']].data.min())
+temporal_extent_upper = pd.Timestamp(ds[dims_dict['T']].data.max())
 print(f'min: {temporal_extent_lower} \nmax: {temporal_extent_upper}')
 # create a temporal extent object
 temporal_extent = pystac.TemporalExtent(intervals=[[temporal_extent_lower, temporal_extent_upper]])
 ```

 %% Cell type:code id:1b1e37c4-5348-46ad-abc9-e005b5d6c02b tags:

 ``` python
 collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)
 ```

 %% Cell type:markdown id:cfb71202-03df-45b5-ac2f-0dc2ee1ab780 tags:

 ## Create pystac collection

 %% Cell type:code id:7e96811b-95ae-406a-9728-55fc429d4e1f tags:

 ``` python
 if catalog.get_child(collection_id):
    collection = catalog.get_child(collection_id)
    print("existing collection opened")
    collection.extent=collection_extent
    collection.description=collection_description
    collection.license=collection_license
 else:
    collection = pystac.Collection(id=collection_id,
                                   description=collection_description,
                                   extent=collection_extent,
                                   license=collection_license)
    print("new collection created")
 ```

 %% Cell type:markdown id:a21c76e8-cd57-4eb5-a33f-7c668a3b3205 tags:

 ## Add zarr url asset to collection

 %% Cell type:code id:094832af-d22b-4359-b0f6-cf687acce5cc tags:

 ``` python
 asset_id = "zarr-s3"
 asset = pystac.Asset(href=zarr_url,
                     description=asset_description,
                     media_type="application/vnd+zarr",
                     roles=asset_roles,
                     extra_fields = xarray_opendataset_kwargs)
 collection.add_asset(asset_id, asset)
 ```

 %% Cell type:markdown id:f67cd5c9-db33-45c2-bc21-480cd67354f4 tags:

 ## Add datacube extension to collection

 %% Cell type:code id:fc00946d-2880-491d-9b3b-3aeeb4414d6c tags:

 ``` python
 # instantiate extention on collection
 dc = DatacubeExtension.ext(collection, add_if_missing=True)
 ```

 %% Cell type:markdown id:8bdd77a2-7587-485e-afb7-42af3a822241 tags:

 ### Add cube dimensions (required field for extension)

 %% Cell type:code id:120a4914-3302-44a5-a282-0308ac84f040 tags:

 ``` python
 # list out dataset dimensions
 # When writing data to Zarr, Xarray sets this attribute on all variables based on the variable dimensions. When reading a Zarr group, Xarray looks for this attribute on all arrays,
 # raising an error if it can’t be found.
 dims = list(ds.dims)
 print(dims)
 ```

 %% Cell type:code id:00a18a29-fb9a-4b56-8009-493122997b16 tags:

 ``` python
 # get x, y bounds for extent of those dimensions (required)
-xy_bounds = [ds.x.data.min().astype(float), ds.y.data.min().astype(float), ds.x.data.max().astype(float), ds.y.data.max().astype(float)]
+xy_bounds = [ds[dims_dict['X']].data.min().astype(float), ds[dims_dict['Y']].data.min().astype(float), ds[dims_dict['X']].data.max().astype(float), ds[dims_dict['Y']].data.max().astype(float)]
 print(xy_bounds)
 ```

+%% Cell type:code id:0f49d6d7-9e30-4144-909b-fa1238e6c77a tags:
+
+``` python
+def get_step(dim_name):
+    dim_vals = ds[dim_name].values
+    diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]
+    unique_steps = np.unique(diffs)
+    # set step - if all steps are the same length
+    # datacube spec specifies to use null for irregularly spaced steps
+    if len(unique_steps)==1:
+        step = unique_steps[0]
+    else:
+        step = None
+    return(step)
+```
+
 %% Cell type:markdown id:00a5e041-081d-428d-ac2e-75d16de205e6 tags:

-#### user input needed - you will need to copy all of the dimensions from above into the dict and fill in the appropriate attributes(type, axis, extent):
+#### user inpds['bottom_top_stag'].values.min()ut needed - you will need to copy all of the dimensions from above into the dict and fill in the appropriate attributes(type, axis, extent):

 %% Cell type:code id:5a443497-67a9-4dce-a8e9-b08d31a88223 tags:

 ``` python
 # create a dictionary of datacube dimensions you would like to assign to this dataset
 # dimension name should come from the coordinates printed above
 # we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)
 # note that the extent of each dimension should be pulled from the dataset
-dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')]}),
-             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]]}),
-             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'extent': [xy_bounds[1], xy_bounds[3]]}),
+dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': pd.Timedelta(get_step('time')).isoformat()}),
+             'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': get_step('x')}),
+             'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'extent': [xy_bounds[1], xy_bounds[3]], 'step': get_step('y')}),
             'bottom_top_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z'}),
             'bottom_top': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z'}),
             'soil_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z'}),
             'x_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x'}),
             'y_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y'}),
             'snow_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z'}),
             'snso_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z'}),
            }
 ```

 %% Cell type:markdown id:0f277883-a3fd-425f-966a-ca2140d0ef2f tags:

 ### Add cube variables (optional field for extension)

 %% Cell type:code id:e9272931-fc0b-4f2a-9546-283033e9cde8 tags:

 ``` python
 # pull list of vars from dataset
 vars = list(ds.variables)
 # spec says that the keys of cube:dimensions and cube:variables should be unique together; a key like lat should not be both a dimension and a variable.
 # we will drop all values in dims from vars
 vars = [v for v in vars if v not in dims]

 # Microsoft Planetary Computer includes coorindates and crs as variables here: https://planetarycomputer.microsoft.com/dataset/daymet-annual-na
 # we will keep those in the var list

 # create dictionary of dataset variables and associated dimensions
 vars_dict={}
 for v in vars:
    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': 'data'})
 ```

 %% Cell type:markdown id:11ad5352-884c-4472-8864-4570a96f66e5 tags:

 ### Finalize extension

 %% Cell type:code id:10141fd4-91d6-491d-878b-02653720891d tags:

 ``` python
 # add dimesions and variables to collection extension
 dc.apply(dimensions=dims_dict, variables=vars_dict)
 ```

 %% Cell type:markdown id:615ca168-75fb-4135-9941-0ef5fe4fd1cb tags:

 ## Add STAC Collection to Catalog and Save

 %% Cell type:code id:4b75791b-6b2d-40be-b7c6-330a60888fb5 tags:

 ``` python
 if catalog.get_child(collection_id):
    collection.normalize_and_save(root_href=os.path.join(catalog_path, collection_id), catalog_type=pystac.CatalogType.SELF_CONTAINED)
 else:
    catalog.add_child(collection)
    catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED)
 ```