From 8dfce89001a764f872b3c951fb1bfc4688dc6c52 Mon Sep 17 00:00:00 2001
From: amsnyder <asnyder@usgs.gov>
Date: Mon, 20 Nov 2023 14:19:13 -0600
Subject: [PATCH] additional modifications to item workflow

---
 ..._exploratory_workflow_conus404-daily.ipynb | 149 +++++++++++-------
 1 file changed, 91 insertions(+), 58 deletions(-)

diff --git a/workflows/create_item_zarr_exploratory_workflow_conus404-daily.ipynb b/workflows/create_item_zarr_exploratory_workflow_conus404-daily.ipynb
index 01ad5803..c488783f 100644
--- a/workflows/create_item_zarr_exploratory_workflow_conus404-daily.ipynb
+++ b/workflows/create_item_zarr_exploratory_workflow_conus404-daily.ipynb
@@ -19,7 +19,9 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "201e0945-de55-45ff-b095-c2af009a4e62",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "import pystac\n",
@@ -36,7 +38,8 @@
     "import metpy\n",
     "import cartopy.crs as ccrs\n",
     "import cfunits\n",
-    "import json"
+    "import json\n",
+    "from shapely.geometry import Polygon, mapping"
    ]
   },
   {
@@ -51,7 +54,9 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# define folder location where your STAC catalog json file is\n",
@@ -72,7 +77,9 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "482d204d-b5b6-40e5-ac42-55b459be1097",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# url to zarr store that you want to create a collection for\n",
@@ -88,11 +95,7 @@
     "asset_roles = [\"data\",\"zarr\",\"s3\"]\n",
     "\n",
     "# name for STAC collection\n",
-    "item_id = 'conus404-daily'\n",
-    "# description of STAC collection\n",
-    "item_description = 'CONUS404 40 years of daily values for subset of model output variables derived from hourly values on cloud storage'\n",
-    "# license for dataset\n",
-    "item_license = 'CC0-1.0'"
+    "item_id = 'conus404-daily'"
    ]
   },
   {
@@ -109,7 +112,9 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "708f2cf5-79ab-49af-8067-de31d0d13ee6",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# open and view zarr dataset\n",
@@ -132,7 +137,9 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ab91268f-7200-4cb1-979a-c7d75531d2c0",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "dims_auto_extract = ['X', 'Y', 'T']\n",
@@ -167,7 +174,9 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "4eb4d027-4266-4a0b-8f16-bacfbef06242",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# plot a map of a single variable\n",
@@ -190,7 +199,9 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "c7de2681-88c2-4597-857c-8f169c596f8b",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# enter lat, lon of point you want to plot time series for\n",
@@ -209,7 +220,7 @@
    "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c",
    "metadata": {},
    "source": [
-    "## Create Collection Extent"
+    "## Create Item Geometry, BBox, Datetime"
    ]
   },
   {
@@ -217,14 +228,16 @@
    "id": "69f0d837-68a5-4fed-9a14-5d75cfbb0da4",
    "metadata": {},
    "source": [
-    "### Spatial Extent"
+    "### Spatial - Geometry and bbox"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "d46805e0-8e94-4ebe-aa01-d9a2d7051459",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# pull out lat/lon bbox for data\n",
@@ -235,8 +248,14 @@
     "#coord_bounds = [ds.lon.data.min().compute().astype(float), ds.lat.data.min().compute().astype(float), ds.lon.data.max().compute().astype(float), ds.lat.data.max().compute().astype(float)]\n",
     "coord_bounds = [ds.lon.data.min().astype(float), ds.lat.data.min().astype(float), ds.lon.data.max().astype(float), ds.lat.data.max().astype(float)]\n",
     "print(coord_bounds)\n",
-    "# create a spatial extent object \n",
-    "spatial_extent = pystac.SpatialExtent(bboxes=[coord_bounds])"
+    "# create a geometry object \n",
+    "footprint = mapping(Polygon([\n",
+    "            [coord_bounds[0], coord_bounds[1]],\n",
+    "            [coord_bounds[0], coord_bounds[3]],\n",
+    "            [coord_bounds[2], coord_bounds[3]],\n",
+    "            [coord_bounds[2], coord_bounds[1]]\n",
+    "        ]))\n",
+    "print(footprint)"
    ]
   },
   {
@@ -244,32 +263,22 @@
    "id": "a04c8fca-1d33-43ac-9e2b-62d7be2887f7",
    "metadata": {},
    "source": [
-    "### Temporal Extent"
+    "### Temporal - datetime"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "41a84995-867c-4152-8c57-85e3758bbb77",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# pull out first and last timestamps\n",
     "temporal_extent_lower = pd.Timestamp(ds[dim_names_dict['T']].data.min())\n",
     "temporal_extent_upper = pd.Timestamp(ds[dim_names_dict['T']].data.max())\n",
-    "print(f'min: {temporal_extent_lower} \\nmax: {temporal_extent_upper}')\n",
-    "# create a temporal extent object\n",
-    "temporal_extent = pystac.TemporalExtent(intervals=[[temporal_extent_lower, temporal_extent_upper]])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1b1e37c4-5348-46ad-abc9-e005b5d6c02b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)"
+    "print(f'min: {temporal_extent_lower} \\nmax: {temporal_extent_upper}')"
    ]
   },
   {
@@ -277,28 +286,28 @@
    "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780",
    "metadata": {},
    "source": [
-    "## Create pystac collection"
+    "## Create pystac item"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "7e96811b-95ae-406a-9728-55fc429d4e1f",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
-    "if catalog.get_child(collection_id):\n",
-    "    collection = catalog.get_child(collection_id)\n",
-    "    print(\"existing collection opened\")\n",
-    "    collection.extent=collection_extent\n",
-    "    collection.description=collection_description\n",
-    "    collection.license=collection_license\n",
+    "if next(catalog.get_items(item_id), None):\n",
+    "    item = next(catalog.get_items(item_id), None)()\n",
+    "    print(\"existing item opened\")\n",
     "else:\n",
-    "    collection = pystac.Collection(id=collection_id,\n",
-    "                                   description=collection_description,\n",
-    "                                   extent=collection_extent,\n",
-    "                                   license=collection_license)\n",
-    "    print(\"new collection created\")"
+    "    item = pystac.Item(id=item_id,\n",
+    "                       geometry=footprint,\n",
+    "                       bbox=coord_bounds,\n",
+    "                       datetime=temporal_extent_upper,\n",
+    "                       properties={})\n",
+    "    print(\"new item created\")"
    ]
   },
   {
@@ -306,7 +315,7 @@
    "id": "a21c76e8-cd57-4eb5-a33f-7c668a3b3205",
    "metadata": {},
    "source": [
-    "## Add zarr url asset to collection"
+    "## Add zarr url asset to item"
    ]
   },
   {
@@ -322,7 +331,7 @@
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles,\n",
     "                     extra_fields = xarray_opendataset_kwargs)\n",
-    "collection.add_asset(asset_id, asset)"
+    "item.add_asset(asset_id, asset)"
    ]
   },
   {
@@ -330,7 +339,7 @@
    "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4",
    "metadata": {},
    "source": [
-    "## Add datacube extension to collection"
+    "## Add datacube extension to item"
    ]
   },
   {
@@ -341,7 +350,7 @@
    "outputs": [],
    "source": [
     "# instantiate extention on collection\n",
-    "dc = DatacubeExtension.ext(collection, add_if_missing=True)"
+    "dc = DatacubeExtension.ext(item, add_if_missing=True)"
    ]
   },
   {
@@ -588,7 +597,19 @@
    "id": "615ca168-75fb-4135-9941-0ef5fe4fd1cb",
    "metadata": {},
    "source": [
-    "## Add STAC Collection to Catalog and Save"
+    "## Add STAC Item to Catalog and Save"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "161949f9-e839-4497-8314-cab8a3871669",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "json.dumps(item.to_dict())"
    ]
   },
   {
@@ -598,19 +619,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "if catalog.get_child(collection_id):\n",
-    "    collection.normalize_and_save(root_href=os.path.join(catalog_path, collection_id), catalog_type=pystac.CatalogType.SELF_CONTAINED)\n",
+    "if next(catalog.get_items(item_id), None):\n",
+    "    #item.normalize_and_save(root_href=os.path.join(catalog_path, item_id), catalog_type=pystac.CatalogType.SELF_CONTAINED)\n",
+    "    catalog.normalize_hrefs(catalog_path)\n",
+    "    catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)\n",
     "else:\n",
-    "    catalog.add_child(collection)\n",
-    "    catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED)"
+    "    catalog.add_item(item)\n",
+    "    catalog.normalize_hrefs(catalog_path)\n",
+    "    catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)\n",
+    "    #catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ca08bfcb-53c3-4573-bece-408da8964302",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "geo",
+   "display_name": "global-global-pangeo",
    "language": "python",
-   "name": "python3"
+   "name": "conda-env-global-global-pangeo-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -622,7 +655,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,
-- 
GitLab