From 2054f4199d79c2f4ac8c80ba6fc1325862d67c41 Mon Sep 17 00:00:00 2001
From: amsnyder <asnyder@usgs.gov>
Date: Thu, 25 Apr 2024 15:36:58 -0500
Subject: [PATCH] add archival thredds asset

---
 catalog/bcsd_mon_vic/collection.json          | 27 +++++++--
 catalog/bcsd_obs/collection.json              | 27 +++++++--
 ..._mon_vic_create_collection_from_zarr.ipynb | 57 +++++++++++++++++--
 ...bcsd_obs_create_collection_from_zarr.ipynb | 55 ++++++++++++++++--
 4 files changed, 146 insertions(+), 20 deletions(-)

diff --git a/catalog/bcsd_mon_vic/collection.json b/catalog/bcsd_mon_vic/collection.json
index f01ae007..e76a312a 100644
--- a/catalog/bcsd_mon_vic/collection.json
+++ b/catalog/bcsd_mon_vic/collection.json
@@ -14448,7 +14448,8 @@
     "zarr-s3-osn": {
       "href": "s3://mdmf/gdp/bcsd_mon_vic.zarr/",
       "type": "application/vnd+zarr",
-      "description": "Open Storage Network Pod S3 API access to collection zarr group",
+      "title": "Requester pays access to zarr via S3 API",
+      "description": "Free, public access to zarr data store via the S3 API. This data is stored on an Open Storage Network Pod.",
       "xarray:open_kwargs": {
         "chunks": {},
         "engine": "zarr",
@@ -14463,13 +14464,15 @@
       "roles": [
         "data",
         "zarr",
-        "s3"
+        "s3",
+        "osn"
       ]
     },
-    "zarr-s3": {
+    "zarr-s3-aws": {
       "href": "s3://nhgf-development/workspace/DataConversion/bcsd_mon_vic.zarr/",
       "type": "application/vnd+zarr",
-      "description": "S3 access to collection zarr group",
+      "title": "Free access to zarr via S3 API",
+      "description": "Requester pays, public access to zarr data store via the S3 API. This data is stored in an AWS S3 bucket.",
       "xarray:open_kwargs": {
         "chunks": {},
         "engine": "zarr",
@@ -14481,7 +14484,21 @@
       "roles": [
         "data",
         "zarr",
-        "s3"
+        "s3",
+        "aws",
+        "requester-pays"
+      ]
+    },
+    "legacy-s3-aws": {
+      "href": "s3://nhgf-development/thredds/BCSD_mon_VIC/",
+      "title": "Requester pays access to archival legacy files via S3 API",
+      "description": "Requester pays, public access (via the S3 API) to archival legacy files from WMA THREDDS server that were used to create this zarr store. This data is stored in an AWS S3 bucket.",
+      "roles": [
+        "data",
+        "directory",
+        "s3",
+        "aws",
+        "requester-pays"
       ]
     }
   }
diff --git a/catalog/bcsd_obs/collection.json b/catalog/bcsd_obs/collection.json
index c21ae138..d4549e93 100644
--- a/catalog/bcsd_obs/collection.json
+++ b/catalog/bcsd_obs/collection.json
@@ -170,7 +170,8 @@
     "zarr-s3-osn": {
       "href": "s3://mdmf/gdp/bcsd_obs.zarr/",
       "type": "application/vnd+zarr",
-      "description": "Open Storage Network Pod S3 API access to collection zarr group",
+      "title": "Requester pays access to zarr via S3 API",
+      "description": "Free, public access to zarr data store via the S3 API. This data is stored on an Open Storage Network Pod.",
       "xarray:open_kwargs": {
         "chunks": {},
         "engine": "zarr",
@@ -185,13 +186,15 @@
       "roles": [
         "data",
         "zarr",
-        "s3"
+        "s3",
+        "osn"
       ]
     },
-    "zarr-s3": {
+    "zarr-s3-aws": {
       "href": "s3://nhgf-development/workspace/DataConversion/bcsd_obs.zarr/",
       "type": "application/vnd+zarr",
-      "description": "S3 access to collection zarr group",
+      "title": "Free access to zarr via S3 API",
+      "description": "Requester pays, public access to zarr data store via the S3 API. This data is stored in an AWS S3 bucket.",
       "xarray:open_kwargs": {
         "chunks": {},
         "engine": "zarr",
@@ -203,7 +206,21 @@
       "roles": [
         "data",
         "zarr",
-        "s3"
+        "s3",
+        "aws",
+        "requester-pays"
+      ]
+    },
+    "legacy-s3-aws": {
+      "href": "s3://nhgf-development/thredds/maurer/monthly_obs/",
+      "title": "Requester pays access to archival legacy files via S3 API",
+      "description": "Requester pays, public access (via the S3 API) to archival legacy files from WMA THREDDS server that were used to create this zarr store. This data is stored in an AWS S3 bucket.",
+      "roles": [
+        "data",
+        "directory",
+        "s3",
+        "aws",
+        "requester-pays"
       ]
     }
   }
diff --git a/workflows/archive/bcsd_mon_vic_create_collection_from_zarr.ipynb b/workflows/archive/bcsd_mon_vic_create_collection_from_zarr.ipynb
index 5473da1e..878b8fd5 100644
--- a/workflows/archive/bcsd_mon_vic_create_collection_from_zarr.ipynb
+++ b/workflows/archive/bcsd_mon_vic_create_collection_from_zarr.ipynb
@@ -86,10 +86,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\": {\"anon\": True, \"client_kwargs\": {\"endpoint_url\":\"https://usgs.osn.mghpcc.org/\"}}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title = 'Requester pays access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description = \"Open Storage Network Pod S3 API access to collection zarr group\"\n",
+    "asset_description = \"Free, public access to zarr data store via the S3 API. This data is stored on an Open Storage Network Pod.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles = [\"data\",\"zarr\",\"s3\",\"osn\"]"
    ]
   },
   {
@@ -108,10 +110,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs2 = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\":{\"requester_pays\":True}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title2 = 'Free access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description2 = \"S3 access to collection zarr group\"\n",
+    "asset_description2 = \"Requester pays, public access to zarr data store via the S3 API. This data is stored in an AWS S3 bucket.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles2 = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles2 = [\"data\",\"zarr\",\"s3\",\"aws\",\"requester-pays\"]"
    ]
   },
   {
@@ -427,6 +431,20 @@
     "## Add zarr url asset to collection"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b86522c-d25d-4ef4-9b19-27d73223aac2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# first clear out any existing assets - that way if we change the name, we won't have the old asset\n",
+    "# preserved\n",
+    "collection.assets.clear()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -436,6 +454,7 @@
    "source": [
     "asset_id = \"zarr-s3-osn\"\n",
     "asset = pystac.Asset(href=zarr_url,\n",
+    "                     title=asset_title,\n",
     "                     description=asset_description,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles,\n",
@@ -452,8 +471,9 @@
    },
    "outputs": [],
    "source": [
-    "asset_id2 = \"zarr-s3\"\n",
+    "asset_id2 = \"zarr-s3-aws\"\n",
     "asset2 = pystac.Asset(href=zarr_url2,\n",
+    "                     title=asset_title2,\n",
     "                     description=asset_description2,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles2,\n",
@@ -461,6 +481,33 @@
     "collection.add_asset(asset_id2, asset2)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "0df80374-edbe-4f5b-b685-8fad1ed85e1a",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Add Collection Link to Archival Assets on S3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6789dbbb-135c-44c8-8e07-d6e21be21754",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "archival_id = \"legacy-s3-aws\"\n",
+    "archival_asset = pystac.Asset(href='s3://nhgf-development/thredds/BCSD_mon_VIC/',\n",
+    "                             title='Requester pays access to archival legacy files via S3 API', \n",
+    "                             description='Requester pays, public access (via the S3 API) to archival legacy files from WMA THREDDS server that were used to create this zarr store. This data is stored in an AWS S3 bucket.',\n",
+    "                             roles=[\"data\",\"directory\",\"s3\",\"aws\",\"requester-pays\"])\n",
+    "collection.add_asset(archival_id, archival_asset)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4",
diff --git a/workflows/archive/bcsd_obs_create_collection_from_zarr.ipynb b/workflows/archive/bcsd_obs_create_collection_from_zarr.ipynb
index b743f5b2..2f4593c2 100644
--- a/workflows/archive/bcsd_obs_create_collection_from_zarr.ipynb
+++ b/workflows/archive/bcsd_obs_create_collection_from_zarr.ipynb
@@ -86,10 +86,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\": {\"anon\": True, \"client_kwargs\": {\"endpoint_url\":\"https://usgs.osn.mghpcc.org/\"}}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title = 'Requester pays access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description = \"Open Storage Network Pod S3 API access to collection zarr group\"\n",
+    "asset_description = \"Free, public access to zarr data store via the S3 API. This data is stored on an Open Storage Network Pod.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles = [\"data\",\"zarr\",\"s3\",\"osn\"]"
    ]
   },
   {
@@ -108,10 +110,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs2 = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\":{\"requester_pays\":True}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title2 = 'Free access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description2 = \"S3 access to collection zarr group\"\n",
+    "asset_description2 = \"Requester pays, public access to zarr data store via the S3 API. This data is stored in an AWS S3 bucket.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles2 = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles2 = [\"data\",\"zarr\",\"s3\",\"aws\",\"requester-pays\"]"
    ]
   },
   {
@@ -425,6 +429,20 @@
     "## Add zarr url asset to collection"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "69db5207-8cfa-4503-9e61-c5a25c28c5fe",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# first clear out any existing assets - that way if we change the name, we won't have the old asset\n",
+    "# preserved\n",
+    "collection.assets.clear()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -434,6 +452,7 @@
    "source": [
     "asset_id = \"zarr-s3-osn\"\n",
     "asset = pystac.Asset(href=zarr_url,\n",
+    "                     title=asset_title,\n",
     "                     description=asset_description,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles,\n",
@@ -450,8 +469,9 @@
    },
    "outputs": [],
    "source": [
-    "asset_id2 = \"zarr-s3\"\n",
+    "asset_id2 = \"zarr-s3-aws\"\n",
     "asset2 = pystac.Asset(href=zarr_url2,\n",
+    "                     title=asset_title2,\n",
     "                     description=asset_description2,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles2,\n",
@@ -459,6 +479,31 @@
     "collection.add_asset(asset_id2, asset2)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "2d7629f8-3b6e-4173-8dba-4fe00120a58a",
+   "metadata": {},
+   "source": [
+    "## Add Collection Link to Archival Assets on S3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24cb34d6-7661-4662-b0f0-27d990ea2b26",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "archival_id = \"legacy-s3-aws\"\n",
+    "archival_asset = pystac.Asset(href='s3://nhgf-development/thredds/maurer/monthly_obs/',\n",
+    "                             title='Requester pays access to archival legacy files via S3 API', \n",
+    "                             description='Requester pays, public access (via the S3 API) to archival legacy files from WMA THREDDS server that were used to create this zarr store. This data is stored in an AWS S3 bucket.',\n",
+    "                             roles=[\"data\",\"directory\",\"s3\",\"aws\",\"requester-pays\"])\n",
+    "collection.add_asset(archival_id, archival_asset)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4",
-- 
GitLab