From 5c4b37af04c781bc37743c7e3c85f29e0c2ffdab Mon Sep 17 00:00:00 2001
From: amsnyder <asnyder@usgs.gov>
Date: Thu, 25 Apr 2024 14:52:56 -0500
Subject: [PATCH] add archival thredds asset

---
 .../collection.json                           | 39 +++++++++++++++++-
 ...mulation_create_collection_from_zarr.ipynb | 41 ++++++++++++++++---
 2 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/catalog/alaska_et_2020_ccsm4_historical_simulation/collection.json b/catalog/alaska_et_2020_ccsm4_historical_simulation/collection.json
index 3e7c0288..829d5758 100644
--- a/catalog/alaska_et_2020_ccsm4_historical_simulation/collection.json
+++ b/catalog/alaska_et_2020_ccsm4_historical_simulation/collection.json
@@ -112,7 +112,8 @@
     "zarr-s3-osn": {
       "href": "s3://mdmf/gdp/alaska_et_2020_ccsm4_historical_simulation.zarr/",
       "type": "application/vnd+zarr",
-      "description": "Open Storage Network Pod S3 API access to collection zarr group",
+      "title": "Requester pays access to zarr via S3 API",
+      "description": "Free, public access to zarr data store via the S3 API. This data is stored on an Open Storage Network Pod.",
       "xarray:open_kwargs": {
         "chunks": {},
         "engine": "zarr",
@@ -127,7 +128,8 @@
       "roles": [
         "data",
         "zarr",
-        "s3"
+        "s3",
+        "osn"
       ]
     },
     "zarr-s3": {
@@ -147,6 +149,39 @@
         "zarr",
         "s3"
       ]
+    },
+    "zarr-s3-aws": {
+      "href": "s3://nhgf-development/workspace/DataConversion/alaska_et_2020_ccsm4_historical_simulation.zarr/",
+      "type": "application/vnd+zarr",
+      "title": "Free access to zarr via S3 API",
+      "description": "Requester pays, public access to zarr data store via the S3 API. This data is stored in an AWS S3 bucket.",
+      "xarray:open_kwargs": {
+        "chunks": {},
+        "engine": "zarr",
+        "consolidated": true
+      },
+      "xarray:storage_options": {
+        "requester_pays": true
+      },
+      "roles": [
+        "data",
+        "zarr",
+        "s3",
+        "aws",
+        "requester-pays"
+      ]
+    },
+    "legacy-s3-aws": {
+      "href": "s3://nhgf-development/thredds/alaska_et_2020/",
+      "title": "Requester pays access to archival legacy files via S3 API",
+      "description": "Requester pays, public access (via the S3 API) to archival legacy files from WMA THREDDS server that were used to create this zarr store. This data is stored in an AWS S3 bucket.",
+      "roles": [
+        "data",
+        "directory",
+        "s3",
+        "aws",
+        "requester-pays"
+      ]
     }
   }
 }
\ No newline at end of file
diff --git a/workflows/archive/alaska_et_2020_ccsm4_historical_simulation_create_collection_from_zarr.ipynb b/workflows/archive/alaska_et_2020_ccsm4_historical_simulation_create_collection_from_zarr.ipynb
index 423be8d4..fb770d91 100644
--- a/workflows/archive/alaska_et_2020_ccsm4_historical_simulation_create_collection_from_zarr.ipynb
+++ b/workflows/archive/alaska_et_2020_ccsm4_historical_simulation_create_collection_from_zarr.ipynb
@@ -92,10 +92,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\": {\"anon\": True, \"client_kwargs\": {\"endpoint_url\":\"https://usgs.osn.mghpcc.org/\"}}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title = 'Requester pays access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description = \"Open Storage Network Pod S3 API access to collection zarr group\"\n",
+    "asset_description = \"Free, public access to zarr data store via the S3 API. This data is stored on an Open Storage Network Pod.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles = [\"data\",\"zarr\",\"s3\",\"osn\"]"
    ]
   },
   {
@@ -114,10 +116,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs2 = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\":{\"requester_pays\":True}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title2 = 'Free access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description2 = \"S3 access to collection zarr group\"\n",
+    "asset_description2 = \"Requester pays, public access to zarr data store via the S3 API. This data is stored in an AWS S3 bucket.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles2 = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles2 = [\"data\",\"zarr\",\"s3\",\"aws\",\"requester-pays\"]"
    ]
   },
   {
@@ -468,6 +472,7 @@
    "source": [
     "asset_id = \"zarr-s3-osn\"\n",
     "asset = pystac.Asset(href=zarr_url,\n",
+    "                     title=asset_title,\n",
     "                     description=asset_description,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles,\n",
@@ -484,8 +489,9 @@
    },
    "outputs": [],
    "source": [
-    "asset_id2 = \"zarr-s3\"\n",
+    "asset_id2 = \"zarr-s3-aws\"\n",
     "asset2 = pystac.Asset(href=zarr_url2,\n",
+    "                     title=asset_title2,\n",
     "                     description=asset_description2,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles2,\n",
@@ -493,6 +499,31 @@
     "collection.add_asset(asset_id2, asset2)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "0428bc79-153a-4185-9b69-87d9b4e994fc",
+   "metadata": {},
+   "source": [
+    "### Add Collection Link to Archival Assets on S3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92ae6437-0404-4078-8104-33b38cfad7a4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "archival_id = \"legacy-s3-aws\"\n",
+    "archival_asset = pystac.Asset(href='s3://nhgf-development/thredds/alaska_et_2020/',\n",
+    "                             title='Requester pays access to archival legacy files via S3 API', \n",
+    "                             description='Requester pays, public access (via the S3 API) to archival legacy files from WMA THREDDS server that were used to create this zarr store. This data is stored in an AWS S3 bucket.',\n",
+    "                             roles=[\"data\",\"directory\",\"s3\",\"aws\",\"requester-pays\"])\n",
+    "collection.add_asset(archival_id, archival_asset)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4",
-- 
GitLab