From 801e9687a73866689875188ac5085eb70ddb3409 Mon Sep 17 00:00:00 2001
From: amsnyder <asnyder@usgs.gov>
Date: Thu, 25 Apr 2024 15:28:31 -0500
Subject: [PATCH] add archival thredds asset

---
 catalog/bcca_cmip3_future/collection.json     | 27 +++++++--
 catalog/catalog.json                          |  2 +-
 ...3_future_create_collection_from_zarr.ipynb | 55 +++++++++++++++++--
 ...historic_create_collection_from_zarr.ipynb | 55 +++++++++++++++++--
 ...5_future_create_collection_from_zarr.ipynb | 55 +++++++++++++++++--
 ...storical_create_collection_from_zarr.ipynb | 55 +++++++++++++++++--
 6 files changed, 223 insertions(+), 26 deletions(-)

diff --git a/catalog/bcca_cmip3_future/collection.json b/catalog/bcca_cmip3_future/collection.json
index 9bacdc06..62180139 100644
--- a/catalog/bcca_cmip3_future/collection.json
+++ b/catalog/bcca_cmip3_future/collection.json
@@ -1674,7 +1674,8 @@
     "zarr-s3-osn": {
       "href": "s3://mdmf/gdp/bcca_cmip3_future.zarr/",
       "type": "application/vnd+zarr",
-      "description": "Open Storage Network Pod S3 API access to collection zarr group",
+      "title": "Requester pays access to zarr via S3 API",
+      "description": "Free, public access to zarr data store via the S3 API. This data is stored on an Open Storage Network Pod.",
       "xarray:open_kwargs": {
         "chunks": {},
         "engine": "zarr",
@@ -1689,13 +1690,15 @@
       "roles": [
         "data",
         "zarr",
-        "s3"
+        "s3",
+        "osn"
       ]
     },
-    "zarr-s3": {
+    "zarr-s3-aws": {
       "href": "s3://nhgf-development/workspace/DataConversion/bcca_cmip3_future.zarr/",
       "type": "application/vnd+zarr",
-      "description": "S3 access to collection zarr group",
+      "title": "Free access to zarr via S3 API",
+      "description": "Requester pays, public access to zarr data store via the S3 API. This data is stored in an AWS S3 bucket.",
       "xarray:open_kwargs": {
         "chunks": {},
         "engine": "zarr",
@@ -1707,7 +1710,21 @@
       "roles": [
         "data",
         "zarr",
-        "s3"
+        "s3",
+        "aws",
+        "requester-pays"
+      ]
+    },
+    "legacy-s3-aws": {
+      "href": "s3://nhgf-development/thredds/bcca/",
+      "title": "Requester pays access to archival legacy files via S3 API",
+      "description": "Requester pays, public access (via the S3 API) to archival legacy files from WMA THREDDS server that were used to create this zarr store. This data is stored in an AWS S3 bucket.",
+      "roles": [
+        "data",
+        "directory",
+        "s3",
+        "aws",
+        "requester-pays"
       ]
     }
   }
diff --git a/catalog/catalog.json b/catalog/catalog.json
index c8abc1d2..a754bc4b 100644
--- a/catalog/catalog.json
+++ b/catalog/catalog.json
@@ -350,4 +350,4 @@
       "type": "application/json"
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/workflows/archive/bcca_cmip3_future_create_collection_from_zarr.ipynb b/workflows/archive/bcca_cmip3_future_create_collection_from_zarr.ipynb
index 41e61984..44a70b30 100644
--- a/workflows/archive/bcca_cmip3_future_create_collection_from_zarr.ipynb
+++ b/workflows/archive/bcca_cmip3_future_create_collection_from_zarr.ipynb
@@ -86,10 +86,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\": {\"anon\": True, \"client_kwargs\": {\"endpoint_url\":\"https://usgs.osn.mghpcc.org/\"}}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title = 'Requester pays access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description = \"Open Storage Network Pod S3 API access to collection zarr group\"\n",
+    "asset_description = \"Free, public access to zarr data store via the S3 API. This data is stored on an Open Storage Network Pod.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles = [\"data\",\"zarr\",\"s3\",\"osn\"]"
    ]
   },
   {
@@ -108,10 +110,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs2 = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\":{\"requester_pays\":True}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title2 = 'Free access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description2 = \"S3 access to collection zarr group\"\n",
+    "asset_description2 = \"Requester pays, public access to zarr data store via the S3 API. This data is stored in an AWS S3 bucket.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles2 = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles2 = [\"data\",\"zarr\",\"s3\",\"aws\",\"requester-pays\"]"
    ]
   },
   {
@@ -427,6 +431,20 @@
     "## Add zarr url asset to collection"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5123b0ef-facf-4c13-a173-bb590dabeb8a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# first clear out any existing assets - that way if we change the name, we won't have the old asset\n",
+    "# preserved\n",
+    "collection.assets.clear()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -436,6 +454,7 @@
    "source": [
     "asset_id = \"zarr-s3-osn\"\n",
     "asset = pystac.Asset(href=zarr_url,\n",
+    "                     title=asset_title,\n",
     "                     description=asset_description,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles,\n",
@@ -452,8 +471,9 @@
    },
    "outputs": [],
    "source": [
-    "asset_id2 = \"zarr-s3\"\n",
+    "asset_id2 = \"zarr-s3-aws\"\n",
     "asset2 = pystac.Asset(href=zarr_url2,\n",
+    "                     title=asset_title2,\n",
     "                     description=asset_description2,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles2,\n",
@@ -461,6 +481,31 @@
     "collection.add_asset(asset_id2, asset2)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "dd93b4b1-9d38-4533-bd9b-c3ec3eed07de",
+   "metadata": {},
+   "source": [
+    "## Add Collection Link to Archival Assets on S3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb8f0ce0-0f0d-4f11-8218-95df6fafaac8",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "archival_id = \"legacy-s3-aws\"\n",
+    "archival_asset = pystac.Asset(href='s3://nhgf-development/thredds/bcca/',\n",
+    "                             title='Requester pays access to archival legacy files via S3 API', \n",
+    "                             description='Requester pays, public access (via the S3 API) to archival legacy files from WMA THREDDS server that were used to create this zarr store. This data is stored in an AWS S3 bucket.',\n",
+    "                             roles=[\"data\",\"directory\",\"s3\",\"aws\",\"requester-pays\"])\n",
+    "collection.add_asset(archival_id, archival_asset)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4",
diff --git a/workflows/archive/bcca_cmip3_historic_create_collection_from_zarr.ipynb b/workflows/archive/bcca_cmip3_historic_create_collection_from_zarr.ipynb
index bf9122fa..c08edb64 100644
--- a/workflows/archive/bcca_cmip3_historic_create_collection_from_zarr.ipynb
+++ b/workflows/archive/bcca_cmip3_historic_create_collection_from_zarr.ipynb
@@ -86,10 +86,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\": {\"anon\": True, \"client_kwargs\": {\"endpoint_url\":\"https://usgs.osn.mghpcc.org/\"}}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title = 'Requester pays access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description = \"Open Storage Network Pod S3 API access to collection zarr group\"\n",
+    "asset_description = \"Free, public access to zarr data store via the S3 API. This data is stored on an Open Storage Network Pod.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles = [\"data\",\"zarr\",\"s3\",\"osn\"]"
    ]
   },
   {
@@ -108,10 +110,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs2 = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\":{\"requester_pays\":True}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title2 = 'Free access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description2 = \"S3 access to collection zarr group\"\n",
+    "asset_description2 = \"Requester pays, public access to zarr data store via the S3 API. This data is stored in an AWS S3 bucket.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles2 = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles2 = [\"data\",\"zarr\",\"s3\",\"aws\",\"requester-pays\"]"
    ]
   },
   {
@@ -427,6 +431,20 @@
     "## Add zarr url asset to collection"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "da735ef0-49d7-4c61-9ea2-b8d10cb3b009",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# first clear out any existing assets - that way if we change the name, we won't have the old asset\n",
+    "# preserved\n",
+    "collection.assets.clear()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -436,6 +454,7 @@
    "source": [
     "asset_id = \"zarr-s3-osn\"\n",
     "asset = pystac.Asset(href=zarr_url,\n",
+    "                     title=asset_title,\n",
     "                     description=asset_description,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles,\n",
@@ -452,8 +471,9 @@
    },
    "outputs": [],
    "source": [
-    "asset_id2 = \"zarr-s3\"\n",
+    "asset_id2 = \"zarr-s3-aws\"\n",
     "asset2 = pystac.Asset(href=zarr_url2,\n",
+    "                     title=asset_title2,\n",
     "                     description=asset_description2,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles2,\n",
@@ -461,6 +481,31 @@
     "collection.add_asset(asset_id2, asset2)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "65dbae32-d8b6-4a00-a0ed-d1508962c7cb",
+   "metadata": {},
+   "source": [
+    "## Add Collection Link to Archival Assets on S3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4840bdc2-5ed9-463f-bd7d-5da69fe34f26",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "archival_id = \"legacy-s3-aws\"\n",
+    "archival_asset = pystac.Asset(href='s3://nhgf-development/thredds/bcca/',\n",
+    "                             title='Requester pays access to archival legacy files via S3 API', \n",
+    "                             description='Requester pays, public access (via the S3 API) to archival legacy files from WMA THREDDS server that were used to create this zarr store. This data is stored in an AWS S3 bucket.',\n",
+    "                             roles=[\"data\",\"directory\",\"s3\",\"aws\",\"requester-pays\"])\n",
+    "collection.add_asset(archival_id, archival_asset)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4",
diff --git a/workflows/archive/bcca_cmip5_future_create_collection_from_zarr.ipynb b/workflows/archive/bcca_cmip5_future_create_collection_from_zarr.ipynb
index 5a8d339a..1eeedc37 100644
--- a/workflows/archive/bcca_cmip5_future_create_collection_from_zarr.ipynb
+++ b/workflows/archive/bcca_cmip5_future_create_collection_from_zarr.ipynb
@@ -86,10 +86,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\": {\"anon\": True, \"client_kwargs\": {\"endpoint_url\":\"https://usgs.osn.mghpcc.org/\"}}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title = 'Requester pays access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description = \"Open Storage Network Pod S3 API access to collection zarr group\"\n",
+    "asset_description = \"Free, public access to zarr data store via the S3 API. This data is stored on an Open Storage Network Pod.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles = [\"data\",\"zarr\",\"s3\",\"osn\"]"
    ]
   },
   {
@@ -108,10 +110,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs2 = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\":{\"requester_pays\":True}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title2 = 'Free access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description2 = \"S3 access to collection zarr group\"\n",
+    "asset_description2 = \"Requester pays, public access to zarr data store via the S3 API. This data is stored in an AWS S3 bucket.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles2 = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles2 = [\"data\",\"zarr\",\"s3\",\"aws\",\"requester-pays\"]"
    ]
   },
   {
@@ -429,6 +433,20 @@
     "## Add zarr url asset to collection"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "74738a9e-0062-4274-b4e5-7bf8d0e5250d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# first clear out any existing assets - that way if we change the name, we won't have the old asset\n",
+    "# preserved\n",
+    "collection.assets.clear()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -438,6 +456,7 @@
    "source": [
     "asset_id = \"zarr-s3-osn\"\n",
     "asset = pystac.Asset(href=zarr_url,\n",
+    "                     title=asset_title,\n",
     "                     description=asset_description,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles,\n",
@@ -454,8 +473,9 @@
    },
    "outputs": [],
    "source": [
-    "asset_id2 = \"zarr-s3\"\n",
+    "asset_id2 = \"zarr-s3-aws\"\n",
     "asset2 = pystac.Asset(href=zarr_url2,\n",
+    "                     title=asset_title2,\n",
     "                     description=asset_description2,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles2,\n",
@@ -463,6 +483,31 @@
     "collection.add_asset(asset_id2, asset2)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "c19ddf59-1a44-4939-9c0f-b0b01c084e55",
+   "metadata": {},
+   "source": [
+    "## Add Collection Link to Archival Assets on S3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aec85aa6-e26a-4bd5-ae14-923aa7b92468",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "archival_id = \"legacy-s3-aws\"\n",
+    "archival_asset = pystac.Asset(href='s3://nhgf-development/thredds/bcca/',\n",
+    "                             title='Requester pays access to archival legacy files via S3 API', \n",
+    "                             description='Requester pays, public access (via the S3 API) to archival legacy files from WMA THREDDS server that were used to create this zarr store. This data is stored in an AWS S3 bucket.',\n",
+    "                             roles=[\"data\",\"directory\",\"s3\",\"aws\",\"requester-pays\"])\n",
+    "collection.add_asset(archival_id, archival_asset)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4",
diff --git a/workflows/archive/bcca_cmip5_historical_create_collection_from_zarr.ipynb b/workflows/archive/bcca_cmip5_historical_create_collection_from_zarr.ipynb
index bebbdf36..de5c72a2 100644
--- a/workflows/archive/bcca_cmip5_historical_create_collection_from_zarr.ipynb
+++ b/workflows/archive/bcca_cmip5_historical_create_collection_from_zarr.ipynb
@@ -86,10 +86,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\": {\"anon\": True, \"client_kwargs\": {\"endpoint_url\":\"https://usgs.osn.mghpcc.org/\"}}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title = 'Requester pays access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description = \"Open Storage Network Pod S3 API access to collection zarr group\"\n",
+    "asset_description = \"Free, public access to zarr data store via the S3 API. This data is stored on an Open Storage Network Pod.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles = [\"data\",\"zarr\",\"s3\",\"osn\"]"
    ]
   },
   {
@@ -108,10 +110,12 @@
     "# ref: https://github.com/stac-extensions/xarray-assets\n",
     "xarray_opendataset_kwargs2 = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n",
     "                          \"xarray:storage_options\":{\"requester_pays\":True}}\n",
+    "# title - The displayed title for clients and users.\n",
+    "asset_title2 = 'Free access to zarr via S3 API'\n",
     "# description for zarr url asset attached to collection (zarr_url)\n",
-    "asset_description2 = \"S3 access to collection zarr group\"\n",
+    "asset_description2 = \"Requester pays, public access to zarr data store via the S3 API. This data is stored in an AWS S3 bucket.\"\n",
     "# roles to tag zarr url asset with\n",
-    "asset_roles2 = [\"data\",\"zarr\",\"s3\"]"
+    "asset_roles2 = [\"data\",\"zarr\",\"s3\",\"aws\",\"requester-pays\"]"
    ]
   },
   {
@@ -429,6 +433,20 @@
     "## Add zarr url asset to collection"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c444e54a-b748-489e-8c18-3f5efff0cc17",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# first clear out any existing assets - that way if we change the name, we won't have the old asset\n",
+    "# preserved\n",
+    "collection.assets.clear()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -438,6 +456,7 @@
    "source": [
     "asset_id = \"zarr-s3-osn\"\n",
     "asset = pystac.Asset(href=zarr_url,\n",
+    "                     title=asset_title,\n",
     "                     description=asset_description,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles,\n",
@@ -454,8 +473,9 @@
    },
    "outputs": [],
    "source": [
-    "asset_id2 = \"zarr-s3\"\n",
+    "asset_id2 = \"zarr-s3-aws\"\n",
     "asset2 = pystac.Asset(href=zarr_url2,\n",
+    "                     title=asset_title2,\n",
     "                     description=asset_description2,\n",
     "                     media_type=\"application/vnd+zarr\",\n",
     "                     roles=asset_roles2,\n",
@@ -463,6 +483,31 @@
     "collection.add_asset(asset_id2, asset2)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "918c8ab9-668a-4475-b9f6-a06fef61a259",
+   "metadata": {},
+   "source": [
+    "## Add Collection Link to Archival Assets on S3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b70ded9-6f7a-405f-8be4-b0afc646a27e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "archival_id = \"legacy-s3-aws\"\n",
+    "archival_asset = pystac.Asset(href='s3://nhgf-development/thredds/bcca/',\n",
+    "                             title='Requester pays access to archival legacy files via S3 API', \n",
+    "                             description='Requester pays, public access (via the S3 API) to archival legacy files from WMA THREDDS server that were used to create this zarr store. This data is stored in an AWS S3 bucket.',\n",
+    "                             roles=[\"data\",\"directory\",\"s3\",\"aws\",\"requester-pays\"])\n",
+    "collection.add_asset(archival_id, archival_asset)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4",
-- 
GitLab