From d975688a5379e393baa8ad1095a4c7f0140c53a1 Mon Sep 17 00:00:00 2001 From: amsnyder <asnyder@usgs.gov> Date: Fri, 19 Jan 2024 13:34:37 -0600 Subject: [PATCH] use osn pod endpoint, use collection inputs from dataset attrs --- .../collection.json | 2 +- .../collection.json | 2 +- catalog/Red_River_future/collection.json | 2 +- catalog/Red_River_historical/collection.json | 2 +- ...R_future_create_collection_from_zarr.ipynb | 156 +++++++----------- ...storical_create_collection_from_zarr.ipynb | 156 +++++++----------- ...r_future_create_collection_from_zarr.ipynb | 156 +++++++----------- ...storical_create_collection_from_zarr.ipynb | 156 +++++++----------- 8 files changed, 252 insertions(+), 380 deletions(-) diff --git a/catalog/Red_River_MPI-ESM-LR_future/collection.json b/catalog/Red_River_MPI-ESM-LR_future/collection.json index f77bfeb2..9efdaea1 100644 --- a/catalog/Red_River_MPI-ESM-LR_future/collection.json +++ b/catalog/Red_River_MPI-ESM-LR_future/collection.json @@ -2,7 +2,7 @@ "type": "Collection", "id": "Red_River_MPI-ESM-LR_future", "stac_version": "1.0.0", - "description": "Red_River_MPI-ESM-LR_future", + "description": "downscaled estimate of tasmin for fudge experiment: RRtnp1-EDQM-A38aaL01K00_rcp85_r1i1p1", "links": [ { "rel": "root", diff --git a/catalog/Red_River_MPI-ESM-LR_historical/collection.json b/catalog/Red_River_MPI-ESM-LR_historical/collection.json index 5190362f..c3bafb1b 100644 --- a/catalog/Red_River_MPI-ESM-LR_historical/collection.json +++ b/catalog/Red_River_MPI-ESM-LR_historical/collection.json @@ -2,7 +2,7 @@ "type": "Collection", "id": "Red_River_MPI-ESM-LR_historical", "stac_version": "1.0.0", - "description": "Red_River_MPI-ESM-LR_historical", + "description": "downscaled estimate of tasmin for fudge experiment: RRtnp1-CDFt-B30aaL01K00_historical_r1i1p1", "links": [ { "rel": "root", diff --git a/catalog/Red_River_future/collection.json b/catalog/Red_River_future/collection.json index 89fc1a3f..eb35533c 100644 --- a/catalog/Red_River_future/collection.json +++ b/catalog/Red_River_future/collection.json @@ -2,7 +2,7 @@ "type": "Collection", "id": "Red_River_future", "stac_version": "1.0.0", - "description": "Red_River_future", + "description": "Auxiliary QC mask tasmin_qcmask for fudge experiment: RRtnp1-BCQM-A28aaL01K00_rcp85_r1i1p1", "links": [ { "rel": "root", diff --git a/catalog/Red_River_historical/collection.json b/catalog/Red_River_historical/collection.json index 7ee74a9f..f5f815f4 100644 --- a/catalog/Red_River_historical/collection.json +++ b/catalog/Red_River_historical/collection.json @@ -2,7 +2,7 @@ "type": "Collection", "id": "Red_River_historical", "stac_version": "1.0.0", - "description": "Red_River_historical", + "description": "downscaled estimate of tasmax for fudge experiment: RRtxp1-BCQM-A10aaL01K00_historical_r6i1p1", "links": [ { "rel": "root", diff --git a/workflows/archive/Red_River_MPI-ESM-LR_future_create_collection_from_zarr.ipynb b/workflows/archive/Red_River_MPI-ESM-LR_future_create_collection_from_zarr.ipynb index 79739bf7..0353d4a3 100644 --- a/workflows/archive/Red_River_MPI-ESM-LR_future_create_collection_from_zarr.ipynb +++ b/workflows/archive/Red_River_MPI-ESM-LR_future_create_collection_from_zarr.ipynb @@ -36,57 +36,39 @@ "import metpy\n", "import cartopy.crs as ccrs\n", "import cfunits\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", - "metadata": {}, - "source": [ - "## Open up NHGF STAC Catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", - "metadata": {}, - "outputs": [], - "source": [ - "# define folder location where your STAC catalog json file is\n", - "catalog_path = os.path.join('..', '..', 'catalog')\n", - "# open catalog\n", - "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" + "import json\n", + "import sys\n", + "sys.path.insert(1, '..')\n", + "import stac_helpers" ] }, { "cell_type": "markdown", - "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "id": "5c57d1f0-db77-4c9a-9837-9673b11f9067", "metadata": {}, "source": [ - "## Collection Metadata Input" + "## Collection ID" ] }, { "cell_type": "code", "execution_count": null, - "id": "482d204d-b5b6-40e5-ac42-55b459be1097", - "metadata": {}, + "id": "113378fe-c9d9-4734-a037-d5d470d15d31", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# name for STAC collection\n", - "collection_id = 'Red_River_MPI-ESM-LR_future'\n", - "# description of STAC collection\n", - "collection_description = 'Red_River_MPI-ESM-LR_future'\n", - "# license for dataset\n", - "collection_license = 'CC0-1.0'" + "collection_id = 'Red_River_MPI-ESM-LR_future'" ] }, { "cell_type": "markdown", "id": "116b5837-8e85-4ae7-964a-803533ded714", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Asset Metadata Input" ] @@ -101,7 +83,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url = 's3://mdmf/gdp/Red_River_MPI-ESM-LR_future.zarr/'\n", + "zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -123,7 +105,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url2 = 's3://nhgf-development/workspace/DataConversion/Red_River_MPI-ESM-LR_future.zarr/'\n", + "zarr_url2 = f's3://nhgf-development/workspace/DataConversion/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -152,27 +134,45 @@ "metadata": {}, "outputs": [], "source": [ - "# # open and view zarr dataset\n", - "# fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", - "# ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", - "# backend_kwargs={'consolidated':True}, chunks={})\n", - "# ds" + "# open and view zarr dataset\n", + "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", + "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", + " backend_kwargs={'consolidated':True}, chunks={})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "metadata": {}, + "source": [ + "## Collection Metadata Input" ] }, { "cell_type": "code", "execution_count": null, - "id": "83f0ec9d-02a4-42bd-aee2-4dfdf8b06235", + "id": "482d204d-b5b6-40e5-ac42-55b459be1097", + "metadata": {}, + "outputs": [], + "source": [ + "# description of STAC collection\n", + "collection_description = ds.attrs['title']\n", + "print(f'collection description: {collection_description}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "977050a7-9c78-4eec-bbaf-bb9da06b3258", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# open and view zarr dataset\n", - "fs2 = fsspec.filesystem('s3', requester_pays=True)\n", - "ds = xr.open_dataset(fs2.get_mapper(zarr_url2), engine='zarr', \n", - " backend_kwargs={'consolidated':True}, chunks={})\n", - "ds" + "# license for dataset\n", + "# no license in dataset attrs, but this is a NOAA dataset, so we will treat it as public domain\n", + "collection_license = 'CC0-1.0'" ] }, { @@ -230,53 +230,6 @@ "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs" ] }, - { - "cell_type": "markdown", - "id": "8fbfecfb-9886-4d06-a34c-6471cb0a6053", - "metadata": {}, - "source": [ - "## Plot a map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4eb4d027-4266-4a0b-8f16-bacfbef06242", - "metadata": {}, - "outputs": [], - "source": [ - "# # plot a map of a single variable\n", - "# var_to_plot = 'SNOW'\n", - "# da = ds[var_to_plot].sel(time='2014-03-01 00:00').load()\n", - "# da.hvplot.quadmesh(x='lon', y='lat', rasterize=True,\n", - "# geo=True, tiles='OSM', alpha=0.7, cmap='turbo')" - ] - }, - { - "cell_type": "markdown", - "id": "5e057a6c-06fb-4406-823b-e81c58e520e4", - "metadata": {}, - "source": [ - "## Plot a time series at a specific point\n", - "This can help you verify a variable's values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7de2681-88c2-4597-857c-8f169c596f8b", - "metadata": {}, - "outputs": [], - "source": [ - "# # enter lat, lon of point you want to plot time series for\n", - "# lat,lon = 39.978322,-105.2772194\n", - "# time_start = '2013-01-01 00:00'\n", - "# time_end = '2013-12-31 00:00'\n", - "# x, y = crs.transform_point(lon, lat, src_crs=ccrs.PlateCarree()) # PlateCaree = Lat,Lon\n", - "# da = ds[var_to_plot].sel(x=x, y=y, method='nearest').sel(time=slice(time_start,time_end)).load()\n", - "# da.hvplot(x=dim_names_dict['T'], grid=True)" - ] - }, { "cell_type": "markdown", "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c", @@ -355,10 +308,25 @@ }, { "cell_type": "markdown", - "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780", + "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", + "metadata": { + "tags": [] + }, + "source": [ + "## Open up NHGF STAC Catalog and create a collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", "metadata": {}, + "outputs": [], "source": [ - "## Create pystac collection" + "# define folder location where your STAC catalog json file is\n", + "catalog_path = os.path.join('..', '..', 'catalog')\n", + "# open catalog\n", + "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" ] }, { diff --git a/workflows/archive/Red_River_MPI-ESM-LR_historical_create_collection_from_zarr.ipynb b/workflows/archive/Red_River_MPI-ESM-LR_historical_create_collection_from_zarr.ipynb index 546542a9..00825081 100644 --- a/workflows/archive/Red_River_MPI-ESM-LR_historical_create_collection_from_zarr.ipynb +++ b/workflows/archive/Red_River_MPI-ESM-LR_historical_create_collection_from_zarr.ipynb @@ -36,57 +36,39 @@ "import metpy\n", "import cartopy.crs as ccrs\n", "import cfunits\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", - "metadata": {}, - "source": [ - "## Open up NHGF STAC Catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", - "metadata": {}, - "outputs": [], - "source": [ - "# define folder location where your STAC catalog json file is\n", - "catalog_path = os.path.join('..', '..', 'catalog')\n", - "# open catalog\n", - "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" + "import json\n", + "import sys\n", + "sys.path.insert(1, '..')\n", + "import stac_helpers" ] }, { "cell_type": "markdown", - "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "id": "66df9726-937b-4fef-b750-f0eab0cd50da", "metadata": {}, "source": [ - "## Collection Metadata Input" + "## Collection ID" ] }, { "cell_type": "code", "execution_count": null, - "id": "482d204d-b5b6-40e5-ac42-55b459be1097", - "metadata": {}, + "id": "765226f7-32c2-4bc2-82b4-e00528cb8daf", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# name for STAC collection\n", - "collection_id = 'Red_River_MPI-ESM-LR_historical'\n", - "# description of STAC collection\n", - "collection_description = 'Red_River_MPI-ESM-LR_historical'\n", - "# license for dataset\n", - "collection_license = 'CC0-1.0'" + "collection_id = 'Red_River_MPI-ESM-LR_historical'" ] }, { "cell_type": "markdown", "id": "116b5837-8e85-4ae7-964a-803533ded714", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Asset Metadata Input" ] @@ -101,7 +83,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url = 's3://mdmf/gdp/Red_River_MPI-ESM-LR_historical.zarr/'\n", + "zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -123,7 +105,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url2 = 's3://nhgf-development/workspace/DataConversion/Red_River_MPI-ESM-LR_historical.zarr/'\n", + "zarr_url2 = f's3://nhgf-development/workspace/DataConversion/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -152,27 +134,45 @@ "metadata": {}, "outputs": [], "source": [ - "# # open and view zarr dataset\n", - "# fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", - "# ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", - "# backend_kwargs={'consolidated':True}, chunks={})\n", - "# ds" + "# open and view zarr dataset\n", + "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", + "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", + " backend_kwargs={'consolidated':True}, chunks={})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "metadata": {}, + "source": [ + "## Collection Metadata Input" ] }, { "cell_type": "code", "execution_count": null, - "id": "83f0ec9d-02a4-42bd-aee2-4dfdf8b06235", + "id": "482d204d-b5b6-40e5-ac42-55b459be1097", + "metadata": {}, + "outputs": [], + "source": [ + "# description of STAC collection\n", + "collection_description = ds.attrs['title']\n", + "print(f'collection description: {collection_description}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "749dbcdb-6ad6-48fe-9938-f1b706aaee8e", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# open and view zarr dataset\n", - "fs2 = fsspec.filesystem('s3', requester_pays=True)\n", - "ds = xr.open_dataset(fs2.get_mapper(zarr_url2), engine='zarr', \n", - " backend_kwargs={'consolidated':True}, chunks={})\n", - "ds" + "# license for dataset\n", + "# no license in dataset attrs, but this is a NOAA dataset, so we will treat it as public domain\n", + "collection_license = 'CC0-1.0'" ] }, { @@ -230,53 +230,6 @@ "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs" ] }, - { - "cell_type": "markdown", - "id": "8fbfecfb-9886-4d06-a34c-6471cb0a6053", - "metadata": {}, - "source": [ - "## Plot a map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4eb4d027-4266-4a0b-8f16-bacfbef06242", - "metadata": {}, - "outputs": [], - "source": [ - "# # plot a map of a single variable\n", - "# var_to_plot = 'SNOW'\n", - "# da = ds[var_to_plot].sel(time='2014-03-01 00:00').load()\n", - "# da.hvplot.quadmesh(x='lon', y='lat', rasterize=True,\n", - "# geo=True, tiles='OSM', alpha=0.7, cmap='turbo')" - ] - }, - { - "cell_type": "markdown", - "id": "5e057a6c-06fb-4406-823b-e81c58e520e4", - "metadata": {}, - "source": [ - "## Plot a time series at a specific point\n", - "This can help you verify a variable's values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7de2681-88c2-4597-857c-8f169c596f8b", - "metadata": {}, - "outputs": [], - "source": [ - "# # enter lat, lon of point you want to plot time series for\n", - "# lat,lon = 39.978322,-105.2772194\n", - "# time_start = '2013-01-01 00:00'\n", - "# time_end = '2013-12-31 00:00'\n", - "# x, y = crs.transform_point(lon, lat, src_crs=ccrs.PlateCarree()) # PlateCaree = Lat,Lon\n", - "# da = ds[var_to_plot].sel(x=x, y=y, method='nearest').sel(time=slice(time_start,time_end)).load()\n", - "# da.hvplot(x=dim_names_dict['T'], grid=True)" - ] - }, { "cell_type": "markdown", "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c", @@ -355,10 +308,25 @@ }, { "cell_type": "markdown", - "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780", + "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", + "metadata": { + "tags": [] + }, + "source": [ + "## Open up NHGF STAC Catalog and create a collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", "metadata": {}, + "outputs": [], "source": [ - "## Create pystac collection" + "# define folder location where your STAC catalog json file is\n", + "catalog_path = os.path.join('..', '..', 'catalog')\n", + "# open catalog\n", + "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" ] }, { diff --git a/workflows/archive/Red_River_future_create_collection_from_zarr.ipynb b/workflows/archive/Red_River_future_create_collection_from_zarr.ipynb index cbc663bf..e75a2459 100644 --- a/workflows/archive/Red_River_future_create_collection_from_zarr.ipynb +++ b/workflows/archive/Red_River_future_create_collection_from_zarr.ipynb @@ -36,57 +36,39 @@ "import metpy\n", "import cartopy.crs as ccrs\n", "import cfunits\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", - "metadata": {}, - "source": [ - "## Open up NHGF STAC Catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", - "metadata": {}, - "outputs": [], - "source": [ - "# define folder location where your STAC catalog json file is\n", - "catalog_path = os.path.join('..', '..', 'catalog')\n", - "# open catalog\n", - "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" + "import json\n", + "import sys\n", + "sys.path.insert(1, '..')\n", + "import stac_helpers" ] }, { "cell_type": "markdown", - "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "id": "e157f566-fd9f-4f90-af46-8e53fd570c12", "metadata": {}, "source": [ - "## Collection Metadata Input" + "## Collection ID" ] }, { "cell_type": "code", "execution_count": null, - "id": "482d204d-b5b6-40e5-ac42-55b459be1097", - "metadata": {}, + "id": "9512821f-d4e9-4fc8-a87d-152b4b70e862", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# name for STAC collection\n", - "collection_id = 'Red_River_future'\n", - "# description of STAC collection\n", - "collection_description = 'Red_River_future'\n", - "# license for dataset\n", - "collection_license = 'CC0-1.0'" + "collection_id = 'Red_River_future'" ] }, { "cell_type": "markdown", "id": "116b5837-8e85-4ae7-964a-803533ded714", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Asset Metadata Input" ] @@ -101,7 +83,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url = 's3://mdmf/gdp/Red_River_future.zarr/'\n", + "zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -123,7 +105,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url2 = 's3://nhgf-development/workspace/DataConversion/Red_River_future.zarr/'\n", + "zarr_url2 = f's3://nhgf-development/workspace/DataConversion/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -152,27 +134,45 @@ "metadata": {}, "outputs": [], "source": [ - "# # open and view zarr dataset\n", - "# fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", - "# ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", - "# backend_kwargs={'consolidated':True}, chunks={})\n", - "# ds" + "# open and view zarr dataset\n", + "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", + "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", + " backend_kwargs={'consolidated':True}, chunks={})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "metadata": {}, + "source": [ + "## Collection Metadata Input" ] }, { "cell_type": "code", "execution_count": null, - "id": "83f0ec9d-02a4-42bd-aee2-4dfdf8b06235", + "id": "482d204d-b5b6-40e5-ac42-55b459be1097", + "metadata": {}, + "outputs": [], + "source": [ + "# description of STAC collection\n", + "collection_description = ds.attrs['title']\n", + "print(f'collection description: {collection_description}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ef08688-2e57-4ba1-9ab4-3bed1214b4b4", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# open and view zarr dataset\n", - "fs2 = fsspec.filesystem('s3', requester_pays=True)\n", - "ds = xr.open_dataset(fs2.get_mapper(zarr_url2), engine='zarr', \n", - " backend_kwargs={'consolidated':True}, chunks={})\n", - "ds" + "# license for dataset\n", + "# no license in dataset attrs, but this is a NOAA dataset, so we will treat it as public domain\n", + "collection_license = 'CC0-1.0'" ] }, { @@ -230,53 +230,6 @@ "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs" ] }, - { - "cell_type": "markdown", - "id": "8fbfecfb-9886-4d06-a34c-6471cb0a6053", - "metadata": {}, - "source": [ - "## Plot a map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4eb4d027-4266-4a0b-8f16-bacfbef06242", - "metadata": {}, - "outputs": [], - "source": [ - "# # plot a map of a single variable\n", - "# var_to_plot = 'SNOW'\n", - "# da = ds[var_to_plot].sel(time='2014-03-01 00:00').load()\n", - "# da.hvplot.quadmesh(x='lon', y='lat', rasterize=True,\n", - "# geo=True, tiles='OSM', alpha=0.7, cmap='turbo')" - ] - }, - { - "cell_type": "markdown", - "id": "5e057a6c-06fb-4406-823b-e81c58e520e4", - "metadata": {}, - "source": [ - "## Plot a time series at a specific point\n", - "This can help you verify a variable's values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7de2681-88c2-4597-857c-8f169c596f8b", - "metadata": {}, - "outputs": [], - "source": [ - "# # enter lat, lon of point you want to plot time series for\n", - "# lat,lon = 39.978322,-105.2772194\n", - "# time_start = '2013-01-01 00:00'\n", - "# time_end = '2013-12-31 00:00'\n", - "# x, y = crs.transform_point(lon, lat, src_crs=ccrs.PlateCarree()) # PlateCaree = Lat,Lon\n", - "# da = ds[var_to_plot].sel(x=x, y=y, method='nearest').sel(time=slice(time_start,time_end)).load()\n", - "# da.hvplot(x=dim_names_dict['T'], grid=True)" - ] - }, { "cell_type": "markdown", "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c", @@ -355,10 +308,25 @@ }, { "cell_type": "markdown", - "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780", + "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", + "metadata": { + "tags": [] + }, + "source": [ + "## Open up NHGF STAC Catalog and create a collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", "metadata": {}, + "outputs": [], "source": [ - "## Create pystac collection" + "# define folder location where your STAC catalog json file is\n", + "catalog_path = os.path.join('..', '..', 'catalog')\n", + "# open catalog\n", + "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" ] }, { diff --git a/workflows/archive/Red_River_historical_create_collection_from_zarr.ipynb b/workflows/archive/Red_River_historical_create_collection_from_zarr.ipynb index 9e862256..19ca0315 100644 --- a/workflows/archive/Red_River_historical_create_collection_from_zarr.ipynb +++ b/workflows/archive/Red_River_historical_create_collection_from_zarr.ipynb @@ -36,57 +36,39 @@ "import metpy\n", "import cartopy.crs as ccrs\n", "import cfunits\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", - "metadata": {}, - "source": [ - "## Open up NHGF STAC Catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", - "metadata": {}, - "outputs": [], - "source": [ - "# define folder location where your STAC catalog json file is\n", - "catalog_path = os.path.join('..', '..', 'catalog')\n", - "# open catalog\n", - "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" + "import json\n", + "import sys\n", + "sys.path.insert(1, '..')\n", + "import stac_helpers" ] }, { "cell_type": "markdown", - "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "id": "277e6724-a7c3-4ad1-ae11-eba53eb76af1", "metadata": {}, "source": [ - "## Collection Metadata Input" + "## Collection ID" ] }, { "cell_type": "code", "execution_count": null, - "id": "482d204d-b5b6-40e5-ac42-55b459be1097", - "metadata": {}, + "id": "2e75e1c7-0f3a-4d34-88c2-da200a1342a5", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# name for STAC collection\n", - "collection_id = 'Red_River_historical'\n", - "# description of STAC collection\n", - "collection_description = 'Red_River_historical'\n", - "# license for dataset\n", - "collection_license = 'CC0-1.0'" + "collection_id = 'Red_River_historical'" ] }, { "cell_type": "markdown", "id": "116b5837-8e85-4ae7-964a-803533ded714", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Asset Metadata Input" ] @@ -101,7 +83,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url = 's3://mdmf/gdp/Red_River_historical.zarr/'\n", + "zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -123,7 +105,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url2 = 's3://nhgf-development/workspace/DataConversion/Red_River_historical.zarr/'\n", + "zarr_url2 = f's3://nhgf-development/workspace/DataConversion/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -152,27 +134,45 @@ "metadata": {}, "outputs": [], "source": [ - "# # open and view zarr dataset\n", - "# fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", - "# ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", - "# backend_kwargs={'consolidated':True}, chunks={})\n", - "# ds" + "# open and view zarr dataset\n", + "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", + "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", + " backend_kwargs={'consolidated':True}, chunks={})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "metadata": {}, + "source": [ + "## Collection Metadata Input" ] }, { "cell_type": "code", "execution_count": null, - "id": "83f0ec9d-02a4-42bd-aee2-4dfdf8b06235", + "id": "482d204d-b5b6-40e5-ac42-55b459be1097", + "metadata": {}, + "outputs": [], + "source": [ + "# description of STAC collection\n", + "collection_description = ds.attrs['title']\n", + "print(f'collection description: {collection_description}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbd16e3c-7a58-4f60-9cb1-7ee31994b6ab", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# open and view zarr dataset\n", - "fs2 = fsspec.filesystem('s3', requester_pays=True)\n", - "ds = xr.open_dataset(fs2.get_mapper(zarr_url2), engine='zarr', \n", - " backend_kwargs={'consolidated':True}, chunks={})\n", - "ds" + "# license for dataset\n", + "# no license in dataset attrs, but this is a NOAA dataset, so we will treat it as public domain\n", + "collection_license = 'CC0-1.0'" ] }, { @@ -230,53 +230,6 @@ "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs" ] }, - { - "cell_type": "markdown", - "id": "8fbfecfb-9886-4d06-a34c-6471cb0a6053", - "metadata": {}, - "source": [ - "## Plot a map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4eb4d027-4266-4a0b-8f16-bacfbef06242", - "metadata": {}, - "outputs": [], - "source": [ - "# # plot a map of a single variable\n", - "# var_to_plot = 'SNOW'\n", - "# da = ds[var_to_plot].sel(time='2014-03-01 00:00').load()\n", - "# da.hvplot.quadmesh(x='lon', y='lat', rasterize=True,\n", - "# geo=True, tiles='OSM', alpha=0.7, cmap='turbo')" - ] - }, - { - "cell_type": "markdown", - "id": "5e057a6c-06fb-4406-823b-e81c58e520e4", - "metadata": {}, - "source": [ - "## Plot a time series at a specific point\n", - "This can help you verify a variable's values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7de2681-88c2-4597-857c-8f169c596f8b", - "metadata": {}, - "outputs": [], - "source": [ - "# # enter lat, lon of point you want to plot time series for\n", - "# lat,lon = 39.978322,-105.2772194\n", - "# time_start = '2013-01-01 00:00'\n", - "# time_end = '2013-12-31 00:00'\n", - "# x, y = crs.transform_point(lon, lat, src_crs=ccrs.PlateCarree()) # PlateCaree = Lat,Lon\n", - "# da = ds[var_to_plot].sel(x=x, y=y, method='nearest').sel(time=slice(time_start,time_end)).load()\n", - "# da.hvplot(x=dim_names_dict['T'], grid=True)" - ] - }, { "cell_type": "markdown", "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c", @@ -355,10 +308,25 @@ }, { "cell_type": "markdown", - "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780", + "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", + "metadata": { + "tags": [] + }, + "source": [ + "## Open up NHGF STAC Catalog and create a collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", "metadata": {}, + "outputs": [], "source": [ - "## Create pystac collection" + "# define folder location where your STAC catalog json file is\n", + "catalog_path = os.path.join('..', '..', 'catalog')\n", + "# open catalog\n", + "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" ] }, { -- GitLab