diff --git a/catalog/Red_River_MPI-ESM-LR_future/collection.json b/catalog/Red_River_MPI-ESM-LR_future/collection.json index f77bfeb2ec4c826c67e270bce40c32afa8bae405..9efdaea1335943be5167b10186ec4954615467b9 100644 --- a/catalog/Red_River_MPI-ESM-LR_future/collection.json +++ b/catalog/Red_River_MPI-ESM-LR_future/collection.json @@ -2,7 +2,7 @@ "type": "Collection", "id": "Red_River_MPI-ESM-LR_future", "stac_version": "1.0.0", - "description": "Red_River_MPI-ESM-LR_future", + "description": "downscaled estimate of tasmin for fudge experiment: RRtnp1-EDQM-A38aaL01K00_rcp85_r1i1p1", "links": [ { "rel": "root", diff --git a/catalog/Red_River_MPI-ESM-LR_historical/collection.json b/catalog/Red_River_MPI-ESM-LR_historical/collection.json index 5190362f179e85a865c418a245c38487e2b312de..c3bafb1b79aaea161e76d4e905284a3a4417b53e 100644 --- a/catalog/Red_River_MPI-ESM-LR_historical/collection.json +++ b/catalog/Red_River_MPI-ESM-LR_historical/collection.json @@ -2,7 +2,7 @@ "type": "Collection", "id": "Red_River_MPI-ESM-LR_historical", "stac_version": "1.0.0", - "description": "Red_River_MPI-ESM-LR_historical", + "description": "downscaled estimate of tasmin for fudge experiment: RRtnp1-CDFt-B30aaL01K00_historical_r1i1p1", "links": [ { "rel": "root", diff --git a/catalog/Red_River_future/collection.json b/catalog/Red_River_future/collection.json index 89fc1a3fe4eeba6bd07cdbafc2feb309dfb29c0f..eb35533c0dab2f04c89d69ca458c50bb31623873 100644 --- a/catalog/Red_River_future/collection.json +++ b/catalog/Red_River_future/collection.json @@ -2,7 +2,7 @@ "type": "Collection", "id": "Red_River_future", "stac_version": "1.0.0", - "description": "Red_River_future", + "description": "Auxiliary QC mask tasmin_qcmask for fudge experiment: RRtnp1-BCQM-A28aaL01K00_rcp85_r1i1p1", "links": [ { "rel": "root", diff --git a/catalog/Red_River_historical/collection.json b/catalog/Red_River_historical/collection.json index 7ee74a9faafad3b1fe741cc4e9fa7e791e3c7dda..f5f815f42c7939105fdfa5711bce256f240108f7 100644 --- a/catalog/Red_River_historical/collection.json +++ b/catalog/Red_River_historical/collection.json @@ -2,7 +2,7 @@ "type": "Collection", "id": "Red_River_historical", "stac_version": "1.0.0", - "description": "Red_River_historical", + "description": "downscaled estimate of tasmax for fudge experiment: RRtxp1-BCQM-A10aaL01K00_historical_r6i1p1", "links": [ { "rel": "root", diff --git a/workflows/archive/Red_River_MPI-ESM-LR_future_create_collection_from_zarr.ipynb b/workflows/archive/Red_River_MPI-ESM-LR_future_create_collection_from_zarr.ipynb index 79739bf7aee3ae21ac12650e976a528b22578a86..0353d4a35d0a6c40629d0473bd568c7f09b7bab6 100644 --- a/workflows/archive/Red_River_MPI-ESM-LR_future_create_collection_from_zarr.ipynb +++ b/workflows/archive/Red_River_MPI-ESM-LR_future_create_collection_from_zarr.ipynb @@ -36,57 +36,39 @@ "import metpy\n", "import cartopy.crs as ccrs\n", "import cfunits\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", - "metadata": {}, - "source": [ - "## Open up NHGF STAC Catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", - "metadata": {}, - "outputs": [], - "source": [ - "# define folder location where your STAC catalog json file is\n", - "catalog_path = os.path.join('..', '..', 'catalog')\n", - "# open catalog\n", - "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" + "import json\n", + "import sys\n", + "sys.path.insert(1, '..')\n", + "import stac_helpers" ] }, { "cell_type": "markdown", - "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "id": "5c57d1f0-db77-4c9a-9837-9673b11f9067", "metadata": {}, "source": [ - "## Collection Metadata Input" + "## Collection ID" ] }, { "cell_type": "code", "execution_count": null, - "id": "482d204d-b5b6-40e5-ac42-55b459be1097", - "metadata": {}, + "id": "113378fe-c9d9-4734-a037-d5d470d15d31", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# name for STAC collection\n", - "collection_id = 'Red_River_MPI-ESM-LR_future'\n", - "# description of STAC collection\n", - "collection_description = 'Red_River_MPI-ESM-LR_future'\n", - "# license for dataset\n", - "collection_license = 'CC0-1.0'" + "collection_id = 'Red_River_MPI-ESM-LR_future'" ] }, { "cell_type": "markdown", "id": "116b5837-8e85-4ae7-964a-803533ded714", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Asset Metadata Input" ] @@ -101,7 +83,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url = 's3://mdmf/gdp/Red_River_MPI-ESM-LR_future.zarr/'\n", + "zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -123,7 +105,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url2 = 's3://nhgf-development/workspace/DataConversion/Red_River_MPI-ESM-LR_future.zarr/'\n", + "zarr_url2 = f's3://nhgf-development/workspace/DataConversion/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -152,27 +134,45 @@ "metadata": {}, "outputs": [], "source": [ - "# # open and view zarr dataset\n", - "# fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", - "# ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", - "# backend_kwargs={'consolidated':True}, chunks={})\n", - "# ds" + "# open and view zarr dataset\n", + "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", + "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", + " backend_kwargs={'consolidated':True}, chunks={})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "metadata": {}, + "source": [ + "## Collection Metadata Input" ] }, { "cell_type": "code", "execution_count": null, - "id": "83f0ec9d-02a4-42bd-aee2-4dfdf8b06235", + "id": "482d204d-b5b6-40e5-ac42-55b459be1097", + "metadata": {}, + "outputs": [], + "source": [ + "# description of STAC collection\n", + "collection_description = ds.attrs['title']\n", + "print(f'collection description: {collection_description}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "977050a7-9c78-4eec-bbaf-bb9da06b3258", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# open and view zarr dataset\n", - "fs2 = fsspec.filesystem('s3', requester_pays=True)\n", - "ds = xr.open_dataset(fs2.get_mapper(zarr_url2), engine='zarr', \n", - " backend_kwargs={'consolidated':True}, chunks={})\n", - "ds" + "# license for dataset\n", + "# no license in dataset attrs, but this is a NOAA dataset, so we will treat it as public domain\n", + "collection_license = 'CC0-1.0'" ] }, { @@ -230,53 +230,6 @@ "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs" ] }, - { - "cell_type": "markdown", - "id": "8fbfecfb-9886-4d06-a34c-6471cb0a6053", - "metadata": {}, - "source": [ - "## Plot a map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4eb4d027-4266-4a0b-8f16-bacfbef06242", - "metadata": {}, - "outputs": [], - "source": [ - "# # plot a map of a single variable\n", - "# var_to_plot = 'SNOW'\n", - "# da = ds[var_to_plot].sel(time='2014-03-01 00:00').load()\n", - "# da.hvplot.quadmesh(x='lon', y='lat', rasterize=True,\n", - "# geo=True, tiles='OSM', alpha=0.7, cmap='turbo')" - ] - }, - { - "cell_type": "markdown", - "id": "5e057a6c-06fb-4406-823b-e81c58e520e4", - "metadata": {}, - "source": [ - "## Plot a time series at a specific point\n", - "This can help you verify a variable's values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7de2681-88c2-4597-857c-8f169c596f8b", - "metadata": {}, - "outputs": [], - "source": [ - "# # enter lat, lon of point you want to plot time series for\n", - "# lat,lon = 39.978322,-105.2772194\n", - "# time_start = '2013-01-01 00:00'\n", - "# time_end = '2013-12-31 00:00'\n", - "# x, y = crs.transform_point(lon, lat, src_crs=ccrs.PlateCarree()) # PlateCaree = Lat,Lon\n", - "# da = ds[var_to_plot].sel(x=x, y=y, method='nearest').sel(time=slice(time_start,time_end)).load()\n", - "# da.hvplot(x=dim_names_dict['T'], grid=True)" - ] - }, { "cell_type": "markdown", "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c", @@ -355,10 +308,25 @@ }, { "cell_type": "markdown", - "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780", + "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", + "metadata": { + "tags": [] + }, + "source": [ + "## Open up NHGF STAC Catalog and create a collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", "metadata": {}, + "outputs": [], "source": [ - "## Create pystac collection" + "# define folder location where your STAC catalog json file is\n", + "catalog_path = os.path.join('..', '..', 'catalog')\n", + "# open catalog\n", + "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" ] }, { diff --git a/workflows/archive/Red_River_MPI-ESM-LR_historical_create_collection_from_zarr.ipynb b/workflows/archive/Red_River_MPI-ESM-LR_historical_create_collection_from_zarr.ipynb index 546542a9061f88965076c5732d1acb972e579b4b..00825081e48c6bc1c0bbc1e4a3958efa60a63a2f 100644 --- a/workflows/archive/Red_River_MPI-ESM-LR_historical_create_collection_from_zarr.ipynb +++ b/workflows/archive/Red_River_MPI-ESM-LR_historical_create_collection_from_zarr.ipynb @@ -36,57 +36,39 @@ "import metpy\n", "import cartopy.crs as ccrs\n", "import cfunits\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", - "metadata": {}, - "source": [ - "## Open up NHGF STAC Catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", - "metadata": {}, - "outputs": [], - "source": [ - "# define folder location where your STAC catalog json file is\n", - "catalog_path = os.path.join('..', '..', 'catalog')\n", - "# open catalog\n", - "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" + "import json\n", + "import sys\n", + "sys.path.insert(1, '..')\n", + "import stac_helpers" ] }, { "cell_type": "markdown", - "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "id": "66df9726-937b-4fef-b750-f0eab0cd50da", "metadata": {}, "source": [ - "## Collection Metadata Input" + "## Collection ID" ] }, { "cell_type": "code", "execution_count": null, - "id": "482d204d-b5b6-40e5-ac42-55b459be1097", - "metadata": {}, + "id": "765226f7-32c2-4bc2-82b4-e00528cb8daf", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# name for STAC collection\n", - "collection_id = 'Red_River_MPI-ESM-LR_historical'\n", - "# description of STAC collection\n", - "collection_description = 'Red_River_MPI-ESM-LR_historical'\n", - "# license for dataset\n", - "collection_license = 'CC0-1.0'" + "collection_id = 'Red_River_MPI-ESM-LR_historical'" ] }, { "cell_type": "markdown", "id": "116b5837-8e85-4ae7-964a-803533ded714", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Asset Metadata Input" ] @@ -101,7 +83,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url = 's3://mdmf/gdp/Red_River_MPI-ESM-LR_historical.zarr/'\n", + "zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -123,7 +105,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url2 = 's3://nhgf-development/workspace/DataConversion/Red_River_MPI-ESM-LR_historical.zarr/'\n", + "zarr_url2 = f's3://nhgf-development/workspace/DataConversion/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -152,27 +134,45 @@ "metadata": {}, "outputs": [], "source": [ - "# # open and view zarr dataset\n", - "# fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", - "# ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", - "# backend_kwargs={'consolidated':True}, chunks={})\n", - "# ds" + "# open and view zarr dataset\n", + "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", + "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", + " backend_kwargs={'consolidated':True}, chunks={})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "metadata": {}, + "source": [ + "## Collection Metadata Input" ] }, { "cell_type": "code", "execution_count": null, - "id": "83f0ec9d-02a4-42bd-aee2-4dfdf8b06235", + "id": "482d204d-b5b6-40e5-ac42-55b459be1097", + "metadata": {}, + "outputs": [], + "source": [ + "# description of STAC collection\n", + "collection_description = ds.attrs['title']\n", + "print(f'collection description: {collection_description}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "749dbcdb-6ad6-48fe-9938-f1b706aaee8e", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# open and view zarr dataset\n", - "fs2 = fsspec.filesystem('s3', requester_pays=True)\n", - "ds = xr.open_dataset(fs2.get_mapper(zarr_url2), engine='zarr', \n", - " backend_kwargs={'consolidated':True}, chunks={})\n", - "ds" + "# license for dataset\n", + "# no license in dataset attrs, but this is a NOAA dataset, so we will treat it as public domain\n", + "collection_license = 'CC0-1.0'" ] }, { @@ -230,53 +230,6 @@ "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs" ] }, - { - "cell_type": "markdown", - "id": "8fbfecfb-9886-4d06-a34c-6471cb0a6053", - "metadata": {}, - "source": [ - "## Plot a map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4eb4d027-4266-4a0b-8f16-bacfbef06242", - "metadata": {}, - "outputs": [], - "source": [ - "# # plot a map of a single variable\n", - "# var_to_plot = 'SNOW'\n", - "# da = ds[var_to_plot].sel(time='2014-03-01 00:00').load()\n", - "# da.hvplot.quadmesh(x='lon', y='lat', rasterize=True,\n", - "# geo=True, tiles='OSM', alpha=0.7, cmap='turbo')" - ] - }, - { - "cell_type": "markdown", - "id": "5e057a6c-06fb-4406-823b-e81c58e520e4", - "metadata": {}, - "source": [ - "## Plot a time series at a specific point\n", - "This can help you verify a variable's values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7de2681-88c2-4597-857c-8f169c596f8b", - "metadata": {}, - "outputs": [], - "source": [ - "# # enter lat, lon of point you want to plot time series for\n", - "# lat,lon = 39.978322,-105.2772194\n", - "# time_start = '2013-01-01 00:00'\n", - "# time_end = '2013-12-31 00:00'\n", - "# x, y = crs.transform_point(lon, lat, src_crs=ccrs.PlateCarree()) # PlateCaree = Lat,Lon\n", - "# da = ds[var_to_plot].sel(x=x, y=y, method='nearest').sel(time=slice(time_start,time_end)).load()\n", - "# da.hvplot(x=dim_names_dict['T'], grid=True)" - ] - }, { "cell_type": "markdown", "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c", @@ -355,10 +308,25 @@ }, { "cell_type": "markdown", - "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780", + "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", + "metadata": { + "tags": [] + }, + "source": [ + "## Open up NHGF STAC Catalog and create a collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", "metadata": {}, + "outputs": [], "source": [ - "## Create pystac collection" + "# define folder location where your STAC catalog json file is\n", + "catalog_path = os.path.join('..', '..', 'catalog')\n", + "# open catalog\n", + "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" ] }, { diff --git a/workflows/archive/Red_River_future_create_collection_from_zarr.ipynb b/workflows/archive/Red_River_future_create_collection_from_zarr.ipynb index cbc663bf59c7ba3913ad31ea70a049a967c630cb..e75a2459a96cc9ce5d791fc7f15903dc42bee1b7 100644 --- a/workflows/archive/Red_River_future_create_collection_from_zarr.ipynb +++ b/workflows/archive/Red_River_future_create_collection_from_zarr.ipynb @@ -36,57 +36,39 @@ "import metpy\n", "import cartopy.crs as ccrs\n", "import cfunits\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", - "metadata": {}, - "source": [ - "## Open up NHGF STAC Catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", - "metadata": {}, - "outputs": [], - "source": [ - "# define folder location where your STAC catalog json file is\n", - "catalog_path = os.path.join('..', '..', 'catalog')\n", - "# open catalog\n", - "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" + "import json\n", + "import sys\n", + "sys.path.insert(1, '..')\n", + "import stac_helpers" ] }, { "cell_type": "markdown", - "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "id": "e157f566-fd9f-4f90-af46-8e53fd570c12", "metadata": {}, "source": [ - "## Collection Metadata Input" + "## Collection ID" ] }, { "cell_type": "code", "execution_count": null, - "id": "482d204d-b5b6-40e5-ac42-55b459be1097", - "metadata": {}, + "id": "9512821f-d4e9-4fc8-a87d-152b4b70e862", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# name for STAC collection\n", - "collection_id = 'Red_River_future'\n", - "# description of STAC collection\n", - "collection_description = 'Red_River_future'\n", - "# license for dataset\n", - "collection_license = 'CC0-1.0'" + "collection_id = 'Red_River_future'" ] }, { "cell_type": "markdown", "id": "116b5837-8e85-4ae7-964a-803533ded714", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Asset Metadata Input" ] @@ -101,7 +83,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url = 's3://mdmf/gdp/Red_River_future.zarr/'\n", + "zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -123,7 +105,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url2 = 's3://nhgf-development/workspace/DataConversion/Red_River_future.zarr/'\n", + "zarr_url2 = f's3://nhgf-development/workspace/DataConversion/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -152,27 +134,45 @@ "metadata": {}, "outputs": [], "source": [ - "# # open and view zarr dataset\n", - "# fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", - "# ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", - "# backend_kwargs={'consolidated':True}, chunks={})\n", - "# ds" + "# open and view zarr dataset\n", + "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", + "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", + " backend_kwargs={'consolidated':True}, chunks={})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "metadata": {}, + "source": [ + "## Collection Metadata Input" ] }, { "cell_type": "code", "execution_count": null, - "id": "83f0ec9d-02a4-42bd-aee2-4dfdf8b06235", + "id": "482d204d-b5b6-40e5-ac42-55b459be1097", + "metadata": {}, + "outputs": [], + "source": [ + "# description of STAC collection\n", + "collection_description = ds.attrs['title']\n", + "print(f'collection description: {collection_description}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ef08688-2e57-4ba1-9ab4-3bed1214b4b4", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# open and view zarr dataset\n", - "fs2 = fsspec.filesystem('s3', requester_pays=True)\n", - "ds = xr.open_dataset(fs2.get_mapper(zarr_url2), engine='zarr', \n", - " backend_kwargs={'consolidated':True}, chunks={})\n", - "ds" + "# license for dataset\n", + "# no license in dataset attrs, but this is a NOAA dataset, so we will treat it as public domain\n", + "collection_license = 'CC0-1.0'" ] }, { @@ -230,53 +230,6 @@ "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs" ] }, - { - "cell_type": "markdown", - "id": "8fbfecfb-9886-4d06-a34c-6471cb0a6053", - "metadata": {}, - "source": [ - "## Plot a map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4eb4d027-4266-4a0b-8f16-bacfbef06242", - "metadata": {}, - "outputs": [], - "source": [ - "# # plot a map of a single variable\n", - "# var_to_plot = 'SNOW'\n", - "# da = ds[var_to_plot].sel(time='2014-03-01 00:00').load()\n", - "# da.hvplot.quadmesh(x='lon', y='lat', rasterize=True,\n", - "# geo=True, tiles='OSM', alpha=0.7, cmap='turbo')" - ] - }, - { - "cell_type": "markdown", - "id": "5e057a6c-06fb-4406-823b-e81c58e520e4", - "metadata": {}, - "source": [ - "## Plot a time series at a specific point\n", - "This can help you verify a variable's values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7de2681-88c2-4597-857c-8f169c596f8b", - "metadata": {}, - "outputs": [], - "source": [ - "# # enter lat, lon of point you want to plot time series for\n", - "# lat,lon = 39.978322,-105.2772194\n", - "# time_start = '2013-01-01 00:00'\n", - "# time_end = '2013-12-31 00:00'\n", - "# x, y = crs.transform_point(lon, lat, src_crs=ccrs.PlateCarree()) # PlateCaree = Lat,Lon\n", - "# da = ds[var_to_plot].sel(x=x, y=y, method='nearest').sel(time=slice(time_start,time_end)).load()\n", - "# da.hvplot(x=dim_names_dict['T'], grid=True)" - ] - }, { "cell_type": "markdown", "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c", @@ -355,10 +308,25 @@ }, { "cell_type": "markdown", - "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780", + "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", + "metadata": { + "tags": [] + }, + "source": [ + "## Open up NHGF STAC Catalog and create a collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", "metadata": {}, + "outputs": [], "source": [ - "## Create pystac collection" + "# define folder location where your STAC catalog json file is\n", + "catalog_path = os.path.join('..', '..', 'catalog')\n", + "# open catalog\n", + "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" ] }, { diff --git a/workflows/archive/Red_River_historical_create_collection_from_zarr.ipynb b/workflows/archive/Red_River_historical_create_collection_from_zarr.ipynb index 9e862256f0a6a7212b66923b45269c19a3e66fe3..19ca03157ebe6aac5a580c89615e377398d5a42a 100644 --- a/workflows/archive/Red_River_historical_create_collection_from_zarr.ipynb +++ b/workflows/archive/Red_River_historical_create_collection_from_zarr.ipynb @@ -36,57 +36,39 @@ "import metpy\n", "import cartopy.crs as ccrs\n", "import cfunits\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", - "metadata": {}, - "source": [ - "## Open up NHGF STAC Catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", - "metadata": {}, - "outputs": [], - "source": [ - "# define folder location where your STAC catalog json file is\n", - "catalog_path = os.path.join('..', '..', 'catalog')\n", - "# open catalog\n", - "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" + "import json\n", + "import sys\n", + "sys.path.insert(1, '..')\n", + "import stac_helpers" ] }, { "cell_type": "markdown", - "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "id": "277e6724-a7c3-4ad1-ae11-eba53eb76af1", "metadata": {}, "source": [ - "## Collection Metadata Input" + "## Collection ID" ] }, { "cell_type": "code", "execution_count": null, - "id": "482d204d-b5b6-40e5-ac42-55b459be1097", - "metadata": {}, + "id": "2e75e1c7-0f3a-4d34-88c2-da200a1342a5", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# name for STAC collection\n", - "collection_id = 'Red_River_historical'\n", - "# description of STAC collection\n", - "collection_description = 'Red_River_historical'\n", - "# license for dataset\n", - "collection_license = 'CC0-1.0'" + "collection_id = 'Red_River_historical'" ] }, { "cell_type": "markdown", "id": "116b5837-8e85-4ae7-964a-803533ded714", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Asset Metadata Input" ] @@ -101,7 +83,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url = 's3://mdmf/gdp/Red_River_historical.zarr/'\n", + "zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -123,7 +105,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url2 = 's3://nhgf-development/workspace/DataConversion/Red_River_historical.zarr/'\n", + "zarr_url2 = f's3://nhgf-development/workspace/DataConversion/{collection_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -152,27 +134,45 @@ "metadata": {}, "outputs": [], "source": [ - "# # open and view zarr dataset\n", - "# fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", - "# ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", - "# backend_kwargs={'consolidated':True}, chunks={})\n", - "# ds" + "# open and view zarr dataset\n", + "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", + "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", + " backend_kwargs={'consolidated':True}, chunks={})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "metadata": {}, + "source": [ + "## Collection Metadata Input" ] }, { "cell_type": "code", "execution_count": null, - "id": "83f0ec9d-02a4-42bd-aee2-4dfdf8b06235", + "id": "482d204d-b5b6-40e5-ac42-55b459be1097", + "metadata": {}, + "outputs": [], + "source": [ + "# description of STAC collection\n", + "collection_description = ds.attrs['title']\n", + "print(f'collection description: {collection_description}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbd16e3c-7a58-4f60-9cb1-7ee31994b6ab", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# open and view zarr dataset\n", - "fs2 = fsspec.filesystem('s3', requester_pays=True)\n", - "ds = xr.open_dataset(fs2.get_mapper(zarr_url2), engine='zarr', \n", - " backend_kwargs={'consolidated':True}, chunks={})\n", - "ds" + "# license for dataset\n", + "# no license in dataset attrs, but this is a NOAA dataset, so we will treat it as public domain\n", + "collection_license = 'CC0-1.0'" ] }, { @@ -230,53 +230,6 @@ "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs" ] }, - { - "cell_type": "markdown", - "id": "8fbfecfb-9886-4d06-a34c-6471cb0a6053", - "metadata": {}, - "source": [ - "## Plot a map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4eb4d027-4266-4a0b-8f16-bacfbef06242", - "metadata": {}, - "outputs": [], - "source": [ - "# # plot a map of a single variable\n", - "# var_to_plot = 'SNOW'\n", - "# da = ds[var_to_plot].sel(time='2014-03-01 00:00').load()\n", - "# da.hvplot.quadmesh(x='lon', y='lat', rasterize=True,\n", - "# geo=True, tiles='OSM', alpha=0.7, cmap='turbo')" - ] - }, - { - "cell_type": "markdown", - "id": "5e057a6c-06fb-4406-823b-e81c58e520e4", - "metadata": {}, - "source": [ - "## Plot a time series at a specific point\n", - "This can help you verify a variable's values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7de2681-88c2-4597-857c-8f169c596f8b", - "metadata": {}, - "outputs": [], - "source": [ - "# # enter lat, lon of point you want to plot time series for\n", - "# lat,lon = 39.978322,-105.2772194\n", - "# time_start = '2013-01-01 00:00'\n", - "# time_end = '2013-12-31 00:00'\n", - "# x, y = crs.transform_point(lon, lat, src_crs=ccrs.PlateCarree()) # PlateCaree = Lat,Lon\n", - "# da = ds[var_to_plot].sel(x=x, y=y, method='nearest').sel(time=slice(time_start,time_end)).load()\n", - "# da.hvplot(x=dim_names_dict['T'], grid=True)" - ] - }, { "cell_type": "markdown", "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c", @@ -355,10 +308,25 @@ }, { "cell_type": "markdown", - "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780", + "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", + "metadata": { + "tags": [] + }, + "source": [ + "## Open up NHGF STAC Catalog and create a collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", "metadata": {}, + "outputs": [], "source": [ - "## Create pystac collection" + "# define folder location where your STAC catalog json file is\n", + "catalog_path = os.path.join('..', '..', 'catalog')\n", + "# open catalog\n", + "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" ] }, {