From 8dfce89001a764f872b3c951fb1bfc4688dc6c52 Mon Sep 17 00:00:00 2001 From: amsnyder <asnyder@usgs.gov> Date: Mon, 20 Nov 2023 14:19:13 -0600 Subject: [PATCH] additional modifications to item workflow --- ..._exploratory_workflow_conus404-daily.ipynb | 149 +++++++++++------- 1 file changed, 91 insertions(+), 58 deletions(-) diff --git a/workflows/create_item_zarr_exploratory_workflow_conus404-daily.ipynb b/workflows/create_item_zarr_exploratory_workflow_conus404-daily.ipynb index 01ad5803..c488783f 100644 --- a/workflows/create_item_zarr_exploratory_workflow_conus404-daily.ipynb +++ b/workflows/create_item_zarr_exploratory_workflow_conus404-daily.ipynb @@ -19,7 +19,9 @@ "cell_type": "code", "execution_count": null, "id": "201e0945-de55-45ff-b095-c2af009a4e62", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import pystac\n", @@ -36,7 +38,8 @@ "import metpy\n", "import cartopy.crs as ccrs\n", "import cfunits\n", - "import json" + "import json\n", + "from shapely.geometry import Polygon, mapping" ] }, { @@ -51,7 +54,9 @@ "cell_type": "code", "execution_count": null, "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# define folder location where your STAC catalog json file is\n", @@ -72,7 +77,9 @@ "cell_type": "code", "execution_count": null, "id": "482d204d-b5b6-40e5-ac42-55b459be1097", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", @@ -88,11 +95,7 @@ "asset_roles = [\"data\",\"zarr\",\"s3\"]\n", "\n", "# name for STAC collection\n", - "item_id = 'conus404-daily'\n", - "# description of STAC collection\n", - "item_description = 'CONUS404 40 years of daily values for subset of model output variables derived from hourly values on cloud storage'\n", - "# license for dataset\n", - "item_license = 'CC0-1.0'" + "item_id = 'conus404-daily'" ] }, { @@ -109,7 +112,9 @@ "cell_type": "code", "execution_count": null, "id": "708f2cf5-79ab-49af-8067-de31d0d13ee6", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# open and view zarr dataset\n", @@ -132,7 +137,9 @@ "cell_type": "code", "execution_count": null, "id": "ab91268f-7200-4cb1-979a-c7d75531d2c0", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "dims_auto_extract = ['X', 'Y', 'T']\n", @@ -167,7 +174,9 @@ "cell_type": "code", "execution_count": null, "id": "4eb4d027-4266-4a0b-8f16-bacfbef06242", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# plot a map of a single variable\n", @@ -190,7 +199,9 @@ "cell_type": "code", "execution_count": null, "id": "c7de2681-88c2-4597-857c-8f169c596f8b", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# enter lat, lon of point you want to plot time series for\n", @@ -209,7 +220,7 @@ "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c", "metadata": {}, "source": [ - "## Create Collection Extent" + "## Create Item Geometry, BBox, Datetime" ] }, { @@ -217,14 +228,16 @@ "id": "69f0d837-68a5-4fed-9a14-5d75cfbb0da4", "metadata": {}, "source": [ - "### Spatial Extent" + "### Spatial - Geometry and bbox" ] }, { "cell_type": "code", "execution_count": null, "id": "d46805e0-8e94-4ebe-aa01-d9a2d7051459", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# pull out lat/lon bbox for data\n", @@ -235,8 +248,14 @@ "#coord_bounds = [ds.lon.data.min().compute().astype(float), ds.lat.data.min().compute().astype(float), ds.lon.data.max().compute().astype(float), ds.lat.data.max().compute().astype(float)]\n", "coord_bounds = [ds.lon.data.min().astype(float), ds.lat.data.min().astype(float), ds.lon.data.max().astype(float), ds.lat.data.max().astype(float)]\n", "print(coord_bounds)\n", - "# create a spatial extent object \n", - "spatial_extent = pystac.SpatialExtent(bboxes=[coord_bounds])" + "# create a geometry object \n", + "footprint = mapping(Polygon([\n", + " [coord_bounds[0], coord_bounds[1]],\n", + " [coord_bounds[0], coord_bounds[3]],\n", + " [coord_bounds[2], coord_bounds[3]],\n", + " [coord_bounds[2], coord_bounds[1]]\n", + " ]))\n", + "print(footprint)" ] }, { @@ -244,32 +263,22 @@ "id": "a04c8fca-1d33-43ac-9e2b-62d7be2887f7", "metadata": {}, "source": [ - "### Temporal Extent" + "### Temporal - datetime" ] }, { "cell_type": "code", "execution_count": null, "id": "41a84995-867c-4152-8c57-85e3758bbb77", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# pull out first and last timestamps\n", "temporal_extent_lower = pd.Timestamp(ds[dim_names_dict['T']].data.min())\n", "temporal_extent_upper = pd.Timestamp(ds[dim_names_dict['T']].data.max())\n", - "print(f'min: {temporal_extent_lower} \\nmax: {temporal_extent_upper}')\n", - "# create a temporal extent object\n", - "temporal_extent = pystac.TemporalExtent(intervals=[[temporal_extent_lower, temporal_extent_upper]])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b1e37c4-5348-46ad-abc9-e005b5d6c02b", - "metadata": {}, - "outputs": [], - "source": [ - "collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)" + "print(f'min: {temporal_extent_lower} \\nmax: {temporal_extent_upper}')" ] }, { @@ -277,28 +286,28 @@ "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780", "metadata": {}, "source": [ - "## Create pystac collection" + "## Create pystac item" ] }, { "cell_type": "code", "execution_count": null, "id": "7e96811b-95ae-406a-9728-55fc429d4e1f", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "if catalog.get_child(collection_id):\n", - " collection = catalog.get_child(collection_id)\n", - " print(\"existing collection opened\")\n", - " collection.extent=collection_extent\n", - " collection.description=collection_description\n", - " collection.license=collection_license\n", + "if next(catalog.get_items(item_id), None):\n", + " item = next(catalog.get_items(item_id), None)()\n", + " print(\"existing item opened\")\n", "else:\n", - " collection = pystac.Collection(id=collection_id,\n", - " description=collection_description,\n", - " extent=collection_extent,\n", - " license=collection_license)\n", - " print(\"new collection created\")" + " item = pystac.Item(id=item_id,\n", + " geometry=footprint,\n", + " bbox=coord_bounds,\n", + " datetime=temporal_extent_upper,\n", + " properties={})\n", + " print(\"new item created\")" ] }, { @@ -306,7 +315,7 @@ "id": "a21c76e8-cd57-4eb5-a33f-7c668a3b3205", "metadata": {}, "source": [ - "## Add zarr url asset to collection" + "## Add zarr url asset to item" ] }, { @@ -322,7 +331,7 @@ " media_type=\"application/vnd+zarr\",\n", " roles=asset_roles,\n", " extra_fields = xarray_opendataset_kwargs)\n", - "collection.add_asset(asset_id, asset)" + "item.add_asset(asset_id, asset)" ] }, { @@ -330,7 +339,7 @@ "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4", "metadata": {}, "source": [ - "## Add datacube extension to collection" + "## Add datacube extension to item" ] }, { @@ -341,7 +350,7 @@ "outputs": [], "source": [ "# instantiate extention on collection\n", - "dc = DatacubeExtension.ext(collection, add_if_missing=True)" + "dc = DatacubeExtension.ext(item, add_if_missing=True)" ] }, { @@ -588,7 +597,19 @@ "id": "615ca168-75fb-4135-9941-0ef5fe4fd1cb", "metadata": {}, "source": [ - "## Add STAC Collection to Catalog and Save" + "## Add STAC Item to Catalog and Save" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "161949f9-e839-4497-8314-cab8a3871669", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "json.dumps(item.to_dict())" ] }, { @@ -598,19 +619,31 @@ "metadata": {}, "outputs": [], "source": [ - "if catalog.get_child(collection_id):\n", - " collection.normalize_and_save(root_href=os.path.join(catalog_path, collection_id), catalog_type=pystac.CatalogType.SELF_CONTAINED)\n", + "if next(catalog.get_items(item_id), None):\n", + " #item.normalize_and_save(root_href=os.path.join(catalog_path, item_id), catalog_type=pystac.CatalogType.SELF_CONTAINED)\n", + " catalog.normalize_hrefs(catalog_path)\n", + " catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)\n", "else:\n", - " catalog.add_child(collection)\n", - " catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED)" + " catalog.add_item(item)\n", + " catalog.normalize_hrefs(catalog_path)\n", + " catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)\n", + " #catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca08bfcb-53c3-4573-bece-408da8964302", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "geo", + "display_name": "global-global-pangeo", "language": "python", - "name": "python3" + "name": "conda-env-global-global-pangeo-py" }, "language_info": { "codemirror_mode": { @@ -622,7 +655,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.0" + "version": "3.10.13" } }, "nbformat": 4, -- GitLab