diff --git a/workflows/examples/create_collection_from_zarr_conus404-daily.ipynb b/workflows/examples/create_collection_from_zarr_conus404-daily.ipynb index 61d5f552e791cb638e3f34a5652c545e1343f04d..51923ae5d0f09e8c02828833c980ce15c247ab0b 100644 --- a/workflows/examples/create_collection_from_zarr_conus404-daily.ipynb +++ b/workflows/examples/create_collection_from_zarr_conus404-daily.ipynb @@ -45,46 +45,21 @@ }, { "cell_type": "markdown", - "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", - "metadata": {}, - "source": [ - "## Open up NHGF STAC Catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", - "metadata": {}, - "outputs": [], - "source": [ - "# define folder location where your STAC catalog json file is\n", - "catalog_path = os.path.join('..', '..', 'catalog')\n", - "# open catalog\n", - "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" - ] - }, - { - "cell_type": "markdown", - "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "id": "a71f9d19-8fb3-4f47-b4c4-447bb80d8dd5", "metadata": {}, "source": [ - "## Collection Metadata Input" + "## Collection ID" ] }, { "cell_type": "code", "execution_count": null, - "id": "482d204d-b5b6-40e5-ac42-55b459be1097", + "id": "15ee060d-3127-4024-a1ad-6aa0648667e1", "metadata": {}, "outputs": [], "source": [ - "# name for STAC collection\n", - "collection_id = 'conus404-daily'\n", - "# description of STAC collection\n", - "collection_description = 'CONUS404 40 years of daily values for subset of model output variables derived from hourly values on cloud storage'\n", - "# license for dataset\n", - "collection_license = 'CC0-1.0'" + "# name for STAC collection - should match name of zarr dataset\n", + "collection_id = 'conus404-daily'" ] }, { @@ -163,6 +138,30 @@ "ds" ] }, + { + "cell_type": "markdown", + "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "metadata": {}, + "source": [ + "## Collection Metadata Input" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "482d204d-b5b6-40e5-ac42-55b459be1097", + "metadata": {}, + "outputs": [], + "source": [ + "# description of STAC collection\n", + "collection_description = 'CONUS404 40 years of daily values for subset of model output variables derived from hourly values on cloud storage'\n", + "# you can consider pulling this fram dataset attributes instead of manually typing it:\n", + "# collection_description = ds.attrs['title']\n", + "\n", + "# license for dataset\n", + "collection_license = stac_helpers.license_picker(ds.attrs['license'])" + ] + }, { "cell_type": "markdown", "id": "0bc7e9b3-ad62-4b10-a18e-66b7ed2d35dc", @@ -407,10 +406,23 @@ }, { "cell_type": "markdown", - "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780", + "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", + "metadata": {}, + "source": [ + "## Open up STAC Catalog and create a collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", "metadata": {}, + "outputs": [], "source": [ - "## Create pystac collection" + "# define folder location where your STAC catalog json file is\n", + "catalog_path = os.path.join('..', '..', 'catalog')\n", + "# open catalog\n", + "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" ] }, { @@ -556,7 +568,9 @@ "id": "9e2bbcc5-e45a-4b8c-9d60-601f345e8134", "metadata": {}, "source": [ - "**Time**" + "**Time**\n", + "\n", + "If you need to manually construct this field, here is a helpful reference: https://en.wikipedia.org/wiki/ISO_8601#Durations" ] }, { @@ -567,6 +581,8 @@ "outputs": [], "source": [ "time_step = pd.Timedelta(stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True)).isoformat()\n", + "# if time is yearly or monthly, you will need to manually construct it:\n", + "#time_step = \"P1Y0M0DT0H0M0S\"\n", "print(f'time step: {time_step}')" ] }, @@ -699,26 +715,6 @@ "# ds.isel(y=slice(ix-1,ix+3)).y" ] }, - { - "cell_type": "markdown", - "id": "dc9c149c-c2ba-4c5f-b48b-7b1b88a0309c", - "metadata": {}, - "source": [ - "#### extract x, y dimension lower and upper bounds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "773d2e70-9bc5-4f37-96d0-5ff980ae0c2d", - "metadata": {}, - "outputs": [], - "source": [ - "# get x, y bounds for extent of those dimensions (required)\n", - "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", - "print(xy_bounds)" - ] - }, { "cell_type": "markdown", "id": "00a5e041-081d-428d-ac2e-75d16de205e6", @@ -729,7 +725,15 @@ "\n", "Please see [datacube spec](https://github.com/stac-extensions/datacube?tab=readme-ov-file#dimension-object) for details on required fields.\n", "\n", - "If you have a dimension like \"bnds\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76)." + "If you have a dimension like \"bnds\" or \"nv\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76).\n", + "\n", + "Here is an example:\n", + "\n", + "```\n", + "dims_dict = {\n", + " 'bnds': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bnds'), 'extent': [ds.bnds.min().item(), ds.bnds.max().item()]})\n", + " }\n", + "```" ] }, { @@ -759,8 +763,8 @@ "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n", "# note that the extent of each dimension should be pulled from the dataset\n", "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n", - " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", - " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", + " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [spatial_bounds[0], spatial_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", + " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [spatial_bounds[1], spatial_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", " 'bottom_top_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'bottom_top_stag')}),\n", " 'bottom_top': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'bottom_top')}),\n", " 'soil_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'soil_layers_stag')}),\n", @@ -768,8 +772,18 @@ " 'y_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, 'y_stag')}),\n", " 'snow_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'snow_layers_stag')}),\n", " 'snso_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'snso_layers_stag')}),\n", - " }\n", - "display(dims_dict)" + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ab85b09-eb38-404c-910c-13349d5e2234", + "metadata": {}, + "outputs": [], + "source": [ + "# make sure you added all the right dims\n", + "assert dims_dict.keys() == dims" ] }, { diff --git a/workflows/examples/create_item_from_zarr_conus404-daily.ipynb b/workflows/examples/create_item_from_zarr_conus404-daily.ipynb index 5c91bcca4d8eb22975b2a17520b6147152844055..80ac6143334bedae04c439b97060becc7a73d98d 100644 --- a/workflows/examples/create_item_from_zarr_conus404-daily.ipynb +++ b/workflows/examples/create_item_from_zarr_conus404-daily.ipynb @@ -49,45 +49,20 @@ }, { "cell_type": "markdown", - "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", + "id": "f8c93dbc-174c-4387-be7a-00eccf004509", "metadata": {}, "source": [ - "## Open up NHGF STAC Catalog" + "## Item ID" ] }, { "cell_type": "code", "execution_count": null, - "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# define folder location where your STAC catalog json file is\n", - "catalog_path = os.path.join('..', '..', 'catalog_items')\n", - "# open catalog\n", - "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" - ] - }, - { - "cell_type": "markdown", - "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "id": "65b8979c-a462-4f68-8912-e82158d8811e", "metadata": {}, - "source": [ - "## Item Metadata Input" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "482d204d-b5b6-40e5-ac42-55b459be1097", - "metadata": { - "tags": [] - }, "outputs": [], "source": [ - "# name for STAC collection\n", + "# name for STAC item - should match name of zarr dataset\n", "item_id = 'conus404-daily'" ] }, @@ -109,7 +84,7 @@ "outputs": [], "source": [ "# url to zarr store that you want to create a collection for\n", - "zarr_url = 's3://hytest/conus404/conus404_daily.zarr/'\n", + "zarr_url = f's3://mdmf/gdp/{item_id}.zarr/'\n", "\n", "# define keyword arguments needed for opening the dataset with xarray\n", "# ref: https://github.com/stac-extensions/xarray-assets\n", @@ -416,10 +391,25 @@ }, { "cell_type": "markdown", - "id": "cfb71202-03df-45b5-ac2f-0dc2ee1ab780", + "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", "metadata": {}, "source": [ - "## Create pystac item" + "## Open up STAC Catalog and create an item" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# define folder location where your STAC catalog json file is\n", + "catalog_path = os.path.join('..', '..', 'catalog_items')\n", + "# open catalog\n", + "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" ] }, { @@ -565,7 +555,9 @@ "id": "18b8950f-030f-4f78-b9ac-799dc9263cb6", "metadata": {}, "source": [ - "**Time**" + "**Time**\n", + "\n", + "If you need to manually construct this field, here is a helpful reference: https://en.wikipedia.org/wiki/ISO_8601#Durations" ] }, { @@ -576,6 +568,8 @@ "outputs": [], "source": [ "time_step = pd.Timedelta(stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True)).isoformat()\n", + "# if time is yearly or monthly, you will need to manually construct it:\n", + "#time_step = \"P1Y0M0DT0H0M0S\"\n", "print(f'time step: {time_step}')" ] }, @@ -708,26 +702,6 @@ "# ds.isel(y=slice(ix-1,ix+3)).y" ] }, - { - "cell_type": "markdown", - "id": "1cf871b8-7eee-4c47-81b4-a42583dd60b0", - "metadata": {}, - "source": [ - "#### extract x, y dimension lower and upper bounds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "00a18a29-fb9a-4b56-8009-493122997b16", - "metadata": {}, - "outputs": [], - "source": [ - "# get x, y bounds for extent of those dimensions (required)\n", - "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", - "print(xy_bounds)" - ] - }, { "cell_type": "markdown", "id": "00a5e041-081d-428d-ac2e-75d16de205e6", @@ -738,7 +712,15 @@ "\n", "Please see [datacube spec](https://github.com/stac-extensions/datacube?tab=readme-ov-file#dimension-object) for details on required fields.\n", "\n", - "If you have a dimension like \"bnds\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76)." + "If you have a dimension like \"bnds\" or \"nv\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76).\n", + "\n", + "Here is an example:\n", + "\n", + "```\n", + "dims_dict = {\n", + " 'bnds': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bnds'), 'extent': [ds.bnds.min().item(), ds.bnds.max().item()]})\n", + " }\n", + "```" ] }, { @@ -768,8 +750,8 @@ "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n", "# note that the extent of each dimension should be pulled from the dataset\n", "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': time_step}),\n", - " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", - " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", + " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [spatial_bounds[0], spatial_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", + " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [spatial_bounds[1], spatial_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", " 'bottom_top_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'bottom_top_stag')}),\n", " 'bottom_top': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'bottom_top')}),\n", " 'soil_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'soil_layers_stag')}),\n", @@ -777,8 +759,18 @@ " 'y_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, 'y_stag')}),\n", " 'snow_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'snow_layers_stag')}),\n", " 'snso_layers_stag': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'z', 'description': stac_helpers.get_long_name(ds, 'snso_layers_stag')}),\n", - " }\n", - "display(dims_dict)" + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "816ed76c-c287-4116-a507-6a5c2734e24b", + "metadata": {}, + "outputs": [], + "source": [ + "# make sure you added all the right dims\n", + "assert dims_dict.keys() == dims" ] }, {