diff --git a/workflows/archive/iclus_hd_create_collection_from_zarr.ipynb b/workflows/archive/iclus_hd_create_collection_from_zarr.ipynb index 3e67a08cdd9b294fc452fd2e8407c4a1a7d884c6..961f375c737f138a0da52cf5cbeb6cde0801014d 100644 --- a/workflows/archive/iclus_hd_create_collection_from_zarr.ipynb +++ b/workflows/archive/iclus_hd_create_collection_from_zarr.ipynb @@ -333,7 +333,7 @@ }, "outputs": [], "source": [ - "# uncomment if you wish to use dask\n", + "# # uncomment if you wish to use dask\n", "# XX, YY = dask.array.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)" ] }, @@ -346,9 +346,9 @@ }, "outputs": [], "source": [ - "# choose a size for the chunks - these are square chunks that are chunk_len x chunk_len\n", - "# this size worked on a dask cluster on my local computer\n", - "# I haven't been able to get it optimized to work on Nebari, so this will crash if you run the delayed function below\n", + "# # choose a size for the chunks - these are square chunks that are chunk_len x chunk_len\n", + "# # this size worked on a dask cluster on my local computer\n", + "# # I haven't been able to get it optimized to work on Nebari, so this will crash if you run the delayed function below\n", "# chunk_len = 10000\n", "# XX_chunked = XX.rechunk((chunk_len, chunk_len)).ravel()\n", "# YY_chunked = YY.rechunk((chunk_len, chunk_len)).ravel()\n", @@ -376,7 +376,7 @@ }, "outputs": [], "source": [ - "# uncomment if you wish to use dask\n", + "# # uncomment if you wish to use dask\n", "# cluster = LocalCluster(threads_per_worker=os.cpu_count())\n", "# client = Client(cluster)\n", "# print(f\"The link to view the client dashboard is:\\n> {client.dashboard_link}\")" @@ -455,7 +455,7 @@ "metadata": {}, "source": [ "### Temporal Extent\n", - "No time step in this dataset, so we will use null." + "Had to manually construct because xarray can't decode times" ] }, { @@ -470,8 +470,8 @@ "# pull out first and last timestamps\n", "# temporal_extent_lower = pd.Timestamp(ds[dim_names_dict['T']].data.min())\n", "# temporal_extent_upper = pd.Timestamp(ds[dim_names_dict['T']].data.max())\n", - "temporal_extent_lower = None\n", - "temporal_extent_upper = None\n", + "temporal_extent_lower = pd.Timestamp('2005-01-01 00:00')\n", + "temporal_extent_upper = pd.Timestamp('2100-01-01 00:00')\n", "print(f'min: {temporal_extent_lower} \\nmax: {temporal_extent_upper}')\n", "# create a temporal extent object\n", "temporal_extent = pystac.TemporalExtent(intervals=[[temporal_extent_lower, temporal_extent_upper]])" @@ -589,6 +589,18 @@ "## Add datacube extension to collection" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a50df9b", + "metadata": {}, + "outputs": [], + "source": [ + "# reset dims to true dims\n", + "dim_names_dict = {'X': 'x', 'Y': 'y', 'T': 'time'}\n", + "print(f\"Dimension dictionary: {dim_names_dict}\")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -698,8 +710,10 @@ }, "outputs": [], "source": [ - "# time_step = pd.Timedelta(stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True)).isoformat()\n", - "# print(f'time step: {time_step}')" + "#time_step = pd.Timedelta(stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True)).isoformat()\n", + "# if time is yearly or monthly, you will need to manually construct it:\n", + "time_step = \"P5Y0M0DT0H0M0S\"\n", + "print(f'time step: {time_step}')" ] }, { @@ -846,7 +860,9 @@ "\n", "Please see [datacube spec](https://github.com/stac-extensions/datacube?tab=readme-ov-file#dimension-object) for details on required fields.\n", "\n", - "If you have a dimension like \"bnds\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76)." + "If you have a dimension like \"bnds\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76).\n", + "\n", + "Removed reference system because x and y are indices - not geospatial." ] }, { @@ -878,9 +894,10 @@ "\n", "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n", "# note that the extent of each dimension should be pulled from the dataset\n", - "dims_dict = {dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [spatial_bounds[0], spatial_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", - " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [spatial_bounds[1], spatial_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", - "}" + "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n", + " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [ds.x.min().item(), ds.x.max().item()], 'step': x_step}),\n", + " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [ds.y.min().item(), ds.y.max().item()], 'step': y_step}),\n", + " }" ] }, { @@ -995,9 +1012,9 @@ ], "metadata": { "kernelspec": { - "display_name": "global-global-pangeo", + "display_name": "geo", "language": "python", - "name": "conda-env-global-global-pangeo-py" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1009,7 +1026,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.10.0" } }, "nbformat": 4,