From a4cd671365a85616b98852541837f31419def8a2 Mon Sep 17 00:00:00 2001 From: amsnyder <asnyder@usgs.gov> Date: Mon, 8 Apr 2024 13:12:22 -0500 Subject: [PATCH] fix crs var detection --- ...A_future_create_collection_from_zarr.ipynb | 106 ++++++++++++------ ...storical_create_collection_from_zarr.ipynb | 106 ++++++++++++------ 2 files changed, 140 insertions(+), 72 deletions(-) diff --git a/workflows/archive/LOCA_future_create_collection_from_zarr.ipynb b/workflows/archive/LOCA_future_create_collection_from_zarr.ipynb index 3fe3fee9..6e4fdd83 100644 --- a/workflows/archive/LOCA_future_create_collection_from_zarr.ipynb +++ b/workflows/archive/LOCA_future_create_collection_from_zarr.ipynb @@ -33,7 +33,8 @@ "import pandas as pd\n", "import json\n", "import numpy as np\n", - "import metpy\n", + "import pyproj\n", + "from pyproj import Transformer\n", "import cartopy.crs as ccrs\n", "import cfunits\n", "import json\n", @@ -206,6 +207,16 @@ "## Get crs info" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "7348d314-9b85-4d51-ba01-c599d5e32ce2", + "metadata": {}, + "outputs": [], + "source": [ + "crs_var = 'crs'" + ] + }, { "cell_type": "code", "execution_count": null, @@ -213,8 +224,13 @@ "metadata": {}, "outputs": [], "source": [ - "ds = ds.metpy.parse_cf()\n", - "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs" + "# use pyproj to automatically extract crs info\n", + "crs = pyproj.CRS.from_cf(ds[crs_var].attrs)\n", + "\n", + "# alternatively, create the appropriate cartopy projection\n", + "# crs = ccrs.LambertConformal(central_longitude=crs_info.longitude_of_central_meridian, \n", + "# central_latitude=crs_info.latitude_of_projection_origin,\n", + "# standard_parallels=crs_info.standard_parallel)" ] }, { @@ -245,14 +261,55 @@ "# coordinates must be from WGS 84 datum\n", "# left, bottom, right, top\n", "\n", - "# Note: try changing around the commented out lines below to get type float ratherthan a numpy float\n", - "#coord_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n", - "#coord_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n", - "coord_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", - "print(coord_bounds)\n", - "print(f'\\ncoord_bounds data type: {type(coord_bounds[0])}')\n", + "# Note: try changing around the commented out lines below to get type float rather than a numpy float\n", + "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n", + "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n", + "spatial_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", + "print(spatial_bounds)\n", + "print(f'\\nspatial_bounds data type: {type(spatial_bounds[0])}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c14027f-042d-4b90-8692-4f19c42c9434", + "metadata": {}, + "outputs": [], + "source": [ + "XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "023b7f3d-87dd-4251-b56c-009430852888", + "metadata": {}, + "outputs": [], + "source": [ + "transformer = Transformer.from_crs(crs, \"EPSG:4326\", always_xy=True)\n", + "lon, lat = transformer.transform(XX.ravel(), YY.ravel())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5dba1cf9-7e8e-483a-982e-042f725de7f2", + "metadata": {}, + "outputs": [], + "source": [ + "print(f'lower left coordinates (WGS84): {min(lon)}, {min(lat)}')\n", + "print(f'upper right coordinates (WGS84): {max(lon)}, {max(lat)}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "507840e5-544e-4a9b-b99b-423ecd1f069e", + "metadata": {}, + "outputs": [], + "source": [ "# create a spatial extent object \n", - "spatial_extent = pystac.SpatialExtent(bboxes=[coord_bounds])" + "spatial_extent = pystac.SpatialExtent(bboxes=[[min(lon).item(), min(lat).item(), max(lon).item(), max(lat).item()]])" ] }, { @@ -415,18 +472,6 @@ "print(dims)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "00a18a29-fb9a-4b56-8009-493122997b16", - "metadata": {}, - "outputs": [], - "source": [ - "# get x, y bounds for extent of those dimensions (required)\n", - "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", - "print(xy_bounds)" - ] - }, { "cell_type": "markdown", "id": "e7dc357c-91ec-49ae-83e5-400f791f9792", @@ -436,17 +481,6 @@ "reference list of cartopy projections: https://scitools.org.uk/cartopy/docs/latest/reference/projections.html" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "ea452f62-5644-49b6-8a4e-7dc4f649fd1a", - "metadata": {}, - "outputs": [], - "source": [ - "# print out crs information in dataset\n", - "print(crs)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -652,8 +686,8 @@ "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n", "# note that the extent of each dimension should be pulled from the dataset\n", "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n", - " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", - " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", + " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [spatial_bounds[0], spatial_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", + " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [spatial_bounds[1], spatial_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", " 'bnds': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bnds'), 'extent': [ds.bnds.min().item(), ds.bnds.max().item()]}),\n", " }" ] @@ -693,7 +727,7 @@ "vars_dict={}\n", "for v in vars:\n", " unit = stac_helpers.get_unit(ds, v)\n", - " var_type = stac_helpers.get_var_type(ds, v)\n", + " var_type = stac_helpers.get_var_type(ds, v, crs_var)\n", " long_name = stac_helpers.get_long_name(ds, v)\n", " vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})" ] diff --git a/workflows/archive/LOCA_historical_create_collection_from_zarr.ipynb b/workflows/archive/LOCA_historical_create_collection_from_zarr.ipynb index b9c9e601..84556a09 100644 --- a/workflows/archive/LOCA_historical_create_collection_from_zarr.ipynb +++ b/workflows/archive/LOCA_historical_create_collection_from_zarr.ipynb @@ -33,7 +33,8 @@ "import pandas as pd\n", "import json\n", "import numpy as np\n", - "import metpy\n", + "import pyproj\n", + "from pyproj import Transformer\n", "import cartopy.crs as ccrs\n", "import cfunits\n", "import json\n", @@ -206,6 +207,16 @@ "## Get crs info" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "12d38200-6c52-41d8-aa50-ed699dadfa21", + "metadata": {}, + "outputs": [], + "source": [ + "crs_var = 'crs'" + ] + }, { "cell_type": "code", "execution_count": null, @@ -213,8 +224,13 @@ "metadata": {}, "outputs": [], "source": [ - "ds = ds.metpy.parse_cf()\n", - "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs" + "# use pyproj to automatically extract crs info\n", + "crs = pyproj.CRS.from_cf(ds[crs_var].attrs)\n", + "\n", + "# alternatively, create the appropriate cartopy projection\n", + "# crs = ccrs.LambertConformal(central_longitude=crs_info.longitude_of_central_meridian, \n", + "# central_latitude=crs_info.latitude_of_projection_origin,\n", + "# standard_parallels=crs_info.standard_parallel)" ] }, { @@ -245,14 +261,55 @@ "# coordinates must be from WGS 84 datum\n", "# left, bottom, right, top\n", "\n", - "# Note: try changing around the commented out lines below to get type float ratherthan a numpy float\n", - "#coord_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n", - "#coord_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n", - "coord_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", - "print(coord_bounds)\n", - "print(f'\\ncoord_bounds data type: {type(coord_bounds[0])}')\n", + "# Note: try changing around the commented out lines below to get type float rather than a numpy float\n", + "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n", + "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n", + "spatial_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", + "print(spatial_bounds)\n", + "print(f'\\nspatial_bounds data type: {type(spatial_bounds[0])}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec2cda7c-ea21-4b9b-ac85-6b1885083167", + "metadata": {}, + "outputs": [], + "source": [ + "XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c63c9dae-96d0-4236-adba-9602dc056805", + "metadata": {}, + "outputs": [], + "source": [ + "transformer = Transformer.from_crs(crs, \"EPSG:4326\", always_xy=True)\n", + "lon, lat = transformer.transform(XX.ravel(), YY.ravel())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef7395b1-081d-4799-a944-e5240bc7f11a", + "metadata": {}, + "outputs": [], + "source": [ + "print(f'lower left coordinates (WGS84): {min(lon)}, {min(lat)}')\n", + "print(f'upper right coordinates (WGS84): {max(lon)}, {max(lat)}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03e61b0-5a40-41af-9295-d845982ec763", + "metadata": {}, + "outputs": [], + "source": [ "# create a spatial extent object \n", - "spatial_extent = pystac.SpatialExtent(bboxes=[coord_bounds])" + "spatial_extent = pystac.SpatialExtent(bboxes=[[min(lon).item(), min(lat).item(), max(lon).item(), max(lat).item()]])" ] }, { @@ -415,18 +472,6 @@ "print(dims)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "00a18a29-fb9a-4b56-8009-493122997b16", - "metadata": {}, - "outputs": [], - "source": [ - "# get x, y bounds for extent of those dimensions (required)\n", - "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", - "print(xy_bounds)" - ] - }, { "cell_type": "markdown", "id": "e7dc357c-91ec-49ae-83e5-400f791f9792", @@ -436,17 +481,6 @@ "reference list of cartopy projections: https://scitools.org.uk/cartopy/docs/latest/reference/projections.html" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "ea452f62-5644-49b6-8a4e-7dc4f649fd1a", - "metadata": {}, - "outputs": [], - "source": [ - "# print out crs information in dataset\n", - "print(crs)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -652,8 +686,8 @@ "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n", "# note that the extent of each dimension should be pulled from the dataset\n", "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n", - " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", - " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", + " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [spatial_bounds[0], spatial_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", + " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [spatial_bounds[1], spatial_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", " 'bnds': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bnds'), 'extent': [ds.bnds.min().item(), ds.bnds.max().item()]}),\n", " }" ] @@ -693,7 +727,7 @@ "vars_dict={}\n", "for v in vars:\n", " unit = stac_helpers.get_unit(ds, v)\n", - " var_type = stac_helpers.get_var_type(ds, v)\n", + " var_type = stac_helpers.get_var_type(ds, v, crs_var)\n", " long_name = stac_helpers.get_long_name(ds, v)\n", " vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})" ] -- GitLab