From a4cd671365a85616b98852541837f31419def8a2 Mon Sep 17 00:00:00 2001
From: amsnyder <asnyder@usgs.gov>
Date: Mon, 8 Apr 2024 13:12:22 -0500
Subject: [PATCH] fix crs var detection

---
 ...A_future_create_collection_from_zarr.ipynb | 106 ++++++++++++------
 ...storical_create_collection_from_zarr.ipynb | 106 ++++++++++++------
 2 files changed, 140 insertions(+), 72 deletions(-)

diff --git a/workflows/archive/LOCA_future_create_collection_from_zarr.ipynb b/workflows/archive/LOCA_future_create_collection_from_zarr.ipynb
index 3fe3fee9..6e4fdd83 100644
--- a/workflows/archive/LOCA_future_create_collection_from_zarr.ipynb
+++ b/workflows/archive/LOCA_future_create_collection_from_zarr.ipynb
@@ -33,7 +33,8 @@
     "import pandas as pd\n",
     "import json\n",
     "import numpy as np\n",
-    "import metpy\n",
+    "import pyproj\n",
+    "from pyproj import Transformer\n",
     "import cartopy.crs as ccrs\n",
     "import cfunits\n",
     "import json\n",
@@ -206,6 +207,16 @@
     "## Get crs info"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7348d314-9b85-4d51-ba01-c599d5e32ce2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "crs_var = 'crs'"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -213,8 +224,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds  = ds.metpy.parse_cf()\n",
-    "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs"
+    "# use pyproj to automatically extract crs info\n",
+    "crs = pyproj.CRS.from_cf(ds[crs_var].attrs)\n",
+    "\n",
+    "# alternatively, create the appropriate cartopy projection\n",
+    "# crs = ccrs.LambertConformal(central_longitude=crs_info.longitude_of_central_meridian, \n",
+    "#                             central_latitude=crs_info.latitude_of_projection_origin,\n",
+    "#                             standard_parallels=crs_info.standard_parallel)"
    ]
   },
   {
@@ -245,14 +261,55 @@
     "# coordinates must be from WGS 84 datum\n",
     "# left, bottom, right, top\n",
     "\n",
-    "# Note: try changing around the commented out lines below to get type float ratherthan a numpy float\n",
-    "#coord_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n",
-    "#coord_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n",
-    "coord_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n",
-    "print(coord_bounds)\n",
-    "print(f'\\ncoord_bounds data type: {type(coord_bounds[0])}')\n",
+    "# Note: try changing around the commented out lines below to get type float rather than a numpy float\n",
+    "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n",
+    "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n",
+    "spatial_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n",
+    "print(spatial_bounds)\n",
+    "print(f'\\nspatial_bounds data type: {type(spatial_bounds[0])}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c14027f-042d-4b90-8692-4f19c42c9434",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "023b7f3d-87dd-4251-b56c-009430852888",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformer = Transformer.from_crs(crs, \"EPSG:4326\", always_xy=True)\n",
+    "lon, lat = transformer.transform(XX.ravel(), YY.ravel())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5dba1cf9-7e8e-483a-982e-042f725de7f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'lower left coordinates (WGS84): {min(lon)}, {min(lat)}')\n",
+    "print(f'upper right coordinates (WGS84): {max(lon)}, {max(lat)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "507840e5-544e-4a9b-b99b-423ecd1f069e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "# create a spatial extent object \n",
-    "spatial_extent = pystac.SpatialExtent(bboxes=[coord_bounds])"
+    "spatial_extent = pystac.SpatialExtent(bboxes=[[min(lon).item(), min(lat).item(), max(lon).item(), max(lat).item()]])"
    ]
   },
   {
@@ -415,18 +472,6 @@
     "print(dims)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "00a18a29-fb9a-4b56-8009-493122997b16",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# get x, y bounds for extent of those dimensions (required)\n",
-    "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n",
-    "print(xy_bounds)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
@@ -436,17 +481,6 @@
     "reference list of cartopy projections: https://scitools.org.uk/cartopy/docs/latest/reference/projections.html"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ea452f62-5644-49b6-8a4e-7dc4f649fd1a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# print out crs information in dataset\n",
-    "print(crs)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -652,8 +686,8 @@
     "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
     "# note that the extent of each dimension should be pulled from the dataset\n",
     "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n",
-    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
-    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [spatial_bounds[0], spatial_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [spatial_bounds[1], spatial_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
     "             'bnds': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bnds'), 'extent': [ds.bnds.min().item(), ds.bnds.max().item()]}),\n",
     "            }"
    ]
@@ -693,7 +727,7 @@
     "vars_dict={}\n",
     "for v in vars:\n",
     "    unit = stac_helpers.get_unit(ds, v)\n",
-    "    var_type = stac_helpers.get_var_type(ds, v)\n",
+    "    var_type = stac_helpers.get_var_type(ds, v, crs_var)\n",
     "    long_name = stac_helpers.get_long_name(ds, v)\n",
     "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
    ]
diff --git a/workflows/archive/LOCA_historical_create_collection_from_zarr.ipynb b/workflows/archive/LOCA_historical_create_collection_from_zarr.ipynb
index b9c9e601..84556a09 100644
--- a/workflows/archive/LOCA_historical_create_collection_from_zarr.ipynb
+++ b/workflows/archive/LOCA_historical_create_collection_from_zarr.ipynb
@@ -33,7 +33,8 @@
     "import pandas as pd\n",
     "import json\n",
     "import numpy as np\n",
-    "import metpy\n",
+    "import pyproj\n",
+    "from pyproj import Transformer\n",
     "import cartopy.crs as ccrs\n",
     "import cfunits\n",
     "import json\n",
@@ -206,6 +207,16 @@
     "## Get crs info"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12d38200-6c52-41d8-aa50-ed699dadfa21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "crs_var = 'crs'"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -213,8 +224,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds  = ds.metpy.parse_cf()\n",
-    "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs"
+    "# use pyproj to automatically extract crs info\n",
+    "crs = pyproj.CRS.from_cf(ds[crs_var].attrs)\n",
+    "\n",
+    "# alternatively, create the appropriate cartopy projection\n",
+    "# crs = ccrs.LambertConformal(central_longitude=crs_info.longitude_of_central_meridian, \n",
+    "#                             central_latitude=crs_info.latitude_of_projection_origin,\n",
+    "#                             standard_parallels=crs_info.standard_parallel)"
    ]
   },
   {
@@ -245,14 +261,55 @@
     "# coordinates must be from WGS 84 datum\n",
     "# left, bottom, right, top\n",
     "\n",
-    "# Note: try changing around the commented out lines below to get type float ratherthan a numpy float\n",
-    "#coord_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n",
-    "#coord_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n",
-    "coord_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n",
-    "print(coord_bounds)\n",
-    "print(f'\\ncoord_bounds data type: {type(coord_bounds[0])}')\n",
+    "# Note: try changing around the commented out lines below to get type float rather than a numpy float\n",
+    "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n",
+    "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n",
+    "spatial_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n",
+    "print(spatial_bounds)\n",
+    "print(f'\\nspatial_bounds data type: {type(spatial_bounds[0])}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ec2cda7c-ea21-4b9b-ac85-6b1885083167",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c63c9dae-96d0-4236-adba-9602dc056805",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformer = Transformer.from_crs(crs, \"EPSG:4326\", always_xy=True)\n",
+    "lon, lat = transformer.transform(XX.ravel(), YY.ravel())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ef7395b1-081d-4799-a944-e5240bc7f11a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'lower left coordinates (WGS84): {min(lon)}, {min(lat)}')\n",
+    "print(f'upper right coordinates (WGS84): {max(lon)}, {max(lat)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b03e61b0-5a40-41af-9295-d845982ec763",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "# create a spatial extent object \n",
-    "spatial_extent = pystac.SpatialExtent(bboxes=[coord_bounds])"
+    "spatial_extent = pystac.SpatialExtent(bboxes=[[min(lon).item(), min(lat).item(), max(lon).item(), max(lat).item()]])"
    ]
   },
   {
@@ -415,18 +472,6 @@
     "print(dims)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "00a18a29-fb9a-4b56-8009-493122997b16",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# get x, y bounds for extent of those dimensions (required)\n",
-    "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n",
-    "print(xy_bounds)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "e7dc357c-91ec-49ae-83e5-400f791f9792",
@@ -436,17 +481,6 @@
     "reference list of cartopy projections: https://scitools.org.uk/cartopy/docs/latest/reference/projections.html"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ea452f62-5644-49b6-8a4e-7dc4f649fd1a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# print out crs information in dataset\n",
-    "print(crs)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -652,8 +686,8 @@
     "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n",
     "# note that the extent of each dimension should be pulled from the dataset\n",
     "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n",
-    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
-    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [spatial_bounds[0], spatial_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n",
+    "             dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [spatial_bounds[1], spatial_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n",
     "             'bnds': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'bnds'), 'extent': [ds.bnds.min().item(), ds.bnds.max().item()]}),\n",
     "            }"
    ]
@@ -693,7 +727,7 @@
     "vars_dict={}\n",
     "for v in vars:\n",
     "    unit = stac_helpers.get_unit(ds, v)\n",
-    "    var_type = stac_helpers.get_var_type(ds, v)\n",
+    "    var_type = stac_helpers.get_var_type(ds, v, crs_var)\n",
     "    long_name = stac_helpers.get_long_name(ds, v)\n",
     "    vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})"
    ]
-- 
GitLab