From 647eb91f438ce8cdc1cb0221873d749adb7cff5a Mon Sep 17 00:00:00 2001
From: amsnyder <asnyder@usgs.gov>
Date: Tue, 9 Apr 2024 17:27:17 -0400
Subject: [PATCH] add daskification/results for serap_slamm

---
 ...ap_slamm_create_collection_from_zarr.ipynb | 88 +++++++++++++++++--
 1 file changed, 79 insertions(+), 9 deletions(-)

diff --git a/workflows/archive/serap_slamm_create_collection_from_zarr.ipynb b/workflows/archive/serap_slamm_create_collection_from_zarr.ipynb
index 740f54ec..ebfa82d9 100644
--- a/workflows/archive/serap_slamm_create_collection_from_zarr.ipynb
+++ b/workflows/archive/serap_slamm_create_collection_from_zarr.ipynb
@@ -40,7 +40,11 @@
     "import json\n",
     "import sys\n",
     "sys.path.insert(1, '..')\n",
-    "import stac_helpers"
+    "import stac_helpers\n",
+    "import dask\n",
+    "import dask.array as da\n",
+    "from dask.distributed import Client, LocalCluster\n",
+    "import pyproj"
    ]
   },
   {
@@ -301,7 +305,34 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)"
+    "# # uncomment if you wish to use dask\n",
+    "# XX, YY = dask.array.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3020e62b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # choose a size for the chunks - these are square chunks that are chunk_len x chunk_len\n",
+    "# # this size worked on a dask cluster on my local computer\n",
+    "# # I haven't been able to get it optimized to work on Nebari, so this will crash if you run the delayed function below\n",
+    "# chunk_len = 10000\n",
+    "# XX_chunked = XX.rechunk((chunk_len, chunk_len)).ravel()\n",
+    "# YY_chunked = YY.rechunk((chunk_len, chunk_len)).ravel()\n",
+    "# XX_chunked"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "917e1e92",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformer = Transformer.from_crs(crs, \"EPSG:4326\", always_xy=True)"
    ]
   },
   {
@@ -311,8 +342,47 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "transformer = Transformer.from_crs(crs, \"EPSG:4326\", always_xy=True)\n",
-    "lon, lat = transformer.transform(XX.ravel(), YY.ravel())"
+    "# # uncomment if you wish to use dask\n",
+    "# cluster = LocalCluster(threads_per_worker=os.cpu_count())\n",
+    "# client = Client(cluster)\n",
+    "# print(f\"The link to view the client dashboard is:\\n>  {client.dashboard_link}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "76f60290",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# #lon, lat = transformer.transform(XX.ravel(), YY.ravel())\n",
+    "# @dask.delayed\n",
+    "# def proc_func(XX_chunked, YY_chunked):\n",
+    "#     lon, lat = transformer.transform(XX_chunked, YY_chunked)\n",
+    "#     min_lon = lon.min()\n",
+    "#     min_lat = lat.min()\n",
+    "#     max_lon = lon.max()\n",
+    "#     max_lat = lat.max()\n",
+    "#     return min_lon, min_lat, max_lon, max_lat"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "668d1cbb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# commented out because this will crash on nebari, but results from local computer are copied in the cell below\n",
+    "# result = proc_func(XX_chunked, YY_chunked).compute()\n",
+    "# min_lon = result[0]\n",
+    "# min_lat = result[1]\n",
+    "# max_lon = result[2]\n",
+    "# max_lat = result[3]\n",
+    "min_lon = -85.45951153138921\n",
+    "min_lat = 28.29288351506335\n",
+    "max_lon = -75.00778602615432\n",
+    "max_lat = 38.14259014718172"
    ]
   },
   {
@@ -322,8 +392,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(f'lower left coordinates (WGS84): {min(lon)}, {min(lat)}')\n",
-    "print(f'upper right coordinates (WGS84): {max(lon)}, {max(lat)}')"
+    "print(f'lower left coordinates (WGS84): {min_lon}, {min_lat}')\n",
+    "print(f'upper right coordinates (WGS84): {max_lon}, {max_lat}')"
    ]
   },
   {
@@ -859,9 +929,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "global-global-pangeo",
+   "display_name": "geo",
    "language": "python",
-   "name": "conda-env-global-global-pangeo-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -873,7 +943,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.10.0"
   }
  },
  "nbformat": 4,
-- 
GitLab