From b02079abfdb36bd049e27f69ac6b2eea6a143adf Mon Sep 17 00:00:00 2001 From: amsnyder <asnyder@usgs.gov> Date: Fri, 19 Jan 2024 15:39:24 -0600 Subject: [PATCH] use helpers --- ...e_slices_create_collection_from_zarr.ipynb | 129 ++-------------- ...d_annual_create_collection_from_zarr.ipynb | 142 ++---------------- ...e_slices_create_collection_from_zarr.ipynb | 142 ++---------------- 3 files changed, 41 insertions(+), 372 deletions(-) diff --git a/workflows/archive/TTU_2019_rcp45_time_slices_create_collection_from_zarr.ipynb b/workflows/archive/TTU_2019_rcp45_time_slices_create_collection_from_zarr.ipynb index 7cb328b5..cc8a0a00 100644 --- a/workflows/archive/TTU_2019_rcp45_time_slices_create_collection_from_zarr.ipynb +++ b/workflows/archive/TTU_2019_rcp45_time_slices_create_collection_from_zarr.ipynb @@ -189,22 +189,9 @@ "outputs": [], "source": [ "# dims_auto_extract = ['X', 'Y', 'T']\n", - "# def extract_dim(ds, d):\n", - "# try:\n", - "# dim_list = ds.cf.axes[d]\n", - "# assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n", - "# dim = dim_list[0]\n", - "# except KeyError:\n", - "# print(f\"Could not auto-extract {d} dimension name.\")\n", - "# print(\"Look at the xarray output above showing the dataset dimensions.\")\n", - "# dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n", - "# assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n", - "# print(f\"name of {d} dimension: {dim}\\n\")\n", - "# return dim\n", - "\n", "# dim_names_dict = {}\n", "# for d in dims_auto_extract:\n", - "# dim_names_dict[d] = extract_dim(ds, d)\n", + "# dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n", "dim_names_dict = {'X': 'lon', 'Y': 'lat', 'T': 'time'}\n", "print(f\"Dimension dictionary: {dim_names_dict}\")" ] @@ -443,59 +430,6 @@ "print(xy_bounds)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a", - "metadata": {}, - "outputs": [], - "source": [ - "def get_step(ds, dim_name, time_dim=False, debug=False, step_ix=0, round_dec=None):\n", - " dim_vals = ds[dim_name].values\n", - " diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n", - " # option to round number of decimals\n", - " # sometimes there are different steps calculated due to small rounding errors coming out of the diff\n", - " # calculation, rounding these can correct for that\n", - " if round_dec:\n", - " unique_steps = np.unique(np.array(diffs).round(decimals=round_dec), return_counts=True)\n", - " else:\n", - " unique_steps = np.unique(diffs, return_counts=True)\n", - " step_list = unique_steps[0]\n", - " # optional - for invesitgating uneven steps\n", - " if debug:\n", - " print(f'step_list: {step_list}')\n", - " print(f'step_count: {unique_steps[1]}')\n", - " indices = [i for i, x in enumerate(diffs) if x == step_list[step_ix]]\n", - " print(f'index locations of step index {step_ix} in step_list: {indices}')\n", - " # set step - if all steps are the same length\n", - " # datacube spec specifies to use null for irregularly spaced steps\n", - " if len(step_list)==1:\n", - " if time_dim:\n", - " # make sure time deltas are in np timedelta format\n", - " step_list = [np.array([step], dtype=\"timedelta64[ns]\")[0] for step in step_list]\n", - " step = step_list[0].astype(float).item()\n", - " else:\n", - " step = None\n", - " return(step)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a20d12bf-a511-4c5e-84d0-77e2ec551518", - "metadata": {}, - "outputs": [], - "source": [ - "def get_long_name(ds, v):\n", - " # try to get long_name attribute from variable\n", - " try:\n", - " long_name = ds[v].attrs['long_name']\n", - " # otherwise, leave empty\n", - " except:\n", - " long_name = None\n", - " return long_name" - ] - }, { "cell_type": "markdown", "id": "e7dc357c-91ec-49ae-83e5-400f791f9792", @@ -577,7 +511,7 @@ "outputs": [], "source": [ "# # debugging for time steps: get all step values and locations\n", - "# time_step = get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)" + "# time_step = stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)" ] }, { @@ -609,7 +543,7 @@ "metadata": {}, "outputs": [], "source": [ - "x_step = get_step(ds, dim_names_dict['X'])\n", + "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n", "print(f'x step: {x_step}')" ] }, @@ -622,7 +556,7 @@ "source": [ "# # debugging for spatial steps: get all step values and locations\n", "# x_dim=dim_names_dict['X']\n", - "# x_step = get_step(ds, x_dim, debug=True, step_ix=1)\n", + "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=1)\n", "# print(f'\\nx dim name (for next cell): {x_dim}')" ] }, @@ -655,7 +589,7 @@ "metadata": {}, "outputs": [], "source": [ - "y_step = get_step(ds, dim_names_dict['Y'])\n", + "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n", "print(f'y step: {y_step}')" ] }, @@ -668,7 +602,7 @@ "source": [ "# # debugging for spatial steps: get all step values and locations\n", "# y_dim=dim_names_dict['Y']\n", - "# y_step = get_step(ds, y_dim, debug=True, step_ix=1)\n", + "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=1)\n", "# print(f'\\nx dim name (for next cell): {x_dim}')" ] }, @@ -723,10 +657,10 @@ "\n", "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n", "# note that the extent of each dimension should be pulled from the dataset\n", - "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n", - " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", - " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", - " 'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n", + "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n", + " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", + " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", + " 'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n", " }" ] }, @@ -738,43 +672,6 @@ "### Add cube variables (optional field for extension)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "92510876-7853-4d24-8563-c69f9012aeb6", - "metadata": {}, - "outputs": [], - "source": [ - "# define functions to pull out datacube attributes and validate format\n", - "def get_unit(ds, v):\n", - " # check if unit is defined for variable\n", - " try:\n", - " unit = ds[v].attrs['units']\n", - " except:\n", - " unit = None\n", - " # check if unit comes from https://docs.unidata.ucar.edu/udunits/current/#Database\n", - " # datacube extension specifies: The unit of measurement for the data, preferably compliant to UDUNITS-2 units (singular).\n", - " # gdptools expects this format as well\n", - " try:\n", - " cfunits.Units(unit).isvalid\n", - " except:\n", - " print(\"Unit is not valid as a UD unit.\")\n", - " unit = str(input(\"Please enter a valid unit for {v} from here: https://docs.unidata.ucar.edu/udunits/current/#Database\"))\n", - " assert cfunits.Units(unit).isvalid\n", - " return unit\n", - "\n", - "def get_var_type(ds, v):\n", - " if v in ds.coords:\n", - " # type = auxiliary for a variable that contains coordinate data, but isn't a dimension in cube:dimensions.\n", - " # For example, the values of the datacube might be provided in the projected coordinate reference system, \n", - " # but the datacube could have a variable lon with dimensions (y, x), giving the longitude at each point.\n", - " var_type = 'auxiliary'\n", - " # type = data for a variable indicating some measured value, for example \"precipitation\", \"temperature\", etc.\n", - " else:\n", - " var_type = 'data'\n", - " return var_type" - ] - }, { "cell_type": "code", "execution_count": null, @@ -801,9 +698,9 @@ "# create dictionary of dataset variables and associated dimensions\n", "vars_dict={}\n", "for v in vars:\n", - " unit = get_unit(ds, v)\n", - " var_type = get_var_type(ds, v)\n", - " long_name = get_long_name(ds, v)\n", + " unit = stac_helpers.get_unit(ds, v)\n", + " var_type = stac_helpers.get_var_type(ds, v)\n", + " long_name = stac_helpers.get_long_name(ds, v)\n", " vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})" ] }, diff --git a/workflows/archive/TTU_2019_rcp85_gridded_annual_create_collection_from_zarr.ipynb b/workflows/archive/TTU_2019_rcp85_gridded_annual_create_collection_from_zarr.ipynb index 355ca14f..c8586ac6 100644 --- a/workflows/archive/TTU_2019_rcp85_gridded_annual_create_collection_from_zarr.ipynb +++ b/workflows/archive/TTU_2019_rcp85_gridded_annual_create_collection_from_zarr.ipynb @@ -187,22 +187,9 @@ "outputs": [], "source": [ "# dims_auto_extract = ['X', 'Y', 'T']\n", - "# def extract_dim(ds, d):\n", - "# try:\n", - "# dim_list = ds.cf.axes[d]\n", - "# assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n", - "# dim = dim_list[0]\n", - "# except KeyError:\n", - "# print(f\"Could not auto-extract {d} dimension name.\")\n", - "# print(\"Look at the xarray output above showing the dataset dimensions.\")\n", - "# dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n", - "# assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n", - "# print(f\"name of {d} dimension: {dim}\\n\")\n", - "# return dim\n", - "\n", "# dim_names_dict = {}\n", "# for d in dims_auto_extract:\n", - "# dim_names_dict[d] = extract_dim(ds, d)\n", + "# dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n", "dim_names_dict = {'X': 'lon', 'Y': 'lat', 'T': 'time'}\n", "print(f\"Dimension dictionary: {dim_names_dict}\")" ] @@ -441,59 +428,6 @@ "print(xy_bounds)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a", - "metadata": {}, - "outputs": [], - "source": [ - "def get_step(ds, dim_name, time_dim=False, debug=False, step_ix=0, round_dec=None):\n", - " dim_vals = ds[dim_name].values\n", - " diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n", - " # option to round number of decimals\n", - " # sometimes there are different steps calculated due to small rounding errors coming out of the diff\n", - " # calculation, rounding these can correct for that\n", - " if round_dec:\n", - " unique_steps = np.unique(np.array(diffs).round(decimals=round_dec), return_counts=True)\n", - " else:\n", - " unique_steps = np.unique(diffs, return_counts=True)\n", - " step_list = unique_steps[0]\n", - " # optional - for invesitgating uneven steps\n", - " if debug:\n", - " print(f'step_list: {step_list}')\n", - " print(f'step_count: {unique_steps[1]}')\n", - " indices = [i for i, x in enumerate(diffs) if x == step_list[step_ix]]\n", - " print(f'index locations of step index {step_ix} in step_list: {indices}')\n", - " # set step - if all steps are the same length\n", - " # datacube spec specifies to use null for irregularly spaced steps\n", - " if len(step_list)==1:\n", - " if time_dim:\n", - " # make sure time deltas are in np timedelta format\n", - " step_list = [np.array([step], dtype=\"timedelta64[ns]\")[0] for step in step_list]\n", - " step = step_list[0].astype(float).item()\n", - " else:\n", - " step = None\n", - " return(step)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a20d12bf-a511-4c5e-84d0-77e2ec551518", - "metadata": {}, - "outputs": [], - "source": [ - "def get_long_name(ds, v):\n", - " # try to get long_name attribute from variable\n", - " try:\n", - " long_name = ds[v].attrs['long_name']\n", - " # otherwise, leave empty\n", - " except:\n", - " long_name = None\n", - " return long_name" - ] - }, { "cell_type": "markdown", "id": "e7dc357c-91ec-49ae-83e5-400f791f9792", @@ -575,7 +509,7 @@ "outputs": [], "source": [ "# # debugging for time steps: get all step values and locations\n", - "# time_step = get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)" + "# time_step = stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)" ] }, { @@ -607,7 +541,7 @@ "metadata": {}, "outputs": [], "source": [ - "x_step = get_step(ds, dim_names_dict['X'])\n", + "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n", "print(f'x step: {x_step}')" ] }, @@ -620,7 +554,7 @@ "source": [ "# # debugging for spatial steps: get all step values and locations\n", "# x_dim=dim_names_dict['X']\n", - "# x_step = get_step(ds, x_dim, debug=True, step_ix=1)\n", + "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=1)\n", "# print(f'\\nx dim name (for next cell): {x_dim}')" ] }, @@ -653,7 +587,7 @@ "metadata": {}, "outputs": [], "source": [ - "y_step = get_step(ds, dim_names_dict['Y'])\n", + "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n", "print(f'y step: {y_step}')" ] }, @@ -666,7 +600,7 @@ "source": [ "# # debugging for spatial steps: get all step values and locations\n", "# y_dim=dim_names_dict['Y']\n", - "# y_step = get_step(ds, y_dim, debug=True, step_ix=1)\n", + "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=1)\n", "# print(f'\\nx dim name (for next cell): {x_dim}')" ] }, @@ -721,10 +655,10 @@ "\n", "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n", "# note that the extent of each dimension should be pulled from the dataset\n", - "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n", - " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", - " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", - " 'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n", + "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n", + " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", + " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", + " 'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n", " }" ] }, @@ -736,43 +670,6 @@ "### Add cube variables (optional field for extension)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "92510876-7853-4d24-8563-c69f9012aeb6", - "metadata": {}, - "outputs": [], - "source": [ - "# define functions to pull out datacube attributes and validate format\n", - "def get_unit(ds, v):\n", - " # check if unit is defined for variable\n", - " try:\n", - " unit = ds[v].attrs['units']\n", - " except:\n", - " unit = None\n", - " # check if unit comes from https://docs.unidata.ucar.edu/udunits/current/#Database\n", - " # datacube extension specifies: The unit of measurement for the data, preferably compliant to UDUNITS-2 units (singular).\n", - " # gdptools expects this format as well\n", - " try:\n", - " cfunits.Units(unit).isvalid\n", - " except:\n", - " print(\"Unit is not valid as a UD unit.\")\n", - " unit = str(input(\"Please enter a valid unit for {v} from here: https://docs.unidata.ucar.edu/udunits/current/#Database\"))\n", - " assert cfunits.Units(unit).isvalid\n", - " return unit\n", - "\n", - "def get_var_type(ds, v):\n", - " if v in ds.coords:\n", - " # type = auxiliary for a variable that contains coordinate data, but isn't a dimension in cube:dimensions.\n", - " # For example, the values of the datacube might be provided in the projected coordinate reference system, \n", - " # but the datacube could have a variable lon with dimensions (y, x), giving the longitude at each point.\n", - " var_type = 'auxiliary'\n", - " # type = data for a variable indicating some measured value, for example \"precipitation\", \"temperature\", etc.\n", - " else:\n", - " var_type = 'data'\n", - " return var_type" - ] - }, { "cell_type": "code", "execution_count": null, @@ -799,9 +696,9 @@ "# create dictionary of dataset variables and associated dimensions\n", "vars_dict={}\n", "for v in vars:\n", - " unit = get_unit(ds, v)\n", - " var_type = get_var_type(ds, v)\n", - " long_name = get_long_name(ds, v)\n", + " unit = stac_helpers.get_unit(ds, v)\n", + " var_type = stac_helpers.get_var_type(ds, v)\n", + " long_name = stac_helpers.get_long_name(ds, v)\n", " vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})" ] }, @@ -843,18 +740,7 @@ "source": [ "# # helper to find items of wrong type\n", "# d = collection.to_dict()\n", - "# def find_paths(nested_dict, prepath=()):\n", - "# for k, v in nested_dict.items():\n", - "# try:\n", - "# path = prepath + (k,)\n", - "# if type(v) is np.float64: # found value\n", - "# yield path\n", - "# elif hasattr(v, 'items'): # v is a dict\n", - "# yield from find_paths(v, path) \n", - "# except:\n", - "# print(prepath)\n", - "\n", - "# print(*find_paths(d))" + "# print(*stac_helpers.find_paths(d))" ] }, { diff --git a/workflows/archive/TTU_2019_rcp85_time_slices_create_collection_from_zarr.ipynb b/workflows/archive/TTU_2019_rcp85_time_slices_create_collection_from_zarr.ipynb index 831b1d24..3e90059b 100644 --- a/workflows/archive/TTU_2019_rcp85_time_slices_create_collection_from_zarr.ipynb +++ b/workflows/archive/TTU_2019_rcp85_time_slices_create_collection_from_zarr.ipynb @@ -189,22 +189,9 @@ "outputs": [], "source": [ "# dims_auto_extract = ['X', 'Y', 'T']\n", - "# def extract_dim(ds, d):\n", - "# try:\n", - "# dim_list = ds.cf.axes[d]\n", - "# assert len(dim_list)==1, f'There are too many {d} dimensions in this dataset.'\n", - "# dim = dim_list[0]\n", - "# except KeyError:\n", - "# print(f\"Could not auto-extract {d} dimension name.\")\n", - "# print(\"Look at the xarray output above showing the dataset dimensions.\")\n", - "# dim = str(input(f\"What is the name of the {d} dimension of this dataset?\"))\n", - "# assert dim in ds.dims, \"That is not a valid dimension name for this dataset\"\n", - "# print(f\"name of {d} dimension: {dim}\\n\")\n", - "# return dim\n", - "\n", "# dim_names_dict = {}\n", "# for d in dims_auto_extract:\n", - "# dim_names_dict[d] = extract_dim(ds, d)\n", + "# dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n", "dim_names_dict = {'X': 'lon', 'Y': 'lat', 'T': 'time'}\n", "print(f\"Dimension dictionary: {dim_names_dict}\")" ] @@ -443,59 +430,6 @@ "print(xy_bounds)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f49d6d7-9e30-4144-909b-fa1238e6c77a", - "metadata": {}, - "outputs": [], - "source": [ - "def get_step(ds, dim_name, time_dim=False, debug=False, step_ix=0, round_dec=None):\n", - " dim_vals = ds[dim_name].values\n", - " diffs = [d2 - d1 for d1, d2 in zip(dim_vals, dim_vals[1:])]\n", - " # option to round number of decimals\n", - " # sometimes there are different steps calculated due to small rounding errors coming out of the diff\n", - " # calculation, rounding these can correct for that\n", - " if round_dec:\n", - " unique_steps = np.unique(np.array(diffs).round(decimals=round_dec), return_counts=True)\n", - " else:\n", - " unique_steps = np.unique(diffs, return_counts=True)\n", - " step_list = unique_steps[0]\n", - " # optional - for invesitgating uneven steps\n", - " if debug:\n", - " print(f'step_list: {step_list}')\n", - " print(f'step_count: {unique_steps[1]}')\n", - " indices = [i for i, x in enumerate(diffs) if x == step_list[step_ix]]\n", - " print(f'index locations of step index {step_ix} in step_list: {indices}')\n", - " # set step - if all steps are the same length\n", - " # datacube spec specifies to use null for irregularly spaced steps\n", - " if len(step_list)==1:\n", - " if time_dim:\n", - " # make sure time deltas are in np timedelta format\n", - " step_list = [np.array([step], dtype=\"timedelta64[ns]\")[0] for step in step_list]\n", - " step = step_list[0].astype(float).item()\n", - " else:\n", - " step = None\n", - " return(step)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a20d12bf-a511-4c5e-84d0-77e2ec551518", - "metadata": {}, - "outputs": [], - "source": [ - "def get_long_name(ds, v):\n", - " # try to get long_name attribute from variable\n", - " try:\n", - " long_name = ds[v].attrs['long_name']\n", - " # otherwise, leave empty\n", - " except:\n", - " long_name = None\n", - " return long_name" - ] - }, { "cell_type": "markdown", "id": "e7dc357c-91ec-49ae-83e5-400f791f9792", @@ -577,7 +511,7 @@ "outputs": [], "source": [ "# # debugging for time steps: get all step values and locations\n", - "# time_step = get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)" + "# time_step = stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)" ] }, { @@ -609,7 +543,7 @@ "metadata": {}, "outputs": [], "source": [ - "x_step = get_step(ds, dim_names_dict['X'])\n", + "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n", "print(f'x step: {x_step}')" ] }, @@ -622,7 +556,7 @@ "source": [ "# # debugging for spatial steps: get all step values and locations\n", "# x_dim=dim_names_dict['X']\n", - "# x_step = get_step(ds, x_dim, debug=True, step_ix=1)\n", + "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=1)\n", "# print(f'\\nx dim name (for next cell): {x_dim}')" ] }, @@ -655,7 +589,7 @@ "metadata": {}, "outputs": [], "source": [ - "y_step = get_step(ds, dim_names_dict['Y'])\n", + "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n", "print(f'y step: {y_step}')" ] }, @@ -668,7 +602,7 @@ "source": [ "# # debugging for spatial steps: get all step values and locations\n", "# y_dim=dim_names_dict['Y']\n", - "# y_step = get_step(ds, y_dim, debug=True, step_ix=1)\n", + "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=1)\n", "# print(f'\\nx dim name (for next cell): {x_dim}')" ] }, @@ -723,10 +657,10 @@ "\n", "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n", "# note that the extent of each dimension should be pulled from the dataset\n", - "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n", - " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", - " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", - " 'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n", + "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step':time_step}),\n", + " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", + " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", + " 'nv': pystac.extensions.datacube.Dimension({'type': 'count', 'description': stac_helpers.get_long_name(ds, 'nv'), 'extent': [ds.nv.min().item(), ds.nv.max().item()]}),\n", " }" ] }, @@ -738,43 +672,6 @@ "### Add cube variables (optional field for extension)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "92510876-7853-4d24-8563-c69f9012aeb6", - "metadata": {}, - "outputs": [], - "source": [ - "# define functions to pull out datacube attributes and validate format\n", - "def get_unit(ds, v):\n", - " # check if unit is defined for variable\n", - " try:\n", - " unit = ds[v].attrs['units']\n", - " except:\n", - " unit = None\n", - " # check if unit comes from https://docs.unidata.ucar.edu/udunits/current/#Database\n", - " # datacube extension specifies: The unit of measurement for the data, preferably compliant to UDUNITS-2 units (singular).\n", - " # gdptools expects this format as well\n", - " try:\n", - " cfunits.Units(unit).isvalid\n", - " except:\n", - " print(\"Unit is not valid as a UD unit.\")\n", - " unit = str(input(\"Please enter a valid unit for {v} from here: https://docs.unidata.ucar.edu/udunits/current/#Database\"))\n", - " assert cfunits.Units(unit).isvalid\n", - " return unit\n", - "\n", - "def get_var_type(ds, v):\n", - " if v in ds.coords:\n", - " # type = auxiliary for a variable that contains coordinate data, but isn't a dimension in cube:dimensions.\n", - " # For example, the values of the datacube might be provided in the projected coordinate reference system, \n", - " # but the datacube could have a variable lon with dimensions (y, x), giving the longitude at each point.\n", - " var_type = 'auxiliary'\n", - " # type = data for a variable indicating some measured value, for example \"precipitation\", \"temperature\", etc.\n", - " else:\n", - " var_type = 'data'\n", - " return var_type" - ] - }, { "cell_type": "code", "execution_count": null, @@ -801,9 +698,9 @@ "# create dictionary of dataset variables and associated dimensions\n", "vars_dict={}\n", "for v in vars:\n", - " unit = get_unit(ds, v)\n", - " var_type = get_var_type(ds, v)\n", - " long_name = get_long_name(ds, v)\n", + " unit = stac_helpers.get_unit(ds, v)\n", + " var_type = stac_helpers.get_var_type(ds, v)\n", + " long_name = stac_helpers.get_long_name(ds, v)\n", " vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})" ] }, @@ -845,18 +742,7 @@ "source": [ "# # helper to find items of wrong type\n", "# d = collection.to_dict()\n", - "# def find_paths(nested_dict, prepath=()):\n", - "# for k, v in nested_dict.items():\n", - "# try:\n", - "# path = prepath + (k,)\n", - "# if type(v) is np.float64: # found value\n", - "# yield path\n", - "# elif hasattr(v, 'items'): # v is a dict\n", - "# yield from find_paths(v, path) \n", - "# except:\n", - "# print(prepath)\n", - "\n", - "# print(*find_paths(d))" + "# print(*stac_helpers.find_paths(d))" ] }, { -- GitLab