diff --git a/catalog/CooperMcKenzie/collection.json b/catalog/CooperMcKenzie/collection.json new file mode 100644 index 0000000000000000000000000000000000000000..576c38082c1a5ca758d7d16f64902caf0a0d1502 --- /dev/null +++ b/catalog/CooperMcKenzie/collection.json @@ -0,0 +1,292 @@ +{ + "type": "Collection", + "id": "CooperMcKenzie", + "stac_version": "1.0.0", + "description": "SnowModel Output McKenzie River Basin, Cascades, UTM Zone 10N NAD83 (2014)", + "links": [ + { + "rel": "root", + "href": "../catalog.json", + "type": "application/json" + }, + { + "rel": "parent", + "href": "../catalog.json", + "type": "application/json" + } + ], + "stac_extensions": [ + "https://stac-extensions.github.io/datacube/v2.2.0/schema.json" + ], + "cube:dimensions": { + "time": { + "type": "temporal", + "description": "time", + "extent": [ + "1988-09-30T00:00:00Z", + "2009-09-29T00:00:00Z" + ], + "step": "P1DT0H0M0S" + }, + "UTM_Meters_East": { + "type": "spatial", + "axis": "x", + "description": "UTM Zone 10N Meters East", + "extent": [ + 489701.14541243, + 601701.1454124299 + ], + "step": 100.0, + "reference_system": "{\"$schema\":\"https://proj.org/schemas/v0.5/projjson.schema.json\",\"type\":\"ProjectedCRS\",\"name\":\"NAD83 / UTM zone 10N\",\"base_crs\":{\"name\":\"NAD83\",\"datum\":{\"type\":\"GeodeticReferenceFrame\",\"name\":\"North American Datum 1983\",\"ellipsoid\":{\"name\":\"GRS 1980\",\"semi_major_axis\":6378137,\"inverse_flattening\":298.257222101}},\"coordinate_system\":{\"subtype\":\"ellipsoidal\",\"axis\":[{\"name\":\"Geodetic latitude\",\"abbreviation\":\"Lat\",\"direction\":\"north\",\"unit\":\"degree\"},{\"name\":\"Geodetic longitude\",\"abbreviation\":\"Lon\",\"direction\":\"east\",\"unit\":\"degree\"}]},\"id\":{\"authority\":\"EPSG\",\"code\":4269}},\"conversion\":{\"name\":\"UTM zone 10N\",\"method\":{\"name\":\"Transverse Mercator\",\"id\":{\"authority\":\"EPSG\",\"code\":9807}},\"parameters\":[{\"name\":\"Latitude of natural origin\",\"value\":0,\"unit\":\"degree\",\"id\":{\"authority\":\"EPSG\",\"code\":8801}},{\"name\":\"Longitude of natural origin\",\"value\":-123,\"unit\":\"degree\",\"id\":{\"authority\":\"EPSG\",\"code\":8802}},{\"name\":\"Scale factor at natural origin\",\"value\":0.9996,\"unit\":\"unity\",\"id\":{\"authority\":\"EPSG\",\"code\":8805}},{\"name\":\"False easting\",\"value\":500000,\"unit\":\"metre\",\"id\":{\"authority\":\"EPSG\",\"code\":8806}},{\"name\":\"False northing\",\"value\":0,\"unit\":\"metre\",\"id\":{\"authority\":\"EPSG\",\"code\":8807}}]},\"coordinate_system\":{\"subtype\":\"Cartesian\",\"axis\":[{\"name\":\"Easting\",\"abbreviation\":\"E\",\"direction\":\"east\",\"unit\":\"metre\"},{\"name\":\"Northing\",\"abbreviation\":\"N\",\"direction\":\"north\",\"unit\":\"metre\"}]},\"scope\":\"Engineering survey, topographic mapping.\",\"area\":\"North America - between 126°W and 120°W - onshore and offshore. Canada - British Columbia; Northwest Territories; Yukon. United States (USA) - California; Oregon; Washington.\",\"bbox\":{\"south_latitude\":30.54,\"west_longitude\":-126,\"north_latitude\":81.8,\"east_longitude\":-119.99},\"id\":{\"authority\":\"EPSG\",\"code\":26910}}" + }, + "UTM_Meters_North": { + "type": "spatial", + "axis": "y", + "description": "UTM Zone 10N Meters North", + "extent": [ + 4854173.6404094, + 4929973.6404094 + ], + "step": 100.0, + "reference_system": "{\"$schema\":\"https://proj.org/schemas/v0.5/projjson.schema.json\",\"type\":\"ProjectedCRS\",\"name\":\"NAD83 / UTM zone 10N\",\"base_crs\":{\"name\":\"NAD83\",\"datum\":{\"type\":\"GeodeticReferenceFrame\",\"name\":\"North American Datum 1983\",\"ellipsoid\":{\"name\":\"GRS 1980\",\"semi_major_axis\":6378137,\"inverse_flattening\":298.257222101}},\"coordinate_system\":{\"subtype\":\"ellipsoidal\",\"axis\":[{\"name\":\"Geodetic latitude\",\"abbreviation\":\"Lat\",\"direction\":\"north\",\"unit\":\"degree\"},{\"name\":\"Geodetic longitude\",\"abbreviation\":\"Lon\",\"direction\":\"east\",\"unit\":\"degree\"}]},\"id\":{\"authority\":\"EPSG\",\"code\":4269}},\"conversion\":{\"name\":\"UTM zone 10N\",\"method\":{\"name\":\"Transverse Mercator\",\"id\":{\"authority\":\"EPSG\",\"code\":9807}},\"parameters\":[{\"name\":\"Latitude of natural origin\",\"value\":0,\"unit\":\"degree\",\"id\":{\"authority\":\"EPSG\",\"code\":8801}},{\"name\":\"Longitude of natural origin\",\"value\":-123,\"unit\":\"degree\",\"id\":{\"authority\":\"EPSG\",\"code\":8802}},{\"name\":\"Scale factor at natural origin\",\"value\":0.9996,\"unit\":\"unity\",\"id\":{\"authority\":\"EPSG\",\"code\":8805}},{\"name\":\"False easting\",\"value\":500000,\"unit\":\"metre\",\"id\":{\"authority\":\"EPSG\",\"code\":8806}},{\"name\":\"False northing\",\"value\":0,\"unit\":\"metre\",\"id\":{\"authority\":\"EPSG\",\"code\":8807}}]},\"coordinate_system\":{\"subtype\":\"Cartesian\",\"axis\":[{\"name\":\"Easting\",\"abbreviation\":\"E\",\"direction\":\"east\",\"unit\":\"metre\"},{\"name\":\"Northing\",\"abbreviation\":\"N\",\"direction\":\"north\",\"unit\":\"metre\"}]},\"scope\":\"Engineering survey, topographic mapping.\",\"area\":\"North America - between 126°W and 120°W - onshore and offshore. Canada - British Columbia; Northwest Territories; Yukon. United States (USA) - California; Oregon; Washington.\",\"bbox\":{\"south_latitude\":30.54,\"west_longitude\":-126,\"north_latitude\":81.8,\"east_longitude\":-119.99},\"id\":{\"authority\":\"EPSG\",\"code\":26910}}" + } + }, + "cube:variables": { + "McKenzie_reference_climate_precipitation": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Precipitation (Rain + Snow) (m)", + "unit": "m" + }, + "McKenzie_reference_climate_runoff": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Runoff (Rain + Snowmelt) (m)", + "unit": "m" + }, + "McKenzie_reference_climate_snowfall": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowfall-Water-Equivalent (m)", + "unit": "m" + }, + "McKenzie_reference_climate_swe": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Snow-Water-Equivalent (m)", + "unit": "m" + }, + "McKenzie_reference_climate_swemelt": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowmelt (m)", + "unit": "kg m-2" + }, + "McKenzie_t2_precipitation": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Precipitation (Rain + Snow) (m)", + "unit": "m" + }, + "McKenzie_t2_runoff": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Runoff (Rain + Snowmelt) (m)", + "unit": "m" + }, + "McKenzie_t2_snowfall": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowfall-Water-Equivalent (m)", + "unit": "m" + }, + "McKenzie_t2_swe": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Snow-Water-Equivalent (m)", + "unit": "m" + }, + "McKenzie_t2_swemelt": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowmelt (m)", + "unit": "kg m-2" + }, + "McKenzie_t2p10_precipitation": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Precipitation (Rain + Snow) (m)", + "unit": "m" + }, + "McKenzie_t2p10_runoff": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Runoff (Rain + Snowmelt) (m)", + "unit": "m" + }, + "McKenzie_t2p10_snowfall": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowfall-Water-Equivalent (m)", + "unit": "m" + }, + "McKenzie_t2p10_swe": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Snow-Water-Equivalent (m)", + "unit": "m" + }, + "McKenzie_t2p10_swemelt": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowmelt (m)", + "unit": "kg m-2" + }, + "crs": { + "dimensions": [], + "type": "data", + "description": null, + "unit": null + }, + "lat": { + "dimensions": [ + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "auxiliary", + "description": "Latitude", + "unit": "degrees_north" + }, + "lon": { + "dimensions": [ + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "auxiliary", + "description": "Longitude", + "unit": "degrees_east" + } + }, + "extent": { + "spatial": { + "bbox": [ + [ + -123.12959312773609, + 43.833639916731286, + -121.7203572712103, + 44.52308115319376 + ] + ] + }, + "temporal": { + "interval": [ + [ + "1988-09-30T00:00:00Z", + "2009-09-29T00:00:00Z" + ] + ] + } + }, + "license": "Unlicense", + "assets": { + "zarr-s3-osn": { + "href": "s3://mdmf/gdp/CooperMcKenzie.zarr/", + "type": "application/vnd+zarr", + "description": "Open Storage Network Pod S3 API access to collection zarr group", + "xarray:open_kwargs": { + "chunks": {}, + "engine": "zarr", + "consolidated": true + }, + "xarray:storage_options": { + "anon": true, + "client_kwargs": { + "endpoint_url": "https://usgs.osn.mghpcc.org/" + } + }, + "roles": [ + "data", + "zarr", + "s3" + ] + }, + "zarr-s3": { + "href": "s3://nhgf-development/workspace/DataConversion/CooperMcKenzie.zarr/", + "type": "application/vnd+zarr", + "description": "S3 access to collection zarr group", + "xarray:open_kwargs": { + "chunks": {}, + "engine": "zarr", + "consolidated": true + }, + "xarray:storage_options": { + "requester_pays": true + }, + "roles": [ + "data", + "zarr", + "s3" + ] + } + } +} \ No newline at end of file diff --git a/catalog/CooperUpperDeschutes/collection.json b/catalog/CooperUpperDeschutes/collection.json new file mode 100644 index 0000000000000000000000000000000000000000..41d38046e5b634739bcff06703379d013a26b7b6 --- /dev/null +++ b/catalog/CooperUpperDeschutes/collection.json @@ -0,0 +1,392 @@ +{ + "type": "Collection", + "id": "CooperUpperDeschutes", + "stac_version": "1.0.0", + "description": "SnowModel Output Upper Deschutes River Basin, Cascades, UTM Zone 10N NAD83 (2014)", + "links": [ + { + "rel": "root", + "href": "../catalog.json", + "type": "application/json" + }, + { + "rel": "parent", + "href": "../catalog.json", + "type": "application/json" + } + ], + "stac_extensions": [ + "https://stac-extensions.github.io/datacube/v2.2.0/schema.json" + ], + "cube:dimensions": { + "time": { + "type": "temporal", + "description": "time", + "extent": [ + "1988-09-30T00:00:00Z", + "2011-09-29T00:00:00Z" + ], + "step": "P1DT0H0M0S" + }, + "UTM_Meters_East": { + "type": "spatial", + "axis": "x", + "description": "UTM Zone 10N Meters East", + "extent": [ + 587990.53711819, + 626390.53711819 + ], + "step": 100.0, + "reference_system": "{\"$schema\":\"https://proj.org/schemas/v0.5/projjson.schema.json\",\"type\":\"ProjectedCRS\",\"name\":\"NAD83 / UTM zone 10N\",\"base_crs\":{\"name\":\"NAD83\",\"datum\":{\"type\":\"GeodeticReferenceFrame\",\"name\":\"North American Datum 1983\",\"ellipsoid\":{\"name\":\"GRS 1980\",\"semi_major_axis\":6378137,\"inverse_flattening\":298.257222101}},\"coordinate_system\":{\"subtype\":\"ellipsoidal\",\"axis\":[{\"name\":\"Geodetic latitude\",\"abbreviation\":\"Lat\",\"direction\":\"north\",\"unit\":\"degree\"},{\"name\":\"Geodetic longitude\",\"abbreviation\":\"Lon\",\"direction\":\"east\",\"unit\":\"degree\"}]},\"id\":{\"authority\":\"EPSG\",\"code\":4269}},\"conversion\":{\"name\":\"UTM zone 10N\",\"method\":{\"name\":\"Transverse Mercator\",\"id\":{\"authority\":\"EPSG\",\"code\":9807}},\"parameters\":[{\"name\":\"Latitude of natural origin\",\"value\":0,\"unit\":\"degree\",\"id\":{\"authority\":\"EPSG\",\"code\":8801}},{\"name\":\"Longitude of natural origin\",\"value\":-123,\"unit\":\"degree\",\"id\":{\"authority\":\"EPSG\",\"code\":8802}},{\"name\":\"Scale factor at natural origin\",\"value\":0.9996,\"unit\":\"unity\",\"id\":{\"authority\":\"EPSG\",\"code\":8805}},{\"name\":\"False easting\",\"value\":500000,\"unit\":\"metre\",\"id\":{\"authority\":\"EPSG\",\"code\":8806}},{\"name\":\"False northing\",\"value\":0,\"unit\":\"metre\",\"id\":{\"authority\":\"EPSG\",\"code\":8807}}]},\"coordinate_system\":{\"subtype\":\"Cartesian\",\"axis\":[{\"name\":\"Easting\",\"abbreviation\":\"E\",\"direction\":\"east\",\"unit\":\"metre\"},{\"name\":\"Northing\",\"abbreviation\":\"N\",\"direction\":\"north\",\"unit\":\"metre\"}]},\"scope\":\"Engineering survey, topographic mapping.\",\"area\":\"North America - between 126°W and 120°W - onshore and offshore. Canada - British Columbia; Northwest Territories; Yukon. United States (USA) - California; Oregon; Washington.\",\"bbox\":{\"south_latitude\":30.54,\"west_longitude\":-126,\"north_latitude\":81.8,\"east_longitude\":-119.99},\"id\":{\"authority\":\"EPSG\",\"code\":26910}}" + }, + "UTM_Meters_North": { + "type": "spatial", + "axis": "y", + "description": "UTM Zone 10N Meters North", + "extent": [ + 4886005.6890793, + 4964005.6890793 + ], + "step": 100.0, + "reference_system": "{\"$schema\":\"https://proj.org/schemas/v0.5/projjson.schema.json\",\"type\":\"ProjectedCRS\",\"name\":\"NAD83 / UTM zone 10N\",\"base_crs\":{\"name\":\"NAD83\",\"datum\":{\"type\":\"GeodeticReferenceFrame\",\"name\":\"North American Datum 1983\",\"ellipsoid\":{\"name\":\"GRS 1980\",\"semi_major_axis\":6378137,\"inverse_flattening\":298.257222101}},\"coordinate_system\":{\"subtype\":\"ellipsoidal\",\"axis\":[{\"name\":\"Geodetic latitude\",\"abbreviation\":\"Lat\",\"direction\":\"north\",\"unit\":\"degree\"},{\"name\":\"Geodetic longitude\",\"abbreviation\":\"Lon\",\"direction\":\"east\",\"unit\":\"degree\"}]},\"id\":{\"authority\":\"EPSG\",\"code\":4269}},\"conversion\":{\"name\":\"UTM zone 10N\",\"method\":{\"name\":\"Transverse Mercator\",\"id\":{\"authority\":\"EPSG\",\"code\":9807}},\"parameters\":[{\"name\":\"Latitude of natural origin\",\"value\":0,\"unit\":\"degree\",\"id\":{\"authority\":\"EPSG\",\"code\":8801}},{\"name\":\"Longitude of natural origin\",\"value\":-123,\"unit\":\"degree\",\"id\":{\"authority\":\"EPSG\",\"code\":8802}},{\"name\":\"Scale factor at natural origin\",\"value\":0.9996,\"unit\":\"unity\",\"id\":{\"authority\":\"EPSG\",\"code\":8805}},{\"name\":\"False easting\",\"value\":500000,\"unit\":\"metre\",\"id\":{\"authority\":\"EPSG\",\"code\":8806}},{\"name\":\"False northing\",\"value\":0,\"unit\":\"metre\",\"id\":{\"authority\":\"EPSG\",\"code\":8807}}]},\"coordinate_system\":{\"subtype\":\"Cartesian\",\"axis\":[{\"name\":\"Easting\",\"abbreviation\":\"E\",\"direction\":\"east\",\"unit\":\"metre\"},{\"name\":\"Northing\",\"abbreviation\":\"N\",\"direction\":\"north\",\"unit\":\"metre\"}]},\"scope\":\"Engineering survey, topographic mapping.\",\"area\":\"North America - between 126°W and 120°W - onshore and offshore. Canada - British Columbia; Northwest Territories; Yukon. United States (USA) - California; Oregon; Washington.\",\"bbox\":{\"south_latitude\":30.54,\"west_longitude\":-126,\"north_latitude\":81.8,\"east_longitude\":-119.99},\"id\":{\"authority\":\"EPSG\",\"code\":26910}}" + } + }, + "cube:variables": { + "UpperDeschutes_reference_climate_precipitation": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Precipitation (Rain + Snow) (m)", + "unit": "m" + }, + "UpperDeschutes_reference_climate_runoff": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Runoff (Rain + Snowmelt) (m)", + "unit": "m" + }, + "UpperDeschutes_reference_climate_snowfall": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowfall-Water-Equivalent (m)", + "unit": "m" + }, + "UpperDeschutes_reference_climate_swe": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Snow-Water-Equivalent (m)", + "unit": "m" + }, + "UpperDeschutes_reference_climate_swemelt": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowmelt (m)", + "unit": "kg m-2" + }, + "UpperDeschutes_t2_precipitation": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Precipitation (Rain + Snow) (m)", + "unit": "m" + }, + "UpperDeschutes_t2_runoff": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Runoff (Rain + Snowmelt) (m)", + "unit": "m" + }, + "UpperDeschutes_t2_snowfall": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowfall-Water-Equivalent (m)", + "unit": "m" + }, + "UpperDeschutes_t2_swe": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Snow-Water-Equivalent (m)", + "unit": "m" + }, + "UpperDeschutes_t2_swemelt": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowmelt (m)", + "unit": "kg m-2" + }, + "UpperDeschutes_t2p10_precipitation": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Precipitation (Rain + Snow) (m)", + "unit": "m" + }, + "UpperDeschutes_t2p10_runoff": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Runoff (Rain + Snowmelt) (m)", + "unit": "m" + }, + "UpperDeschutes_t2p10_snowfall": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowfall-Water-Equivalent (m)", + "unit": "m" + }, + "UpperDeschutes_t2p10_swe": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Snow-Water-Equivalent (m)", + "unit": "m" + }, + "UpperDeschutes_t2p10_swemelt": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowmelt (m)", + "unit": "kg m-2" + }, + "UpperDeschutes_t4_precipitation": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Precipitation (Rain + Snow) (m)", + "unit": "m" + }, + "UpperDeschutes_t4_runoff": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Runoff (Rain + Snowmelt) (m)", + "unit": "m" + }, + "UpperDeschutes_t4_snowfall": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowfall-Water-Equivalent (m)", + "unit": "m" + }, + "UpperDeschutes_t4_swe": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Snow-Water-Equivalent (m)", + "unit": "m" + }, + "UpperDeschutes_t4_swemelt": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowmelt (m)", + "unit": "kg m-2" + }, + "UpperDeschutes_t4p10_precipitation": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Precipitation (Rain + Snow) (m)", + "unit": "m" + }, + "UpperDeschutes_t4p10_runoff": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Runoff (Rain + Snowmelt) (m)", + "unit": "m" + }, + "UpperDeschutes_t4p10_snowfall": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowfall-Water-Equivalent (m)", + "unit": "m" + }, + "UpperDeschutes_t4p10_swe": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Snow-Water-Equivalent (m)", + "unit": "m" + }, + "UpperDeschutes_t4p10_swemelt": { + "dimensions": [ + "time", + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "data", + "description": "Daily Total Snowmelt (m)", + "unit": "kg m-2" + }, + "crs": { + "dimensions": [], + "type": "data", + "description": null, + "unit": null + }, + "lat": { + "dimensions": [ + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "auxiliary", + "description": "Latitude", + "unit": "degrees_north" + }, + "lon": { + "dimensions": [ + "UTM_Meters_North", + "UTM_Meters_East" + ], + "type": "auxiliary", + "description": "Longitude", + "unit": "degrees_east" + } + }, + "extent": { + "spatial": { + "bbox": [ + [ + -121.90025189980798, + 44.11632158926688, + -121.40141686316812, + 44.82402988204395 + ] + ] + }, + "temporal": { + "interval": [ + [ + "1988-09-30T00:00:00Z", + "2011-09-29T00:00:00Z" + ] + ] + } + }, + "license": "Unlicense", + "assets": { + "zarr-s3-osn": { + "href": "s3://mdmf/gdp/CooperUpperDeschutes.zarr/", + "type": "application/vnd+zarr", + "description": "Open Storage Network Pod S3 API access to collection zarr group", + "xarray:open_kwargs": { + "chunks": {}, + "engine": "zarr", + "consolidated": true + }, + "xarray:storage_options": { + "anon": true, + "client_kwargs": { + "endpoint_url": "https://usgs.osn.mghpcc.org/" + } + }, + "roles": [ + "data", + "zarr", + "s3" + ] + }, + "zarr-s3": { + "href": "s3://nhgf-development/workspace/DataConversion/CooperUpperDeschutes.zarr/", + "type": "application/vnd+zarr", + "description": "S3 access to collection zarr group", + "xarray:open_kwargs": { + "chunks": {}, + "engine": "zarr", + "consolidated": true + }, + "xarray:storage_options": { + "requester_pays": true + }, + "roles": [ + "data", + "zarr", + "s3" + ] + } + } +} \ No newline at end of file diff --git a/catalog/catalog.json b/catalog/catalog.json index d3e912a17eb90813a0cb8c5d10bd72c75e8ed07f..5f5c61e1a34c4b6cf21e9266892089f3399b2fac 100644 --- a/catalog/catalog.json +++ b/catalog/catalog.json @@ -128,6 +128,16 @@ "rel": "child", "href": "./iclus_hc/collection.json", "type": "application/json" + }, + { + "rel": "child", + "href": "./CooperMcKenzie/collection.json", + "type": "application/json" + }, + { + "rel": "child", + "href": "./CooperUpperDeschutes/collection.json", + "type": "application/json" } ] } \ No newline at end of file diff --git a/workflows/archive/CooperMcKenzie_create_collection_from_zarr.ipynb b/workflows/archive/CooperMcKenzie_create_collection_from_zarr.ipynb index e2de17a71fbba156904c02bb529f9a8a2b0bbeb1..3faef1b113de4c6960e51e0a136082a80bc1e676 100644 --- a/workflows/archive/CooperMcKenzie_create_collection_from_zarr.ipynb +++ b/workflows/archive/CooperMcKenzie_create_collection_from_zarr.ipynb @@ -8,7 +8,6 @@ }, "source": [ "# CooperMcKenzie Zarr -> Collection Workflow\n", - "## In progress - figuring out how to build crs info (do we want to fix the zarr or manually construct it for STAC)\n", "This is a workflow to build [STAC collections](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md) from the zarr assets for the dataset named above. We use the [datacube extension](https://github.com/stac-extensions/datacube) to define the spatial and temporal dimensions of the zarr store, as well as the variables it contains.\n", "\n", "To simplify this workflow so that it can scale to many datasets, a few simplifying suggestions and assumptions are made:\n", @@ -34,7 +33,8 @@ "import pandas as pd\n", "import json\n", "import numpy as np\n", - "import metpy\n", + "import pyproj\n", + "from pyproj import Transformer\n", "import cartopy.crs as ccrs\n", "import cfunits\n", "import json\n", @@ -136,8 +136,8 @@ "outputs": [], "source": [ "# open and view zarr dataset\n", - "fs2 = fsspec.filesystem('s3', requester_pays=True)\n", - "ds = xr.open_dataset(fs2.get_mapper(zarr_url2), engine='zarr', \n", + "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", + "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", " backend_kwargs={'consolidated':True}, chunks={})\n", "ds" ] @@ -213,22 +213,37 @@ "metadata": {}, "outputs": [], "source": [ - "ds = ds.metpy.parse_cf()\n", - "crs = ds[list(ds.keys())[0]].metpy.cartopy_crs" + "crs = pyproj.CRS.from_cf(ds.crs.attrs)" + ] + }, + { + "cell_type": "markdown", + "id": "d16521ad-78a7-4df1-8ebb-6995846a2ad5", + "metadata": {}, + "source": [ + "### Compare dataset crs var to generated proj4 string to make sure it looks ok" ] }, { "cell_type": "code", "execution_count": null, - "id": "8c8f9da9-f233-49b8-bbc8-bfe2bd11f55f", - "metadata": { - "tags": [] - }, + "id": "38492a33-861e-46a3-8f0f-ceb7b2b5f42a", + "metadata": {}, "outputs": [], "source": [ "ds.crs" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ba255b8-b76b-4f8d-8907-cc5589221e66", + "metadata": {}, + "outputs": [], + "source": [ + "crs.to_proj4()" + ] + }, { "cell_type": "markdown", "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c", @@ -255,15 +270,56 @@ "# pull out lat/lon bbox for data\n", "# coordinates must be from WGS 84 datum\n", "# left, bottom, right, top\n", - "# Note: I'm not sure why but I have some trouble getting the data type right here - \n", - "# I've included all the options I've had to run to get it to not be a regular float rather \n", - "# than a numpy float below - switch the commented line if you have this issue\n", - "#coord_bounds = [ds.lon.data.min().compute().astype(float), ds.lat.data.min().compute().astype(float), ds.lon.data.max().compute().astype(float), ds.lat.data.max().compute().astype(float)]\n", - "#coord_bounds = [ds.lon.data.min().compute().astype(float).tolist(), ds.lat.data.min().compute().astype(float).tolist(), ds.lon.data.max().compute().astype(float).tolist(), ds.lat.data.max().compute().astype(float).tolist()]\n", - "coord_bounds = [ds.lon.data.min().astype(float).item(), ds.lat.data.min().astype(float).item(), ds.lon.data.max().astype(float).item(), ds.lat.data.max().astype(float).item()]\n", - "print(coord_bounds)\n", + "\n", + "# Note: try changing around the commented out lines below to get type float rather than a numpy float\n", + "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n", + "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n", + "spatial_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", + "print(spatial_bounds)\n", + "print(f'\\nspatial_bounds data type: {type(spatial_bounds[0])}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c80c650f-6dc6-4e2d-869c-0e674f528520", + "metadata": {}, + "outputs": [], + "source": [ + "XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97fb5d51-ee5c-46d9-b256-6a5a51a42495", + "metadata": {}, + "outputs": [], + "source": [ + "transformer = Transformer.from_crs(crs, \"EPSG:4326\", always_xy=True)\n", + "lon, lat = transformer.transform(XX.ravel(), YY.ravel())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ae8c4e5-9069-4972-bcf6-0bb824ea83a3", + "metadata": {}, + "outputs": [], + "source": [ + "print(f'lower left coordinates (WGS84): {min(lon)}, {min(lat)}')\n", + "print(f'upper right coordinates (WGS84): {max(lon)}, {max(lat)}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a054422-6b84-44bd-b102-ac293745cda8", + "metadata": {}, + "outputs": [], + "source": [ "# create a spatial extent object \n", - "spatial_extent = pystac.SpatialExtent(bboxes=[coord_bounds])" + "spatial_extent = pystac.SpatialExtent(bboxes=[[min(lon).item(), min(lat).item(), max(lon).item(), max(lat).item()]])" ] }, { @@ -282,8 +338,14 @@ "outputs": [], "source": [ "# pull out first and last timestamps\n", - "temporal_extent_lower = pd.Timestamp(ds[dim_names_dict['T']].data.min())\n", - "temporal_extent_upper = pd.Timestamp(ds[dim_names_dict['T']].data.max())\n", + "#temporal_extent_lower = pd.Timestamp(ds[dim_names_dict['T']].data.min())\n", + "#temporal_extent_upper = pd.Timestamp(ds[dim_names_dict['T']].data.max())\n", + "# if you get an error:\n", + "# Cannot convert input [] of type <class 'cftime._cftime.DatetimeNoLeap'> to Timestamp\n", + "# use the following instead:\n", + "temporal_extent_lower = pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().min())\n", + "temporal_extent_upper = pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().max())\n", + "\n", "print(f'min: {temporal_extent_lower} \\nmax: {temporal_extent_upper}')\n", "# create a temporal extent object\n", "temporal_extent = pystac.TemporalExtent(intervals=[[temporal_extent_lower, temporal_extent_upper]])" @@ -426,25 +488,13 @@ "print(dims)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "00a18a29-fb9a-4b56-8009-493122997b16", - "metadata": {}, - "outputs": [], - "source": [ - "# get x, y bounds for extent of those dimensions (required)\n", - "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", - "print(xy_bounds)" - ] - }, { "cell_type": "markdown", "id": "e7dc357c-91ec-49ae-83e5-400f791f9792", "metadata": {}, "source": [ - "#### user input needed - you will need to look at the crs information and create a cartopy crs object after identifying the projection type:\n", - "reference list of cartopy projections: https://scitools.org.uk/cartopy/docs/latest/reference/projections.html" + "#### user review needed\n", + "#### compare crs information to the projjson to make sure it looks correct" ] }, { @@ -454,9 +504,7 @@ "metadata": {}, "outputs": [], "source": [ - "# print ot crs information in dataset\n", - "crs_info = ds.crs\n", - "print(crs_info)" + "crs" ] }, { @@ -466,10 +514,6 @@ "metadata": {}, "outputs": [], "source": [ - "# # create the appropriate cartopy projection\n", - "# lcc = ccrs.LambertConformal(central_longitude=crs_info.longitude_of_central_meridian, \n", - "# central_latitude=crs_info.latitude_of_projection_origin,\n", - "# standard_parallels=crs_info.standard_parallel)\n", "# # the datacube extension can accept reference_system information as a numerical EPSG code, \n", "# # WKT2 (ISO 19162) string or PROJJSON object.\n", "# # we will use a projjson, as was done by Microsoft Planetary Computer here:\n", @@ -478,7 +522,8 @@ "# projjson = json.loads(lcc.to_json())\n", "\n", "# alternatively, I think we could do this:\n", - "projjson = crs.to_json()" + "projjson = crs.to_json()\n", + "print(crs.to_json(pretty=True))" ] }, { @@ -515,8 +560,10 @@ "metadata": {}, "outputs": [], "source": [ - "# # debugging for time steps: get all step values and locations\n", - "# time_step = stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=1)" + "# # optional debugging for time steps:\n", + "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n", + "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n", + "# time_step = stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True, debug=True, step_ix=0)" ] }, { @@ -538,7 +585,9 @@ "id": "57f9d11a-530f-4069-a21b-e7512c31b7c1", "metadata": {}, "source": [ - "**X/lon**" + "**X/lon**\n", + "\n", + "**rounding error in spatial steps**: need to round to 9th decimal to take care of rounding error that comes up in calculating spatial steps" ] }, { @@ -548,7 +597,10 @@ "metadata": {}, "outputs": [], "source": [ - "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n", + "#x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n", + "# a common issue that causes the spatial step not to be identified comes from rounding errors in the step calculation\n", + "# use the debugging cells below to identify if this is the issue, if so, use the round_dec argument to round to a higher decimal place:\n", + "x_step = stac_helpers.get_step(ds, dim_names_dict['X'], round_dec=9)\n", "print(f'x step: {x_step}')" ] }, @@ -559,9 +611,11 @@ "metadata": {}, "outputs": [], "source": [ - "# # debugging for spatial steps: get all step values and locations\n", + "# # optional debugging for spatial steps:\n", + "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n", + "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n", "# x_dim=dim_names_dict['X']\n", - "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=1)\n", + "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=0)\n", "# print(f'\\nx dim name (for next cell): {x_dim}')" ] }, @@ -595,6 +649,9 @@ "outputs": [], "source": [ "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n", + "# a common issue that causes the spatial step not to be identified comes from rounding errors in the step calculation\n", + "# use the debugging cells below to identify if this is the issue, if so, use the round_dec argument to round to a higher decimal place:\n", + "#y_step = stac_helpers.get_step(ds, dim_names_dict['Y'], round_dec=13)\n", "print(f'y step: {y_step}')" ] }, @@ -605,9 +662,11 @@ "metadata": {}, "outputs": [], "source": [ - "# # debugging for spatial steps: get all step values and locations\n", + "# # optional debugging for spatial steps:\n", + "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n", + "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n", "# y_dim=dim_names_dict['Y']\n", - "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=1)\n", + "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=0)\n", "# print(f'\\nx dim name (for next cell): {x_dim}')" ] }, @@ -625,6 +684,26 @@ "# ds.isel(y=slice(ix-1,ix+3)).y" ] }, + { + "cell_type": "markdown", + "id": "ca0df0c7-27d0-468c-a615-b0f5a9a429a4", + "metadata": {}, + "source": [ + "#### extract x, y dimension lower and upper bounds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cfd212f-9309-44a8-845a-b2b6f536a598", + "metadata": {}, + "outputs": [], + "source": [ + "# get x, y bounds for extent of those dimensions (required)\n", + "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", + "print(xy_bounds)" + ] + }, { "cell_type": "markdown", "id": "00a5e041-081d-428d-ac2e-75d16de205e6", @@ -658,9 +737,9 @@ "# dimension name should come from the coordinates printed above\n", "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n", "# note that the extent of each dimension should be pulled from the dataset\n", - "dims_dict = {'time': pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, 'time'), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': time_step}),\n", - " 'x': pystac.extensions.datacube.Dimension({'type': 'spatial', 'description': stac_helpers.get_long_name(ds, 'x'), 'axis': 'x', 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", - " 'y': pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, 'y'), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", + "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': time_step}),\n", + " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", + " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", " }" ] }, @@ -679,13 +758,13 @@ "metadata": {}, "outputs": [], "source": [ - "# pull list of vars from dataset\n", - "vars = list(ds.variables)\n", - "\n", "# drop metpy_crs coordinate we have added\n", "if 'metpy_crs' in ds.coords:\n", " ds = ds.drop_vars('metpy_crs')\n", "\n", + "# pull list of vars from dataset\n", + "vars = list(ds.variables)\n", + "\n", "# spec says that the keys of cube:dimensions and cube:variables should be unique together; a key like lat should not be both a dimension and a variable.\n", "# we will drop all values in dims from vars\n", "vars = [v for v in vars if v not in dims]\n", diff --git a/workflows/archive/CooperUpperDeschutes_create_collection_from_zarr.ipynb b/workflows/archive/CooperUpperDeschutes_create_collection_from_zarr.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a06fe0c728d6e3b89972ff3e1f76ab01e8544588 --- /dev/null +++ b/workflows/archive/CooperUpperDeschutes_create_collection_from_zarr.ipynb @@ -0,0 +1,858 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6c10e07b-1e60-4926-af1d-fa75dc78e5d4", + "metadata": { + "tags": [] + }, + "source": [ + "# CooperUpperDeschutes Zarr -> Collection Workflow\n", + "This is a workflow to build [STAC collections](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md) from the zarr assets for the dataset named above. We use the [datacube extension](https://github.com/stac-extensions/datacube) to define the spatial and temporal dimensions of the zarr store, as well as the variables it contains.\n", + "\n", + "To simplify this workflow so that it can scale to many datasets, a few simplifying suggestions and assumptions are made:\n", + "1. For USGS data, we can use the CC0-1.0 license. For all other data we can use Unlicense. Ref: https://spdx.org/licenses/\n", + "2. I am assuming all coordinates are from the WGS84 datum if not specified." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "201e0945-de55-45ff-b095-c2af009a4e62", + "metadata": {}, + "outputs": [], + "source": [ + "import pystac\n", + "from pystac.extensions.datacube import CollectionDatacubeExtension, AssetDatacubeExtension, AdditionalDimension, DatacubeExtension\n", + "import xarray as xr\n", + "import cf_xarray\n", + "import os\n", + "import fsspec\n", + "import cf_xarray\n", + "import hvplot.xarray\n", + "import pandas as pd\n", + "import json\n", + "import numpy as np\n", + "import pyproj\n", + "from pyproj import Transformer\n", + "import cartopy.crs as ccrs\n", + "import cfunits\n", + "import json\n", + "import sys\n", + "sys.path.insert(1, '..')\n", + "import stac_helpers" + ] + }, + { + "cell_type": "markdown", + "id": "8cfb2138-2a0f-4e8f-a58f-d3a79eb4bdf1", + "metadata": {}, + "source": [ + "## Collection ID" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91bd4f53-e74e-4f4b-99b4-130d5d7472fd", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# name for STAC collection\n", + "collection_id = 'CooperUpperDeschutes'" + ] + }, + { + "cell_type": "markdown", + "id": "116b5837-8e85-4ae7-964a-803533ded714", + "metadata": { + "tags": [] + }, + "source": [ + "## Asset Metadata Input" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd6fa323-132a-4794-8c80-576933f547a0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# url to zarr store that you want to create a collection for\n", + "zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'\n", + "\n", + "# define keyword arguments needed for opening the dataset with xarray\n", + "# ref: https://github.com/stac-extensions/xarray-assets\n", + "xarray_opendataset_kwargs = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n", + " \"xarray:storage_options\": {\"anon\": True, \"client_kwargs\": {\"endpoint_url\":\"https://usgs.osn.mghpcc.org/\"}}}\n", + "# description for zarr url asset attached to collection (zarr_url)\n", + "asset_description = \"Open Storage Network Pod S3 API access to collection zarr group\"\n", + "# roles to tag zarr url asset with\n", + "asset_roles = [\"data\",\"zarr\",\"s3\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1441cd4-e94c-4902-af46-8f1af470eb6b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# url to zarr store that you want to create a collection for\n", + "zarr_url2 = f's3://nhgf-development/workspace/DataConversion/{collection_id}.zarr/'\n", + "\n", + "# define keyword arguments needed for opening the dataset with xarray\n", + "# ref: https://github.com/stac-extensions/xarray-assets\n", + "xarray_opendataset_kwargs2 = {\"xarray:open_kwargs\":{\"chunks\":{},\"engine\":\"zarr\",\"consolidated\":True},\n", + " \"xarray:storage_options\":{\"requester_pays\":True}}\n", + "# description for zarr url asset attached to collection (zarr_url)\n", + "asset_description2 = \"S3 access to collection zarr group\"\n", + "# roles to tag zarr url asset with\n", + "asset_roles2 = [\"data\",\"zarr\",\"s3\"]" + ] + }, + { + "cell_type": "markdown", + "id": "b213b74f-ad17-4774-93b6-3b62be616b45", + "metadata": { + "tags": [] + }, + "source": [ + "## Data Exploration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "708f2cf5-79ab-49af-8067-de31d0d13ee6", + "metadata": {}, + "outputs": [], + "source": [ + "# open and view zarr dataset\n", + "fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')\n", + "ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', \n", + " backend_kwargs={'consolidated':True}, chunks={})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "996e60ba-13e4-453a-8534-e62ce747f0fa", + "metadata": {}, + "source": [ + "## Collection Metadata Input" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "482d204d-b5b6-40e5-ac42-55b459be1097", + "metadata": {}, + "outputs": [], + "source": [ + "# description of STAC collection\n", + "collection_description = ds.attrs['title']\n", + "print(f'collection description: {collection_description}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2bbf33b-0b20-4375-9cd9-dc4b66549707", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# license for dataset\n", + "collection_license = stac_helpers.license_picker(ds.attrs['license'])" + ] + }, + { + "cell_type": "markdown", + "id": "0bc7e9b3-ad62-4b10-a18e-66b7ed2d35dc", + "metadata": {}, + "source": [ + "## Identify x, y, t dimensions of dataset\n", + "May require user input if dimensions cannot be auto-detected." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab91268f-7200-4cb1-979a-c7d75531d2c0", + "metadata": {}, + "outputs": [], + "source": [ + "dims_auto_extract = ['X', 'Y', 'T']\n", + "dim_names_dict = {}\n", + "for d in dims_auto_extract:\n", + " dim_names_dict[d] = stac_helpers.extract_dim(ds, d)\n", + "print(f\"Dimension dictionary: {dim_names_dict}\")" + ] + }, + { + "cell_type": "markdown", + "id": "810d7480-165d-41c0-bd09-163656a14003", + "metadata": {}, + "source": [ + "## Get crs info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03d52f3-1367-4255-a561-52ee4fc9e92d", + "metadata": {}, + "outputs": [], + "source": [ + "crs = pyproj.CRS.from_cf(ds.crs.attrs)" + ] + }, + { + "cell_type": "markdown", + "id": "d16521ad-78a7-4df1-8ebb-6995846a2ad5", + "metadata": {}, + "source": [ + "### Compare dataset crs var to generated proj4 string to make sure it looks ok" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38492a33-861e-46a3-8f0f-ceb7b2b5f42a", + "metadata": {}, + "outputs": [], + "source": [ + "ds.crs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ba255b8-b76b-4f8d-8907-cc5589221e66", + "metadata": {}, + "outputs": [], + "source": [ + "crs.to_proj4()" + ] + }, + { + "cell_type": "markdown", + "id": "a8c3ed37-8564-400b-a7fb-25bd5e43d21c", + "metadata": {}, + "source": [ + "## Create Collection Extent" + ] + }, + { + "cell_type": "markdown", + "id": "69f0d837-68a5-4fed-9a14-5d75cfbb0da4", + "metadata": {}, + "source": [ + "### Spatial Extent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d46805e0-8e94-4ebe-aa01-d9a2d7051459", + "metadata": {}, + "outputs": [], + "source": [ + "# pull out lat/lon bbox for data\n", + "# coordinates must be from WGS 84 datum\n", + "# left, bottom, right, top\n", + "\n", + "# Note: try changing around the commented out lines below to get type float rather than a numpy float\n", + "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float), ds[dim_names_dict['Y']].data.min().compute().astype(float), ds[dim_names_dict['X']].data.max().compute().astype(float), ds[dim_names_dict['Y']].data.max().compute().astype(float)]\n", + "#spatial_bounds = [ds[dim_names_dict['X']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.min().compute().astype(float).tolist(), ds[dim_names_dict['X']].data.max().compute().astype(float).tolist(), ds[dim_names_dict['Y']].data.max().compute().astype(float).tolist()]\n", + "spatial_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", + "print(spatial_bounds)\n", + "print(f'\\nspatial_bounds data type: {type(spatial_bounds[0])}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c80c650f-6dc6-4e2d-869c-0e674f528520", + "metadata": {}, + "outputs": [], + "source": [ + "XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97fb5d51-ee5c-46d9-b256-6a5a51a42495", + "metadata": {}, + "outputs": [], + "source": [ + "transformer = Transformer.from_crs(crs, \"EPSG:4326\", always_xy=True)\n", + "lon, lat = transformer.transform(XX.ravel(), YY.ravel())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ae8c4e5-9069-4972-bcf6-0bb824ea83a3", + "metadata": {}, + "outputs": [], + "source": [ + "print(f'lower left coordinates (WGS84): {min(lon)}, {min(lat)}')\n", + "print(f'upper right coordinates (WGS84): {max(lon)}, {max(lat)}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a054422-6b84-44bd-b102-ac293745cda8", + "metadata": {}, + "outputs": [], + "source": [ + "# create a spatial extent object \n", + "spatial_extent = pystac.SpatialExtent(bboxes=[[min(lon).item(), min(lat).item(), max(lon).item(), max(lat).item()]])" + ] + }, + { + "cell_type": "markdown", + "id": "a04c8fca-1d33-43ac-9e2b-62d7be2887f7", + "metadata": {}, + "source": [ + "### Temporal Extent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41a84995-867c-4152-8c57-85e3758bbb77", + "metadata": {}, + "outputs": [], + "source": [ + "# pull out first and last timestamps\n", + "#temporal_extent_lower = pd.Timestamp(ds[dim_names_dict['T']].data.min())\n", + "#temporal_extent_upper = pd.Timestamp(ds[dim_names_dict['T']].data.max())\n", + "# if you get an error:\n", + "# Cannot convert input [] of type <class 'cftime._cftime.DatetimeNoLeap'> to Timestamp\n", + "# use the following instead:\n", + "temporal_extent_lower = pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().min())\n", + "temporal_extent_upper = pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().max())\n", + "\n", + "print(f'min: {temporal_extent_lower} \\nmax: {temporal_extent_upper}')\n", + "# create a temporal extent object\n", + "temporal_extent = pystac.TemporalExtent(intervals=[[temporal_extent_lower, temporal_extent_upper]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b1e37c4-5348-46ad-abc9-e005b5d6c02b", + "metadata": {}, + "outputs": [], + "source": [ + "collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)" + ] + }, + { + "cell_type": "markdown", + "id": "20b00e88-5a13-46b3-9787-d9ac2d4e7bd6", + "metadata": { + "tags": [] + }, + "source": [ + "## Open up NHGF STAC Catalog and create a collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf6c59d-58cd-48b1-a5fd-3bb205a3ef56", + "metadata": {}, + "outputs": [], + "source": [ + "# define folder location where your STAC catalog json file is\n", + "catalog_path = os.path.join('..', '..', 'catalog')\n", + "# open catalog\n", + "catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e96811b-95ae-406a-9728-55fc429d4e1f", + "metadata": {}, + "outputs": [], + "source": [ + "if catalog.get_child(collection_id):\n", + " collection = catalog.get_child(collection_id)\n", + " print(\"existing collection opened\")\n", + " collection.extent=collection_extent\n", + " collection.description=collection_description\n", + " collection.license=collection_license\n", + "else:\n", + " collection = pystac.Collection(id=collection_id,\n", + " description=collection_description,\n", + " extent=collection_extent,\n", + " license=collection_license)\n", + " print(\"new collection created\")" + ] + }, + { + "cell_type": "markdown", + "id": "a21c76e8-cd57-4eb5-a33f-7c668a3b3205", + "metadata": {}, + "source": [ + "## Add zarr url asset to collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "094832af-d22b-4359-b0f6-cf687acce5cc", + "metadata": {}, + "outputs": [], + "source": [ + "asset_id = \"zarr-s3-osn\"\n", + "asset = pystac.Asset(href=zarr_url,\n", + " description=asset_description,\n", + " media_type=\"application/vnd+zarr\",\n", + " roles=asset_roles,\n", + " extra_fields = xarray_opendataset_kwargs)\n", + "collection.add_asset(asset_id, asset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c298d07-f234-4a08-986d-87f4a39e9ae6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "asset_id2 = \"zarr-s3\"\n", + "asset2 = pystac.Asset(href=zarr_url2,\n", + " description=asset_description2,\n", + " media_type=\"application/vnd+zarr\",\n", + " roles=asset_roles2,\n", + " extra_fields = xarray_opendataset_kwargs2)\n", + "collection.add_asset(asset_id2, asset2)" + ] + }, + { + "cell_type": "markdown", + "id": "f67cd5c9-db33-45c2-bc21-480cd67354f4", + "metadata": {}, + "source": [ + "## Add datacube extension to collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc00946d-2880-491d-9b3b-3aeeb4414d6c", + "metadata": {}, + "outputs": [], + "source": [ + "# instantiate extention on collection\n", + "dc = DatacubeExtension.ext(collection, add_if_missing=True)" + ] + }, + { + "cell_type": "markdown", + "id": "8bdd77a2-7587-485e-afb7-42af3a822241", + "metadata": {}, + "source": [ + "### Add cube dimensions (required field for extension)" + ] + }, + { + "cell_type": "markdown", + "id": "e7dc357c-91ec-49ae-83e5-400f791f9792", + "metadata": {}, + "source": [ + "#### user review needed\n", + "#### compare crs information to the projjson to make sure it looks correct" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea452f62-5644-49b6-8a4e-7dc4f649fd1a", + "metadata": {}, + "outputs": [], + "source": [ + "crs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b1d05ff-8e43-44a7-8343-178b112c4ad6", + "metadata": {}, + "outputs": [], + "source": [ + "# # the datacube extension can accept reference_system information as a numerical EPSG code, \n", + "# # WKT2 (ISO 19162) string or PROJJSON object.\n", + "# # we will use a projjson, as was done by Microsoft Planetary Computer here:\n", + "# # https://planetarycomputer.microsoft.com/dataset/daymet-annual-na\n", + "# # https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-na\n", + "# projjson = json.loads(lcc.to_json())\n", + "\n", + "# alternatively, I think we could do this:\n", + "projjson = crs.to_json()\n", + "print(crs.to_json(pretty=True))" + ] + }, + { + "cell_type": "markdown", + "id": "39ffb2bc-afe8-4c67-b385-cd98251d5d4b", + "metadata": {}, + "source": [ + "#### user review needed - looks at the steps pulled out and make sure they make sense" + ] + }, + { + "cell_type": "markdown", + "id": "3d8f421e-302c-4020-8fe3-4cea3ee53143", + "metadata": {}, + "source": [ + "**Time**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13967ca9-1920-40f8-81cf-639d63439d71", + "metadata": {}, + "outputs": [], + "source": [ + "time_step = pd.Timedelta(stac_helpers.get_step(ds, dim_names_dict['T'], time_dim=True)).isoformat()\n", + "print(f'time step: {time_step}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8640779d-5131-4973-aad9-455d4acffd7a", + "metadata": {}, + "outputs": [], + "source": [ + "# # optional debugging for spatial steps:\n", + "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n", + "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n", + "# x_dim=dim_names_dict['X']\n", + "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=0)\n", + "# print(f'\\nx dim name (for next cell): {x_dim}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de0efee3-af6e-4451-87a1-264503e24948", + "metadata": {}, + "outputs": [], + "source": [ + "# # debugging for time steps, cont:\n", + "# # please choose one of the index locations printed above\n", + "# # this will print the time steps adjacent to it\n", + "# ix = 3343\n", + "# ds.isel(time=slice(ix-1,ix+3)).time" + ] + }, + { + "cell_type": "markdown", + "id": "57f9d11a-530f-4069-a21b-e7512c31b7c1", + "metadata": {}, + "source": [ + "**X/lon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73306fb1-9b51-42d0-86fa-8d5e72644ee1", + "metadata": {}, + "outputs": [], + "source": [ + "x_step = stac_helpers.get_step(ds, dim_names_dict['X'])\n", + "# a common issue that causes the spatial step not to be identified comes from rounding errors in the step calculation\n", + "# use the debugging cells below to identify if this is the issue, if so, use the round_dec argument to round to a higher decimal place:\n", + "#x_step = stac_helpers.get_step(ds, dim_names_dict['X'], round_dec=13)\n", + "print(f'x step: {x_step}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2afe1dd0-ceb0-4a25-856e-38aefdbecd0c", + "metadata": {}, + "outputs": [], + "source": [ + "# # optional debugging for spatial steps:\n", + "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n", + "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n", + "# x_dim=dim_names_dict['X']\n", + "# x_step = stac_helpers.get_step(ds, x_dim, debug=True, step_ix=0)\n", + "# print(f'\\nx dim name (for next cell): {x_dim}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba997858-c048-4047-b03b-fc011a0d93b1", + "metadata": {}, + "outputs": [], + "source": [ + "# # debugging for spatial steps, cont:\n", + "# # please choose one of the index locations printed above\n", + "# # this will print the time steps adjacent to it\n", + "# ix = 5\n", + "# ds.isel(x=slice(ix-1,ix+3)).x" + ] + }, + { + "cell_type": "markdown", + "id": "27cd6e0f-5289-4672-8f92-092dcd133817", + "metadata": {}, + "source": [ + "**Y/lat**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c046328f-4953-4df6-ac0d-2bab43f86888", + "metadata": {}, + "outputs": [], + "source": [ + "y_step = stac_helpers.get_step(ds, dim_names_dict['Y'])\n", + "# a common issue that causes the spatial step not to be identified comes from rounding errors in the step calculation\n", + "# use the debugging cells below to identify if this is the issue, if so, use the round_dec argument to round to a higher decimal place:\n", + "#y_step = stac_helpers.get_step(ds, dim_names_dict['Y'], round_dec=13)\n", + "print(f'y step: {y_step}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bd290eb-fdb3-4e2e-b9e8-4ebcfa3f608b", + "metadata": {}, + "outputs": [], + "source": [ + "# # optional debugging for spatial steps:\n", + "# # check all step sizes (step_list), get number of occurences of each (step_count), and get index locations where each step size occurs in the dataset so you can manually inspect the values, if needed\n", + "# # please specify the index of the step in step_list with the step_ix field - this will return the indices in the dataset where this step size occurred\n", + "# y_dim=dim_names_dict['Y']\n", + "# y_step = stac_helpers.get_step(ds, y_dim, debug=True, step_ix=0)\n", + "# print(f'\\nx dim name (for next cell): {x_dim}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e27871aa-d33f-4af2-9f4e-96a2e54f03ca", + "metadata": {}, + "outputs": [], + "source": [ + "# # debugging for spatial steps, cont:\n", + "# # please choose one of the index locations printed above\n", + "# # this will print the time steps adjacent to it\n", + "# ix = 5\n", + "# ds.isel(y=slice(ix-1,ix+3)).y" + ] + }, + { + "cell_type": "markdown", + "id": "6d773006-21b4-41c6-bfae-e4868f391c41", + "metadata": {}, + "source": [ + "#### extract x, y dimension lower and upper bounds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b635e48-8d4c-4e29-ba8a-d20ee1bd4492", + "metadata": {}, + "outputs": [], + "source": [ + "# get x, y bounds for extent of those dimensions (required)\n", + "xy_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]\n", + "print(xy_bounds)" + ] + }, + { + "cell_type": "markdown", + "id": "00a5e041-081d-428d-ac2e-75d16de205e6", + "metadata": {}, + "source": [ + "#### user input needed - you will need to copy all of the dimensions printed below into the dict and fill in the appropriate attributes(type, axis, extent, etc.):\n", + "\n", + "Please see [datacube spec](https://github.com/stac-extensions/datacube?tab=readme-ov-file#dimension-object) for details on required fields.\n", + "\n", + "If you have a dimension like \"bnds\" that is used on variables like time_bnds, lon_bnds, lat_bnds to choose either the lower or upper bound, you can use and [additional dimension object](https://github.com/stac-extensions/datacube?tab=readme-ov-file#additional-dimension-object). We recommend making the type \"count\" as Microsoft Planetary Computer did [here](https://github.com/stac-extensions/datacube/blob/9e74fa706c9bdd971e01739cf18dcc53bdd3dd4f/examples/daymet-hi-annual.json#L76)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acd45d3c-7845-47e6-9b7d-e35627a7ca9a", + "metadata": {}, + "outputs": [], + "source": [ + "dims = list(ds.dims)\n", + "print(dims)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a443497-67a9-4dce-a8e9-b08d31a88223", + "metadata": {}, + "outputs": [], + "source": [ + "# create a dictionary of datacube dimensions you would like to assign to this dataset\n", + "# dimension name should come from the coordinates printed above\n", + "# we do not recommend including redundant dimensions (do not include x,y if you have lon,lat)\n", + "# note that the extent of each dimension should be pulled from the dataset\n", + "dims_dict = {dim_names_dict['T']: pystac.extensions.datacube.Dimension({'type': 'temporal', 'description': stac_helpers.get_long_name(ds, dim_names_dict['T']), 'extent': [temporal_extent_lower.strftime('%Y-%m-%dT%XZ'), temporal_extent_upper.strftime('%Y-%m-%dT%XZ')], 'step': time_step}),\n", + " dim_names_dict['X']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'x', 'description': stac_helpers.get_long_name(ds, dim_names_dict['X']), 'extent': [xy_bounds[0], xy_bounds[2]], 'step': x_step, 'reference_system': projjson}),\n", + " dim_names_dict['Y']: pystac.extensions.datacube.Dimension({'type': 'spatial', 'axis': 'y', 'description': stac_helpers.get_long_name(ds, dim_names_dict['Y']), 'extent': [xy_bounds[1], xy_bounds[3]], 'step': y_step, 'reference_system': projjson}),\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "0f277883-a3fd-425f-966a-ca2140d0ef2f", + "metadata": {}, + "source": [ + "### Add cube variables (optional field for extension)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9272931-fc0b-4f2a-9546-283033e9cde8", + "metadata": {}, + "outputs": [], + "source": [ + "# drop metpy_crs coordinate we have added\n", + "if 'metpy_crs' in ds.coords:\n", + " ds = ds.drop_vars('metpy_crs')\n", + "\n", + "# pull list of vars from dataset\n", + "vars = list(ds.variables)\n", + "\n", + "# spec says that the keys of cube:dimensions and cube:variables should be unique together; a key like lat should not be both a dimension and a variable.\n", + "# we will drop all values in dims from vars\n", + "vars = [v for v in vars if v not in dims]\n", + "\n", + "# Microsoft Planetary Computer includes coordinates and crs as variables here:\n", + "# https://planetarycomputer.microsoft.com/dataset/daymet-annual-na\n", + "# https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-na\n", + "# we will keep those in the var list\n", + "\n", + "# create dictionary of dataset variables and associated dimensions\n", + "vars_dict={}\n", + "for v in vars:\n", + " unit = stac_helpers.get_unit(ds, v)\n", + " var_type = stac_helpers.get_var_type(ds, v)\n", + " long_name = stac_helpers.get_long_name(ds, v)\n", + " vars_dict[v] = pystac.extensions.datacube.Variable({'dimensions':list(ds[v].dims), 'type': var_type, 'description': long_name, 'unit': unit})" + ] + }, + { + "cell_type": "markdown", + "id": "11ad5352-884c-4472-8864-4570a96f66e5", + "metadata": {}, + "source": [ + "### Finalize extension" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10141fd4-91d6-491d-878b-02653720891d", + "metadata": {}, + "outputs": [], + "source": [ + "# add dimesions and variables to collection extension\n", + "dc.apply(dimensions=dims_dict, variables=vars_dict)" + ] + }, + { + "cell_type": "markdown", + "id": "615ca168-75fb-4135-9941-0ef5fe4fd1cb", + "metadata": {}, + "source": [ + "## Add STAC Collection to Catalog and Save" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2120a55-3d04-4122-a93f-29afcdb8cb1b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# # helper to find items of wrong type\n", + "# d = collection.to_dict()\n", + "# print(*stac_helpers.find_paths(d))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b75791b-6b2d-40be-b7c6-330a60888fb5", + "metadata": {}, + "outputs": [], + "source": [ + "if catalog.get_child(collection_id):\n", + " collection.normalize_and_save(root_href=os.path.join(catalog_path, collection_id), catalog_type=pystac.CatalogType.SELF_CONTAINED)\n", + "else:\n", + " catalog.add_child(collection)\n", + " catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6f676b5-e892-4bfb-8d73-2828addd838c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "global-global-pangeo", + "language": "python", + "name": "conda-env-global-global-pangeo-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}