Skip to content
Snippets Groups Projects
Commit 07ef3e57 authored by Snyder, Amelia Marie's avatar Snyder, Amelia Marie
Browse files

fix collection save out

parent 647be96d
No related branches found
No related tags found
1 merge request!91Aiem
%% Cell type:markdown id:66f9bf84-b0ee-4aae-8877-661b81f59bcc tags: %% Cell type:markdown id:66f9bf84-b0ee-4aae-8877-661b81f59bcc tags:
# AIEM_permafrost Collection Creation # AIEM_permafrost Collection Creation
This is a workflow to build a [STAC collection](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md) that will serve as the parent collection for several zarr datasets. The specific zarr datasets included in this collection can be seen in the collection_id_list below. This is a workflow to build a [STAC collection](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md) that will serve as the parent collection for several zarr datasets. The specific zarr datasets included in this collection can be seen in the collection_id_list below.
%% Cell type:code id:1c2249e2-5d3e-42ba-a584-aca58678232e tags: %% Cell type:code id:1c2249e2-5d3e-42ba-a584-aca58678232e tags:
``` python ``` python
import pystac import pystac
from pystac.extensions.datacube import CollectionDatacubeExtension, AssetDatacubeExtension, AdditionalDimension, DatacubeExtension from pystac.extensions.datacube import CollectionDatacubeExtension, AssetDatacubeExtension, AdditionalDimension, DatacubeExtension
import xarray as xr import xarray as xr
import cf_xarray import cf_xarray
import os import os
import fsspec import fsspec
import cf_xarray import cf_xarray
import hvplot.xarray import hvplot.xarray
import pandas as pd import pandas as pd
import json import json
import numpy as np import numpy as np
import pyproj import pyproj
from pyproj import Transformer from pyproj import Transformer
import cartopy.crs as ccrs import cartopy.crs as ccrs
import cfunits import cfunits
import json import json
import sys import sys
sys.path.insert(1, '../..') sys.path.insert(1, '../..')
import stac_helpers import stac_helpers
``` ```
%% Cell type:code id:0e4491d3-d7a9-4437-9fe8-df2e35dc6ae5 tags: %% Cell type:code id:0e4491d3-d7a9-4437-9fe8-df2e35dc6ae5 tags:
``` python ``` python
# name of overall STAC collection we are creating with this workflow # name of overall STAC collection we are creating with this workflow
overall_collection_id = 'AIEM_permafrost' overall_collection_id = 'AIEM_permafrost'
# name of child STAC collections that will be contained in this collection - should match name of zarr datasets # name of child STAC collections that will be contained in this collection - should match name of zarr datasets
collection_id_list = ['northslope_rcp45_metadata', "northslope_rcp85_metadata", "selawik_rcp45_metadata", "selawik_rcp85_metadata", collection_id_list = ['northslope_rcp45_metadata', "northslope_rcp85_metadata", "selawik_rcp45_metadata", "selawik_rcp85_metadata",
"5m_rcp45_G1.8SW_S0.15m_wMeta", "5m_rcp45_G1.8SW_wMeta", "5m_rcp85_G1.8SW_wMeta", "5m_rcp45_G1.2SW_wMeta", "5m_rcp45_G1.8SW_S0.15m_wMeta", "5m_rcp45_G1.8SW_wMeta", "5m_rcp85_G1.8SW_wMeta", "5m_rcp45_G1.2SW_wMeta",
"5m_rcp45_G0.6SW_S0.15m_wMeta", "5m_rcp45_G1.2SW_S0.15m_wMeta", "5m_rcp85_G1.2SW_wMeta", "5m_rcp85_G1.8SW_S0.15m_wMeta", "5m_rcp45_G0.6SW_S0.15m_wMeta", "5m_rcp45_G1.2SW_S0.15m_wMeta", "5m_rcp85_G1.2SW_wMeta", "5m_rcp85_G1.8SW_S0.15m_wMeta",
"5m_rcp85_G0.6SW_S0.15m_wMeta", "5m_rcp45_G0.6SW_wMeta", "5m_rcp85_G1.2SW_S0.15m_wMeta", "5m_rcp85_G0.6SW_wMeta"] "5m_rcp85_G0.6SW_S0.15m_wMeta", "5m_rcp45_G0.6SW_wMeta", "5m_rcp85_G1.2SW_S0.15m_wMeta", "5m_rcp85_G0.6SW_wMeta"]
``` ```
%% Cell type:code id:a98fb989-65bb-49c5-8762-9cccd1cba6bb tags: %% Cell type:code id:a98fb989-65bb-49c5-8762-9cccd1cba6bb tags:
``` python ``` python
# url to zarr store that you want to create a collection for # url to zarr store that you want to create a collection for
fs2 = fsspec.filesystem('s3', profile='osn-mdmf-workspace', endpoint_url='https://usgs.osn.mghpcc.org/') fs2 = fsspec.filesystem('s3', profile='osn-mdmf-workspace', endpoint_url='https://usgs.osn.mghpcc.org/')
``` ```
%% Cell type:code id:91f8f839-c862-4e15-b67e-00d6bee32d44 tags: %% Cell type:code id:91f8f839-c862-4e15-b67e-00d6bee32d44 tags:
``` python ``` python
dim_names_dict = {'X': 'x', 'Y': 'y', 'T': 'time'} dim_names_dict = {'X': 'x', 'Y': 'y', 'T': 'time'}
``` ```
%% Cell type:code id:5ab97501-1d74-422e-96a1-320898269b1a tags: %% Cell type:code id:5ab97501-1d74-422e-96a1-320898269b1a tags:
``` python ``` python
crs_var = 'crs' crs_var = 'crs'
``` ```
%% Cell type:code id:e772d0e9-20cc-4e2f-923e-866aeb681d00 tags: %% Cell type:code id:e772d0e9-20cc-4e2f-923e-866aeb681d00 tags:
``` python ``` python
min_lons = [] min_lons = []
min_lats = [] min_lats = []
max_lons = [] max_lons = []
max_lats = [] max_lats = []
min_times = [] min_times = []
max_times = [] max_times = []
for collection_id in collection_id_list: for collection_id in collection_id_list:
zarr_url = f's3://mdmf-workspace/gdp/{collection_id}.zarr/' zarr_url = f's3://mdmf-workspace/gdp/{collection_id}.zarr/'
ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr', ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr',
backend_kwargs={'consolidated':True}, chunks={}) backend_kwargs={'consolidated':True}, chunks={})
collection_description = ds.attrs['title'] collection_description = ds.attrs['title']
# print each zarr's description to see if they are the same and if they can be used for the overall collection # print each zarr's description to see if they are the same and if they can be used for the overall collection
# in this case, they are appropriate to use # in this case, they are appropriate to use
#print(f'possible collection description: {collection_description}') #print(f'possible collection description: {collection_description}')
collection_license = stac_helpers.license_picker(ds.attrs['license']) collection_license = stac_helpers.license_picker(ds.attrs['license'])
# print each zarr's license to see if they are the same and if they can be used for the overall collection # print each zarr's license to see if they are the same and if they can be used for the overall collection
# in this case, they are appropriate to use # in this case, they are appropriate to use
#print(f'possible collection license: {collection_license}') #print(f'possible collection license: {collection_license}')
crs = pyproj.CRS.from_cf(ds[crs_var].attrs) crs = pyproj.CRS.from_cf(ds[crs_var].attrs)
spatial_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()] spatial_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]
XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data) XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)
transformer = Transformer.from_crs(crs, "EPSG:4326", always_xy=True) transformer = Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
lon, lat = transformer.transform(XX.ravel(), YY.ravel()) lon, lat = transformer.transform(XX.ravel(), YY.ravel())
min_lons.append(min(lon)) min_lons.append(min(lon))
min_lats.append(min(lat)) min_lats.append(min(lat))
max_lons.append(max(lon)) max_lons.append(max(lon))
max_lats.append(max(lat)) max_lats.append(max(lat))
min_times.append(pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().min())) min_times.append(pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().min()))
max_times.append(pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().max())) max_times.append(pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().max()))
spatial_extent = pystac.SpatialExtent(bboxes=[[min(min_lons).item(), min(min_lats).item(), max(max_lons).item(), max(max_lats).item()]]) spatial_extent = pystac.SpatialExtent(bboxes=[[min(min_lons).item(), min(min_lats).item(), max(max_lons).item(), max(max_lats).item()]])
temporal_extent = pystac.TemporalExtent(intervals=[[min(min_times), max(max_times)]]) temporal_extent = pystac.TemporalExtent(intervals=[[min(min_times), max(max_times)]])
collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent) collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)
``` ```
%% Cell type:code id:5eafaa34-e079-4688-826b-383046ccbe2d tags: %% Cell type:code id:5eafaa34-e079-4688-826b-383046ccbe2d tags:
``` python ``` python
# define folder location where your STAC catalog json file is # define folder location where your STAC catalog json file is
catalog_path = os.path.join('..', '..', '..','catalog') catalog_path = os.path.join('..', '..', '..','catalog')
# open catalog # open catalog
catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json')) catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))
``` ```
%% Cell type:code id:c380222b-b358-4239-9e13-467fbee32b56 tags: %% Cell type:code id:c380222b-b358-4239-9e13-467fbee32b56 tags:
``` python ``` python
if catalog.get_child(collection_id): if catalog.get_child(collection_id):
collection = catalog.get_child(collection_id) collection = catalog.get_child(collection_id)
print("existing collection opened") print("existing collection opened")
collection.extent=collection_extent collection.extent=collection_extent
collection.description=collection_description collection.description=collection_description
collection.license=collection_license collection.license=collection_license
else: else:
collection = pystac.Collection(id=collection_id, collection = pystac.Collection(id=collection_id,
description=collection_description, description=collection_description,
extent=collection_extent, extent=collection_extent,
license=collection_license) license=collection_license)
print("new collection created") print("new collection created")
``` ```
%% Output %% Output
new collection created new collection created
%% Cell type:code id:ca6ca927-8b29-4352-85f7-7742ace1610f tags: %% Cell type:code id:ca6ca927-8b29-4352-85f7-7742ace1610f tags:
``` python ``` python
if catalog.get_child(collection_id): if catalog.get_child(overall_collection_id):
collection.normalize_and_save(root_href=os.path.join(catalog_path, collection_id), catalog_type=pystac.CatalogType.SELF_CONTAINED) collection.normalize_and_save(root_href=os.path.join(catalog_path, overall_collection_id), catalog_type=pystac.CatalogType.SELF_CONTAINED)
else: else:
catalog.add_child(collection) catalog.add_child(collection)
catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED) catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED)
``` ```
%% Cell type:code id:2c985c59-169b-49ea-9177-02276e802fe2 tags: %% Cell type:code id:2c985c59-169b-49ea-9177-02276e802fe2 tags:
``` python ``` python
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment