Skip to content
Snippets Groups Projects
Commit c7cdb2d0 authored by Snyder, Amelia Marie's avatar Snyder, Amelia Marie
Browse files

switch to public bucket

parent 69c0b23f
No related branches found
No related tags found
1 merge request!91Aiem
%% Cell type:markdown id:66f9bf84-b0ee-4aae-8877-661b81f59bcc tags:
# AIEM_permafrost Collection Creation
This is a workflow to build a [STAC collection](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md) that will serve as the parent collection for several zarr datasets. The specific zarr datasets included in this collection can be seen in the collection_id_list below.
%% Cell type:code id:1c2249e2-5d3e-42ba-a584-aca58678232e tags:
``` python
import pystac
import xarray as xr
import os
import fsspec
import numpy as np
import pandas as pd
import pyproj
from pyproj import Transformer
import json
import sys
sys.path.insert(1, '../..')
import stac_helpers
```
%% Cell type:code id:0e4491d3-d7a9-4437-9fe8-df2e35dc6ae5 tags:
``` python
# name of overall STAC collection we are creating with this workflow
overall_collection_id = 'AIEM_permafrost'
# name of child STAC collections that will be contained in this collection - should match name of zarr datasets
collection_id_list = ['northslope_rcp45_metadata', "northslope_rcp85_metadata", "selawik_rcp45_metadata", "selawik_rcp85_metadata",
"5m_rcp45_G1.8SW_S0.15m_wMeta", "5m_rcp45_G1.8SW_wMeta", "5m_rcp85_G1.8SW_wMeta", "5m_rcp45_G1.2SW_wMeta",
"5m_rcp45_G0.6SW_S0.15m_wMeta", "5m_rcp45_G1.2SW_S0.15m_wMeta", "5m_rcp85_G1.2SW_wMeta", "5m_rcp85_G1.8SW_S0.15m_wMeta",
"5m_rcp85_G0.6SW_S0.15m_wMeta", "5m_rcp45_G0.6SW_wMeta", "5m_rcp85_G1.2SW_S0.15m_wMeta", "5m_rcp85_G0.6SW_wMeta"]
```
%% Cell type:code id:a98fb989-65bb-49c5-8762-9cccd1cba6bb tags:
``` python
# url to zarr store that you want to create a collection for
fs2 = fsspec.filesystem('s3', profile='osn-mdmf-workspace', endpoint_url='https://usgs.osn.mghpcc.org/')
fs2 = fsspec.filesystem('s3', anon=True, endpoint_url='https://usgs.osn.mghpcc.org/')
```
%% Cell type:code id:91f8f839-c862-4e15-b67e-00d6bee32d44 tags:
``` python
dim_names_dict = {'X': 'x', 'Y': 'y', 'T': 'time'}
```
%% Cell type:code id:5ab97501-1d74-422e-96a1-320898269b1a tags:
``` python
crs_var = 'crs'
```
%% Cell type:code id:e772d0e9-20cc-4e2f-923e-866aeb681d00 tags:
``` python
min_lons = []
min_lats = []
max_lons = []
max_lats = []
min_times = []
max_times = []
for collection_id in collection_id_list:
zarr_url = f's3://mdmf-workspace/gdp/{collection_id}.zarr/'
zarr_url = f's3://mdmf/gdp/{collection_id}.zarr/'
ds = xr.open_dataset(fs2.get_mapper(zarr_url), engine='zarr',
backend_kwargs={'consolidated':True}, chunks={})
collection_description = ds.attrs['title']
# print each zarr's description to see if they are the same and if they can be used for the overall collection
# in this case, they are appropriate to use
#print(f'possible collection description: {collection_description}')
collection_license = stac_helpers.license_picker(ds.attrs['license'])
# print each zarr's license to see if they are the same and if they can be used for the overall collection
# in this case, they are appropriate to use
#print(f'possible collection license: {collection_license}')
crs = pyproj.CRS.from_cf(ds[crs_var].attrs)
spatial_bounds = [ds[dim_names_dict['X']].data.min().astype(float).item(), ds[dim_names_dict['Y']].data.min().astype(float).item(), ds[dim_names_dict['X']].data.max().astype(float).item(), ds[dim_names_dict['Y']].data.max().astype(float).item()]
XX, YY = np.meshgrid(ds[dim_names_dict['X']].data, ds[dim_names_dict['Y']].data)
transformer = Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
lon, lat = transformer.transform(XX.ravel(), YY.ravel())
min_lons.append(min(lon))
min_lats.append(min(lat))
max_lons.append(max(lon))
max_lats.append(max(lat))
min_times.append(pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().min()))
max_times.append(pd.Timestamp(ds.indexes[dim_names_dict['T']].to_datetimeindex().max()))
spatial_extent = pystac.SpatialExtent(bboxes=[[min(min_lons).item(), min(min_lats).item(), max(max_lons).item(), max(max_lats).item()]])
temporal_extent = pystac.TemporalExtent(intervals=[[min(min_times), max(max_times)]])
collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)
```
%% Cell type:code id:5eafaa34-e079-4688-826b-383046ccbe2d tags:
``` python
# define folder location where your STAC catalog json file is
catalog_path = os.path.join('..', '..', '..','catalog')
# open catalog
catalog = pystac.Catalog.from_file(os.path.join(catalog_path, 'catalog.json'))
```
%% Cell type:code id:c380222b-b358-4239-9e13-467fbee32b56 tags:
``` python
if catalog.get_child(overall_collection_id):
collection = catalog.get_child(overall_collection_id)
print("existing collection opened")
collection.extent=collection_extent
collection.description=collection_description
collection.license=collection_license
else:
collection = pystac.Collection(id=overall_collection_id,
description=collection_description,
extent=collection_extent,
license=collection_license)
print("new collection created")
```
%% Cell type:code id:ca6ca927-8b29-4352-85f7-7742ace1610f tags:
``` python
if catalog.get_child(overall_collection_id):
collection.normalize_and_save(root_href=os.path.join(catalog_path, overall_collection_id), catalog_type=pystac.CatalogType.SELF_CONTAINED)
else:
catalog.add_child(collection)
catalog.normalize_and_save(root_href=catalog_path, catalog_type=pystac.CatalogType.SELF_CONTAINED)
```
%% Cell type:code id:2c985c59-169b-49ea-9177-02276e802fe2 tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment