!pip install -q pystac stac-validator
[notice] A new release of pip is available: 26.0 -> 26.0.1
[notice] To update, run: pip install --upgrade pip

This notebook demonstrates how to convert GeoCroissant metadata a geospatial extension of the Croissant metadata format into a STAC (SpatioTemporal Asset Catalog) Item.
stac-validator| STAC Field | GeoCroissant Field |
|---|---|
id |
name |
type |
@type |
bbox |
spatialCoverage.geo.box |
geometry |
spatialCoverage.geo.box |
properties.start_datetime |
temporalCoverage (start) |
properties.end_datetime |
temporalCoverage (end) |
proj:epsg |
geocr:coordinateReferenceSystem |
properties.gsd |
geocr:spatialResolution.value |
assets |
distribution (FileObject/FileSet) |
assets[key].href |
contentUrl |
assets[key].type |
encodingFormat |
assets[key].raster:bands |
geocr:spectralBandMetadata |
!pip install -q pystac stac-validator
[notice] A new release of pip is available: 26.0 -> 26.0.1
[notice] To update, run: pip install --upgrade pip
This converter properly handles: - Spatial Coverage: Parses GeoCroissant box format (south west north east) → STAC bbox - Temporal Coverage: Parses ISO 8601 temporal ranges → start/end datetime - CRS: Extracts coordinate reference systems (e.g., EPSG:4326) - Resolution: Spatial and temporal resolution metadata - Distribution: FileObject and FileSet assets → STAC assets - Spectral Bands: Band metadata for raster data - Conformance: GeoCroissant version compliance
import json
from datetime import datetime
from pystac import Item, Asset, MediaType
from pystac.extensions.projection import ProjectionExtension
def geocroissant_to_stac(geocroissant_data):
"""Convert GeoCroissant metadata to STAC Item."""
# Extract basic metadata
item_id = geocroissant_data.get("name", "unknown").replace(" ", "_")
title = geocroissant_data.get("name", "")
description = geocroissant_data.get("description", "")
license_info = geocroissant_data.get("license", "proprietary")
keywords = geocroissant_data.get("keywords", [])
# Parse spatial coverage (GeoCroissant format: "south west north east")
spatial_coverage = geocroissant_data.get("spatialCoverage", {})
geo_info = spatial_coverage.get("geo", {}) if isinstance(spatial_coverage, dict) else {}
bbox_string = geo_info.get("box", "") if isinstance(geo_info, dict) else ""
if bbox_string:
coords = [float(x) for x in bbox_string.split()]
south, west, north, east = coords
bbox = [west, south, east, north] # STAC format: [west, south, east, north]
geometry = {
"type": "Polygon",
"coordinates": [[
[west, south], [west, north], [east, north], [east, south], [west, south]
]]
}
else:
# Default to global extent if no spatial coverage provided
bbox = [-180, -90, 180, 90]
geometry = {
"type": "Polygon",
"coordinates": [[
[-180, -90], [-180, 90], [180, 90], [180, -90], [-180, -90]
]]
}
# Parse temporal coverage (ISO 8601: "start/end")
temporal_coverage = geocroissant_data.get("temporalCoverage", "")
if temporal_coverage and "/" in temporal_coverage:
start_str, end_str = temporal_coverage.split("/")
start_dt = datetime.fromisoformat(start_str)
end_dt = datetime.fromisoformat(end_str)
midpoint_dt = start_dt + (end_dt - start_dt) / 2
else:
start_dt = end_dt = None
midpoint_dt = datetime.now()
# Build STAC properties
properties = {
"title": title,
"description": description,
"license": license_info,
"keywords": keywords,
}
if start_dt and end_dt:
properties["start_datetime"] = start_dt.isoformat() + "Z"
properties["end_datetime"] = end_dt.isoformat() + "Z"
# Add GeoCroissant metadata
crs = geocroissant_data.get("geocr:coordinateReferenceSystem")
spatial_res = geocroissant_data.get("geocr:spatialResolution", {})
if isinstance(spatial_res, dict) and spatial_res.get("value"):
properties["gsd"] = float(spatial_res["value"])
temporal_res = geocroissant_data.get("geocr:temporalResolution", {})
if isinstance(temporal_res, dict) and temporal_res.get("value"):
properties["geocr:temporalResolution"] = f"{temporal_res['value']} {temporal_res.get('unitText', '')}"
sampling_strategy = geocroissant_data.get("geocr:samplingStrategy")
if sampling_strategy:
properties["geocr:samplingStrategy"] = sampling_strategy
conforms_to = geocroissant_data.get("conformsTo", [])
if conforms_to:
properties["conformsTo"] = conforms_to
# Create STAC Item
item = Item(
id=item_id,
geometry=geometry,
bbox=bbox,
datetime=midpoint_dt,
properties=properties
)
# Add projection extension if CRS present
if crs and "EPSG:" in crs:
proj_ext = ProjectionExtension.ext(item, add_if_missing=True)
proj_ext.epsg = int(crs.replace("EPSG:", ""))
# Process distribution to add assets
distribution = geocroissant_data.get("distribution", [])
for dist_item in distribution:
item_type = dist_item.get("@type", "")
content_url = dist_item.get("contentUrl", "")
# Skip directory entries and file:// URLs
if not content_url or content_url.startswith("file://"):
continue
if "directory" in dist_item.get("encodingFormat", "").lower():
continue
asset_id = dist_item.get("@id", dist_item.get("name", "asset")).replace(" ", "_").lower()
encoding_format = dist_item.get("encodingFormat", "")
# Determine media type
if "tiff" in encoding_format.lower() or "tif" in encoding_format.lower():
media_type = MediaType.GEOTIFF
elif "json" in encoding_format.lower():
media_type = MediaType.JSON
elif "parquet" in encoding_format.lower():
media_type = MediaType.PARQUET
elif "zarr" in encoding_format.lower():
media_type = "application/zarr"
else:
media_type = encoding_format
# Determine roles
roles = ["data"]
if "FileSet" in item_type:
roles.append("collection")
asset = Asset(
href=content_url,
media_type=media_type,
title=dist_item.get("description", dist_item.get("name", "")),
roles=roles
)
# Add file pattern for FileSets
includes = dist_item.get("includes")
if includes:
asset.extra_fields["file_pattern"] = includes
item.add_asset(asset_id, asset)
# Add spectral band metadata to GEOTIFF assets if present
spectral_bands = geocroissant_data.get("geocr:spectralBandMetadata", [])
if spectral_bands:
raster_bands = []
for band_info in spectral_bands:
raster_band = {"name": band_info.get("name", "")}
center_wl = band_info.get("geocr:centerWavelength", {})
if isinstance(center_wl, dict) and center_wl.get("value"):
raster_band["center_wavelength"] = float(center_wl["value"])
bandwidth = band_info.get("geocr:bandwidth", {})
if isinstance(bandwidth, dict) and bandwidth.get("value"):
raster_band["bandwidth"] = float(bandwidth["value"])
raster_bands.append(raster_band)
# Apply to GEOTIFF assets
for asset_key, asset in item.assets.items():
if asset.media_type in [MediaType.GEOTIFF, MediaType.COG]:
asset.extra_fields["raster:bands"] = raster_bands
return item.to_dict()# Load GeoCroissant and convert to STAC
with open("croissant.json", "r") as f:
geocroissant_data = json.load(f)
stac_item = geocroissant_to_stac(geocroissant_data)
# Save to file
with open('stac_item.json', 'w') as f:
json.dump(stac_item, f, indent=2)
print(json.dumps(stac_item, indent=2)){
"type": "Feature",
"stac_version": "1.1.0",
"stac_extensions": [
"https://stac-extensions.github.io/projection/v2.0.0/schema.json"
],
"id": "NASA_POWER_T2M_2020",
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-180.0,
-90.0
],
[
-180.0,
90.0
],
[
179.375,
90.0
],
[
179.375,
-90.0
],
[
-180.0,
-90.0
]
]
]
},
"bbox": [
-180.0,
-90.0,
179.375,
90.0
],
"properties": {
"title": "NASA POWER T2M 2020",
"description": "Temperature at 2 Meters monthly data for 2020",
"license": "CC-BY-4.0",
"keywords": [
"temperature",
"climate",
"nasa power",
"t2m",
"2020"
],
"start_datetime": "2020-01-01T00:00:00Z",
"end_datetime": "2020-12-31T00:00:00Z",
"gsd": 0.5,
"geocr:temporalResolution": "1 month",
"conformsTo": [
"http://mlcommons.org/croissant/1.1",
"http://mlcommons.org/croissant/geo/1.0"
],
"proj:code": "EPSG:4326",
"datetime": "2020-07-01T12:00:00Z"
},
"links": [],
"assets": {
"zarr-data": {
"href": "https://nasa-power.s3.us-west-2.amazonaws.com/merra2/temporal/power_merra2_monthly_temporal_utc.zarr/",
"type": "application/zarr",
"title": "zarr-data",
"roles": [
"data"
]
}
}
}
Validate the generated STAC Item using the stac-validator tool to ensure it conforms to the STAC specification.
!stac-validator stac_item.json
Thanks for using STAC version 1.1.0!
[
{
"version": "1.1.0",
"path": "stac_item.json",
"schema": [
"https://stac-extensions.github.io/projection/v2.0.0/schema.json",
"https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/item.json"
],
"valid_stac": true,
"asset_type": "ITEM",
"validation_method": "default"
}
]
Validation completed in 2.68s