!pip install -q python-cmr earthaccess mlcroissant rasterio
[notice] A new release of pip is available: 26.0 -> 26.0.1
[notice] To update, run: pip install --upgrade pip
This notebook demonstrates a complete end-to-end conversion of NASA UMM-G (Unified Metadata Model - Granule)

First, let’s install all required packages:
!pip install -q python-cmr earthaccess mlcroissant rasterio
[notice] A new release of pip is available: 26.0 -> 26.0.1
[notice] To update, run: pip install --upgrade pip
We’ll use the earthaccess library to authenticate with NASA Earthdata and fetch satellite imagery metadata. Our target is the HLS (Harmonized Landsat Sentinel-2) dataset, specifically Sentinel-2 data with low cloud coverage over Huntsville, Alabama.
NASA Earthdata requires authentication to access cloud-hosted data. We’ll use interactive login to establish our session.
import earthaccess
# Step 1: Authenticate with NASA Earthdata Login
earthaccess.login(strategy="interactive") <earthaccess.auth.Auth at 0x7f45d3f96d20>
# Step 2: Build the search query using the low-level API
query = earthaccess.DataGranules().concept_id("C2021957295-LPCLOUD").cloud_hosted(True)
# Step 3: Get total number of matching granules
print(f"Total matching granules: {query.hits()}")
# Step 4: Fetch just the first 100 granules
granules = list(query.get(100)) # You can change 100 to any number
print(f"Fetched {len(granules)} granules")Total matching granules: 20140158
Fetched 100 granules
We search for HLS Sentinel-2 granules using the collection concept ID C2021957295-LPCLOUD. This represents the HLS Sentinel-2 Multi-spectral Instrument Surface Reflectance Daily Global 30m v2.0 dataset.
import json
# Step 1: Just treat it as a dictionary
sample = granules[0]
print(json.dumps(sample, indent=2)) # Pretty print the UMM-G metadata{
"meta": {
"concept-type": "granule",
"concept-id": "G2711472078-LPCLOUD",
"revision-id": 1,
"native-id": "HLS.S30.T55JGM.2015332T001732.v2.0",
"collection-concept-id": "C2021957295-LPCLOUD",
"provider-id": "LPCLOUD",
"format": "application/echo10+xml",
"revision-date": "2023-06-15T06:04:43.667Z"
},
"umm": {
"TemporalExtent": {
"RangeDateTime": {
"BeginningDateTime": "2015-11-28T00:17:27.450Z",
"EndingDateTime": "2015-11-28T00:17:27.450Z"
}
},
"GranuleUR": "HLS.S30.T55JGM.2015332T001732.v2.0",
"AdditionalAttributes": [
{
"Name": "PRODUCT_URI",
"Values": [
"S2A_MSIL1C_20151128T001732_N0204_R116_T55JGM_20151128T001727.SAFE"
]
},
{
"Name": "CLOUD_COVERAGE",
"Values": [
"100"
]
},
{
"Name": "MGRS_TILE_ID",
"Values": [
"55JGM"
]
},
{
"Name": "SPATIAL_COVERAGE",
"Values": [
"0"
]
},
{
"Name": "SPATIAL_RESOLUTION",
"Values": [
"30.0"
]
},
{
"Name": "HLS_PROCESSING_TIME",
"Values": [
"2023-06-15T06:03:38Z"
]
},
{
"Name": "SENSING_TIME",
"Values": [
"2015-11-28T00:17:27.456Z"
]
},
{
"Name": "HORIZONTAL_CS_CODE",
"Values": [
"EPSG:32755"
]
},
{
"Name": "HORIZONTAL_CS_NAME",
"Values": [
"WGS84 / UTM zone 55S"
]
},
{
"Name": "ULX",
"Values": [
"699960.0"
]
},
{
"Name": "ULY",
"Values": [
"-2799960.0"
]
},
{
"Name": "SPATIAL_RESAMPLING_ALG",
"Values": [
"Area Weighted Average"
]
},
{
"Name": "ADD_OFFSET",
"Values": [
"0"
]
},
{
"Name": "REF_SCALE_FACTOR",
"Values": [
"0.0001"
]
},
{
"Name": "ANG_SCALE_FACTOR",
"Values": [
"0.01"
]
},
{
"Name": "FILLVALUE",
"Values": [
"-9999"
]
},
{
"Name": "QA_FILLVALUE",
"Values": [
"255"
]
},
{
"Name": "NCOLS",
"Values": [
"3660"
]
},
{
"Name": "NROWS",
"Values": [
"3660"
]
},
{
"Name": "MEAN_SUN_AZIMUTH_ANGLE",
"Values": [
"83.01522549"
]
},
{
"Name": "MEAN_SUN_ZENITH_ANGLE",
"Values": [
"20.56099496"
]
},
{
"Name": "MEAN_VIEW_AZIMUTH_ANGLE",
"Values": [
"291.51000977"
]
},
{
"Name": "MEAN_VIEW_ZENITH_ANGLE",
"Values": [
"11.97999954"
]
},
{
"Name": "NBAR_SOLAR_ZENITH",
"Values": [
"24.14364084"
]
},
{
"Name": "MSI_BAND_01_BANDPASS_ADJUSTMENT_SLOPE_AND_OFFSET",
"Values": [
"0.9959",
"-0.0002"
]
},
{
"Name": "MSI_BAND_02_BANDPASS_ADJUSTMENT_SLOPE_AND_OFFSET",
"Values": [
"0.9778",
"-0.004"
]
},
{
"Name": "MSI_BAND_03_BANDPASS_ADJUSTMENT_SLOPE_AND_OFFSET",
"Values": [
"1.0053",
"-0.0009"
]
},
{
"Name": "MSI_BAND_04_BANDPASS_ADJUSTMENT_SLOPE_AND_OFFSET",
"Values": [
"0.9765",
"0.0009"
]
},
{
"Name": "MSI_BAND_11_BANDPASS_ADJUSTMENT_SLOPE_AND_OFFSET",
"Values": [
"0.9987",
"-0.0011"
]
},
{
"Name": "MSI_BAND_12_BANDPASS_ADJUSTMENT_SLOPE_AND_OFFSET",
"Values": [
"1.003",
"-0.0012"
]
},
{
"Name": "MSI_BAND_8A_BANDPASS_ADJUSTMENT_SLOPE_AND_OFFSET",
"Values": [
"0.9983",
"-0.0001"
]
},
{
"Name": "AROP_AVE_XSHIFT(METERS)",
"Values": [
"0.0"
]
},
{
"Name": "AROP_AVE_YSHIFT(METERS)",
"Values": [
"0.0"
]
},
{
"Name": "AROP_NCP",
"Values": [
"0"
]
},
{
"Name": "AROP_RMSE(METERS)",
"Values": [
"0.0"
]
},
{
"Name": "AROP_S2_REFIMG",
"Values": [
"NONE"
]
},
{
"Name": "ACCODE",
"Values": [
"LaSRC v3.2.0"
]
},
{
"Name": "PROCESSING_BASELINE",
"Values": [
"02.04"
]
},
{
"Name": "IDENTIFIER_PRODUCT_DOI",
"Values": [
"10.5067/HLS/HLSS30.002"
]
},
{
"Name": "IDENTIFIER_PRODUCT_DOI_AUTHORITY",
"Values": [
"https://doi.org"
]
}
],
"SpatialExtent": {
"HorizontalSpatialDomain": {
"Geometry": {
"GPolygons": [
{
"Boundary": {
"Points": [
{
"Longitude": 148.98645834,
"Latitude": -25.31770273
},
{
"Longitude": 148.98873086,
"Latitude": -25.31090165
},
{
"Longitude": 148.99038533,
"Latitude": -25.3027548
},
{
"Longitude": 148.98621543,
"Latitude": -25.30281105
},
{
"Longitude": 148.98645834,
"Latitude": -25.31770273
}
]
}
}
]
}
}
},
"ProviderDates": [
{
"Date": "2023-06-15T06:03:56.741Z",
"Type": "Insert"
},
{
"Date": "2023-06-15T06:03:56.741Z",
"Type": "Update"
}
],
"CollectionReference": {
"EntryTitle": "HLS Sentinel-2 Multi-spectral Instrument Surface Reflectance Daily Global 30m v2.0"
},
"RelatedUrls": [
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B08.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B08.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B08.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.Fmask.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.Fmask.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.Fmask.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B11.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B11.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B11.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.VZA.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.VZA.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.VZA.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B02.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B02.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B02.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.SAA.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.SAA.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.SAA.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B03.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B03.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B03.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B06.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B06.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B06.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.VAA.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.VAA.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.VAA.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.SZA.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.SZA.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.SZA.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B07.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B07.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B07.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B8A.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B8A.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B8A.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B01.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B01.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B01.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B10.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B10.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B10.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B04.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B04.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B04.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B12.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B12.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B12.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B09.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B09.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B09.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B05.tif",
"Type": "GET DATA",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.B05.tif"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.B05.tif",
"Type": "GET DATA VIA DIRECT ACCESS",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0_stac.json",
"Type": "VIEW RELATED INFORMATION",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0_stac.json"
},
{
"URL": "s3://lp-prod-public/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0_stac.json",
"Type": "VIEW RELATED INFORMATION",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.cmr.xml",
"Type": "VIEW RELATED INFORMATION",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.cmr.xml"
},
{
"URL": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.cmr.xml",
"Type": "VIEW RELATED INFORMATION",
"Description": "This link provides direct download access via S3 to the granule"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials",
"Type": "VIEW RELATED INFORMATION",
"Description": "api endpoint to retrieve temporary credentials valid for same-region direct s3 access"
},
{
"URL": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.jpg",
"Type": "GET RELATED VISUALIZATION",
"Description": "Download HLS.S30.T55JGM.2015332T001732.v2.0.jpg"
},
{
"URL": "s3://lp-prod-public/HLSS30.020/HLS.S30.T55JGM.2015332T001732.v2.0/HLS.S30.T55JGM.2015332T001732.v2.0.jpg",
"Type": "GET RELATED VISUALIZATION",
"Description": "This link provides direct download access via S3 to the granule"
}
],
"DataGranule": {
"DayNightFlag": "Day",
"Identifiers": [
{
"Identifier": "HLS.S30.T55JGM.2015332T001732",
"IdentifierType": "ProducerGranuleId"
}
],
"ProductionDateTime": "2023-06-15T06:03:38.000Z",
"ArchiveAndDistributionInformation": [
{
"Name": "Not provided",
"SizeInBytes": 1744934,
"SizeUnit": "MB"
}
]
},
"Platforms": [
{
"ShortName": "Sentinel-2A",
"Instruments": [
{
"ShortName": "Sentinel-2 MSI"
}
]
}
],
"MetadataSpecification": {
"URL": "https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6",
"Name": "UMM-G",
"Version": "1.6.6"
}
},
"size": 1.6640987396240234
}
# Step 2: Save it to a JSON file
with open("nasa_ummg_raw.json", "w") as f:
json.dump(sample, f, indent=2)
print("Saved first granule to nasa_ummg_raw.json")Saved first granule to nasa_ummg_raw.json
import earthaccess
import json
# Step 1: Authenticate with NASA Earthdata
earthaccess.login()
# Step 2: Define Huntsville, Alabama coordinates
# Huntsville coordinates: approximately 34.7304° N, 86.5861° W
huntsville_lat = 34.7304
huntsville_lon = -86.5861
# Create a bounding box around Huntsville (about 0.5 degrees in each direction)
bbox = [huntsville_lon - 0.5, huntsville_lat - 0.5, huntsville_lon + 0.5, huntsville_lat + 0.5]
# Step 3: Build the search query for Huntsville with cloud coverage filter
query = (earthaccess.DataGranules()
.concept_id("C2021957295-LPCLOUD")
.bounding_box(*bbox) # Huntsville area
.cloud_hosted(True)
.cloud_cover(0, 20)) # Filter for 0-20% cloud coverage
# Step 4: Get total number of matching granules with low cloud cover
print(f"Total matching granules over Huntsville, AL with 0-20% cloud coverage: {query.hits()}")
# Step 5: Fetch just the first few granules
granules = list(query.get(10)) # Get first 10 granules
print(f"Fetched {len(granules)} granules")
# Step 6: Print the first sample
if granules:
sample = granules[0]
print("\nFirst granule metadata for Huntsville, AL:")
#print(json.dumps(sample, indent=2))
# Extract cloud coverage info if available
if hasattr(sample, 'get') and 'CloudCover' in str(sample):
print(f"\nCloud coverage information found in metadata")
else:
# Look for cloud coverage in the metadata structure
metadata_str = json.dumps(sample, indent=2)
if 'cloud' in metadata_str.lower():
print(f"\nCloud-related information found in metadata")
else:
print("No granules found over Huntsville, AL with 0-20% cloud coverage")Total matching granules over Huntsville, AL with 0-20% cloud coverage: 1666
Fetched 10 granules
First granule metadata for Huntsville, AL:
Cloud-related information found in metadata
# Extract cloud coverage from the metadata
def get_cloud_coverage(granule):
additional_attrs = granule.get('umm', {}).get('AdditionalAttributes', [])
for attr in additional_attrs:
if attr.get('Name') == 'CLOUD_COVERAGE':
return float(attr.get('Values', ['0'])[0])
return None
# Check the cloud coverage of your sample
cloud_cover = get_cloud_coverage(sample)
print(f"Cloud coverage: {cloud_cover}%")
# Check cloud coverage for all fetched granules
for i, granule in enumerate(granules):
cloud_cover = get_cloud_coverage(granule)
granule_id = granule.get('umm', {}).get('GranuleUR', 'Unknown')
print(f"Granule {i+1}: {granule_id} - Cloud coverage: {cloud_cover}%")Cloud coverage: 3.0%
Granule 1: HLS.S30.T16SED.2016011T162642.v2.0 - Cloud coverage: 3.0%
Granule 2: HLS.S30.T16SEE.2016011T162642.v2.0 - Cloud coverage: 2.0%
Granule 3: HLS.S30.T16SEC.2016011T162642.v2.0 - Cloud coverage: 13.0%
Granule 4: HLS.S30.T16SEC.2016041T162412.v2.0 - Cloud coverage: 13.0%
Granule 5: HLS.S30.T16SDE.2016114T163322.v2.0 - Cloud coverage: 18.0%
Granule 6: HLS.S30.T16SEC.2016114T163322.v2.0 - Cloud coverage: 8.0%
Granule 7: HLS.S30.T16SED.2016114T163322.v2.0 - Cloud coverage: 13.0%
Granule 8: HLS.S30.T16SDC.2016114T163322.v2.0 - Cloud coverage: 3.0%
Granule 9: HLS.S30.T16SDD.2016114T163322.v2.0 - Cloud coverage: 3.0%
Granule 10: HLS.S30.T16SDD.2016134T163332.v2.0 - Cloud coverage: 0.0%
NASA UMM-G (Unified Metadata Model - Granule) is NASA’s standardized format for granule-level metadata. It contains comprehensive information about:
We’ll analyze the cloud coverage of fetched granules and select the best quality data (0% cloud coverage) for our GeoCroissant conversion.
# Get Granule 4 (0% cloud coverage)
granule_4 = granules[9] # Index 3 for the 4th granule
# Step 2: Save it to a JSON file
with open("nasa_ummg_h.json", "w") as f:
json.dump(granule_4, f, indent=2)
print("Saved Granule 4 (HLS.S30.T55JCN.2015332T001732.v2.0 - 0% cloud coverage) to nasa_ummg_h.json")
# Optional: Also print some key info about the saved granule
granule_id = granule_4.get('umm', {}).get('GranuleUR', 'Unknown')
cloud_cover = get_cloud_coverage(granule_4)
print(f"Granule ID: {granule_id}")
print(f"Cloud Coverage: {cloud_cover}%")Saved Granule 4 (HLS.S30.T55JCN.2015332T001732.v2.0 - 0% cloud coverage) to nasa_ummg_h.json
Granule ID: HLS.S30.T16SDD.2016134T163332.v2.0
Cloud Coverage: 0.0%
Run the converter:
# Convert NASA UMM-G to GeoCroissant (reads nasa_ummg_h.json → outputs geocroissant_output.json)
!python geocroissant_converter.pyConversion completed using only TTL-defined properties!
Input: nasa_ummg_h.json
Output: geocroissant_output.json
GeoCroissant Properties Used (from TTL):
✓ geocr:coordinateReferenceSystem
✓ geocr:spatialResolution
✓ geocr:bandConfiguration
✓ geocr:spectralBandMetadata
✓ geocr:samplingStrategy
✓ geocr:solarInstrumentCharacteristics
Schema.org Properties Used:
✓ spatialCoverage
✓ temporalCoverage
✓ distribution
The converter automatically created geocroissant_output.json with all NASA UMM-G properties mapped to TTL-compliant GeoCroissant format.
import json
with open("geocroissant_output.json", "r") as f:
data = json.load(f)
# Print with indentation
print(json.dumps(data, indent=2)){
"@context": {
"@language": "en",
"@vocab": "https://schema.org/",
"citeAs": "cr:citeAs",
"column": "cr:column",
"conformsTo": "dct:conformsTo",
"cr": "http://mlcommons.org/croissant/",
"geocr": "http://mlcommons.org/croissant/geo/",
"rai": "http://mlcommons.org/croissant/RAI/",
"dct": "http://purl.org/dc/terms/",
"sc": "https://schema.org/",
"data": {
"@id": "cr:data",
"@type": "@json"
},
"examples": {
"@id": "cr:examples",
"@type": "@json"
},
"dataBiases": "cr:dataBiases",
"dataCollection": "cr:dataCollection",
"dataType": {
"@id": "cr:dataType",
"@type": "@vocab"
},
"extract": "cr:extract",
"field": "cr:field",
"fileProperty": "cr:fileProperty",
"fileObject": "cr:fileObject",
"fileSet": "cr:fileSet",
"format": "cr:format",
"includes": "cr:includes",
"isLiveDataset": "cr:isLiveDataset",
"jsonPath": "cr:jsonPath",
"key": "cr:key",
"md5": "cr:md5",
"parentField": "cr:parentField",
"path": "cr:path",
"personalSensitiveInformation": "cr:personalSensitiveInformation",
"recordSet": "cr:recordSet",
"references": "cr:references",
"regex": "cr:regex",
"repeated": "cr:repeated",
"replace": "cr:replace",
"samplingRate": "cr:samplingRate",
"separator": "cr:separator",
"source": "cr:source",
"subField": "cr:subField",
"transform": "cr:transform"
},
"@type": "sc:Dataset",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0",
"description": "HLS Sentinel-2 Multi-spectral Instrument Surface Reflectance Daily Global 30m v2.0",
"url": "https://cmr.earthdata.nasa.gov/search/concepts/G2700719831-LPCLOUD.html",
"datePublished": "2023-05-31T04:12:47.704Z",
"version": "1",
"license": "https://creativecommons.org/publicdomain/zero/1.0/",
"citeAs": "HLS.S30.T16SDD.2016134T163332.v2.0. NASA EOSDIS Land Processes Distributed Active Archive Center. https://cmr.earthdata.nasa.gov/search/concepts/G2700719831-LPCLOUD.html",
"conformsTo": [
"http://mlcommons.org/croissant/1.1",
"http://mlcommons.org/croissant/geo/1.0"
],
"geocr:coordinateReferenceSystem": "EPSG:32616",
"geocr:spatialResolution": {
"@type": "sc:QuantitativeValue",
"value": 30.0,
"unitText": "m"
},
"spatialCoverage": {
"@type": "sc:Place",
"geo": {
"@type": "sc:GeoShape",
"box": "34.24811019 -88.09948036 35.24303017 -86.89272491"
}
},
"temporalCoverage": "2016-05-13T16:35:44.550Z/2016-05-13T16:35:44.550Z",
"geocr:bandConfiguration": {
"@type": "geocr:BandConfiguration",
"geocr:totalBands": 13,
"geocr:bandNamesList": [
"B01",
"B02",
"B03",
"B04",
"B05",
"B06",
"B07",
"B08",
"B8A",
"B09",
"B10",
"B11",
"B12"
]
},
"geocr:spectralBandMetadata": [
{
"@type": "geocr:SpectralBand",
"name": "B01",
"description": "Coastal aerosol",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 443,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 65,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B02",
"description": "Blue",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 490,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 65,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B03",
"description": "Green",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 560,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 60,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B04",
"description": "Red",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 665,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 30,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B05",
"description": "Red edge 1",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 705,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 15,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B06",
"description": "Red edge 2",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 740,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 15,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B07",
"description": "Red edge 3",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 783,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 20,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B08",
"description": "NIR",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 842,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 115,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B8A",
"description": "NIR narrow",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 865,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 20,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B09",
"description": "Water vapour",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 945,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 20,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B10",
"description": "SWIR cirrus",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 1375,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 30,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B11",
"description": "SWIR 1",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 1610,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 90,
"unitText": "nm"
}
},
{
"@type": "geocr:SpectralBand",
"name": "B12",
"description": "SWIR 2",
"geocr:centerWavelength": {
"@type": "sc:QuantitativeValue",
"value": 2190,
"unitText": "nm"
},
"geocr:bandwidth": {
"@type": "sc:QuantitativeValue",
"value": 180,
"unitText": "nm"
}
}
],
"geocr:solarInstrumentCharacteristics": {
"@type": "geocr:SolarInstrumentCharacteristics",
"geocr:observatory": "Sentinel-2A",
"geocr:instrument": "Sentinel-2 MSI"
},
"geocr:samplingStrategy": "Spatial coverage: 99%",
"distribution": [
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B08.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B08.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B08.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B08.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B08.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B07.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B07.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B07.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B07.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B07.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B05.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B05.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B05.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B05.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B05.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B02.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B02.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B02.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B02.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B02.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B10.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B10.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B10.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B10.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B10.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.SZA.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.SZA.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.SZA.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.SZA.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.SZA.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B01.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B01.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B01.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B01.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B01.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B03.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B03.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B03.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B03.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B03.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.VAA.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.VAA.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.VAA.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.VAA.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.VAA.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B04.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B04.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B04.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B04.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B04.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B8A.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B8A.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B8A.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B8A.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B8A.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B06.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B06.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B06.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B06.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B06.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B11.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B11.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B11.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B11.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B11.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.VZA.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.VZA.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.VZA.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.VZA.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.VZA.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B09.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B09.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B09.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B09.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B09.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.Fmask.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.Fmask.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.Fmask.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.Fmask.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.Fmask.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B12.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B12.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.B12.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.B12.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.B12.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.SAA.tif",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.SAA.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.SAA.tif"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.SAA.tif",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.SAA.tif",
"encodingFormat": "image/tiff",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0_stac.json",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0_stac.json",
"encodingFormat": "application/json",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0_stac.json"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0_stac.json",
"contentUrl": "s3://lp-prod-public/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0_stac.json",
"encodingFormat": "application/json",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.cmr.xml",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.cmr.xml",
"encodingFormat": "application/xml",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.cmr.xml"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.cmr.xml",
"contentUrl": "s3://lp-prod-protected/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.cmr.xml",
"encodingFormat": "application/xml",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
},
{
"@type": "cr:FileObject",
"name": "s3credentials",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials",
"encodingFormat": "application/octet-stream",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "api endpoint to retrieve temporary credentials valid for same-region direct s3 access"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.jpg",
"contentUrl": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.jpg",
"encodingFormat": "image/jpeg",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "Download HLS.S30.T16SDD.2016134T163332.v2.0.jpg"
},
{
"@type": "cr:FileObject",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0.jpg",
"contentUrl": "s3://lp-prod-public/HLSS30.020/HLS.S30.T16SDD.2016134T163332.v2.0/HLS.S30.T16SDD.2016134T163332.v2.0.jpg",
"encodingFormat": "image/jpeg",
"sha256": "https://github.com/mlcommons/croissant/issues/80",
"description": "This link provides direct download access via S3 to the granule"
}
],
"recordSet": [
{
"@type": "cr:RecordSet",
"@id": "G2700719831-LPCLOUD",
"name": "HLS.S30.T16SDD.2016134T163332.v2.0",
"description": "HLS Sentinel-2 Multi-spectral Instrument Surface Reflectance Daily Global 30m v2.0"
}
]
}
The generated GeoCroissant JSON must pass the official ML Commons Croissant validator to ensure compliance with the standard. This validation checks:
Let’s validate our converted metadata:
!mlcroissant validate --jsonld=geocroissant_output.jsonI0216 17:16:47.204446 125298223576896 validate.py:53] Done.
Our GeoCroissant dataset contains 13 spectral bands from Sentinel-2 satellite imagery. Each band captures different wavelengths of electromagnetic radiation:
| Band | Description | Wavelength | Application |
|---|---|---|---|
| B01 | Coastal Aerosol | 443nm | Atmospheric correction |
| B02 | Blue | 490nm | Water body detection |
| B03 | Green | 560nm | Vegetation health |
| B04 | Red | 665nm | Vegetation stress |
| B05 | Red Edge 1 | 705nm | Chlorophyll content |
| B06 | Red Edge 2 | 740nm | Leaf area index |
| B07 | Red Edge 3 | 783nm | Vegetation moisture |
| B08 | NIR | 842nm | Biomass estimation |
| B8A | NIR Narrow | 865nm | Atmospheric water vapor |
| B09 | Water Vapour | 945nm | Cirrus cloud detection |
| B10 | SWIR Cirrus | 1375nm | SWIR applications |
| B11 | SWIR 1 | 1610nm | Moisture content |
| B12 | SWIR 2 | 2190nm | Geology applications |
We’ll download all bands directly from NASA’s cloud infrastructure, apply proper scaling factors, and create comprehensive visualizations including individual band displays and RGB composite imagery.
import json
import numpy as np
import matplotlib.pyplot as plt
import rasterio
import tempfile
import os
import earthaccess
# Constants
SCALE_FACTOR = 0.0001
BAND_LABELS = {
"B01": ("Coastal Aerosol", "443nm"),
"B02": ("Blue", "490nm"),
"B03": ("Green", "560nm"),
"B04": ("Red", "665nm"),
"B05": ("Red Edge 1", "705nm"),
"B06": ("Red Edge 2", "740nm"),
"B07": ("Red Edge 3", "783nm"),
"B08": ("NIR", "842nm"),
"B8A": ("NIR Narrow", "865nm"),
"B09": ("Water Vapour", "945nm"),
"B10": ("SWIR Cirrus", "1375nm"),
"B11": ("SWIR 1", "1610nm"),
"B12": ("SWIR 2", "2190nm"),
}
# Load GeoCroissant metadata
with open('geocroissant_output.json', 'r') as f:
data = json.load(f)
# Access the first record from recordSet
record = data['recordSet'][0]
print(f"Loaded: {record['name']}")
print(f"Description: {record['description']}")
# Extract band URLs from distribution
band_urls = {}
for dist in data.get('distribution', []):
url = dist.get('contentUrl', '')
if url.startswith('https://') and '.tif' in url:
for band in BAND_LABELS:
if f"{band}.tif" in url:
band_urls[band] = url
print(f"Found bands: {', '.join(sorted(band_urls.keys()))}")
# Authenticate with NASA Earthdata
earthaccess.login()
session = earthaccess.get_requests_https_session()
# Download function using rasterio
def download_band(url, band_name):
print(f"Downloading {band_name} ...")
with tempfile.NamedTemporaryFile(suffix='.tif', delete=False) as tmp:
temp_path = tmp.name
try:
response = session.get(url, stream=True)
response.raise_for_status()
with open(temp_path, 'wb') as f:
for chunk in response.iter_content(8192):
if chunk:
f.write(chunk)
# Read with rasterio
with rasterio.open(temp_path) as src:
data_array = src.read(1) # Read first band
print(f"{band_name} shape: {data_array.shape}")
return data_array
except Exception as e:
print(f"Failed to download {band_name}: {e}")
return None
finally:
if os.path.exists(temp_path):
os.unlink(temp_path)
# Download all bands
band_data = {}
for band, url in band_urls.items():
arr = download_band(url, band)
if arr is not None:
band_data[band] = arr
# Preprocessing
def scale_and_normalize(data_array, scale_factor=SCALE_FACTOR):
arr = data_array.astype(np.float32) * scale_factor
arr = np.clip(arr, 0, 1)
return arr
# Plotting function (always grayscale)
def plot_band(ax, band_arr, band, title=None):
cmap = "gray" # grayscale for individual bands
valid = band_arr[band_arr > 0]
vmin, vmax = (np.percentile(valid, 2), np.percentile(valid, 98)) if valid.size else (0, 1)
ax.imshow(band_arr, cmap=cmap, vmin=vmin, vmax=vmax)
ax.set_title(title or band, fontsize=10)
ax.axis('off')
# RGB composite function
def plot_rgb(ax, band_data):
try:
r = scale_and_normalize(band_data["B04"]) # Red
g = scale_and_normalize(band_data["B03"]) # Green
b = scale_and_normalize(band_data["B02"]) # Blue
rgb = np.dstack([r, g, b])
ax.imshow(rgb)
ax.set_title("RGB Composite (B04, B03, B02)", fontsize=10)
ax.axis('off')
except Exception as e:
ax.set_title("RGB Composite Failed", fontsize=10)
print(f"RGB composite error: {e}")
# Sort bands to place RGB after B8A
bands = sorted(band_data.keys(), key=lambda b: (len(b), b))
insert_rgb_after = "B8A"
plot_sequence = []
for band in bands:
plot_sequence.append(band)
if band == insert_rgb_after:
plot_sequence.append("RGB") # Marker for RGB slot
# Grid layout
n = len(plot_sequence)
cols = 4
rows = int(np.ceil(n / cols))
fig, axes = plt.subplots(rows, cols, figsize=(4*cols, 3*rows))
axes = axes.flatten()
# Plot each band or RGB
for i, name in enumerate(plot_sequence):
if name == "RGB":
plot_rgb(axes[i], band_data)
else:
arr = scale_and_normalize(band_data[name])
label, wl = BAND_LABELS.get(name, (name, ""))
plot_band(axes[i], arr, name, f"{name} ({label}, {wl})")
# Hide unused axes
for j in range(i+1, len(axes)):
axes[j].axis('off')
fig.suptitle(f"All Bands + RGB Composite: {record['name']}", fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.97])
plt.show()Loaded: HLS.S30.T16SDD.2016134T163332.v2.0
Description: HLS Sentinel-2 Multi-spectral Instrument Surface Reflectance Daily Global 30m v2.0
Found bands: B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B8A
Downloading B08 ...
B08 shape: (3660, 3660)
Downloading B07 ...
B07 shape: (3660, 3660)
Downloading B05 ...
B05 shape: (3660, 3660)
Downloading B02 ...
B02 shape: (3660, 3660)
Downloading B10 ...
B10 shape: (3660, 3660)
Downloading B01 ...
B01 shape: (3660, 3660)
Downloading B03 ...
B03 shape: (3660, 3660)
Downloading B04 ...
B04 shape: (3660, 3660)
Downloading B8A ...
B8A shape: (3660, 3660)
Downloading B06 ...
B06 shape: (3660, 3660)
Downloading B11 ...
B11 shape: (3660, 3660)
Downloading B09 ...
B09 shape: (3660, 3660)
Downloading B12 ...
B12 shape: (3660, 3660)
