The notebook exploring this issue can be found here.
I am trying to create a STAC catalog for a gridded geotiff dataset (flood model predictions at various times, types, climate scenarios, etc.). However, whenever I attempt to save the STAC catalog, I get an erro about the item datetime object. I am posting the error below, and then the code where I create the STAC catalog below that; any suggestions would be appreciated!
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[7], line 6
3 print(f"Found item: {item.id} in catalog {root.id}: {item.datetime}")
5 catalog.validate()
----> 6 catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)
File c:\WBG\Anaconda3\envs\s2s_ingest\lib\site-packages\pystac\catalog.py:986, in Catalog.save(self, catalog_type, dest_href, stac_io)
981 child.save(
982 dest_href=os.path.dirname(child_dest_href),
983 stac_io=stac_io,
984 )
985 else:
--> 986 child.save(stac_io=stac_io)
988 for item_link in self.get_item_links():
989 if item_link.is_resolved():
File c:\WBG\Anaconda3\envs\s2s_ingest\lib\site-packages\pystac\catalog.py:1022, in Catalog.save(self, catalog_type, dest_href, stac_io)
1018 rel_href = make_relative_href(self.self_href, self.self_href)
1019 catalog_dest_href = make_absolute_href(
1020 rel_href, dest_href, start_is_dir=True
1021 )
-> 1022 self.save_object(
1023 include_self_link=include_self_link,
1024 dest_href=catalog_dest_href,
...
--> 412 if dt.tzinfo is None:
413 dt = dt.replace(tzinfo=timezone.utc)
415 timestamp = dt.isoformat(timespec=timespec)
AttributeError: 'str' object has no attribute 'tzinfo'
The errors seems pretty straightforward: the time objects in the stac Items need to be datetime objects ... but they are; you can see that I set them as datetime objects and then validate them as datetime objects (by printing out the year).
catalog = pystac.Catalog(
id="fathom-v31-catalog",
description="STAC Catalog for FATHOM 3.1 global flood hazard data",
)
cur_model = 'FLOOD_MAP-1ARCSEC-NW_OFFSET-1in100-FLUVIAL-UNDEFENDED-DEPTH-2020-PERCENTILE50-v3.1' #new_fathom_models[0]
model_path = f"FATHOM/v31/{cur_model}/"
all_tiles = get_tile_list(model_path) # this gets a list of all tile in our S3 bucket for the current model
deets = cur_model.split("-")
return_period = deets[3]
type = deets[4]
defended = deets[5]
date = deets[7]
scenario = deets[8] if deets[8] != "PERCENTILE50" else "Baseline"
spatial_extent = pystac.SpatialExtent(bboxes=[[-180.0, -90.0, 180.0, 90.0]])
temporal_extent = pystac.TemporalExtent(intervals=[[f"{date}-01-01T00:00:00Z", f"{date}-12-31T00:00:00Z"]])
c_collection = pystac.Collection(
id=cur_model,
description=f"FATHOM 3.1 Flood Hazard Model: {type} flood, {defended}, {return_period} return period, {scenario} scenario, {date} data",
title=f"FATHOM 3.1 - {cur_model}",
extent=pystac.Extent(spatial=spatial_extent, temporal=temporal_extent),
extra_fields={
"model_type": type,
"defended_status": defended,
"return_period": return_period,
"scenario": scenario,
"data_year": date,
}
)
for c_tile in tqdm(all_tiles[:10]): #Limiting to first 10 for testing
raster_s3_uri = f"s3://{s3_bucket}/{model_path}{c_tile}"
bbox, footprint = get_bbox_and_footprint_dumb(c_tile) # tiles are named by their lat/long so we can infer bbox and footprint from another function
item = pystac.Item(
id=c_tile.replace('.tif', ''),
geometry=footprint,
bbox=bbox,
datetime=pystac.utils.str_to_datetime(f"{date}-01-01T00:00:00Z"),
properties={},
)
asset = pystac.Asset(
href=raster_s3_uri,
media_type=pystac.MediaType.COG,
roles=["data"],
title=c_tile,
)
item.add_asset("raster", asset)
c_collection.add_item(item)
catalog.add_child(c_collection)
catalog.normalize_hrefs("")
for root, catalogs, items in catalog.walk():
for item in items:
print(f"Found item: {item.id} in catalog {root.id}: {item.datetime.year}")
catalog.validate()
catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)
From the call stack:
self.extent.to_dict()
-> self.temporal.to_dict()
-> datetime_to_str(i[0])
The problem comes from extent. This variable comes from the constructor of Collection.
def __init__(
self,
id: str,
description: str,
extent: Extent, # HERE
title: str | None = None,
stac_extensions: list[str] | None = None,
href: str | None = None,
extra_fields: dict[str, Any] | None = None,
catalog_type: CatalogType | None = None,
license: str = "other",
keywords: list[str] | None = None,
providers: list[Provider] | None = None,
summaries: Summaries | None = None,
assets: dict[str, Asset] | None = None,
strategy: HrefLayoutStrategy | None = None,
)
Your code pass it from here:
c_collection = pystac.Collection(
id=cur_model,
description=f"FATHOM 3.1 Flood Hazard Model: {type} flood, {defended}, {return_period} return period, {scenario} scenario, {date} data",
title=f"FATHOM 3.1 - {cur_model}",
extent=pystac.Extent(spatial=spatial_extent, temporal=temporal_extent), # HERE
extra_fields={
"model_type": type,
"defended_status": defended,
"return_period": return_period,
"scenario": scenario,
"data_year": date,
}
)
So, it looks like the error comes from:
temporal_extent = pystac.TemporalExtent(intervals=[[f"{date}-01-01T00:00:00Z", f"{date}-12-31T00:00:00Z"]])
Are you sure the variable date only represents the year?
There may be a formatting error here.