Skip to content

Instantly share code, notes, and snippets.

@d-v-b
Last active November 6, 2025 13:08
Show Gist options
  • Select an option

  • Save d-v-b/6e7321d0570ecdb1d0ce520d83375d9f to your computer and use it in GitHub Desktop.

Select an option

Save d-v-b/6e7321d0570ecdb1d0ce520d83375d9f to your computer and use it in GitHub Desktop.
Dump the schema of a zarr group or array to stdout
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "pydantic-zarr==0.8.4",
# "zarr[remote]>=3.1.3",
# "fsspec[s3, http]",
# "typer==0.20.0",
# "rich==14.2.0"
# ]
# ///
"""
Retrive the structure of a Zarr hierarchy as a JSON document
"""
from typing import Annotated, Literal
import zarr
from zarr.storage._common import make_store_path
import pydantic_zarr.v2 as v2
import pydantic_zarr.v3 as v3
import typer
import asyncio
from rich import print_json
import json
ZarrFormat = Literal[2, 3, "auto"]
def get_spec(url: str, zarr_format: ZarrFormat, consolidated_meta: bool):
"""
Get a groupspec or arrayspec from zarr data at a URL
"""
store = asyncio.run(make_store_path(url, mode='r'))
node: zarr.Group | zarr.Array
if zarr_format == "auto":
try:
node = zarr.core.sync_group.get_node(store=store.store, path=store.path, zarr_format=3)
except Exception:
try:
node = zarr.core.sync_group.get_node(store=store.store, path=store.path, zarr_format=2)
except Exception:
raise FileNotFoundError(f"No Zarr V3 or V2 data found at {url}.")
else:
try:
node = zarr.core.sync_group.get_node(store=store.store, path=store.path, zarr_format=zarr_format)
except Exception:
raise FileNotFoundError(f"No Zarr V{zarr_format} data found at {url}.")
if isinstance(node, zarr.Group) and consolidated_meta == True:
node = zarr.open_group(store, mode='r', use_consolidated=True, zarr_format=node.metadata.zarr_format)
match (node.metadata.zarr_format, node):
case (2, zarr.Group()):
spec = v2.GroupSpec.from_zarr(node)
case (2, zarr.Array()):
spec = v2.ArraySpec.from_zarr(node)
case (3, zarr.Group()):
spec = v3.GroupSpec.from_zarr(node)
case (3, zarr.Array()):
spec = v3.ArraySpec.from_zarr(node)
case _:
raise ValueError(f'The object {node} is invalid.')
return spec
def main(
*,
url: Annotated[str, typer.Option(help='The URL for the Zarr array or group to retrieve.')],
zarr_format: Annotated[ZarrFormat, typer.Option(help='The specific Zarr format version to use, or "auto", which will try Zarr V2 and V3.')] = "auto",
consolidated_meta: Annotated[bool, typer.Option(help="Use consolidated metadata, if available.")] = False):
spec = get_spec(url=url, zarr_format=zarr_format, consolidated_meta=consolidated_meta)
print_json(json.dumps(spec.model_dump()))
if __name__ == "__main__":
typer.run(main)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment