Created
February 12, 2026 01:43
-
-
Save mdsumner/b526797a561002902833aa8bf5291eda to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # test_gdal_zarr_v3_datetime64.sh | |
| # | |
| # Reproducer for GDAL Zarr V3 numpy.datetime64 extension data type issue. | |
| # Creates a minimal local Zarr V3 store with a datetime64 time coordinate, | |
| # then exercises GDAL classic 2D raster API access patterns to confirm | |
| # which operations are affected by the unsupported extension type. | |
| # | |
| # Requirements: | |
| # - GDAL >= 3.8 (with Zarr V3 support) | |
| # - Python 3 with zarr >= 3.0 and numpy | |
| # | |
| # Usage: | |
| # chmod +x test_gdal_zarr_v3_datetime64.sh | |
| # ./test_gdal_zarr_v3_datetime64.sh | |
| set -euo pipefail | |
| STORE="/tmp/test_zarr_v3_datetime64" | |
| echo "=== Step 1: Create minimal Zarr V3 store with numpy.datetime64 time coordinate ===" | |
| echo "" | |
| python3 << 'PYEOF' | |
| import zarr | |
| import numpy as np | |
| import json | |
| from pathlib import Path | |
| store_path = Path("/tmp/test_zarr_v3_datetime64") | |
| # Clean slate | |
| if store_path.exists(): | |
| import shutil | |
| shutil.rmtree(store_path) | |
| # Create Zarr V3 group | |
| root = zarr.open_group(store_path, mode="w", zarr_format=3) | |
| # Time coordinate: 3 monthly timestamps | |
| # zarr-python 3.x writes this as numpy.datetime64 extension type | |
| times = np.array(['2024-01-01', '2024-02-01', '2024-03-01'], dtype='datetime64[ns]') | |
| root.create_array("time", data=times, chunks=(3,)) | |
| # Depth coordinate: plain float64 | |
| depths = np.array([5.0, 10.0], dtype='float64') | |
| root.create_array("z", data=depths, chunks=(2,)) | |
| # Spatial coordinates: plain float64 (no issue for GDAL) | |
| lats = np.array([-60.0, -50.0, -40.0, -30.0], dtype='float64') | |
| lons = np.array([100.0, 110.0, 120.0, 130.0, 140.0], dtype='float64') | |
| root.create_array("lat", data=lats, chunks=(4,)) | |
| root.create_array("lon", data=lons, chunks=(5,)) | |
| # 3D data array: Float32, shape (time=3, lat=4, lon=5) — 1 extra dim | |
| data_3d = np.random.rand(3, 4, 5).astype('float32') * 10 | |
| root.create_array("sst", data=data_3d, chunks=(1, 4, 5)) | |
| # 4D data array: Float32, shape (time=3, z=2, lat=4, lon=5) — 2 extra dims | |
| # This mimics the CHLA-Z structure that failed on GCS | |
| data_4d = np.random.rand(3, 2, 4, 5).astype('float32') * 5 | |
| root.create_array("chla", data=data_4d, chunks=(1, 1, 4, 5)) | |
| # Add xarray-style _ARRAY_DIMENSIONS so GDAL can resolve dimensions | |
| for name, dims in [ | |
| ("time", ["time"]), | |
| ("z", ["z"]), | |
| ("lat", ["lat"]), | |
| ("lon", ["lon"]), | |
| ("sst", ["time", "lat", "lon"]), | |
| ("chla", ["time", "z", "lat", "lon"]), | |
| ]: | |
| zj_path = store_path / name / "zarr.json" | |
| with open(zj_path) as f: | |
| zj = json.load(f) | |
| zj["attributes"]["_ARRAY_DIMENSIONS"] = dims | |
| with open(zj_path, "w") as f: | |
| json.dump(zj, f, indent=2) | |
| # Show what zarr-python wrote | |
| print("time/zarr.json data_type field:") | |
| with open(store_path / "time" / "zarr.json") as f: | |
| zj = json.load(f) | |
| print(json.dumps(zj["data_type"], indent=2)) | |
| print() | |
| print("sst: shape (3,4,5) dims [time,lat,lon] data_type:", end=" ") | |
| with open(store_path / "sst" / "zarr.json") as f: | |
| print(json.dumps(json.load(f)["data_type"])) | |
| print("chla: shape (3,2,4,5) dims [time,z,lat,lon] data_type:", end=" ") | |
| with open(store_path / "chla" / "zarr.json") as f: | |
| print(json.dumps(json.load(f)["data_type"])) | |
| PYEOF | |
| echo "" | |
| echo "Store created at: ${STORE}" | |
| echo "" | |
| echo "=== Step 2: GDAL version ===" | |
| gdalinfo --version | |
| echo "" | |
| # Helper to run a command, capture exit code and output | |
| run_test() { | |
| local label="$1" | |
| shift | |
| echo "--- TEST: ${label} ---" | |
| echo " CMD: $*" | |
| if output=$("$@" 2>&1); then | |
| echo " RESULT: SUCCESS" | |
| echo "$output" | head -30 | sed 's/^/ | /' | |
| if [ "$(echo "$output" | wc -l)" -gt 30 ]; then | |
| echo " | ... (truncated)" | |
| fi | |
| else | |
| echo " RESULT: FAILED (exit code $?)" | |
| echo "$output" | head -15 | sed 's/^/ | /' | |
| fi | |
| echo "" | |
| } | |
| echo "=== Step 3: Test GDAL access patterns ===" | |
| echo "" | |
| echo "~~~ 3a: Coordinates (no datetime involved) ~~~" | |
| echo "" | |
| run_test "lat coordinate (float64)" \ | |
| gdalinfo "ZARR:${STORE}":/lat | |
| run_test "z coordinate (float64)" \ | |
| gdalinfo "ZARR:${STORE}":/z | |
| echo "~~~ 3b: 3D sst (time=3, lat=4, lon=5) — 1 extra dim ~~~" | |
| echo "" | |
| run_test "3D bare array (extra dims become bands)" \ | |
| gdalinfo "ZARR:${STORE}/sst" | |
| run_test "3D dimension index :0 (first time step)" \ | |
| gdalinfo "ZARR:${STORE}":/sst:0 | |
| run_test "3D gdal_translate :0 to GeoTIFF" \ | |
| gdal_translate -q "ZARR:${STORE}":/sst:0 /tmp/test_sst_slice.tif | |
| run_test "3D VRT band selection (bypasses dimension resolution)" \ | |
| gdalinfo "vrt://ZARR:${STORE}/sst?bands=1" | |
| echo "~~~ 3c: 4D chla (time=3, z=2, lat=4, lon=5) — 2 extra dims ~~~" | |
| echo "~~~ This mimics the CHLA-Z structure that fails on GCS ~~~" | |
| echo "" | |
| run_test "4D bare array (extra dims become bands)" \ | |
| gdalinfo "ZARR:${STORE}/chla" | |
| run_test "4D dimension index :0,0 (first time, first depth)" \ | |
| gdalinfo "ZARR:${STORE}":/chla:0,0 | |
| run_test "4D gdal_translate :0,0 to GeoTIFF" \ | |
| gdal_translate -q "ZARR:${STORE}":/chla:0,0 /tmp/test_chla_slice.tif | |
| run_test "4D VRT band selection (bypasses dimension resolution)" \ | |
| gdalinfo "vrt://ZARR:${STORE}/chla?bands=1" | |
| echo "~~~ 3d: Group-level and time array access ~~~" | |
| echo "" | |
| run_test "gdalmdiminfo on group (multidim API)" \ | |
| gdalmdiminfo "${STORE}" | |
| run_test "gdalinfo on group (classic API, subdataset listing)" \ | |
| gdalinfo "ZARR:${STORE}" | |
| run_test "gdalinfo on time array directly (numpy.datetime64 type)" \ | |
| gdalinfo "ZARR:${STORE}/time" | |
| echo "=== Step 4: Observations ===" | |
| echo "" | |
| echo "Check for these effects:" | |
| echo "" | |
| echo " 1. HARD FAILURE: Can the time array be opened at all?" | |
| echo " 2. DIMENSION NAMES: Are they 'time','lat','lon' or 'dim0','dim1','dim2'?" | |
| echo " (Look for DIM_time_INDEX vs DIM_dim0_INDEX in band metadata)" | |
| echo " 3. DIMENSION VALUES: Are time coordinate values in band metadata?" | |
| echo " (Look for DIM_time_VALUE=2024-01-01T... in band metadata)" | |
| echo " 4. 4D vs 3D: Does the 4D :0,0 access succeed or fail with" | |
| echo " 'Wrong number of indices of extra dimensions'?" | |
| echo " 5. STDERR: Are ERROR lines emitted even for operations that succeed?" | |
| echo "" | |
| echo "Store left at: ${STORE}" | |
| echo "Clean up with: rm -rf ${STORE} /tmp/test_sst_slice.tif /tmp/test_chla_slice.tif" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment