Skip to content

Instantly share code, notes, and snippets.

@bmorris3
Last active December 9, 2025 17:34
Show Gist options
  • Select an option

  • Save bmorris3/6b110f19cc21f0ec56c4cbf99acdaa40 to your computer and use it in GitHub Desktop.

Select an option

Save bmorris3/6b110f19cc21f0ec56c4cbf99acdaa40 to your computer and use it in GitHub Desktop.
example for building MAESTRO Zarr arrays
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "8f988550-1925-4851-9f0d-3ecd939257ae",
"metadata": {},
"source": [
"# Minimal Example: MAESTRO HDF5 to Zarr"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "95a4b1a1-f682-400e-99e4-2ef3e6274327",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"from tqdm.auto import tqdm\n",
"\n",
"import h5py\n",
"import numpy as np\n",
"import zarr\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from astropy.table import Table"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c8cb17f-ca8d-4e13-9d5d-9c9cb2655f7c",
"metadata": {},
"outputs": [],
"source": [
"grid = Table.read('grid1460.csv')\n",
"grid['index'] = grid['file_number'] - 1\n",
"grid.add_index('index')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d68cac62-2d88-4054-bac6-6ace55100060",
"metadata": {},
"outputs": [],
"source": [
"archive_path = '../12C-H4.h5'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "423508c0-d36c-4b46-aeb9-70ebdc38a87f",
"metadata": {},
"outputs": [],
"source": [
"h5_file = h5py.File(archive_path, 'r')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d54ea68a-041e-4e3e-9242-899801a88752",
"metadata": {},
"outputs": [],
"source": [
"press_coords = h5_file['pressure_coords'][:]\n",
"temp_coords = h5_file['temperature_coords'][:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "557aa663-760a-452e-8160-596e38513d7b",
"metadata": {},
"outputs": [],
"source": [
"# extract only the molecule name:\n",
"path = archive_path.split('/')[-1].split('.h5')[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "799245d8-2991-45a9-bba3-c62331bf5f6e",
"metadata": {},
"outputs": [],
"source": [
"# Natasha's recommended uniform wavenumber sampling \n",
"# (MAESTRO slack Nov 25 2025)\n",
"row = grid[0]\n",
"start = row['start_wavenumber']\n",
"number_wave_pts = row['number_wave_pts']\n",
"delta_wavenumber = row['delta_wavenumber']\n",
"new_wvno_grid = np.arange(number_wave_pts) * delta_wavenumber + start\n",
"\n",
"wavenumber_sampling = new_wvno_grid[::2]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "081a96ac-976d-4eb5-9cb1-4e2c08ba7bde",
"metadata": {},
"outputs": [],
"source": [
"compressed_path = f'{path}.zarr'\n",
"min_wavenumber_pts = int(grid['number_wave_pts'].min())\n",
"\n",
"temperatures = np.sort(list(set(temp_coords)))\n",
"pressures = np.sort(list(set(press_coords)))\n",
"\n",
"# these compression settings will apply to every array:\n",
"compression = dict(\n",
" compressors=zarr.codecs.BloscCodec(\n",
" cname=\"zstd\",\n",
" # here we use the maximum compression level,\n",
" # which takes longer to compress (one time)\n",
" # but not more time to decompress (many times)\n",
" clevel=9,\n",
" shuffle=zarr.codecs.BloscShuffle.shuffle\n",
" )\n",
")\n",
"\n",
"store = zarr.storage.LocalStore(compressed_path)\n",
"root = zarr.create_group(store, overwrite='w', zarr_format=3)\n",
"root.create_array(\n",
" 'temperature', \n",
" data=temperatures, \n",
" dimension_names=('temperature',), \n",
" attributes=dict(coordinates='temperature'),\n",
" **compression\n",
")\n",
"root.create_array(\n",
" 'pressure', \n",
" data=pressures, \n",
" dimension_names=('pressure',), \n",
" attributes=dict(coordinates='pressure'),\n",
" **compression\n",
")\n",
"root.create_array(\n",
" 'wavenumber', \n",
" data=wavenumber_sampling, \n",
" dimension_names=('wavenumber',), \n",
" attributes=dict(coordinates='wavenumber'),\n",
" **compression\n",
")\n",
"\n",
"arr_shape = (wavenumber_sampling.size, temperatures.size, pressures.size)\n",
"arr_attrs = dict(\n",
" # `coordinates` is required for remote indexing with xarray:\n",
" coordinates='wavenumber temperature pressure',\n",
"\n",
" # for MAESTRO versioning:\n",
" molecule=path,\n",
" source=dict(\n",
" database='MAESTRO',\n",
" created=str(datetime.datetime.now()),\n",
" version=0.1 # or pick your number\n",
" ),\n",
" compression=dict(\n",
" codec=compression['compressors'].__class__.__name__,\n",
" algorithm=compression['compressors'].cname.name,\n",
" clevel=compression['compressors'].clevel,\n",
" shuffle=compression['compressors'].shuffle.__class__.__name__,\n",
" )\n",
")\n",
"dimension_names = 'wavenumber temperature pressure'.split()\n",
"\n",
"arr = root.create_array(\n",
" 'csx', \n",
" shape=arr_shape, \n",
" dtype=np.float64, \n",
" dimension_names=dimension_names,\n",
" attributes=arr_attrs,\n",
" **compression\n",
")\n",
"\n",
"# this promotes efficient remote indexing:\n",
"zarr.consolidate_metadata(store)\n",
"\n",
"\n",
"# loop over temperature and pressure to downsample the \n",
"# wavenumber grid and store the results in the zarr array:\n",
"for i, temperature in tqdm(enumerate(temperatures), total=len(temperatures)):\n",
" for j, pressure in enumerate(pressures):\n",
" nearest_coord = np.argmin(np.hypot(grid['pressure_bar'] - pressure, grid['temperature_K'] - temperature)) \n",
" grid_point = dict(grid.loc[nearest_coord])\n",
" cross_section = h5_file['cxs'][nearest_coord]\n",
" wavenumber = np.arange(grid_point['number_wave_pts']) * grid_point['delta_wavenumber'] + grid_point['start_wavenumber']\n",
"\n",
"\n",
" arr[:, i, j] = np.interp(wavenumber_sampling, wavenumber, cross_section)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3657e9cc-f2cb-4359-8677-eba8a70e0d28",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment