Skip to content

Instantly share code, notes, and snippets.

@michaelaye
Last active December 18, 2025 00:44
Show Gist options
  • Select an option

  • Save michaelaye/5ddb550bf50727ee7a0d0f7f91aebd57 to your computer and use it in GitHub Desktop.

Select an option

Save michaelaye/5ddb550bf50727ee7a0d0f7f91aebd57 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Extract polygon footprint from ISIS cube file and convert to GML or GeoPackage format.
Requires: pvl, geopandas, shapely
"""
import argparse
import sys
import geopandas as gpd
import pvl
from shapely import wkt
from shapely.geometry import MultiPolygon, Polygon
def find_footprint_location(cub_file):
"""Find the footprint polygon location from the PVL label.
Returns tuple (start_byte, num_bytes) or None if not found.
"""
label = pvl.load(cub_file)
# Search for Polygon object with Name="Footprint"
if isinstance(label, dict):
# PVL might return a dict or a list of objects
objects = label.get("Object", [])
if not isinstance(objects, list):
objects = [objects]
else:
# If label is a list or other structure
objects = label if isinstance(label, list) else [label]
# First try direct search through objects
for obj in objects:
if isinstance(obj, dict) and obj.get("Name") == "Footprint":
start_byte = obj.get("StartByte")
num_bytes = obj.get("Bytes")
if start_byte is not None and num_bytes is not None:
return int(start_byte), int(num_bytes)
# If not found in direct search, try recursive search through entire label
def find_footprint_recursive(data):
"""Recursively search for Footprint object in PVL structure."""
if isinstance(data, dict):
# Check if this dict has Name="Footprint" and required fields
if (
data.get("Name") == "Footprint"
and "StartByte" in data
and "Bytes" in data
):
return data
# Recursively search all values
for value in data.values():
result = find_footprint_recursive(value)
if result:
return result
elif isinstance(data, list):
# Search through list items
for item in data:
result = find_footprint_recursive(item)
if result:
return result
return None
footprint_obj = find_footprint_recursive(label)
if footprint_obj:
start_byte = footprint_obj.get("StartByte")
num_bytes = footprint_obj.get("Bytes")
if start_byte is not None and num_bytes is not None:
return int(start_byte), int(num_bytes)
return None
def extract_polygon_wkt(cub_file, start_byte, num_bytes):
"""Extract WKT polygon text from cube file.
Note: PVL StartByte uses 1-based indexing (first byte is 1), while Python's
f.seek() uses 0-based indexing (first byte is 0). Therefore, we subtract 1
from the PVL StartByte value before using f.seek().
Args:
cub_file: Path to the ISIS cube file
start_byte: Start byte position from PVL (1-based)
num_bytes: Number of bytes to read
"""
with open(cub_file, "rb") as f:
# Convert PVL 1-based indexing to Python 0-based indexing
# PVL StartByte = 1 means first byte, f.seek(0) means first byte
seek_position = start_byte - 1
f.seek(seek_position)
data = f.read(num_bytes)
wkt_text = data.decode("ascii", errors="replace").strip()
return wkt_text
def parse_wkt_polygon(wkt_text):
"""Parse WKT polygon string and extract coordinates.
Supports both POLYGON and MULTIPOLYGON formats.
MULTIPOLYGON format: MULTIPOLYGON (((lon lat, ...)), ((lon lat, ...)), ...)
POLYGON format: POLYGON ((lon lat, lon lat, ...))
Returns a list of polygons, where each polygon is a list of (lon, lat) tuples.
For POLYGON, returns a list with one polygon.
For MULTIPOLYGON, returns a list with all polygons.
"""
wkt_text = wkt_text.strip()
# Parse using Shapely's WKT parser
geom = wkt.loads(wkt_text)
polygons = []
if geom.geom_type == "Polygon":
# Single polygon
coords = list(geom.exterior.coords)
polygons.append(coords)
elif geom.geom_type == "MultiPolygon":
# Multiple polygons
for poly in geom.geoms:
coords = list(poly.exterior.coords)
polygons.append(coords)
else:
raise ValueError(f"Unsupported geometry type: {geom.geom_type}")
return polygons
def create_geodataframe(polygons, crs="EPSG:4326"):
"""Create a GeoDataFrame from polygons.
Args:
polygons: List of polygons, where each polygon is a list of (lon, lat) tuples
crs: Coordinate reference system
Returns:
GeoDataFrame
"""
# Create Shapely geometry from polygons
if len(polygons) == 1:
# Single polygon
# Note: coordinates are already closed (first == last)
geometry = Polygon(polygons[0])
else:
# Multiple polygons - create MultiPolygon
shapely_polygons = [Polygon(coords) for coords in polygons]
geometry = MultiPolygon(shapely_polygons)
# Create GeoDataFrame
gdf = gpd.GeoDataFrame(
[{"id": 1, "name": "footprint", "geometry": geometry}], crs=crs
)
return gdf
def create_geopackage(polygons, output_file, layer_name="footprint", crs="EPSG:4326"):
"""Create a GeoPackage file from polygons using geopandas exporter.
Args:
polygons: List of polygons, where each polygon is a list of (lon, lat) tuples
output_file: Output file path
layer_name: Layer name for GeoPackage
crs: Coordinate reference system
Returns:
GeoDataFrame
"""
gdf = create_geodataframe(polygons, crs=crs)
# Update layer name if needed
if layer_name != "footprint":
gdf["name"] = layer_name
# Write to GeoPackage using geopandas exporter
gdf.to_file(output_file, driver="GPKG", layer=layer_name)
return gdf
def create_gml(polygons, output_file, crs="EPSG:4326"):
"""Create a GML file from polygons using geopandas exporter.
Args:
polygons: List of polygons, where each polygon is a list of (lon, lat) tuples
output_file: Output file path
crs: Coordinate reference system
Returns:
GeoDataFrame
"""
gdf = create_geodataframe(polygons, crs=crs)
# Write to GML using geopandas exporter
gdf.to_file(output_file, driver="GML")
return gdf
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Extract polygon footprint from ISIS cube file and convert to GML or GeoPackage format."
)
parser.add_argument(
"--format",
choices=["gml", "gpkg", "both"],
default="gml",
help="Output format: gml, gpkg, or both (default: gml). Both formats use geopandas exporters.",
)
parser.add_argument(
"--output", type=str, help="Output file path (default: based on format)"
)
parser.add_argument(
"--cub-file",
type=str,
default="hirise40.cal.map.cub",
help="Input ISIS cube file (default: hirise40.cal.map.cub)",
)
parser.add_argument(
"--start-byte",
type=int,
default=None,
help="Start byte offset (overrides PVL label). Note: If WKT starts with 'ULTIPOLYGON', the script will automatically include the preceding 'M'.",
)
parser.add_argument(
"--num-bytes",
type=int,
default=None,
help="Number of bytes to read (overrides PVL label)",
)
parser.add_argument(
"--layer-name",
type=str,
default="footprint",
help="Layer name for GeoPackage (default: footprint)",
)
parser.add_argument(
"--crs",
type=str,
default="EPSG:4326",
help="CRS for GeoPackage (default: EPSG:4326)",
)
args = parser.parse_args()
cub_file = args.cub_file
# Try to parse footprint location from PVL label if not provided
if args.start_byte is None or args.num_bytes is None:
print("Parsing PVL label to find footprint location...")
footprint_location = find_footprint_location(cub_file)
if footprint_location:
pvl_start_byte, pvl_num_bytes = footprint_location
print("Found footprint in PVL label:")
print(f" StartByte: {pvl_start_byte}")
print(f" Bytes: {pvl_num_bytes}")
start_byte = (
args.start_byte if args.start_byte is not None else pvl_start_byte
)
num_bytes = args.num_bytes if args.num_bytes is not None else pvl_num_bytes
else:
if args.start_byte is None or args.num_bytes is None:
print("Error: Could not find footprint location in PVL label.")
print("Please provide --start-byte and --num-bytes arguments.")
sys.exit(1)
start_byte = args.start_byte
num_bytes = args.num_bytes
else:
start_byte = args.start_byte
num_bytes = args.num_bytes
print()
print("Extracting polygon from ISIS cube file...")
print(f"File: {cub_file}")
print(f"Start byte: {start_byte}")
print(f"Number of bytes: {num_bytes}")
print()
# Extract WKT text
wkt_text = extract_polygon_wkt(cub_file, start_byte, num_bytes)
print(f"WKT text (first 200 chars): {wkt_text[:200]}...")
print(f"WKT text (last 200 chars): ...{wkt_text[-200:]}")
print()
# Parse coordinates
print("Parsing coordinates...")
polygons = parse_wkt_polygon(wkt_text)
print(f"Found {len(polygons)} polygon(s) in MULTIPOLYGON")
for i, poly_coords in enumerate(polygons):
print(f" Polygon {i + 1}: {len(poly_coords)} coordinate pairs")
print(f" First coordinate: {poly_coords[0]}")
print(f" Last coordinate: {poly_coords[-1]}")
print(f" Closed: {poly_coords[0] == poly_coords[-1]}")
print()
# Determine output format(s)
formats_to_create = []
if args.format == "both":
formats_to_create = ["gml", "gpkg"]
else:
formats_to_create = [args.format]
# Create GML if requested
if "gml" in formats_to_create:
output_file = args.output or "hirise40_footprint.gml"
print("Creating GML file...")
gdf = create_gml(polygons, output_file, crs=args.crs)
print(f"Successfully created {output_file}")
print(f" Geometry type: {gdf.geometry.iloc[0].geom_type}")
if len(polygons) > 1:
print(f" Contains {len(polygons)} polygons")
# Create GeoPackage if requested
if "gpkg" in formats_to_create:
output_file = args.output or "hirise40_footprint.gpkg"
print("Creating GeoPackage...")
gdf = create_geopackage(
polygons, output_file, layer_name=args.layer_name, crs=args.crs
)
print(f"Successfully created {output_file}")
print(f" Layer: {args.layer_name}")
print(f" CRS: {args.crs}")
print(f" Geometry type: {gdf.geometry.iloc[0].geom_type}")
if len(polygons) > 1:
print(f" Contains {len(polygons)} polygons")
total_coords = sum(len(poly) for poly in polygons)
print(f"\nTotal coordinates across all polygons: {total_coords}")
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Polygon Storage Format in ISIS Cube Files

Summary

The polygon footprint data in the ISIS cube file hirise40.cal.map.cub is stored as ASCII text in WKT (Well-Known Text) format, not as binary data.

Input Data

The example cube file hirise40.cal.map.cub was created from the following HiRISE data processing pipeline:

  1. Source Data: ESP_039983_1390_RED4_0.IMG (raw HiRISE image data)
  2. Import to ISIS: Used hi2isis to convert the IMG file to ISIS cube format
  3. Calibration: Applied radiometric calibration using hical
  4. Map Projection: Applied default map projection using cam2map
  5. Footprint Generation: Generated the spatial footprint using footprintinit

The resulting cube file (hirise40.cal.map.cub) contains the calibrated, map-projected image data along with the polygon footprint stored in the PVL label and as attached binary data.

Location Information

The polygon footprint location is stored in the PVL (Planetary Data System Label) header of the .cub file. The extraction script automatically parses the PVL label to find:

  • Object Name: Polygon
  • Name: Footprint
  • StartByte: Location in the file (parsed from PVL)
  • Bytes: Size of the footprint data (parsed from PVL)

Indexing Note: PVL uses 1-based indexing (first byte is position 1), while Python's f.seek() uses 0-based indexing (first byte is position 0). The script automatically converts PVL StartByte values to 0-based by subtracting 1 before using f.seek().

Storage Format

The polygon data is stored as a plain ASCII text string in WKT (Well-Known Text) format:

MULTIPOLYGON (((30.107598435886608 -40.35696177712009, 30.108625875765043 -40.356868167155994, ...)))

WKT Format Notes:

  • MULTIPOLYGON is a valid WKT geometry type (OGC standard) used to represent collections of polygons
  • This particular footprint is stored as a MULTIPOLYGON containing a single polygon, which is valid WKT
  • The structure has three levels of parentheses: outer (MULTIPOLYGON), middle (polygon), inner (coordinate ring)

Indexing Conversion: When PVL reports StartByte = 225143647, this means the 225143647th byte (1-based). To read from Python using f.seek(), we convert to 0-based indexing: f.seek(225143647 - 1), which correctly positions us at the start of the MULTIPOLYGON string.

Coordinate Format

  • Coordinates are stored as longitude latitude pairs (not latitude longitude)
  • Each coordinate pair is separated by commas
  • Coordinates are floating-point numbers with high precision (~15 decimal places)
  • The polygon is closed (first and last coordinates are identical)

Extracted Data

  • Total coordinate pairs: 420
  • Coordinate range:
    • Longitude: ~30.107° to ~30.138° (East)
    • Latitude: ~-40.357° to ~-40.526° (South)
  • Projection: Sinusoidal (based on cube file metadata)

Conversion to GML or GeoPackage

The polygon can be extracted and converted to either GML (Geography Markup Language) or GeoPackage format using the extract_isis_footprint.py script. The script uses geopandas exporters for both formats, ensuring compatibility with GIS software.

GML Format

The GML format is created using geopandas.GeoDataFrame.to_file() with driver="GML":

  • GML 3.2 namespace
  • Proper geometry encoding for Polygon/MultiPolygon
  • Coordinates stored as space-separated "lon lat" pairs
  • Compatible with OGC GML standards

GeoPackage Format

The GeoPackage format is created using geopandas.GeoDataFrame.to_file() with driver="GPKG":

  • SQLite-based format (OGC GeoPackage standard)
  • Single polygon feature in a named layer (default: "footprint")
  • Configurable CRS (default: EPSG:4326)
  • Compatible with QGIS, ArcGIS, and other GIS software

Usage

The script automatically parses the PVL label to find the footprint location - no manual byte offsets needed:

# Extract to GML (default) - automatically finds footprint location
python3 extract_isis_footprint.py --format gml

# Extract to GeoPackage - automatically finds footprint location
python3 extract_isis_footprint.py --format gpkg

# Extract to both formats
python3 extract_isis_footprint.py --format both

# Custom options
python3 extract_isis_footprint.py --format gpkg --output my_footprint.gpkg \
    --layer-name "hirise_footprint" --crs "EPSG:4326"

# Override PVL parsing with manual byte offsets (if needed)
python3 extract_isis_footprint.py --format gml --start-byte 225143647 --num-bytes 16318

Required Libraries: The script requires pvl, geopandas, and shapely. Install with:

pip install pvl geopandas shapely

Extraction Method

The extraction process works as follows:

  1. Parse PVL Label: The script reads the PVL label from the beginning of the .cub file using the pvl library
  2. Find Footprint Location: Searches for an object with Object = Polygon and Name = Footprint to extract StartByte and Bytes values
    • Uses direct search through label.get("Object", []) first
    • Falls back to recursive search through the entire PVL structure if needed
  3. Convert Indexing: Converts PVL 1-based StartByte to Python 0-based indexing by subtracting 1
  4. Read WKT Data: Seeks to the calculated position and reads the specified number of bytes
  5. Parse WKT: Uses shapely.wkt.loads() to parse the WKT text into geometric objects
    • Handles both POLYGON and MULTIPOLYGON formats
    • Extracts all polygons if MULTIPOLYGON contains multiple polygons
  6. Export: Uses geopandas.GeoDataFrame.to_file() to export to GML or GeoPackage format
    • GML: gdf.to_file(output_file, driver="GML")
    • GeoPackage: gdf.to_file(output_file, driver="GPKG", layer=layer_name)

Key Implementation Details:

  • PVL parsing handles different label structures (dict, list, nested objects)
  • Recursive search ensures footprint is found even in complex PVL hierarchies
  • Indexing conversion is automatic and transparent to the user
  • WKT parsing uses Shapely for robustness and standards compliance
  • Export uses geopandas exporters for compatibility and maintainability
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment