Skip to content

Instantly share code, notes, and snippets.

@mgd020
Created February 6, 2026 00:45
Show Gist options
  • Select an option

  • Save mgd020/c9666d53a829d0ece9d7c78159e92906 to your computer and use it in GitHub Desktop.

Select an option

Save mgd020/c9666d53a829d0ece9d7c78159e92906 to your computer and use it in GitHub Desktop.
Serialize a Python value into PostgreSQL text input
import json
import re
from collections.abc import Sequence
from dataclasses import dataclass
from datetime import date, datetime, time, timedelta
from decimal import Decimal
from typing import Any
from uuid import UUID
_PG_ARRAY_ELEMENT_NEEDS_QUOTE = re.compile(r"^NULL$|[\s{},\"\\]", re.IGNORECASE)
def _type_error(v: Any) -> Any:
raise TypeError(f"Object of type {v.__class__.__name__} is not PG serializable")
@dataclass(frozen=True)
class PgJson:
value: Any
def pg_text(v, *, null=r"\N", default=_type_error) -> str:
"""
Serialize a Python value into PostgreSQL text input.
The returned string is suitable as a field value for:
- COPY ... FORMAT csv (with a matching NULL marker)
- COPY ... FORMAT text
- PostgreSQL array literals (as elements)
This function does NOT perform CSV quoting or SQL quoting.
"""
if v is None:
return null
t = type(v)
if t is str:
return v
if t is bool:
return "t" if v else "f"
if t is float:
# repr() is round-trippier than str() for some floats
# includes nan/inf/-inf (which doesn't work for numeric columns)
return repr(v)
if t is int or t is Decimal or t is UUID:
return str(v)
if t is date or t is time or t is datetime:
return v.isoformat()
if t is timedelta:
total_us = v.days * 86_400_000_000 + v.seconds * 1_000_000 + v.microseconds
return f"{total_us} us"
if t is PgJson:
return json.dumps(v.value, ensure_ascii=False, allow_nan=False, separators=(",", ":"))
if t is dict:
return json.dumps(v, ensure_ascii=False, allow_nan=False, separators=(",", ":"))
if t is list or t is tuple:
return pg_array_text(v)
if t is bytes or t is bytearray or t is memoryview:
return f"\\x{bytes(v).hex()}"
return pg_text(default(v), null=null)
def pg_array_text(v: Sequence) -> str:
"""Serialize a Python sequence into PostgreSQL array text syntax."""
parts = []
for e in v:
if e is None:
# Arrays use a NULL keyword
parts.append("NULL")
continue
t = type(e)
if t is list or t is tuple:
parts.append(pg_array_text(e))
continue
s = pg_text(e)
# Quote element if it contains special chars for array text format: { } , " \ whitespace
if _PG_ARRAY_ELEMENT_NEEDS_QUOTE.search(s) is not None:
s = '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"'
parts.append(s)
return f"{{{','.join(parts)}}}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment