Created
February 6, 2026 00:45
-
-
Save mgd020/c9666d53a829d0ece9d7c78159e92906 to your computer and use it in GitHub Desktop.
Serialize a Python value into PostgreSQL text input
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import re | |
| from collections.abc import Sequence | |
| from dataclasses import dataclass | |
| from datetime import date, datetime, time, timedelta | |
| from decimal import Decimal | |
| from typing import Any | |
| from uuid import UUID | |
| _PG_ARRAY_ELEMENT_NEEDS_QUOTE = re.compile(r"^NULL$|[\s{},\"\\]", re.IGNORECASE) | |
| def _type_error(v: Any) -> Any: | |
| raise TypeError(f"Object of type {v.__class__.__name__} is not PG serializable") | |
| @dataclass(frozen=True) | |
| class PgJson: | |
| value: Any | |
| def pg_text(v, *, null=r"\N", default=_type_error) -> str: | |
| """ | |
| Serialize a Python value into PostgreSQL text input. | |
| The returned string is suitable as a field value for: | |
| - COPY ... FORMAT csv (with a matching NULL marker) | |
| - COPY ... FORMAT text | |
| - PostgreSQL array literals (as elements) | |
| This function does NOT perform CSV quoting or SQL quoting. | |
| """ | |
| if v is None: | |
| return null | |
| t = type(v) | |
| if t is str: | |
| return v | |
| if t is bool: | |
| return "t" if v else "f" | |
| if t is float: | |
| # repr() is round-trippier than str() for some floats | |
| # includes nan/inf/-inf (which doesn't work for numeric columns) | |
| return repr(v) | |
| if t is int or t is Decimal or t is UUID: | |
| return str(v) | |
| if t is date or t is time or t is datetime: | |
| return v.isoformat() | |
| if t is timedelta: | |
| total_us = v.days * 86_400_000_000 + v.seconds * 1_000_000 + v.microseconds | |
| return f"{total_us} us" | |
| if t is PgJson: | |
| return json.dumps(v.value, ensure_ascii=False, allow_nan=False, separators=(",", ":")) | |
| if t is dict: | |
| return json.dumps(v, ensure_ascii=False, allow_nan=False, separators=(",", ":")) | |
| if t is list or t is tuple: | |
| return pg_array_text(v) | |
| if t is bytes or t is bytearray or t is memoryview: | |
| return f"\\x{bytes(v).hex()}" | |
| return pg_text(default(v), null=null) | |
| def pg_array_text(v: Sequence) -> str: | |
| """Serialize a Python sequence into PostgreSQL array text syntax.""" | |
| parts = [] | |
| for e in v: | |
| if e is None: | |
| # Arrays use a NULL keyword | |
| parts.append("NULL") | |
| continue | |
| t = type(e) | |
| if t is list or t is tuple: | |
| parts.append(pg_array_text(e)) | |
| continue | |
| s = pg_text(e) | |
| # Quote element if it contains special chars for array text format: { } , " \ whitespace | |
| if _PG_ARRAY_ELEMENT_NEEDS_QUOTE.search(s) is not None: | |
| s = '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"' | |
| parts.append(s) | |
| return f"{{{','.join(parts)}}}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment