Skip to content

Instantly share code, notes, and snippets.

@MaxGhenis
Last active December 26, 2025 19:34
Show Gist options
  • Select an option

  • Save MaxGhenis/9cd49188caf9276cc11d0108aa96042f to your computer and use it in GitHub Desktop.

Select an option

Save MaxGhenis/9cd49188caf9276cc11d0108aa96042f to your computer and use it in GitHub Desktop.
85th Percentile Household Income in Los Angeles City (2023-2024) using ACS 1-Year PUMS
"""
85th Percentile Household Income in Los Angeles City
Using 1-year ACS PUMS data for 2023 and 2024
LA City PUMAs identified from Census ACS API by name matching.
These 23 PUMAs are explicitly labeled as LA City areas.
Note: Some LA City areas may be in shared PUMAs (e.g., 03707, 03748) which
include portions of other cities - these are excluded to avoid overestimation.
"""
import pandas as pd
import numpy as np
from io import BytesIO
from zipfile import ZipFile
import requests
# LA City PUMAs (2020 geography) - from Census ACS API name matching
# Source: Census ACS 1-year API, PUMAs containing "LA City" and "Los Angeles County"
LA_CITY_PUMAS = [
"03705", # LA City (Northwest/Chatsworth & Porter Ranch)
"03706", # LA City (North Central/Granada Hills & Sylmar)
"03708", # LA City (Northeast/Sunland, Sun Valley & Tujunga)
"03721", # LA City (Northeast/North Hollywood & Valley Village)
"03722", # LA City (North Central/Van Nuys & North Sherman Oaks)
"03723", # LA City (North Central/Mission Hills & Panorama City)
"03724", # LA City (Northwest/Encino & Tarzana)
"03725", # LA City (Northwest/Canoga Park, Winnetka & Woodland Hills)
"03730", # LA City (Central/Hancock Park & Mid-Wilshire)
"03732", # LA City (East Central & Hollywood)
"03733", # LA City (Central/Koreatown)
"03734", # LA City (East Central/Silver Lake, Echo Park & Westlake)
"03735", # LA City (Mount Washington, Highland Park & Glassell Park)
"03744", # LA City (East Central/Central City & Boyle Heights)
"03745", # LA City (Southeast/East Vernon)
"03746", # LA City (Central/Univ. of Southern California & Exposition Park)
"03747", # LA City (Central/West Adams & Baldwin Hills)
"03750", # LA City (South Central/Westmont)
"03751", # LA City (South Central/Watts)
"03767", # LA City (South/San Pedro)
"03770", # LA City (West Los Angeles, Century City & Palms)
"03775", # LA City (Central)
"03776", # LA City (Central/Westwood & West Los Angeles)
]
def download_pums_data(year: int) -> pd.DataFrame:
"""Download ACS 1-year PUMS household data for California."""
url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_hca.zip"
print(f"Downloading {year} PUMS data from {url}...")
response = requests.get(url)
response.raise_for_status()
with ZipFile(BytesIO(response.content)) as z:
csv_name = [n for n in z.namelist() if n.startswith("psam_h") and n.endswith(".csv")][0]
print(f"Reading {csv_name}...")
with z.open(csv_name) as f:
df = pd.read_csv(f, usecols=["PUMA", "HINCP", "WGTP"], dtype={"PUMA": str})
return df
def weighted_percentile(values: np.ndarray, weights: np.ndarray, percentile: float) -> float:
"""Calculate weighted percentile."""
mask = ~np.isnan(values) & ~np.isnan(weights) & (weights > 0)
values = values[mask]
weights = weights[mask]
sorted_indices = np.argsort(values)
sorted_values = values[sorted_indices]
sorted_weights = weights[sorted_indices]
cumsum = np.cumsum(sorted_weights)
cutoff = percentile / 100.0 * cumsum[-1]
idx = np.searchsorted(cumsum, cutoff)
return sorted_values[min(idx, len(sorted_values) - 1)]
def calculate_la_city_85th_percentile(year: int) -> dict:
"""Calculate 85th percentile household income for LA City."""
df = download_pums_data(year)
# Pad PUMA codes to 5 digits for matching
df["PUMA"] = df["PUMA"].str.zfill(5)
la_city = df[df["PUMA"].isin(LA_CITY_PUMAS)].copy()
p85 = weighted_percentile(
la_city["HINCP"].values,
la_city["WGTP"].values,
85
)
return {
"year": year,
"p85_income": p85,
"total_households": la_city["WGTP"].sum(),
"sample_size": len(la_city)
}
if __name__ == "__main__":
results = []
for year in [2023, 2024]:
result = calculate_la_city_85th_percentile(year)
results.append(result)
print(f"\n{year} Results:")
print(f" 85th Percentile Household Income: ${result['p85_income']:,.0f}")
print(f" Total Households (weighted): {result['total_households']:,.0f}")
print(f" Sample Size: {result['sample_size']:,}")
change = results[1]["p85_income"] - results[0]["p85_income"]
pct_change = (change / results[0]["p85_income"]) * 100
print(f"\nYear-over-year change (2023 to 2024):")
print(f" Absolute change: ${change:,.0f}")
print(f" Percent change: {pct_change:.1f}%")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment