Last active
December 26, 2025 19:34
-
-
Save MaxGhenis/9cd49188caf9276cc11d0108aa96042f to your computer and use it in GitHub Desktop.
85th Percentile Household Income in Los Angeles City (2023-2024) using ACS 1-Year PUMS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| 85th Percentile Household Income in Los Angeles City | |
| Using 1-year ACS PUMS data for 2023 and 2024 | |
| LA City PUMAs identified from Census ACS API by name matching. | |
| These 23 PUMAs are explicitly labeled as LA City areas. | |
| Note: Some LA City areas may be in shared PUMAs (e.g., 03707, 03748) which | |
| include portions of other cities - these are excluded to avoid overestimation. | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from io import BytesIO | |
| from zipfile import ZipFile | |
| import requests | |
| # LA City PUMAs (2020 geography) - from Census ACS API name matching | |
| # Source: Census ACS 1-year API, PUMAs containing "LA City" and "Los Angeles County" | |
| LA_CITY_PUMAS = [ | |
| "03705", # LA City (Northwest/Chatsworth & Porter Ranch) | |
| "03706", # LA City (North Central/Granada Hills & Sylmar) | |
| "03708", # LA City (Northeast/Sunland, Sun Valley & Tujunga) | |
| "03721", # LA City (Northeast/North Hollywood & Valley Village) | |
| "03722", # LA City (North Central/Van Nuys & North Sherman Oaks) | |
| "03723", # LA City (North Central/Mission Hills & Panorama City) | |
| "03724", # LA City (Northwest/Encino & Tarzana) | |
| "03725", # LA City (Northwest/Canoga Park, Winnetka & Woodland Hills) | |
| "03730", # LA City (Central/Hancock Park & Mid-Wilshire) | |
| "03732", # LA City (East Central & Hollywood) | |
| "03733", # LA City (Central/Koreatown) | |
| "03734", # LA City (East Central/Silver Lake, Echo Park & Westlake) | |
| "03735", # LA City (Mount Washington, Highland Park & Glassell Park) | |
| "03744", # LA City (East Central/Central City & Boyle Heights) | |
| "03745", # LA City (Southeast/East Vernon) | |
| "03746", # LA City (Central/Univ. of Southern California & Exposition Park) | |
| "03747", # LA City (Central/West Adams & Baldwin Hills) | |
| "03750", # LA City (South Central/Westmont) | |
| "03751", # LA City (South Central/Watts) | |
| "03767", # LA City (South/San Pedro) | |
| "03770", # LA City (West Los Angeles, Century City & Palms) | |
| "03775", # LA City (Central) | |
| "03776", # LA City (Central/Westwood & West Los Angeles) | |
| ] | |
| def download_pums_data(year: int) -> pd.DataFrame: | |
| """Download ACS 1-year PUMS household data for California.""" | |
| url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_hca.zip" | |
| print(f"Downloading {year} PUMS data from {url}...") | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| with ZipFile(BytesIO(response.content)) as z: | |
| csv_name = [n for n in z.namelist() if n.startswith("psam_h") and n.endswith(".csv")][0] | |
| print(f"Reading {csv_name}...") | |
| with z.open(csv_name) as f: | |
| df = pd.read_csv(f, usecols=["PUMA", "HINCP", "WGTP"], dtype={"PUMA": str}) | |
| return df | |
| def weighted_percentile(values: np.ndarray, weights: np.ndarray, percentile: float) -> float: | |
| """Calculate weighted percentile.""" | |
| mask = ~np.isnan(values) & ~np.isnan(weights) & (weights > 0) | |
| values = values[mask] | |
| weights = weights[mask] | |
| sorted_indices = np.argsort(values) | |
| sorted_values = values[sorted_indices] | |
| sorted_weights = weights[sorted_indices] | |
| cumsum = np.cumsum(sorted_weights) | |
| cutoff = percentile / 100.0 * cumsum[-1] | |
| idx = np.searchsorted(cumsum, cutoff) | |
| return sorted_values[min(idx, len(sorted_values) - 1)] | |
| def calculate_la_city_85th_percentile(year: int) -> dict: | |
| """Calculate 85th percentile household income for LA City.""" | |
| df = download_pums_data(year) | |
| # Pad PUMA codes to 5 digits for matching | |
| df["PUMA"] = df["PUMA"].str.zfill(5) | |
| la_city = df[df["PUMA"].isin(LA_CITY_PUMAS)].copy() | |
| p85 = weighted_percentile( | |
| la_city["HINCP"].values, | |
| la_city["WGTP"].values, | |
| 85 | |
| ) | |
| return { | |
| "year": year, | |
| "p85_income": p85, | |
| "total_households": la_city["WGTP"].sum(), | |
| "sample_size": len(la_city) | |
| } | |
| if __name__ == "__main__": | |
| results = [] | |
| for year in [2023, 2024]: | |
| result = calculate_la_city_85th_percentile(year) | |
| results.append(result) | |
| print(f"\n{year} Results:") | |
| print(f" 85th Percentile Household Income: ${result['p85_income']:,.0f}") | |
| print(f" Total Households (weighted): {result['total_households']:,.0f}") | |
| print(f" Sample Size: {result['sample_size']:,}") | |
| change = results[1]["p85_income"] - results[0]["p85_income"] | |
| pct_change = (change / results[0]["p85_income"]) * 100 | |
| print(f"\nYear-over-year change (2023 to 2024):") | |
| print(f" Absolute change: ${change:,.0f}") | |
| print(f" Percent change: {pct_change:.1f}%") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment