MaxGhenis · December 26, 2025 19:34
diff --git a/la_city_income_85th_percentile.py b/la_city_income_85th_percentile.py
 """
 85th Percentile Household Income in Los Angeles City
 Using 1-year ACS PUMS data for 2023 and 2024

 LA City PUMAs identified from Census ACS API by name matching.
 These 23 PUMAs are explicitly labeled as LA City areas.
 Note: Some LA City areas may be in shared PUMAs (e.g., 03707, 03748) which
 include portions of other cities - these are excluded to avoid overestimation.
 """

 import pandas as pd
 import numpy as np
 from io import BytesIO
 from zipfile import ZipFile
 import requests

 # LA City PUMAs (2020 geography) - from Census ACS API name matching
 # Source: Census ACS 1-year API, PUMAs containing "LA City" and "Los Angeles County"
 LA_CITY_PUMAS = [
    "03705",  # LA City (Northwest/Chatsworth & Porter Ranch)
    "03706",  # LA City (North Central/Granada Hills & Sylmar)
    "03708",  # LA City (Northeast/Sunland, Sun Valley & Tujunga)
    "03721",  # LA City (Northeast/North Hollywood & Valley Village)
    "03722",  # LA City (North Central/Van Nuys & North Sherman Oaks)
    "03723",  # LA City (North Central/Mission Hills & Panorama City)
    "03724",  # LA City (Northwest/Encino & Tarzana)
    "03725",  # LA City (Northwest/Canoga Park, Winnetka & Woodland Hills)
    "03730",  # LA City (Central/Hancock Park & Mid-Wilshire)
    "03732",  # LA City (East Central & Hollywood)
    "03733",  # LA City (Central/Koreatown)
    "03734",  # LA City (East Central/Silver Lake, Echo Park & Westlake)
    "03735",  # LA City (Mount Washington, Highland Park & Glassell Park)
    "03744",  # LA City (East Central/Central City & Boyle Heights)
    "03745",  # LA City (Southeast/East Vernon)
    "03746",  # LA City (Central/Univ. of Southern California & Exposition Park)
    "03747",  # LA City (Central/West Adams & Baldwin Hills)
    "03750",  # LA City (South Central/Westmont)
    "03751",  # LA City (South Central/Watts)
    "03767",  # LA City (South/San Pedro)
    "03770",  # LA City (West Los Angeles, Century City & Palms)
    "03775",  # LA City (Central)
    "03776",  # LA City (Central/Westwood & West Los Angeles)
 ]


 def download_pums_data(year: int) -> pd.DataFrame:
    """Download ACS 1-year PUMS household data for California."""
    url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_hca.zip"
    print(f"Downloading {year} PUMS data from {url}...")

    response = requests.get(url)
    response.raise_for_status()

    with ZipFile(BytesIO(response.content)) as z:
        csv_name = [n for n in z.namelist() if n.startswith("psam_h") and n.endswith(".csv")][0]
        print(f"Reading {csv_name}...")
        with z.open(csv_name) as f:
            df = pd.read_csv(f, usecols=["PUMA", "HINCP", "WGTP"], dtype={"PUMA": str})

    return df


 def weighted_percentile(values: np.ndarray, weights: np.ndarray, percentile: float) -> float:
    """Calculate weighted percentile."""
    mask = ~np.isnan(values) & ~np.isnan(weights) & (weights > 0)
    values = values[mask]
    weights = weights[mask]

    sorted_indices = np.argsort(values)
    sorted_values = values[sorted_indices]
    sorted_weights = weights[sorted_indices]

    cumsum = np.cumsum(sorted_weights)
    cutoff = percentile / 100.0 * cumsum[-1]

    idx = np.searchsorted(cumsum, cutoff)
    return sorted_values[min(idx, len(sorted_values) - 1)]


 def calculate_la_city_85th_percentile(year: int) -> dict:
    """Calculate 85th percentile household income for LA City."""
    df = download_pums_data(year)

    # Pad PUMA codes to 5 digits for matching
    df["PUMA"] = df["PUMA"].str.zfill(5)
    la_city = df[df["PUMA"].isin(LA_CITY_PUMAS)].copy()

    p85 = weighted_percentile(
        la_city["HINCP"].values,
        la_city["WGTP"].values,
        85
    )

    return {
        "year": year,
        "p85_income": p85,
        "total_households": la_city["WGTP"].sum(),
        "sample_size": len(la_city)
    }


 if __name__ == "__main__":
    results = []
    for year in [2023, 2024]:
        result = calculate_la_city_85th_percentile(year)
        results.append(result)
        print(f"\n{year} Results:")
        print(f"  85th Percentile Household Income: ${result['p85_income']:,.0f}")
        print(f"  Total Households (weighted): {result['total_households']:,.0f}")
        print(f"  Sample Size: {result['sample_size']:,}")

    change = results[1]["p85_income"] - results[0]["p85_income"]
    pct_change = (change / results[0]["p85_income"]) * 100
    print(f"\nYear-over-year change (2023 to 2024):")
    print(f"  Absolute change: ${change:,.0f}")
    print(f"  Percent change: {pct_change:.1f}%")
	"""
	85th Percentile Household Income in Los Angeles City
	Using 1-year ACS PUMS data for 2023 and 2024

	LA City PUMAs identified from Census ACS API by name matching.
	These 23 PUMAs are explicitly labeled as LA City areas.
	Note: Some LA City areas may be in shared PUMAs (e.g., 03707, 03748) which
	include portions of other cities - these are excluded to avoid overestimation.
	"""

	import pandas as pd
	import numpy as np
	from io import BytesIO
	from zipfile import ZipFile
	import requests

	# LA City PUMAs (2020 geography) - from Census ACS API name matching
	# Source: Census ACS 1-year API, PUMAs containing "LA City" and "Los Angeles County"
	LA_CITY_PUMAS = [
	"03705", # LA City (Northwest/Chatsworth & Porter Ranch)
	"03706", # LA City (North Central/Granada Hills & Sylmar)
	"03708", # LA City (Northeast/Sunland, Sun Valley & Tujunga)
	"03721", # LA City (Northeast/North Hollywood & Valley Village)
	"03722", # LA City (North Central/Van Nuys & North Sherman Oaks)
	"03723", # LA City (North Central/Mission Hills & Panorama City)
	"03724", # LA City (Northwest/Encino & Tarzana)
	"03725", # LA City (Northwest/Canoga Park, Winnetka & Woodland Hills)
	"03730", # LA City (Central/Hancock Park & Mid-Wilshire)
	"03732", # LA City (East Central & Hollywood)
	"03733", # LA City (Central/Koreatown)
	"03734", # LA City (East Central/Silver Lake, Echo Park & Westlake)
	"03735", # LA City (Mount Washington, Highland Park & Glassell Park)
	"03744", # LA City (East Central/Central City & Boyle Heights)
	"03745", # LA City (Southeast/East Vernon)
	"03746", # LA City (Central/Univ. of Southern California & Exposition Park)
	"03747", # LA City (Central/West Adams & Baldwin Hills)
	"03750", # LA City (South Central/Westmont)
	"03751", # LA City (South Central/Watts)
	"03767", # LA City (South/San Pedro)
	"03770", # LA City (West Los Angeles, Century City & Palms)
	"03775", # LA City (Central)
	"03776", # LA City (Central/Westwood & West Los Angeles)
	]


	def download_pums_data(year: int) -> pd.DataFrame:
	"""Download ACS 1-year PUMS household data for California."""
	url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_hca.zip"
	print(f"Downloading {year} PUMS data from {url}...")

	response = requests.get(url)
	response.raise_for_status()

	with ZipFile(BytesIO(response.content)) as z:
	csv_name = [n for n in z.namelist() if n.startswith("psam_h") and n.endswith(".csv")][0]
	print(f"Reading {csv_name}...")
	with z.open(csv_name) as f:
	df = pd.read_csv(f, usecols=["PUMA", "HINCP", "WGTP"], dtype={"PUMA": str})

	return df


	def weighted_percentile(values: np.ndarray, weights: np.ndarray, percentile: float) -> float:
	"""Calculate weighted percentile."""
	mask = ~np.isnan(values) & ~np.isnan(weights) & (weights > 0)
	values = values[mask]
	weights = weights[mask]

	sorted_indices = np.argsort(values)
	sorted_values = values[sorted_indices]
	sorted_weights = weights[sorted_indices]

	cumsum = np.cumsum(sorted_weights)
	cutoff = percentile / 100.0 * cumsum[-1]

	idx = np.searchsorted(cumsum, cutoff)
	return sorted_values[min(idx, len(sorted_values) - 1)]


	def calculate_la_city_85th_percentile(year: int) -> dict:
	"""Calculate 85th percentile household income for LA City."""
	df = download_pums_data(year)

	# Pad PUMA codes to 5 digits for matching
	df["PUMA"] = df["PUMA"].str.zfill(5)
	la_city = df[df["PUMA"].isin(LA_CITY_PUMAS)].copy()

	p85 = weighted_percentile(
	la_city["HINCP"].values,
	la_city["WGTP"].values,
	85
	)

	return {
	"year": year,
	"p85_income": p85,
	"total_households": la_city["WGTP"].sum(),
	"sample_size": len(la_city)
	}


	if __name__ == "__main__":
	results = []
	for year in [2023, 2024]:
	result = calculate_la_city_85th_percentile(year)
	results.append(result)
	print(f"\n{year} Results:")
	print(f" 85th Percentile Household Income: ${result['p85_income']:,.0f}")
	print(f" Total Households (weighted): {result['total_households']:,.0f}")
	print(f" Sample Size: {result['sample_size']:,}")

	change = results[1]["p85_income"] - results[0]["p85_income"]
	pct_change = (change / results[0]["p85_income"]) * 100
	print(f"\nYear-over-year change (2023 to 2024):")
	print(f" Absolute change: ${change:,.0f}")
	print(f" Percent change: {pct_change:.1f}%")
No results found