Skip to content

Instantly share code, notes, and snippets.

@kliu04
Last active November 24, 2025 00:42
Show Gist options
  • Select an option

  • Save kliu04/71f083ea08663a3974dc9b73b8cb89d0 to your computer and use it in GitHub Desktop.

Select an option

Save kliu04/71f083ea08663a3974dc9b73b8cb89d0 to your computer and use it in GitHub Desktop.
Count rejection emails!
from functools import partial
import mailbox
import email.utils
from datetime import datetime
from multiprocessing import Pool
import os
from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel
class Result(BaseModel):
is_rejection: bool
def parse(mbox: mailbox.mbox, cutoff: datetime):
filtered_emails: set[mailbox.mboxMessage] = set()
for msg in mbox:
date_header = msg.get("Date")
if not date_header:
continue
raw_date = email.utils.parsedate(date_header)
if not raw_date:
continue
msg_dt = datetime(*raw_date[:6])
if msg_dt < cutoff:
# skip old emails
continue
filtered_emails.add(msg)
return filtered_emails
def get_text_body(msg: mailbox.mboxMessage) -> str:
"""
Extracts the plain text body of an email message,
ignoring attachments. Returns a string.
Note: Written by ChatGPT.
"""
body_parts = []
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = part.get("Content-Disposition", "")
if (
content_type == "text/plain"
and "attachment" not in content_disposition.lower()
):
charset = part.get_content_charset() or "utf-8"
try:
text = part.get_payload(decode=True).decode( # type: ignore
charset, errors="replace"
)
except Exception:
text = part.get_payload(decode=True).decode( # type: ignore
"utf-8", errors="replace"
)
body_parts.append(text)
else:
# single-part message
content_type = msg.get_content_type()
if content_type == "text/plain":
charset = msg.get_content_charset() or "utf-8"
body_parts.append(
msg.get_payload(decode=True).decode(charset, errors="replace") # type: ignore
)
return "\n".join(body_parts)
def process_email(msg: mailbox.mboxMessage) -> bool:
prompt = """Return True if this email if and only if the email is a job rejection email.
Only return True if the email explicitly states that the applicant was not selected for the position they applied for, and not for any other reason."""
text = get_text_body(msg)
try:
response = client.responses.parse(
model="gpt-5-nano",
input=[
{"role": "system", "content": prompt},
{"role": "user", "content": text},
],
text_format=Result,
)
event = response.output_parsed
if not event:
return False
print(
f"Processed email: {msg.get('Subject')} with result: {event.is_rejection}"
)
return event.is_rejection
except Exception as e:
print("API error:", e)
return False
# this global is kinda yucky but multiprocessing pool needs it
# if you try to pass client as an argument it gets pickled and fails
# and you can't curry with partial either
# i think you could do something funky with closures but whatever
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY not set in environment variables")
client = OpenAI()
client.api_key = api_key
def main():
cutoff_date = datetime(2025, 9, 1)
filename = "all_mail.mbox"
mbox = mailbox.mbox(filename)
filtered_emails = parse(mbox, cutoff_date)
with Pool() as pool:
# True/False mapped to 1/0
result = sum(list(pool.imap_unordered(process_email, filtered_emails)))
print(f"Total rejection emails: {result}")
print(f"Total emails past cutoff: {len(filtered_emails)}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment