Last active
November 24, 2025 00:42
-
-
Save kliu04/71f083ea08663a3974dc9b73b8cb89d0 to your computer and use it in GitHub Desktop.
Count rejection emails!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from functools import partial | |
| import mailbox | |
| import email.utils | |
| from datetime import datetime | |
| from multiprocessing import Pool | |
| import os | |
| from dotenv import load_dotenv | |
| from openai import OpenAI | |
| from pydantic import BaseModel | |
| class Result(BaseModel): | |
| is_rejection: bool | |
| def parse(mbox: mailbox.mbox, cutoff: datetime): | |
| filtered_emails: set[mailbox.mboxMessage] = set() | |
| for msg in mbox: | |
| date_header = msg.get("Date") | |
| if not date_header: | |
| continue | |
| raw_date = email.utils.parsedate(date_header) | |
| if not raw_date: | |
| continue | |
| msg_dt = datetime(*raw_date[:6]) | |
| if msg_dt < cutoff: | |
| # skip old emails | |
| continue | |
| filtered_emails.add(msg) | |
| return filtered_emails | |
| def get_text_body(msg: mailbox.mboxMessage) -> str: | |
| """ | |
| Extracts the plain text body of an email message, | |
| ignoring attachments. Returns a string. | |
| Note: Written by ChatGPT. | |
| """ | |
| body_parts = [] | |
| if msg.is_multipart(): | |
| for part in msg.walk(): | |
| content_type = part.get_content_type() | |
| content_disposition = part.get("Content-Disposition", "") | |
| if ( | |
| content_type == "text/plain" | |
| and "attachment" not in content_disposition.lower() | |
| ): | |
| charset = part.get_content_charset() or "utf-8" | |
| try: | |
| text = part.get_payload(decode=True).decode( # type: ignore | |
| charset, errors="replace" | |
| ) | |
| except Exception: | |
| text = part.get_payload(decode=True).decode( # type: ignore | |
| "utf-8", errors="replace" | |
| ) | |
| body_parts.append(text) | |
| else: | |
| # single-part message | |
| content_type = msg.get_content_type() | |
| if content_type == "text/plain": | |
| charset = msg.get_content_charset() or "utf-8" | |
| body_parts.append( | |
| msg.get_payload(decode=True).decode(charset, errors="replace") # type: ignore | |
| ) | |
| return "\n".join(body_parts) | |
| def process_email(msg: mailbox.mboxMessage) -> bool: | |
| prompt = """Return True if this email if and only if the email is a job rejection email. | |
| Only return True if the email explicitly states that the applicant was not selected for the position they applied for, and not for any other reason.""" | |
| text = get_text_body(msg) | |
| try: | |
| response = client.responses.parse( | |
| model="gpt-5-nano", | |
| input=[ | |
| {"role": "system", "content": prompt}, | |
| {"role": "user", "content": text}, | |
| ], | |
| text_format=Result, | |
| ) | |
| event = response.output_parsed | |
| if not event: | |
| return False | |
| print( | |
| f"Processed email: {msg.get('Subject')} with result: {event.is_rejection}" | |
| ) | |
| return event.is_rejection | |
| except Exception as e: | |
| print("API error:", e) | |
| return False | |
| # this global is kinda yucky but multiprocessing pool needs it | |
| # if you try to pass client as an argument it gets pickled and fails | |
| # and you can't curry with partial either | |
| # i think you could do something funky with closures but whatever | |
| load_dotenv() | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| if not api_key: | |
| raise ValueError("OPENAI_API_KEY not set in environment variables") | |
| client = OpenAI() | |
| client.api_key = api_key | |
| def main(): | |
| cutoff_date = datetime(2025, 9, 1) | |
| filename = "all_mail.mbox" | |
| mbox = mailbox.mbox(filename) | |
| filtered_emails = parse(mbox, cutoff_date) | |
| with Pool() as pool: | |
| # True/False mapped to 1/0 | |
| result = sum(list(pool.imap_unordered(process_email, filtered_emails))) | |
| print(f"Total rejection emails: {result}") | |
| print(f"Total emails past cutoff: {len(filtered_emails)}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment