Created
February 9, 2026 21:40
-
-
Save danielscholl/537dccd90f67b51acf35e555dc691ae7 to your computer and use it in GitHub Desktop.
AI Document Assistant
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "streamlit>=1.54", | |
| # "langchain-core>=1.2", | |
| # "langchain-classic>=1.0", | |
| # "langchain-openai>=1.1", | |
| # "langchain-chroma>=1.1", | |
| # "langchain-community>=0.4", | |
| # "langchain-text-splitters>=1.1", | |
| # "tiktoken>=0.12", | |
| # "pypdf>=6.0", | |
| # "watchdog>=6.0", | |
| # ] | |
| # /// | |
| # BUSINESS SCIENCE UNIVERSITY | |
| # PYTHON FOR GENERATIVE AI COURSE | |
| # DYNAMIC RAG APPLICATION CHALLENGE | |
| # *** | |
| # | |
| # Document Reference Assistant — A Dynamic RAG Agent | |
| # | |
| # WHY THIS MATTERS (Big Picture): | |
| # This is a complete, single-file RAG application that lets you upload any PDF | |
| # and immediately chat with it. It demonstrates how to build an AI agent that: | |
| # 1. INGESTS a PDF — extracts text, splits into chunks, generates embeddings | |
| # 2. INDEXES chunks — stores vectors in a Chroma database for similarity search | |
| # 3. RETRIEVES context — finds the most relevant chunks for each user question | |
| # 4. GENERATES answers — uses an LLM with retrieved context to produce cited responses | |
| # 5. MANAGES conversation — maintains chat history for multi-turn dialogue | |
| # | |
| # Architecture: | |
| # Upload PDF → Extract Pages → Chunk Text → Embed & Store → Chat Loop | |
| # ↓ | |
| # User Question → Retrieve Chunks → LLM → Answer | |
| # | |
| # Key concepts covered: | |
| # - Self-launching Streamlit app (no separate "streamlit run" command needed) | |
| # - Preflight validation (API key + model availability checks) | |
| # - PDF loading and token-aware text chunking | |
| # - AI-powered metadata extraction (title, topics, suggested questions) | |
| # - Vector embeddings and Chroma vector store | |
| # - History-aware retrieval (reformulates questions using chat context) | |
| # - RAG chain assembly with LangChain | |
| # - Session persistence (resume previous document on reload) | |
| # - Dynamic follow-up question generation | |
| # | |
| # Run with uv (no conda needed): | |
| # uv run --script document-agent.py | |
| # ============================================================================= | |
| # 0.0 SELF-LAUNCH BOOTSTRAP | |
| # ============================================================================= | |
| # WHY? Normally you'd run "streamlit run app.py" in the terminal. This bootstrap | |
| # lets you just run "uv run --script document-agent.py" — the script detects it's | |
| # being run directly (not by Streamlit) and re-launches itself via Streamlit. | |
| # | |
| # HOW IT WORKS: | |
| # 1. First run: __name__ == "__main__" and IN_STREAMLIT is not set | |
| # 2. We do preflight checks (API key, model access) | |
| # 3. We set IN_STREAMLIT=1 in the environment to prevent infinite recursion | |
| # 4. We call "streamlit run <this_file>" as a subprocess | |
| # 5. Streamlit runs this file again, but now IN_STREAMLIT=1, so we skip this block | |
| # and fall through to the actual app code below | |
| import os | |
| import sys | |
| import subprocess | |
| if __name__ == "__main__" and os.environ.get("IN_STREAMLIT") != "1": | |
| # -- Preflight Checks ----------------------------------------------------- | |
| # Before starting the app, verify we have a valid OpenAI API key and that | |
| # the required models are accessible. This catches configuration errors | |
| # early with clear messages instead of cryptic runtime failures. | |
| # | |
| # Required models: | |
| # - text-embedding-ada-002 (embedding model for vectorizing document chunks and queries) | |
| # - gpt-4.1-mini (metadata extraction, follow-up question generation) | |
| api_key = os.environ.get("OPENAI_API_KEY") | |
| if not api_key: | |
| print("ERROR: OPENAI_API_KEY environment variable is not set.") | |
| print("Export it before running: export OPENAI_API_KEY='sk-...'") | |
| raise SystemExit(1) | |
| try: | |
| from openai import OpenAI | |
| client = OpenAI(api_key=api_key) | |
| # client.models.list() calls the OpenAI API to get all models your key can access | |
| available = {m.id for m in client.models.list()} | |
| required = {"text-embedding-ada-002", "gpt-4.1-mini"} | |
| missing = required - available | |
| if missing: | |
| print(f"ERROR: Required model(s) not available: {', '.join(sorted(missing))}") | |
| print("Check your OpenAI plan/access and try again.") | |
| raise SystemExit(1) | |
| print("Preflight OK — API key valid, required models available.") | |
| except SystemExit: | |
| raise | |
| except Exception as e: | |
| print(f"ERROR: Failed to verify OpenAI API access — {e}") | |
| raise SystemExit(1) | |
| # -- Streamlit Theme Configuration ---------------------------------------- | |
| # Create a .streamlit/config.toml file for the dark theme if it doesn't exist. | |
| # Streamlit reads this file automatically on startup. | |
| script_dir = os.path.dirname(os.path.abspath(__file__)) | |
| st_config_dir = os.path.join(script_dir, ".streamlit") | |
| st_config_file = os.path.join(st_config_dir, "config.toml") | |
| if not os.path.exists(st_config_file): | |
| os.makedirs(st_config_dir, exist_ok=True) | |
| with open(st_config_file, "w") as f: | |
| f.write('[theme]\nbase="dark"\nprimaryColor="#d33682"\n' | |
| 'backgroundColor="#000000"\nsecondaryBackgroundColor="#586e75"\n' | |
| 'textColor="#fafafa"\nfont="sans serif"\n') | |
| # -- Launch Streamlit ----------------------------------------------------- | |
| # subprocess.call() runs Streamlit as a child process and waits for it to exit. | |
| # We pass --server.headless=true so it doesn't try to open a browser automatically. | |
| # SystemExit propagates the child's exit code to the parent process. | |
| env = os.environ.copy() | |
| env["IN_STREAMLIT"] = "1" | |
| raise SystemExit( | |
| subprocess.call( | |
| [sys.executable, "-m", "streamlit", "run", __file__, | |
| "--server.port", "8501", "--server.headless", "true"], | |
| env=env, | |
| cwd=script_dir, | |
| ) | |
| ) | |
| # ============================================================================= | |
| # 1.0 IMPORTS | |
| # ============================================================================= | |
| # Everything below this point runs INSIDE Streamlit (the bootstrap above is skipped). | |
| # Chroma: Open-source vector database. Stores document embeddings on disk and | |
| # lets you do similarity search (find the most relevant documents for a query). | |
| from langchain_chroma import Chroma | |
| # ChatPromptTemplate: Defines structured prompts with system/human/AI message roles. | |
| # MessagesPlaceholder: A slot in a prompt template where chat history gets injected. | |
| from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| # Document: LangChain's standard container — holds page_content (text) + metadata (dict). | |
| from langchain_core.documents import Document | |
| # ChatOpenAI: Wrapper around OpenAI's chat completion API (gpt-4, gpt-4o, etc.). | |
| # OpenAIEmbeddings: Wrapper around OpenAI's embedding API — converts text into vectors | |
| # (lists of ~1536 numbers) that capture semantic meaning. Similar text → similar vectors. | |
| from langchain_openai import ChatOpenAI, OpenAIEmbeddings | |
| # create_history_aware_retriever: Wraps a retriever so it reformulates the user's | |
| # question using chat history BEFORE searching. This way "Tell me more about that" | |
| # becomes "Tell me more about [the topic from the previous answer]". | |
| # create_retrieval_chain: Combines a retriever + an LLM chain into a single pipeline: | |
| # question → retrieve docs → pass docs + question to LLM → answer. | |
| from langchain_classic.chains import create_history_aware_retriever, create_retrieval_chain | |
| # create_stuff_documents_chain: The simplest document combination strategy — "stuff" | |
| # all retrieved documents into the prompt context at once (vs. map-reduce or refine). | |
| # Works well when chunks are small enough to fit in the context window. | |
| from langchain_classic.chains.combine_documents import create_stuff_documents_chain | |
| # RunnableWithMessageHistory: Wraps any chain to automatically load/save chat history. | |
| # Each invocation reads prior messages, appends the new exchange, and persists them. | |
| from langchain_core.runnables.history import RunnableWithMessageHistory | |
| # StreamlitChatMessageHistory: Stores chat messages in Streamlit's session_state. | |
| # This means history survives re-runs (Streamlit re-executes the script on every | |
| # interaction) but is lost when the browser tab closes. | |
| from langchain_community.chat_message_histories import StreamlitChatMessageHistory | |
| # PyPDFLoader: Reads a PDF file and returns a list of Document objects (one per page). | |
| from langchain_community.document_loaders import PyPDFLoader | |
| # RecursiveCharacterTextSplitter: Splits long documents into smaller chunks for embedding. | |
| # "Recursive" means it tries splitting on paragraphs first, then sentences, then words — | |
| # preserving natural boundaries. from_tiktoken_encoder() counts tokens (not characters) | |
| # so chunks respect the embedding model's token limits. | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| # Streamlit: The web framework that powers the UI. It re-runs this entire script from | |
| # top to bottom on every user interaction (button click, text input, etc.). | |
| # st.session_state persists data across these re-runs. | |
| import streamlit as st | |
| import os | |
| import json | |
| import re | |
| import html as html_module # renamed to avoid collision with streamlit's html support | |
| import tempfile | |
| import shutil | |
| import time | |
| from pathlib import Path | |
| # ============================================================================= | |
| # 2.0 CONFIGURATION | |
| # ============================================================================= | |
| # Central place for all tunable parameters. Change these to experiment with | |
| # different models, chunk sizes, or storage locations. | |
| BASE_DIR = Path(__file__).parent | |
| # Where the Chroma vector database is stored on disk. | |
| # This directory contains the embedded chunks from the uploaded PDF. | |
| VECTOR_DATABASE = str(BASE_DIR / "data" / "chroma_document.db") | |
| # JSON file that persists document metadata between browser sessions. | |
| # When you reload the page, the app reads this to skip re-uploading. | |
| DOCUMENT_STATE_FILE = str(BASE_DIR / "data" / "document_state.json") | |
| # EMBEDDING MODEL: text-embedding-ada-002 | |
| # - Outputs a 1536-dimensional vector for each text input | |
| # - Used for BOTH indexing documents AND embedding user queries | |
| # - CRITICAL: The same model must be used for indexing and querying, | |
| # otherwise the vectors won't be in the same "space" and search fails | |
| # - Resource: https://platform.openai.com/docs/models | |
| EMBEDDING_MODEL = "text-embedding-ada-002" | |
| # METADATA MODEL: Used for lightweight AI tasks (extracting document metadata, | |
| # generating follow-up questions). A small, fast, cheap model works best here | |
| # since these tasks don't need deep reasoning. | |
| METADATA_MODEL = "gpt-4.1-mini" | |
| # CHAT MODELS — Three tiers the user can select in the sidebar. | |
| # Each tier offers a meaningful difference in quality/cost/speed: | |
| # | |
| # Standard models (accept temperature parameter): | |
| # gpt-4o-mini — Cheapest & fastest. Good for simple Q&A. ~$0.15/$0.60 per 1M tokens. | |
| # gpt-4.1-mini — Best balance of quality and cost. ~$0.40/$1.60 per 1M tokens. (DEFAULT) | |
| # gpt-4.1 — Most capable standard model. ~$2.00/$8.00 per 1M tokens. | |
| # | |
| # Reasoning models (do NOT accept temperature — they self-regulate): | |
| # o3-mini — Cheapest reasoning model. Good for basic analysis. ~$1.10/$4.40 per 1M tokens. | |
| # o4-mini — Best balance for reasoning tasks. ~$1.10/$4.40 per 1M tokens. (DEFAULT) | |
| # o3 — Deepest reasoning. Slow but thorough. ~$2.00/$8.00 per 1M tokens. | |
| STANDARD_MODELS = ["gpt-4o-mini", "gpt-4.1-mini", "gpt-4.1"] | |
| REASONING_MODELS = ["o3-mini", "o4-mini", "o3"] | |
| # CHUNKING PARAMETERS: | |
| # CHUNK_SIZE: Target size of each text chunk in tokens (not characters). | |
| # Smaller chunks = more precise retrieval but less context per chunk. | |
| # Larger chunks = more context but may include irrelevant text. | |
| # 500 tokens is a good default (~375 words, ~1/3 of a page). | |
| # CHUNK_OVERLAP: How many tokens overlap between consecutive chunks. | |
| # This prevents information from being split across chunk boundaries. | |
| # 50 tokens of overlap means the end of chunk N appears at the start of chunk N+1. | |
| CHUNK_SIZE = 500 | |
| CHUNK_OVERLAP = 50 | |
| OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") | |
| # ============================================================================= | |
| # 3.0 PAGE SETUP & STYLING | |
| # ============================================================================= | |
| # Streamlit page configuration must be the first st.* call in the script. | |
| # layout="wide" uses the full browser width instead of a narrow centered column. | |
| st.set_page_config( | |
| page_title="Document Assistant", | |
| page_icon="docs", | |
| layout="wide", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| # Custom CSS injected via st.markdown with unsafe_allow_html=True. | |
| # Streamlit's default styling is functional but generic — this CSS gives the app | |
| # a polished, professional look with a dark-themed header, styled source cards, etc. | |
| st.markdown(""" | |
| <style> | |
| /* Layout */ | |
| .block-container { max-width: 1400px; padding-top: 1rem; } | |
| /* Header bar — gradient background with accent border */ | |
| .app-header { | |
| background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%); | |
| color: #e8e8e8; | |
| padding: 20px 28px; | |
| border-radius: 8px; | |
| margin-bottom: 4px; | |
| border-left: 4px solid #4a90d9; | |
| } | |
| .app-header h1 { margin: 0; font-size: 1.6rem; font-weight: 600; } | |
| .app-header p { margin: 4px 0 0 0; opacity: 0.85; font-size: 0.9rem; } | |
| /* Document status row — shows filename, chunk count, model */ | |
| .doc-status-row { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 20px; | |
| padding: 10px 16px; | |
| background: #f0f4f8; | |
| border-radius: 6px; | |
| margin-bottom: 16px; | |
| font-size: 0.82rem; | |
| color: #495057; | |
| border-left: 3px solid #4a90d9; | |
| } | |
| .doc-status-row .status-item { white-space: nowrap; } | |
| .doc-status-row .status-label { color: #6c757d; margin-right: 4px; } | |
| /* Question chips — clickable suggested/follow-up questions */ | |
| .chip-container { display: flex; flex-wrap: wrap; gap: 8px; margin: 8px 0 16px 0; } | |
| .q-chip { | |
| display: inline-block; | |
| background: #e9ecef; | |
| color: #343a40; | |
| padding: 6px 14px; | |
| border-radius: 20px; | |
| font-size: 0.82rem; | |
| cursor: pointer; | |
| border: 1px solid #dee2e6; | |
| transition: all 0.15s ease; | |
| } | |
| .q-chip:hover { background: #4a90d9; color: white; border-color: #4a90d9; } | |
| /* Source card — displays retrieved chunk content */ | |
| .source-card { | |
| background: #f8f9fa; | |
| border-left: 3px solid #4a90d9; | |
| border-radius: 6px; | |
| padding: 10px 14px; | |
| margin-bottom: 10px; | |
| font-size: 0.82rem; | |
| line-height: 1.5; | |
| } | |
| .source-card .source-meta { | |
| color: #6c757d; | |
| font-size: 0.75rem; | |
| margin-bottom: 4px; | |
| } | |
| /* Processing log — terminal-style output during PDF indexing */ | |
| .process-log { | |
| background: #1a1a2e; | |
| color: #a8d8a8; | |
| border-radius: 6px; | |
| padding: 12px 16px; | |
| font-family: 'Menlo', 'Consolas', monospace; | |
| font-size: 0.78rem; | |
| line-height: 1.8; | |
| max-height: 180px; | |
| overflow-y: auto; | |
| } | |
| /* Info card on upload page */ | |
| .info-card { | |
| background: #f8f9fa; | |
| border-radius: 8px; | |
| padding: 14px 18px; | |
| margin: 8px 0; | |
| border-left: 3px solid #4a90d9; | |
| font-size: 0.88rem; | |
| } | |
| /* Sidebar compact */ | |
| section[data-testid="stSidebar"] { width: 280px !important; } | |
| section[data-testid="stSidebar"] .block-container { padding-top: 1rem; } | |
| /* Footer */ | |
| .app-footer { | |
| text-align: center; color: #6c757d; font-size: 0.75rem; | |
| padding: 12px 0; border-top: 1px solid #dee2e6; margin-top: 24px; | |
| } | |
| /* Highlighted search terms in source cards */ | |
| .source-card mark { | |
| background: #fff3cd; | |
| padding: 1px 3px; | |
| border-radius: 2px; | |
| font-weight: 500; | |
| } | |
| /* Source summary bar — "Retrieved N chunks covering pages X, Y, Z" */ | |
| .source-summary { | |
| background: #e8f4f8; | |
| border-radius: 6px; | |
| padding: 8px 12px; | |
| margin-bottom: 12px; | |
| font-size: 0.78rem; | |
| color: #495057; | |
| } | |
| /* Low confidence warning */ | |
| .confidence-warning { | |
| background: #fff3cd; | |
| border-left: 3px solid #ffc107; | |
| border-radius: 6px; | |
| padding: 10px 14px; | |
| margin: 8px 0; | |
| font-size: 0.82rem; | |
| color: #856404; | |
| } | |
| /* Hide default Streamlit branding */ | |
| #MainMenu {visibility: hidden;} | |
| footer {visibility: hidden;} | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ============================================================================= | |
| # 4.0 SESSION STATE | |
| # ============================================================================= | |
| # Streamlit re-runs the entire script on every interaction. Session state is how | |
| # we persist data across those re-runs. Think of it as a per-user dictionary that | |
| # survives button clicks and text input but resets when the browser tab closes. | |
| # | |
| # DEFAULTS defines every key we use and its initial value. On each re-run, we | |
| # only set keys that don't exist yet (preserving values from previous interactions). | |
| DEFAULTS = { | |
| "document_ready": False, # True once a PDF has been indexed | |
| "document_metadata": None, # AI-extracted: title, topics, questions, etc. | |
| "uploaded_filename": None, # Original filename of the uploaded PDF | |
| "uploaded_filesize": None, # File size in bytes | |
| "chunk_count": None, # Number of chunks in the vector store | |
| "page_count": None, # Number of pages in the PDF | |
| "last_sources": [], # Retrieved documents for the latest answer | |
| "last_query": None, # Last user question (for highlighting terms) | |
| "chip_question": None, # Set when user clicks a suggested question chip | |
| "followup_questions": [], # AI-generated follow-up questions after each answer | |
| } | |
| for key, val in DEFAULTS.items(): | |
| if key not in st.session_state: | |
| st.session_state[key] = val | |
| # -- Session Restore ---------------------------------------------------------- | |
| # WHY? Without this, refreshing the browser would lose the loaded document and | |
| # force re-uploading. We persist document metadata to a JSON file on disk (alongside | |
| # the Chroma vector DB). On startup, if both exist, we restore session state so | |
| # the user goes straight to the chat screen. | |
| if ( | |
| not st.session_state.document_ready | |
| and os.path.exists(VECTOR_DATABASE) | |
| and os.path.exists(DOCUMENT_STATE_FILE) | |
| ): | |
| try: | |
| with open(DOCUMENT_STATE_FILE) as f: | |
| saved = json.load(f) | |
| st.session_state.document_ready = True | |
| st.session_state.document_metadata = saved.get("document_metadata") | |
| st.session_state.uploaded_filename = saved.get("uploaded_filename") | |
| st.session_state.uploaded_filesize = saved.get("uploaded_filesize") | |
| st.session_state.chunk_count = saved.get("chunk_count") | |
| st.session_state.page_count = saved.get("page_count") | |
| except Exception: | |
| pass # corrupt file — fall through to upload screen | |
| # ============================================================================= | |
| # 5.0 CORE FUNCTIONS — RAG Pipeline | |
| # ============================================================================= | |
| # These functions implement the RAG pipeline: | |
| # get_embedding_function() → creates the embedding model (cached) | |
| # get_vectorstore() → connects to the Chroma vector database | |
| # extract_document_metadata() → uses AI to extract title, topics, questions from PDF | |
| # process_uploaded_pdf() → full ingestion pipeline (load → chunk → embed → store) | |
| # create_rag_chain() → assembles the retrieval + generation chain | |
| # generate_followup_questions() → generates contextual follow-up suggestions | |
| # highlight_terms() → highlights search terms in source text | |
| # reset_document() → clears everything for a fresh upload | |
| # * 5.1 Embedding Function (Cached) | |
| # ----------------------------------- | |
| # @st.cache_resource means this function runs ONCE and the result is reused across | |
| # all re-runs and all users. This avoids creating a new OpenAIEmbeddings object | |
| # (and its HTTP connection pool) on every single Streamlit re-run. | |
| # | |
| # CRITICAL: The same embedding model must be used for BOTH indexing and querying. | |
| @st.cache_resource(show_spinner=False) | |
| def get_embedding_function(): | |
| return OpenAIEmbeddings(model=EMBEDDING_MODEL) | |
| # * 5.2 Vector Store Connection | |
| # ------------------------------ | |
| # Connects to an existing Chroma database on disk. Does NOT create a new one — | |
| # that happens in process_uploaded_pdf(). Returns None if no document is loaded. | |
| def get_vectorstore(): | |
| if not st.session_state.document_ready: | |
| return None | |
| return Chroma( | |
| persist_directory=VECTOR_DATABASE, | |
| embedding_function=get_embedding_function() | |
| ) | |
| # * 5.3 AI-Powered Metadata Extraction | |
| # -------------------------------------- | |
| # WHY? Instead of showing the raw filename as the app title, we use a small LLM | |
| # to analyze the first few pages and extract structured metadata: a professional | |
| # title, topics, suggested questions, and a welcome message. This makes the chat | |
| # experience feel tailored to each document. | |
| # | |
| # HOW: We take the first ~4000 chars from the first 5 pages, send them to | |
| # gpt-4.1-mini with a structured prompt, and parse the JSON response. | |
| # If anything fails, we fall back to sensible defaults derived from the filename. | |
| def extract_document_metadata(documents: list, filename: str) -> dict: | |
| sample_content = "" | |
| for doc in documents[:5]: | |
| sample_content += doc.page_content[:1000] + "\n\n" | |
| sample_content = sample_content[:4000] | |
| llm = ChatOpenAI(model=METADATA_MODEL, temperature=0.3) | |
| prompt = ChatPromptTemplate.from_template("""Analyze this document excerpt and extract metadata. | |
| Document filename: {filename} | |
| Content sample: | |
| {content} | |
| Return a JSON object with these exact keys: | |
| {{ | |
| "title": "A concise, professional title for this document (max 50 chars)", | |
| "subtitle": "A brief description of what this document covers (max 100 chars)", | |
| "topics": ["topic1", "topic2", ...], // 8-15 key topics/concepts covered | |
| "chapters": ["Chapter 1: Title", "Chapter 2: Title", ...], // Main sections/chapters if identifiable from TOC or headings. Empty list if none. | |
| "example_questions": [ | |
| "Question 1 that a reader might ask about this content", | |
| "Question 2...", | |
| ... | |
| ], // 6-8 diverse example questions | |
| "welcome_message": "A brief welcome message for a chat assistant (max 150 chars)" | |
| }} | |
| Return ONLY valid JSON, no other text.""") | |
| try: | |
| response = llm.invoke(prompt.format(filename=filename, content=sample_content)) | |
| # Strip markdown code fences if the LLM wraps the JSON in ```json ... ``` | |
| json_str = response.content.strip() | |
| if json_str.startswith("```"): | |
| json_str = json_str.split("```")[1] | |
| if json_str.startswith("json"): | |
| json_str = json_str[4:] | |
| return json.loads(json_str) | |
| except Exception: | |
| # Fallback: derive basic metadata from the filename | |
| return { | |
| "title": filename.replace(".pdf", "").replace("_", " ").title(), | |
| "subtitle": "Document Reference Assistant", | |
| "topics": ["Overview", "Key Concepts", "Details", "Summary"], | |
| "example_questions": [ | |
| "What is the main topic of this document?", | |
| "Can you summarize the key points?", | |
| "What are the important concepts covered?", | |
| "How does this relate to practical applications?" | |
| ], | |
| "welcome_message": f"I can answer questions about {filename}. What would you like to know?" | |
| } | |
| # * 5.4 PDF Ingestion Pipeline | |
| # ------------------------------ | |
| # This is the main ingestion function. It takes a raw uploaded PDF and produces | |
| # a fully searchable vector store. The pipeline: | |
| # | |
| # 1. Save uploaded bytes to a temp file (PyPDFLoader needs a file path) | |
| # 2. Extract text from every page → list of Document objects | |
| # 3. AI metadata extraction (title, topics, questions) | |
| # 4. Token-based chunking (split pages into ~500 token chunks) | |
| # 5. Create a TOC document (synthetic document with document overview) | |
| # 6. Generate embeddings via OpenAI and store in Chroma | |
| # 7. Persist document state to disk for session restore | |
| # | |
| # The log_container and progress_bar provide real-time feedback in the UI. | |
| def process_uploaded_pdf(uploaded_file, log_container, progress_bar) -> tuple[bool, str]: | |
| logs = [] | |
| def log(msg, pct): | |
| logs.append(msg) | |
| log_container.markdown( | |
| '<div class="process-log">' + "<br>".join(f"> {l}" for l in logs) + "</div>", | |
| unsafe_allow_html=True | |
| ) | |
| progress_bar.progress(pct) | |
| try: | |
| # Step 1: Save to temp file | |
| log("Saving uploaded file...", 0.05) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp.write(uploaded_file.getvalue()) | |
| tmp_path = tmp.name | |
| # Step 2: Extract text from PDF | |
| # PyPDFLoader reads each page into a Document with page_content and metadata (page number). | |
| log("Extracting text from PDF...", 0.15) | |
| loader = PyPDFLoader(tmp_path) | |
| documents = loader.load() | |
| if not documents: | |
| return False, "No content could be extracted from the PDF." | |
| log(f" Found {len(documents)} pages", 0.20) | |
| # Step 3: AI metadata extraction | |
| log("Analyzing document with AI...", 0.25) | |
| metadata = extract_document_metadata(documents, uploaded_file.name) | |
| log(f' Title: "{metadata.get("title", "?")}"', 0.35) | |
| # Step 4: Token-based chunking | |
| # WHY tokens, not characters? LLMs and embedding models have TOKEN limits. | |
| # 500 tokens ≈ 375 words ≈ 1/3 of a page. Overlap of 50 tokens ensures | |
| # information at chunk boundaries isn't lost. | |
| log("Token-based chunking...", 0.40) | |
| splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( | |
| model_name=EMBEDDING_MODEL, | |
| chunk_size=CHUNK_SIZE, | |
| chunk_overlap=CHUNK_OVERLAP, | |
| ) | |
| chunks = splitter.split_documents(documents) | |
| for i, chunk in enumerate(chunks): | |
| chunk.metadata["source_file"] = uploaded_file.name | |
| chunk.metadata["chunk_index"] = i | |
| log(f" {len(chunks)} chunks created ({CHUNK_SIZE} tokens each)", 0.50) | |
| # Step 5: Create a synthetic TOC document | |
| # WHY? When users ask broad questions like "What is this document about?", | |
| # the individual chunks may not contain a good overview. This TOC document | |
| # acts as a summary that the retriever can find for high-level questions. | |
| toc_doc = Document( | |
| page_content=( | |
| f"Document: {metadata.get('title', uploaded_file.name)}\n" | |
| f"Description: {metadata.get('subtitle', '')}\n" | |
| f"Pages: {len(documents)}, Chunks: {len(chunks)}\n" | |
| f"Topics: {', '.join(metadata.get('topics', []))}" | |
| ), | |
| metadata={"source": "Table of Contents", "source_file": uploaded_file.name} | |
| ) | |
| chunks.insert(0, toc_doc) | |
| # Step 6: Generate embeddings and store in Chroma | |
| # Chroma.from_documents() sends all chunk texts to OpenAI's embedding API, | |
| # gets back vectors, and stores everything in the persist_directory folder. | |
| # If a previous database exists, we close its connections and remove it first. | |
| log("Generating embeddings & indexing...", 0.55) | |
| if os.path.exists(VECTOR_DATABASE): | |
| try: | |
| old_store = Chroma( | |
| persist_directory=VECTOR_DATABASE, | |
| embedding_function=get_embedding_function() | |
| ) | |
| if hasattr(old_store, "_client"): | |
| old_store._client.clear_system_cache() | |
| del old_store | |
| except Exception: | |
| pass | |
| shutil.rmtree(VECTOR_DATABASE, ignore_errors=True) | |
| Chroma.from_documents( | |
| chunks, | |
| embedding=get_embedding_function(), | |
| persist_directory=VECTOR_DATABASE | |
| ) | |
| log(" Vector store created", 0.90) | |
| os.unlink(tmp_path) | |
| # Step 7: Persist to session state (for this browser session) and | |
| # to disk (for surviving page refreshes / new browser sessions) | |
| st.session_state.document_ready = True | |
| st.session_state.uploaded_filename = uploaded_file.name | |
| st.session_state.uploaded_filesize = uploaded_file.size | |
| st.session_state.document_metadata = metadata | |
| st.session_state.chunk_count = len(chunks) | |
| st.session_state.page_count = len(documents) | |
| if "langchain_messages" in st.session_state: | |
| st.session_state.langchain_messages = [] | |
| with open(DOCUMENT_STATE_FILE, "w") as f: | |
| json.dump({ | |
| "uploaded_filename": uploaded_file.name, | |
| "uploaded_filesize": uploaded_file.size, | |
| "document_metadata": metadata, | |
| "chunk_count": len(chunks), | |
| "page_count": len(documents), | |
| }, f) | |
| log("Done.", 1.0) | |
| return True, f"{len(documents)} pages | {len(chunks)} chunks indexed" | |
| except Exception as e: | |
| return False, f"Error: {e}" | |
| # * 5.5 RAG Chain Assembly | |
| # -------------------------- | |
| # This is the heart of the application — it assembles the full RAG pipeline: | |
| # | |
| # User Question | |
| # ↓ | |
| # [History-Aware Retriever] | |
| # Uses chat history to reformulate the question into a standalone query. | |
| # Example: "Tell me more" → "Tell me more about vector embeddings" | |
| # Then searches Chroma for the k most relevant chunks. | |
| # ↓ | |
| # [Stuff Documents Chain] | |
| # Takes the retrieved chunks and "stuffs" them into the system prompt | |
| # as context for the LLM. | |
| # ↓ | |
| # [LLM (ChatOpenAI)] | |
| # Generates an answer using the context + question + chat history. | |
| # ↓ | |
| # [RunnableWithMessageHistory] | |
| # Automatically loads/saves chat history to StreamlitChatMessageHistory. | |
| # This wraps everything so history management is transparent. | |
| # | |
| # Two modes: | |
| # Standard: Fast, concise answers with 4 retrieved chunks | |
| # Reasoning: Deeper analysis with 6 retrieved chunks (uses reasoning models like o3) | |
| def create_rag_chain(model_name: str, temp: float = None, is_reasoning: bool = False): | |
| vectorstore = get_vectorstore() | |
| if vectorstore is None: | |
| return None, None | |
| metadata = st.session_state.document_metadata or {} | |
| source_name = metadata.get("title", st.session_state.uploaded_filename or "Document") | |
| # Retrieve more chunks for reasoning mode (deeper analysis needs more context) | |
| k_docs = 6 if is_reasoning else 4 | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": k_docs}) | |
| # Reasoning models (o3, o4-mini) don't accept a temperature parameter | |
| llm = ChatOpenAI(model=model_name) if is_reasoning else ChatOpenAI(model=model_name, temperature=temp) | |
| # -- Stage 1: Question Reformulation -------------------------------------- | |
| # WHY? In a multi-turn conversation, the user might say "Tell me more about that". | |
| # The retriever needs a standalone question to search effectively. This prompt | |
| # asks the LLM to reformulate the question using chat history context. | |
| contextualize_q_prompt = ChatPromptTemplate.from_messages([ | |
| ("system", "Given a chat history and the latest user question, " | |
| "formulate a standalone question. Do NOT answer it, just reformulate if needed."), | |
| MessagesPlaceholder("chat_history"), | |
| ("human", "{input}"), | |
| ]) | |
| msgs = StreamlitChatMessageHistory(key="langchain_messages") | |
| history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt) | |
| # -- Stage 2: Answer Generation ------------------------------------------- | |
| # The system prompt tells the LLM its role, injects retrieved context via {context}, | |
| # and instructs it to cite page numbers using [p.X] format. | |
| mode_text = ( | |
| "Provide clear, concise answers. " | |
| "If the user asks you to summarize, find, or explain, follow their instruction naturally." | |
| ) | |
| if is_reasoning: | |
| mode_text = ( | |
| "ANALYSIS MODE: Provide deep, thorough explanations. " | |
| "Connect concepts across sections, provide examples, explain underlying principles. " | |
| "If the user asks you to summarize, find, or explain, follow their instruction naturally." | |
| ) | |
| citation_instruction = ( | |
| "IMPORTANT: Include page references in your answer using [p.X] format " | |
| "(where X is the page number from the source metadata). " | |
| "Place citations at the end of sentences that reference specific information. " | |
| "If the retrieved context does not contain enough information to answer confidently, " | |
| "clearly state that the answer may not be fully covered in the document." | |
| ) | |
| qa_prompt = ChatPromptTemplate.from_messages([ | |
| ("system", | |
| f"You are an expert assistant for: {source_name}\n\n" | |
| "{context}\n\n" | |
| f"{mode_text}\n\n" | |
| f"{citation_instruction}\n" | |
| "Use markdown formatting for readability."), | |
| MessagesPlaceholder("chat_history"), | |
| ("human", "{input}") | |
| ]) | |
| # -- Stage 3: Chain Assembly ---------------------------------------------- | |
| # create_stuff_documents_chain: LLM + prompt that expects {context} filled with docs | |
| # create_retrieval_chain: retriever → stuff chain (auto-fills {context} with results) | |
| # RunnableWithMessageHistory: wraps everything with automatic chat history | |
| qa_chain = create_stuff_documents_chain(llm, qa_prompt) | |
| rag_chain = create_retrieval_chain(history_aware_retriever, qa_chain) | |
| chain_with_history = RunnableWithMessageHistory( | |
| rag_chain, | |
| lambda session_id: msgs, | |
| input_messages_key="input", | |
| history_messages_key="chat_history", | |
| output_messages_key="answer", | |
| ) | |
| return chain_with_history, retriever | |
| # * 5.6 Source Text Highlighting | |
| # -------------------------------- | |
| # When displaying retrieved source chunks, we highlight words from the user's | |
| # query so they can quickly see why each chunk was relevant. | |
| # Common stop words are excluded to avoid highlighting "what", "the", etc. | |
| def highlight_terms(text: str, query: str | None) -> str: | |
| text = html_module.escape(text) | |
| text = text.replace("\n", "<br>") | |
| if not query: | |
| return text | |
| stop_words = { | |
| "what", "where", "when", "which", "that", "this", "with", "from", | |
| "have", "does", "about", "how", "the", "and", "for", "are", "but", | |
| "not", "you", "all", "can", "was", "one", "our", "out", "into", | |
| } | |
| words = [w for w in query.split() if len(w) > 3 and w.lower() not in stop_words] | |
| for word in words: | |
| pattern = re.compile(f"({re.escape(word)})", re.IGNORECASE) | |
| text = pattern.sub(r"<mark>\1</mark>", text) | |
| return text | |
| # * 5.7 Follow-Up Question Generation | |
| # -------------------------------------- | |
| # WHY? After each answer, we generate contextual follow-up questions so the user | |
| # doesn't have to think of what to ask next. This creates a guided exploration | |
| # experience — the AI suggests logical next questions based on what was just discussed. | |
| # | |
| # HOW: Send the Q&A pair to a small LLM and ask for 4 follow-up questions as JSON. | |
| def generate_followup_questions(question: str, answer: str) -> list[str]: | |
| llm = ChatOpenAI(model=METADATA_MODEL, temperature=0.5) | |
| prompt = ChatPromptTemplate.from_template( | |
| "Based on this Q&A exchange, suggest 4 brief follow-up questions the user might ask next.\n\n" | |
| "Question: {question}\n\n" | |
| "Answer: {answer}\n\n" | |
| "Return ONLY a JSON array of 4 short question strings, no other text." | |
| ) | |
| try: | |
| response = llm.invoke(prompt.format(question=question, answer=answer[:2000])) | |
| text = response.content.strip() | |
| if text.startswith("```"): | |
| text = text.split("```")[1] | |
| if text.startswith("json"): | |
| text = text[4:] | |
| return json.loads(text)[:4] | |
| except Exception: | |
| return [] | |
| # * 5.8 Document Reset | |
| # ---------------------- | |
| # Clears all session state, chat history, the vector database, and the persisted | |
| # state file. Used when the user clicks "New Document" to start fresh. | |
| def reset_document(): | |
| for key in DEFAULTS: | |
| st.session_state[key] = DEFAULTS[key] | |
| if "langchain_messages" in st.session_state: | |
| st.session_state.langchain_messages = [] | |
| # Close Chroma's SQLite connection before deleting the database directory, | |
| # otherwise the OS may refuse to remove locked files. | |
| if os.path.exists(VECTOR_DATABASE): | |
| try: | |
| old_store = Chroma( | |
| persist_directory=VECTOR_DATABASE, | |
| embedding_function=get_embedding_function() | |
| ) | |
| if hasattr(old_store, "_client"): | |
| old_store._client.clear_system_cache() | |
| del old_store | |
| except Exception: | |
| pass | |
| shutil.rmtree(VECTOR_DATABASE, ignore_errors=True) | |
| if os.path.exists(DOCUMENT_STATE_FILE): | |
| os.remove(DOCUMENT_STATE_FILE) | |
| # ============================================================================= | |
| # 6.0 MAIN APPLICATION — Two-Stage UI | |
| # ============================================================================= | |
| # The app has two stages: | |
| # Stage 1 (Upload): Shown when no document is loaded. User uploads a PDF, | |
| # which is automatically processed and indexed. | |
| # Stage 2 (Chat): Shown after a document is indexed. Two-column layout with | |
| # chat on the left and retrieved sources on the right. | |
| # | |
| # Streamlit re-runs this entire script on every interaction. The if/else below | |
| # controls which stage is displayed based on session state. | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # STAGE 1: UPLOAD & INDEX | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| if not st.session_state.document_ready: | |
| st.markdown(""" | |
| <div class="app-header" style="text-align:center;"> | |
| <h1>Document Reference Assistant</h1> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Center the upload widget using a 3-column layout (empty | content | empty) | |
| col_l, col_c, col_r = st.columns([1, 2, 1]) | |
| with col_c: | |
| st.markdown("<h4 style='text-align:center;'>Add a Document</h4>", unsafe_allow_html=True) | |
| uploaded_file = st.file_uploader( | |
| "Choose a PDF file", type=["pdf"], | |
| help="PDF documents up to 200 MB", | |
| label_visibility="collapsed" | |
| ) | |
| # Auto-process: as soon as a file is selected, start the ingestion pipeline. | |
| # No "Process" button needed — reduces friction. | |
| if uploaded_file is not None: | |
| progress_bar = st.progress(0) | |
| log_area = st.empty() | |
| success, message = process_uploaded_pdf(uploaded_file, log_area, progress_bar) | |
| if success: | |
| st.success(message) | |
| time.sleep(0.6) | |
| st.rerun() # Trigger a re-run to switch to Stage 2 | |
| else: | |
| st.error(message) | |
| else: | |
| st.caption("Drag-and-drop or browse to select a PDF file.") | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # STAGE 2: CHAT WITH SOURCES | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| else: | |
| # Extract metadata for display throughout the chat UI | |
| meta = st.session_state.document_metadata or {} | |
| title = meta.get("title", "Document Reference") | |
| subtitle = meta.get("subtitle", "") | |
| topics = meta.get("topics", []) | |
| chapters = meta.get("chapters", []) | |
| example_questions = meta.get("example_questions", []) | |
| welcome = meta.get("welcome_message", "How can I help you?") | |
| # -- Sidebar: Document Info + Settings ------------------------------------ | |
| # The sidebar shows the current document, model settings, and topic pills. | |
| with st.sidebar: | |
| st.markdown(f"**{st.session_state.uploaded_filename}**") | |
| size_mb = (st.session_state.uploaded_filesize or 0) / 1024 / 1024 | |
| st.caption( | |
| f"{size_mb:.1f} MB | " | |
| f"{st.session_state.page_count or '?'} pages | " | |
| f"{st.session_state.chunk_count or '?'} chunks" | |
| ) | |
| if st.button("New Document", use_container_width=True): | |
| reset_document() | |
| st.rerun() | |
| st.divider() | |
| # Model selection — Standard mode allows temperature control, | |
| # Reasoning mode uses models like o3/o4-mini that don't accept temperature. | |
| with st.expander("Settings", expanded=False): | |
| model_type = st.radio("Mode", ["Standard", "Reasoning"], index=0, | |
| help="Reasoning = deeper analysis, slower") | |
| if model_type == "Standard": | |
| selected_model = st.selectbox("Model", STANDARD_MODELS, index=1) | |
| temperature = st.slider("Temperature", 0.0, 1.0, 0.3, 0.1) | |
| else: | |
| selected_model = st.selectbox("Model", REASONING_MODELS, index=1) | |
| temperature = None | |
| # Fallback defaults if the Settings expander was never opened | |
| if "model_type" not in dir(): | |
| model_type = "Standard" | |
| selected_model = STANDARD_MODELS[0] | |
| temperature = 0.3 | |
| st.divider() | |
| if st.button("Clear Chat", use_container_width=True): | |
| st.session_state.langchain_messages = [] | |
| st.session_state.last_sources = [] | |
| st.session_state.last_query = None | |
| st.rerun() | |
| st.divider() | |
| # Topic pills — visual summary of document topics from AI metadata extraction | |
| if topics: | |
| st.markdown("**Topics**") | |
| pills_html = " ".join( | |
| f'<span style="display:inline-block;background:#e9ecef;color:#495057;' | |
| f'padding:3px 10px;border-radius:14px;font-size:0.75rem;margin:2px;">' | |
| f'{t}</span>' | |
| for t in topics | |
| ) | |
| st.markdown(pills_html, unsafe_allow_html=True) | |
| # -- Header — Document title and subtitle --------------------------------- | |
| st.markdown( | |
| f'<div class="app-header"><h1>{title}</h1><p>{subtitle}</p></div>', | |
| unsafe_allow_html=True | |
| ) | |
| # -- Status Row — Compact bar showing document, chunk count, and model ---- | |
| is_reasoning = (model_type == "Reasoning") | |
| status_items = [ | |
| f'<span class="status-item"><span class="status-label">Document:</span> {st.session_state.uploaded_filename}</span>', | |
| f'<span class="status-item"><span class="status-label">Indexed:</span> {st.session_state.chunk_count or "?"} chunks</span>', | |
| f'<span class="status-item"><span class="status-label">Model:</span> {selected_model}</span>', | |
| ] | |
| st.markdown( | |
| '<div class="doc-status-row">' + " ".join(status_items) + '</div>', | |
| unsafe_allow_html=True | |
| ) | |
| # -- Two-Column Layout: Chat (70%) | Sources (30%) ------------------------ | |
| chat_col, sources_col = st.columns([7, 3], gap="medium") | |
| # ---- CHAT COLUMN -------------------------------------------------------- | |
| with chat_col: | |
| # StreamlitChatMessageHistory stores messages in st.session_state["langchain_messages"]. | |
| # On first load, we seed it with the AI-generated welcome message. | |
| msgs = StreamlitChatMessageHistory(key="langchain_messages") | |
| if len(msgs.messages) == 0: | |
| msgs.add_ai_message(welcome) | |
| # Build the RAG chain for the currently selected model | |
| rag_chain, retriever = create_rag_chain( | |
| selected_model, temperature, is_reasoning | |
| ) | |
| # Render the full chat history | |
| for msg in msgs.messages: | |
| role = "assistant" if msg.type == "ai" else "user" | |
| st.chat_message(role).write(msg.content) | |
| # -- Suggested / Follow-Up Question Chips ----------------------------- | |
| # Before any exchange: show static example questions from metadata. | |
| # After an exchange: show AI-generated follow-up questions that are | |
| # contextual to the last answer. Falls back to static examples if | |
| # follow-up generation failed. | |
| followups = st.session_state.followup_questions | |
| if len(msgs.messages) <= 1: | |
| if example_questions: | |
| st.markdown("**Suggested questions**") | |
| chip_cols = st.columns(min(len(example_questions), 4)) | |
| for idx, q in enumerate(example_questions): | |
| col = chip_cols[idx % len(chip_cols)] | |
| if col.button(q, key=f"chip_{idx}", use_container_width=True): | |
| st.session_state.chip_question = q | |
| st.rerun() | |
| else: | |
| chips = followups if followups else example_questions | |
| if chips: | |
| st.markdown("**Suggested questions**") | |
| chip_cols = st.columns(min(len(chips), 4)) | |
| for idx, q in enumerate(chips): | |
| col = chip_cols[idx % len(chip_cols)] | |
| if col.button(q, key=f"followup_{idx}", use_container_width=True): | |
| st.session_state.chip_question = q | |
| st.rerun() | |
| # -- Conversation Utility Buttons ------------------------------------- | |
| # Visible after at least one full Q&A exchange. These inject pre-written | |
| # prompts as if the user typed them: | |
| # Regenerate: re-asks the last question (removes the last Q&A pair first) | |
| # More detail: asks for an expanded version of the last answer | |
| # Shorter: asks for a condensed version | |
| if len(msgs.messages) > 2: | |
| u1, u2, u3, _ = st.columns([2, 2, 2, 6]) | |
| if u1.button("Regenerate", key="util_regen", use_container_width=True): | |
| last_q = st.session_state.last_query | |
| if last_q and len(st.session_state.get("langchain_messages", [])) >= 2: | |
| st.session_state.langchain_messages.pop() # remove AI answer | |
| st.session_state.langchain_messages.pop() # remove user question | |
| st.session_state.chip_question = last_q | |
| st.session_state.last_sources = [] | |
| st.rerun() | |
| if u2.button("More detail", key="util_more", use_container_width=True): | |
| st.session_state.chip_question = ( | |
| "Please expand on your previous answer with more detail and page citations." | |
| ) | |
| st.rerun() | |
| if u3.button("Shorter", key="util_shorter", use_container_width=True): | |
| st.session_state.chip_question = ( | |
| "Please give a shorter, more concise version of your previous answer." | |
| ) | |
| st.rerun() | |
| # -- Question Input & RAG Invocation ---------------------------------- | |
| # st.chat_input provides the text box at the bottom of the chat. | |
| # chip_question is set when the user clicks a suggested question button. | |
| # Either source triggers the RAG chain. | |
| typed_question = st.chat_input("Ask a question about the document...") | |
| question = st.session_state.chip_question or typed_question | |
| if st.session_state.chip_question: | |
| st.session_state.chip_question = None # consume the chip question | |
| if question: | |
| st.chat_message("user").write(question) | |
| st.session_state.last_query = question | |
| spinner_text = "Analyzing..." if is_reasoning else "Searching..." | |
| with st.spinner(spinner_text): | |
| try: | |
| if rag_chain: | |
| # Invoke the full RAG pipeline: | |
| # 1. Reformulate question using chat history | |
| # 2. Retrieve relevant chunks from Chroma | |
| # 3. Generate answer with retrieved context | |
| # 4. Save Q&A to chat history automatically | |
| response = rag_chain.invoke( | |
| {"input": question}, | |
| config={"configurable": {"session_id": "doc_chat"}} | |
| ) | |
| st.chat_message("assistant").write(response["answer"]) | |
| st.session_state.last_sources = response.get("context", []) | |
| # Generate contextual follow-up questions based on this exchange | |
| st.session_state.followup_questions = generate_followup_questions( | |
| question, response["answer"] | |
| ) | |
| # Rerun to display the new follow-up chips (they render above | |
| # the input area, which has already been drawn by this point) | |
| st.rerun() | |
| else: | |
| st.error("RAG chain not available. Try re-uploading the document.") | |
| except Exception as e: | |
| st.error(f"Error: {e}") | |
| # ---- SOURCES COLUMN ----------------------------------------------------- | |
| # Shows the retrieved chunks that the LLM used to generate its answer. | |
| # Each chunk is in a collapsible expander labeled with its page and chunk number. | |
| # Search terms from the user's query are highlighted with <mark> tags. | |
| with sources_col: | |
| st.markdown("**Sources**") | |
| sources = st.session_state.last_sources | |
| if sources: | |
| # Summary bar — shows how many chunks were retrieved and which pages | |
| pages_cited = sorted(set( | |
| str(d.metadata.get("page", "?")) | |
| for d in sources | |
| if d.metadata.get("page") is not None | |
| )) | |
| pages_str = ", ".join(pages_cited) if pages_cited else "N/A" | |
| st.markdown( | |
| f'<div class="source-summary">' | |
| f'Retrieved <strong>{len(sources)}</strong> chunks ' | |
| f'covering pages <strong>{pages_str}</strong></div>', | |
| unsafe_allow_html=True, | |
| ) | |
| for i, doc in enumerate(sources): | |
| page = doc.metadata.get("page", "?") | |
| chunk_idx = doc.metadata.get("chunk_index", "?") | |
| with st.expander(f"Page {page} · Chunk {chunk_idx}"): | |
| full_hl = highlight_terms( | |
| doc.page_content, st.session_state.last_query | |
| ) | |
| st.markdown(full_hl, unsafe_allow_html=True) | |
| else: | |
| st.caption("Sources for the latest answer will appear here.") | |
| # ============================================================================= | |
| # 7.0 FOOTER | |
| # ============================================================================= | |
| st.markdown( | |
| '<div class="app-footer">Document Reference Assistant — ' | |
| 'Built with LangChain + Streamlit — Business Science University</div>', | |
| unsafe_allow_html=True | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment