|
import os |
|
|
|
# See prerequisite: OpenAI API key: |
|
os.environ["OPENAI_API_KEY"] = "your-api-key" |
|
|
|
# ========================================= |
|
# Step 1: Load and Split Documents |
|
# Load data and chunk it to ~500-1000 tokens per piece with overlap for context. |
|
|
|
from langchain_community.document_loaders import WebBaseLoader # Or TextLoader for local files |
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
|
# Load example data (e.g., a blog post) |
|
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/") |
|
docs = loader.load() |
|
|
|
# Split into chunks |
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
splits = text_splitter.split_documents(docs) |
|
|
|
# For PDFs or custom files, use PyPDFLoader or similar. |
|
|
|
# ========================================= |
|
# Step 2: Embed and Index in Vector DB |
|
# Use embeddings to vectorize chunks and store them. |
|
from langchain_community.vectorstores.singlestoredb import SingleStoreDB |
|
# SingleStore integrates well for production-scale apps with SQL querying alongside vectors. |
|
vector_database = SingleStoreDB.from_documents(splits, embeddings, table_name="my_rag_table") |
|
|
|
|
|
# ========================================= |
|
# Step 3: Retrieval |
|
# Embed the query and fetch relevant chunks. |
|
|
|
query = "What is task decomposition?" |
|
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3}) |
|
retrieved_docs = retriever.invoke(query) |
|
|
|
# Combine retrieved content |
|
context = "\n\n".join(doc.page_content for doc in retrieved_docs) |
|
|
|
# ========================================= |
|
# Step 4: Augment and Generate |
|
# Craft a prompt with context and query, then generate. |
|
from langchain_openai import ChatOpenAI |
|
from langchain_core.prompts import ChatPromptTemplate |
|
|
|
llm = ChatOpenAI(model="gpt-3.5-turbo") |
|
|
|
prompt_template = ChatPromptTemplate.from_template( |
|
"Answer the question based on this context: {context}\n\nQuestion: {query}" |
|
) |
|
chain = prompt_template | llm |
|
|
|
response = chain.invoke({"context": context, "query": query}) |
|
print(response.content) |
|
|
|
# ========================================= |
|
# This creates a basic RAG chain: retrieve → augment → generate. |
|
|
|
# For local LLMs (e.g., Llama 3 via Ollama): |
|
# Install Ollama and pull a model: |
|
# ollama pull llama3 |
|
# Replace ChatOpenAI with ChatOllama(model="llama3"). |