|
#!/usr/bin/env python3 |
|
""" |
|
Single-file RAG pipeline using: |
|
- LlamaIndex (open source) |
|
- Local Ollama (llama3 for both embeddings + generation) |
|
- SingleStore DB (local vector store) |
|
No internet, no paid APIs required. |
|
""" |
|
|
|
import os |
|
from llama_index.core import ( |
|
Settings, |
|
SimpleDirectoryReader, |
|
VectorStoreIndex, |
|
StorageContext, |
|
) |
|
from llama_index.core.node_parsers import SentenceSplitter |
|
from llama_index.embeddings.ollama import OllamaEmbedding |
|
from llama_index.vector_stores.singlestore import SingleStoreVectorStore |
|
from llama_index.llms.ollama import Ollama |
|
|
|
# ========================= CONFIGURATION ========================= |
|
# Change these to match your environment |
|
DOCS_DIR = "/path/to/your/documents" # ← your PDF/TXT/DOCX folder |
|
OLLAMA_BASE_URL = "http://localhost:11434" # default Ollama port |
|
SINGLESTORE_CONFIG = { |
|
"host": "localhost", |
|
"port": 3306, |
|
"user": "root", |
|
"password": "your_password", # ← change this |
|
"database": "rag_db", # ← change this |
|
"table": "vector_table", |
|
} |
|
EMBEDDING_MODEL = "llama3" # must be pulled with `ollama pull llama3` |
|
LLM_MODEL = "llama3" |
|
CHUNK_SIZE = 1024 |
|
CHUNK_OVERLAP = 20 |
|
TOP_K = 3 |
|
|
|
# Sample query (change as needed) |
|
QUERY = "What is the main topic discussed in these documents?" |
|
|
|
# ========================= STEP 1: LOAD & SPLIT ========================= |
|
print("🔄 Step 1: Loading and splitting documents...") |
|
documents = SimpleDirectoryReader( |
|
input_dir=DOCS_DIR, |
|
required_exts=[".pdf", ".txt", ".docx", ".doc"], # supported formats |
|
).load_data() |
|
|
|
parser = SentenceSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP) |
|
nodes = parser.get_nodes_from_documents(documents) |
|
|
|
print(f"✅ Loaded {len(documents)} documents → {len(nodes)} nodes") |
|
|
|
# ========================= STEP 2: EMBED & INDEX IN SINGLESTORE ========================= |
|
print("🔄 Step 2: Setting up embeddings + SingleStore vector store...") |
|
|
|
Settings.embed_model = OllamaEmbedding( |
|
model_name=EMBEDDING_MODEL, |
|
base_url=OLLAMA_BASE_URL, |
|
) |
|
|
|
vector_store = SingleStoreVectorStore( |
|
host=SINGLESTORE_CONFIG["host"], |
|
port=SINGLESTORE_CONFIG["port"], |
|
user=SINGLESTORE_CONFIG["user"], |
|
password=SINGLESTORE_CONFIG["password"], |
|
database=SINGLESTORE_CONFIG["database"], |
|
table=SINGLESTORE_CONFIG["table"], |
|
embedding_dimension=4096, # llama3 dimension |
|
) |
|
|
|
storage_context = StorageContext.from_defaults(vector_store=vector_store) |
|
|
|
index = VectorStoreIndex(nodes, storage_context=storage_context) |
|
|
|
print("✅ Documents embedded and indexed in SingleStore") |
|
|
|
# ========================= STEP 3: RETRIEVAL ========================= |
|
print("\n🔄 Step 3: Retrieval demo...") |
|
retriever = index.as_retriever(similarity_top_k=TOP_K) |
|
retrieved_nodes = retriever.retrieve(QUERY) |
|
|
|
print(f"Top {TOP_K} retrieved nodes:") |
|
for i, node in enumerate(retrieved_nodes, 1): |
|
print(f" {i}. {node.text[:200]}...") |
|
|
|
# ========================= STEP 4: AUGMENT + GENERATE ========================= |
|
print("\n🔄 Step 4: Augment & Generate with local Ollama...") |
|
Settings.llm = Ollama( |
|
model=LLM_MODEL, |
|
base_url=OLLAMA_BASE_URL, |
|
temperature=0.7, |
|
request_timeout=120.0, |
|
) |
|
|
|
query_engine = index.as_query_engine(similarity_top_k=TOP_K) |
|
|
|
response = query_engine.query(QUERY) |
|
|
|
print("\n" + "="*60) |
|
print("FINAL RAG RESPONSE") |
|
print("="*60) |
|
print(response) |
|
print("="*60) |
|
|
|
# Optional: show source nodes used for the final answer |
|
print("\nSources used:") |
|
for i, node in enumerate(response.source_nodes, 1): |
|
print(f" {i}. {node.node.metadata.get('file_name', 'unknown')} " |
|
f"(score: {node.score:.3f})") |