-
-
Save ryzn0518/7bffa5dda582fa2483470d337ea3bf35 to your computer and use it in GitHub Desktop.
Creating a private data QA bot entirely using the open-source LLM project
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from langchain import PromptTemplate, LLMChain | |
| from langchain.document_loaders import UnstructuredHTMLLoader | |
| from langchain.embeddings import LlamaCppEmbeddings | |
| from langchain.llms import LlamaCpp | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores.faiss import FAISS | |
| loader = UnstructuredHTMLLoader("langchain/docs/_build/html/index.html") | |
| embedding = LlamaCppEmbeddings(model_path="path/models/ggml-model-q4_0.bin") | |
| llm = LlamaCpp(model_path="path/models/ggml-model-q4_0.bin") | |
| def split_chunks(sources: list) -> list: | |
| chunks = [] | |
| splitter = RecursiveCharacterTextSplitter(separator="", chunk_size=256, chunk_overlap=16) | |
| for chunk in splitter.split_documents(sources): | |
| chunks.append(chunk) | |
| return chunks | |
| def generate_embedding(chunks: list): | |
| texts = [doc.page_content for doc in chunks] | |
| metadatas = [doc.metadata for doc in chunks] | |
| search_index = FAISS.from_texts(texts, embedding, metadatas=metadatas) | |
| return search_index | |
| def similarity_search( | |
| query: str, index: FAISS | |
| ) -> (list, list): | |
| matched_docs = index.similarity_search(query, k=4) | |
| sources = [] | |
| for doc in matched_docs: | |
| sources.append( | |
| { | |
| "page_content": doc.page_content, | |
| "metadata": doc.metadata, | |
| } | |
| ) | |
| return matched_docs, sources | |
| docs = loader.load() | |
| chunks = split_chunks(docs) | |
| embeddings = generate_embedding(chunks) | |
| question = "What are the use cases of LangChain?" | |
| matched_docs, sources = similarity_search(question, embeddings) | |
| template = """ | |
| Please use the following context to answer questions. | |
| Context: {context} | |
| --- | |
| Question: {question} | |
| Answer: Let's think step by step.""" | |
| context = "\n".join([doc.page_content for doc in matched_docs]) | |
| prompt = PromptTemplate(template=template, input_variables=["context", "question"]).partial(context=context) | |
| llm_chain = LLMChain(prompt=prompt, llm=llm) | |
| print(llm_chain.run(question)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment