import os from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.llms import HuggingFaceHub from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate import uuid import faiss vectorstore = None def load_vectorstore(pdf_path): global vectorstore reader = PdfReader(pdf_path) text = "".join([page.extract_text() or "" for page in reader.pages]) splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) chunks = splitter.split_text(text) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") dim = len(embeddings.embed_query("test")) index = faiss.IndexFlatL2(dim) vectorstore = FAISS( embedding_function=embeddings, index=index, docstore=InMemoryDocstore({}), index_to_docstore_id={} ) uuids = [str(uuid.uuid4()) for _ in chunks] vectorstore.add_texts(chunks, ids=uuids) def ask_question(query): global vectorstore if not vectorstore: return "Please upload and index a document first." llm = HuggingFaceHub( repo_id="mistralai/Mistral-7B-Instruct-v0.1", huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"), model_kwargs={"temperature": 0.7, "max_length": 512} ) retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) prompt = PromptTemplate( template="Use the context to answer the question: Context: {context} Question: {question} Answer:", input_variables=["context", "question"] ) chain = RetrievalQA.from_chain_type( llm=llm, retriever=retriever, return_source_documents=False, chain_type_kwargs={"prompt": prompt} ) return chain({"query": query})["result"]