# app.py import streamlit as st import os from io import BytesIO from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.llms import HuggingFaceHub from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate import faiss import uuid # Load secrets from Streamlit HUGGINGFACEHUB_API_TOKEN = st.secrets["HUGGINGFACEHUB_API_TOKEN"] RAG_ACCESS_KEY = st.secrets["RAG_ACCESS_KEY"] # Initialize session state if "vectorstore" not in st.session_state: st.session_state.vectorstore = None if "history" not in st.session_state: st.session_state.history = [] if "authenticated" not in st.session_state: st.session_state.authenticated = False # Sidebar with logo and authentication with st.sidebar: try: st.image("bsnl_logo.png", width=200) except FileNotFoundError: st.warning("BSNL logo not found.") st.header("RAG Control Panel") api_key_input = st.text_input("Enter RAG Access Key", type="password") # Custom styled Authenticate button st.markdown(""" """, unsafe_allow_html=True) with st.container(): st.markdown('
', unsafe_allow_html=True) if st.button("Authenticate"): if api_key_input == RAG_ACCESS_KEY: st.session_state.authenticated = True st.success("Authentication successful!") else: st.error("Invalid API key.") st.markdown('
', unsafe_allow_html=True) if st.session_state.authenticated: input_data = st.file_uploader("Upload a PDF file", type=["pdf"]) if st.button("Process File") and input_data is not None: try: vector_store = process_input(input_data) st.session_state.vectorstore = vector_store st.success("File processed successfully. You can now ask questions.") except Exception as e: st.error(f"Processing failed: {str(e)}") st.subheader("Chat History") for i, (q, a) in enumerate(st.session_state.history): st.write(f"**Q{i+1}:** {q}") st.write(f"**A{i+1}:** {a}") st.markdown("---") # Main app interface def main(): st.markdown(""" """, unsafe_allow_html=True) st.title("RAG Q&A App with Mistral AI") st.markdown("Welcome to the BSNL RAG App! Upload a PDF and ask questions.") if not st.session_state.authenticated: st.warning("Please authenticate using the sidebar.") return if st.session_state.vectorstore is None: st.info("Please upload and process a PDF file.") return query = st.text_input("Enter your question:") if st.button("Submit") and query: with st.spinner("Generating answer..."): try: answer = answer_question(st.session_state.vectorstore, query) st.session_state.history.append((query, answer)) st.write("**Answer:**", answer) except Exception as e: st.error(f"Error generating answer: {str(e)}") # Process PDF and build vector store def process_input(input_data): os.makedirs("vectorstore", exist_ok=True) os.chmod("vectorstore", 0o777) progress_bar = st.progress(0) status = st.status("Processing PDF file...", expanded=True) status.update(label="Reading PDF file...") progress_bar.progress(0.2) pdf_reader = PdfReader(BytesIO(input_data.read())) documents = "".join([page.extract_text() or "" for page in pdf_reader.pages]) status.update(label="Splitting text...") progress_bar.progress(0.4) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) texts = text_splitter.split_text(documents) status.update(label="Creating embeddings...") progress_bar.progress(0.6) hf_embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={'device': 'cpu'} ) status.update(label="Building vector store...") progress_bar.progress(0.8) dimension = len(hf_embeddings.embed_query("test")) index = faiss.IndexFlatL2(dimension) vector_store = FAISS( embedding_function=hf_embeddings, index=index, docstore=InMemoryDocstore({}), index_to_docstore_id={} ) uuids = [str(uuid.uuid4()) for _ in texts] vector_store.add_texts(texts, ids=uuids) status.update(label="Saving vector store...") progress_bar.progress(0.9) vector_store.save_local("vectorstore/faiss_index") status.update(label="Done!", state="complete") progress_bar.progress(1.0) return vector_store # Answer the user's query def answer_question(vectorstore, query): try: llm = HuggingFaceHub( repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={"temperature": 0.7, "max_length": 512}, huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN ) except Exception as e: raise RuntimeError("Failed to load LLM. Check Hugging Face API key and access rights.") from e retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) prompt_template = PromptTemplate( template="Use the context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:", input_variables=["context", "question"] ) qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=False, chain_type_kwargs={"prompt": prompt_template} ) result = qa_chain({"query": query}) return result["result"].split("Answer:")[-1].strip() # Run the app if __name__ == "__main__": main()