Spaces:

samim2024
/

bsnl-chatboot

Sleeping

App Files Files Community

samim2024 commited on May 16

Commit

0a1db48

verified ·

1 Parent(s): 4a31251

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -42

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import streamlit as st
 import os
 from io import BytesIO
@@ -13,16 +14,13 @@ import faiss
 import uuid
 from dotenv import load_dotenv
-# Load local .env (only useful locally)
 load_dotenv()
-# Load keys
-RAG_ACCESS_KEY = os.getenv("RAG_ACCESS_KEY")
 HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "").strip()
 if not HUGGINGFACEHUB_API_TOKEN:
-    st.warning("Hugging Face API token not found in environment variables! "
-               "Please set it in your Hugging Face Secrets or your .env file.")
 # Initialize session state
 if "vectorstore" not in st.session_state:
@@ -38,10 +36,10 @@ with st.sidebar:
         st.image("bsnl_logo.png", width=200)
     except Exception:
         st.warning("BSNL logo not found.")
     st.header("RAG Control Panel")
     api_key_input = st.text_input("Enter RAG Access Key", type="password")
     # Blue authenticate button style
     st.markdown("""
         <style>
@@ -61,7 +59,7 @@ with st.sidebar:
         }
         </style>
     """, unsafe_allow_html=True)
     with st.container():
         st.markdown('<div class="auth-button">', unsafe_allow_html=True)
         if st.button("Authenticate"):
@@ -71,18 +69,22 @@ with st.sidebar:
             else:
                 st.error("Invalid API key.")
         st.markdown('</div>', unsafe_allow_html=True)
     if st.session_state.authenticated:
         input_data = st.file_uploader("Upload a PDF file", type=["pdf"])
         if st.button("Process File") and input_data is not None:
             try:
                 vector_store = process_input(input_data)
                 st.session_state.vectorstore = vector_store
                 st.success("File processed successfully. You can now ask questions.")
             except Exception as e:
-                st.error(f"Processing failed: {str(e)}")
     st.subheader("Chat History")
     for i, (q, a) in enumerate(st.session_state.history):
         st.write(f"**Q{i+1}:** {q}")
@@ -93,25 +95,52 @@ with st.sidebar:
 def main():
     st.markdown("""
         <style>
         .stApp {
             font-family: 'Roboto', sans-serif;
             background-color: #FFFFFF;
-            color: #333;
         }
         </style>
     """, unsafe_allow_html=True)
     st.title("RAG Q&A App with Mistral AI")
-    st.markdown("Welcome to the BSNL RAG App! Upload a PDF and ask questions.")
     if not st.session_state.authenticated:
         st.warning("Please authenticate using the sidebar.")
         return
     if st.session_state.vectorstore is None:
         st.info("Please upload and process a PDF file.")
         return
     query = st.text_input("Enter your question:")
     if st.button("Submit") and query:
         with st.spinner("Generating answer..."):
@@ -124,31 +153,37 @@ def main():
 # PDF processing logic
 def process_input(input_data):
-    os.makedirs("vectorstore", exist_ok=True)
-    os.chmod("vectorstore", 0o777)
     progress_bar = st.progress(0)
     status = st.empty()
     status.text("Reading PDF file...")
-    progress_bar.progress(0.2)
     pdf_reader = PdfReader(BytesIO(input_data.read()))
     documents = "".join([page.extract_text() or "" for page in pdf_reader.pages])
-    status.text("Splitting text...")
-    progress_bar.progress(0.4)
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     texts = text_splitter.split_text(documents)
     status.text("Creating embeddings...")
-    progress_bar.progress(0.6)
     hf_embeddings = HuggingFaceEmbeddings(
         model_name="sentence-transformers/all-mpnet-base-v2",
         model_kwargs={'device': 'cpu'}
     )
     status.text("Building vector store...")
-    progress_bar.progress(0.8)
     dimension = len(hf_embeddings.embed_query("test"))
     index = faiss.IndexFlatL2(dimension)
     vector_store = FAISS(
@@ -157,35 +192,34 @@ def process_input(input_data):
         docstore=InMemoryDocstore({}),
         index_to_docstore_id={}
     )
     uuids = [str(uuid.uuid4()) for _ in texts]
     vector_store.add_texts(texts, ids=uuids)
-    status.text("Saving vector store...")
-    progress_bar.progress(0.9)
-    vector_store.save_local("vectorstore/faiss_index")
-    status.text("Done!")
     progress_bar.progress(1.0)
     return vector_store
 # Question-answering logic
 def answer_question(vectorstore, query):
     if not HUGGINGFACEHUB_API_TOKEN:
         raise RuntimeError("Missing Hugging Face API token. Please set it in your secrets.")
     llm = HuggingFaceHub(
         repo_id="mistralai/Mistral-7B-Instruct-v0.1",
         model_kwargs={"temperature": 0.7, "max_length": 512},
         huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
     )
     retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
     prompt_template = PromptTemplate(
         template="Use the context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
         input_variables=["context", "question"]
     )
     qa_chain = RetrievalQA.from_chain_type(
         llm=llm,
         chain_type="stuff",
@@ -193,7 +227,7 @@ def answer_question(vectorstore, query):
         return_source_documents=False,
         chain_type_kwargs={"prompt": prompt_template}
     )
     result = qa_chain({"query": query})
     return result["result"].split("Answer:")[-1].strip()

+# app.py
 import streamlit as st
 import os
 from io import BytesIO
 import uuid
 from dotenv import load_dotenv
+# Load environment variables
 load_dotenv()
 HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "").strip()
+RAG_ACCESS_KEY = os.getenv("RAG_ACCESS_KEY")
 if not HUGGINGFACEHUB_API_TOKEN:
+    st.warning("Hugging Face API token not found! Please set HUGGINGFACEHUB_API_TOKEN in your .env file.")
 # Initialize session state
 if "vectorstore" not in st.session_state:
         st.image("bsnl_logo.png", width=200)
     except Exception:
         st.warning("BSNL logo not found.")
     st.header("RAG Control Panel")
     api_key_input = st.text_input("Enter RAG Access Key", type="password")
     # Blue authenticate button style
     st.markdown("""
         <style>
         }
         </style>
     """, unsafe_allow_html=True)
     with st.container():
         st.markdown('<div class="auth-button">', unsafe_allow_html=True)
         if st.button("Authenticate"):
             else:
                 st.error("Invalid API key.")
         st.markdown('</div>', unsafe_allow_html=True)
     if st.session_state.authenticated:
         input_data = st.file_uploader("Upload a PDF file", type=["pdf"])
         if st.button("Process File") and input_data is not None:
             try:
                 vector_store = process_input(input_data)
                 st.session_state.vectorstore = vector_store
                 st.success("File processed successfully. You can now ask questions.")
+            except st.StreamlitAPIException as e:
+                st.error(f"File upload failed: Streamlit API error - {str(e)}. Check server configuration.")
+            except (PermissionError, OSError) as e:
+                st.error(f"File upload failed: Permission or OS error - {str(e)}. Check file system access.")
             except Exception as e:
+                st.error(f"File upload failed: Unexpected error - {str(e)}. Please try again or check server logs.")
     st.subheader("Chat History")
     for i, (q, a) in enumerate(st.session_state.history):
         st.write(f"**Q{i+1}:** {q}")
 def main():
     st.markdown("""
         <style>
+        @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
         .stApp {
+            background-color: #FFFFFF;
             font-family: 'Roboto', sans-serif;
+            color: #333333;
+        }
+        .stTextInput > div > div > input {
             background-color: #FFFFFF;
+            color: #333333;
+            border-radius: 8px;
+            border: 1px solid #007BFF;
+            padding: 10px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        .stButton > button {
+            background-color: #007BFF;
+            color: white;
+            border-radius: 8px;
+            padding: 10px 20px;
+            border: none;
+            transition: all 0.3s ease;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.2);
+        }
+        .stButton > button:hover {
+            background-color: #0056b3;
+            transform: scale(1.05);
+        }
+        .stSidebar {
+            background-color: #F5F5F5;
+            padding: 20px;
+            border-right: 2px solid #007BFF;
         }
         </style>
     """, unsafe_allow_html=True)
     st.title("RAG Q&A App with Mistral AI")
+    st.markdown("Welcome to the BSNL RAG App! Upload a PDF file and ask questions.", unsafe_allow_html=True)
     if not st.session_state.authenticated:
         st.warning("Please authenticate using the sidebar.")
         return
     if st.session_state.vectorstore is None:
         st.info("Please upload and process a PDF file.")
         return
     query = st.text_input("Enter your question:")
     if st.button("Submit") and query:
         with st.spinner("Generating answer..."):
 # PDF processing logic
 def process_input(input_data):
+    # Initialize progress bar and status
     progress_bar = st.progress(0)
     status = st.empty()
+    # Step 1: Read PDF file in memory
     status.text("Reading PDF file...")
+    progress_bar.progress(0.20)
     pdf_reader = PdfReader(BytesIO(input_data.read()))
     documents = "".join([page.extract_text() or "" for page in pdf_reader.pages])
+    # Step 2: Split text
+    status.text("Splitting text into chunks...")
+    progress_bar.progress(0.40)
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     texts = text_splitter.split_text(documents)
+    # Step 3: Create embeddings
     status.text("Creating embeddings...")
+    progress_bar.progress(0.60)
     hf_embeddings = HuggingFaceEmbeddings(
         model_name="sentence-transformers/all-mpnet-base-v2",
         model_kwargs={'device': 'cpu'}
     )
+    # Step 4: Initialize FAISS vector store
     status.text("Building vector store...")
+    progress_bar.progress(0.80)
     dimension = len(hf_embeddings.embed_query("test"))
     index = faiss.IndexFlatL2(dimension)
     vector_store = FAISS(
         docstore=InMemoryDocstore({}),
         index_to_docstore_id={}
     )
+    # Add texts to vector store
     uuids = [str(uuid.uuid4()) for _ in texts]
     vector_store.add_texts(texts, ids=uuids)
+    # Step 5: Complete processing
+    status.text("Processing complete!")
     progress_bar.progress(1.0)
     return vector_store
 # Question-answering logic
 def answer_question(vectorstore, query):
     if not HUGGINGFACEHUB_API_TOKEN:
         raise RuntimeError("Missing Hugging Face API token. Please set it in your secrets.")
     llm = HuggingFaceHub(
         repo_id="mistralai/Mistral-7B-Instruct-v0.1",
         model_kwargs={"temperature": 0.7, "max_length": 512},
         huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
     )
     retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
     prompt_template = PromptTemplate(
         template="Use the context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
         input_variables=["context", "question"]
     )
     qa_chain = RetrievalQA.from_chain_type(
         llm=llm,
         chain_type="stuff",
         return_source_documents=False,
         chain_type_kwargs={"prompt": prompt_template}
     )
     result = qa_chain({"query": query})
     return result["result"].split("Answer:")[-1].strip()