samim2024 commited on
Commit
5d008ae
·
verified ·
1 Parent(s): 7edfd17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -136
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import streamlit as st
2
  import os
3
  from io import BytesIO
@@ -28,175 +29,118 @@ if "authenticated" not in st.session_state:
28
 
29
  # Sidebar
30
  with st.sidebar:
31
- # BSNL Logo (local file with error handling)
32
  try:
33
- st.image(
34
- "bsnl_logo.png",
35
- width=200
36
- )
37
  except FileNotFoundError:
38
- st.warning("BSNL logo not found. Please ensure 'bsnl_logo.png' exists in the project root.")
 
39
  st.header("RAG Control Panel")
40
  api_key_input = st.text_input("Enter RAG Access Key", type="password")
41
-
42
- # Authentication
43
  if st.button("Authenticate"):
44
  if api_key_input == RAG_ACCESS_KEY:
45
  st.session_state.authenticated = True
46
- st.success("Authentication successful!")
47
  else:
48
- st.error("Invalid API key.")
49
-
50
- # File uploader
51
  if st.session_state.authenticated:
52
- input_data = st.file_uploader("Upload a PDF file", type=["pdf"])
53
-
54
- if st.button("Process File") and input_data is not None:
55
- try:
56
- vector_store = process_input(input_data)
57
- st.session_state.vectorstore = vector_store
58
- st.success("File processed successfully. You can now ask questions.")
59
- except (PermissionError, OSError) as e:
60
- st.error(f"File upload failed (Permission or OS error): {str(e)}. Check server permissions or file system access.")
61
- except Exception as e:
62
- st.error(f"File upload failed (Unexpected error): {str(e)}. Please try again or check server logs.")
63
-
64
- # Display chat history
65
- st.subheader("Chat History")
66
- for i, (q, a) in enumerate(st.session_state.history):
67
- st.write(f"**Q{i+1}:** {q}")
68
- st.write(f"**A{i+1}:** {a}")
69
- st.markdown("---")
70
-
71
- # Main app
 
 
 
 
 
 
72
  def main():
73
- # Inject CSS for simple color scheme and clean styling
74
- st.markdown("""
75
- <style>
76
- @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
77
- .stApp {
78
- background-color: #FFFFFF; /* White background */
79
- font-family: 'Roboto', sans-serif;
80
- color: #333333;
81
- }
82
- .stTextInput > div > div > input {
83
- background-color: #FFFFFF;
84
- color: #333333;
85
- border-radius: 8px;
86
- border: 1px solid #007BFF;
87
- padding: 10px;
88
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
89
- }
90
- .stButton > button {
91
- background-color: #007BFF;
92
- color: white;
93
- border-radius: 8px;
94
- padding: 10px 20px;
95
- border: none;
96
- transition: all 0.3s ease;
97
- box-shadow: 0 2px 4px rgba(0,0,0,0.2);
98
- }
99
- .stButton > button:hover {
100
- background-color: #0056b3;
101
- transform: scale(1.05);
102
- }
103
- .stSidebar {
104
- background-color: #F5F5F5; /* Light gray */
105
- padding: 20px;
106
- border-right: 2px solid #007BFF;
107
- }
108
- h1, h2, h3 {
109
- color: #333333;
110
- }
111
- .stSpinner > div > div {
112
- border-color: #007BFF transparent transparent transparent;
113
- }
114
- </style>
115
- """, unsafe_allow_html=True)
116
-
117
  st.title("RAG Q&A App with Mistral AI")
118
- st.markdown("Welcome to the BSNL RAG App! Upload your PDF files and ask questions with ease.", unsafe_allow_html=True)
119
-
120
  if not st.session_state.authenticated:
121
- st.warning("Please authenticate with your API key in the sidebar.")
122
  return
123
-
124
  if st.session_state.vectorstore is None:
125
  st.info("Please upload and process a PDF file in the sidebar.")
126
  return
127
-
128
  query = st.text_input("Enter your question:")
129
  if st.button("Submit") and query:
130
  with st.spinner("Generating answer..."):
131
- answer = answer_question(st.session_state.vectorstore, query)
132
- st.session_state.history.append((query, answer))
133
- st.write("**Answer:**", answer)
 
 
 
134
 
135
  def process_input(input_data):
136
- # Create vectorstore directory for FAISS index
137
- try:
138
- os.makedirs("vectorstore", exist_ok=True)
139
- os.chmod("vectorstore", 0o777) # Ensure write permissions
140
- except PermissionError as e:
141
- st.error(f"Failed to create vectorstore directory: {str(e)}")
142
- raise
143
-
144
- # Initialize progress bar and status
145
  progress_bar = st.progress(0)
146
- status = st.status("Processing PDF file...", expanded=True)
147
-
148
- # Step 1: Read PDF file in memory
149
  status.update(label="Reading PDF file...")
150
- progress_bar.progress(0.20)
151
-
152
  pdf_reader = PdfReader(BytesIO(input_data.read()))
153
- documents = ""
154
- for page in pdf_reader.pages:
155
- documents += page.extract_text() or ""
156
-
157
- # Step 2: Split text
158
- status.update(label="Splitting text into chunks...")
159
- progress_bar.progress(0.40)
160
-
161
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
162
- texts = text_splitter.split_text(documents)
163
-
164
- # Step 3: Create embeddings
165
  status.update(label="Creating embeddings...")
166
- progress_bar.progress(0.60)
167
-
168
- hf_embeddings = HuggingFaceEmbeddings(
169
  model_name="sentence-transformers/all-mpnet-base-v2",
170
- model_kwargs={'device': 'cpu'}
171
  )
172
-
173
- # Step 4: Initialize FAISS vector store
174
- status.update(label="Building vector store...")
175
- progress_bar.progress(0.80)
176
-
177
- dimension = len(hf_embeddings.embed_query("sample text"))
178
  index = faiss.IndexFlatL2(dimension)
179
  vector_store = FAISS(
180
- embedding_function=hf_embeddings,
181
  index=index,
182
  docstore=InMemoryDocstore({}),
183
  index_to_docstore_id={}
184
  )
185
-
186
- # Add texts to vector store
187
- uuids = [str(uuid.uuid4()) for _ in range(len(texts))]
188
  vector_store.add_texts(texts, ids=uuids)
189
-
190
- # Save vector store locally
191
  status.update(label="Saving vector store...")
192
- progress_bar.progress(0.90)
193
-
194
  vector_store.save_local("vectorstore/faiss_index")
195
-
196
- # Complete processing
197
- status.update(label="Processing complete!", state="complete")
198
  progress_bar.progress(1.0)
199
-
200
  return vector_store
201
 
202
  def answer_question(vectorstore, query):
@@ -205,14 +149,14 @@ def answer_question(vectorstore, query):
205
  model_kwargs={"temperature": 0.7, "max_length": 512},
206
  huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
207
  )
208
-
209
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
210
-
211
  prompt_template = PromptTemplate(
212
  template="Use the provided context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
213
  input_variables=["context", "question"]
214
  )
215
-
216
  qa_chain = RetrievalQA.from_chain_type(
217
  llm=llm,
218
  chain_type="stuff",
@@ -220,7 +164,7 @@ def answer_question(vectorstore, query):
220
  return_source_documents=False,
221
  chain_type_kwargs={"prompt": prompt_template}
222
  )
223
-
224
  result = qa_chain({"query": query})
225
  return result["result"].split("Answer:")[-1].strip()
226
 
 
1
+ # app.py
2
  import streamlit as st
3
  import os
4
  from io import BytesIO
 
29
 
30
  # Sidebar
31
  with st.sidebar:
 
32
  try:
33
+ st.image("bsnl_logo.png", width=200)
 
 
 
34
  except FileNotFoundError:
35
+ st.warning("Logo missing: 'bsnl_logo.png' not found.")
36
+
37
  st.header("RAG Control Panel")
38
  api_key_input = st.text_input("Enter RAG Access Key", type="password")
39
+
 
40
  if st.button("Authenticate"):
41
  if api_key_input == RAG_ACCESS_KEY:
42
  st.session_state.authenticated = True
43
+ st.success("Authenticated successfully!")
44
  else:
45
+ st.error("Invalid RAG Access Key.")
46
+
 
47
  if st.session_state.authenticated:
48
+ input_data = st.file_uploader("Upload PDF file", type=["pdf"])
49
+
50
+ if input_data:
51
+ if input_data.type != "application/pdf":
52
+ st.error("Only PDF files are allowed.")
53
+ elif st.button("Process File"):
54
+ try:
55
+ vector_store = process_input(input_data)
56
+ st.session_state.vectorstore = vector_store
57
+ st.success("File processed successfully. You can now ask questions.")
58
+ except (PermissionError, OSError) as e:
59
+ st.error(f"File error: {str(e)}")
60
+ except Exception as e:
61
+ if "403" in str(e):
62
+ st.error("Access Denied (403): Check Hugging Face API token.")
63
+ else:
64
+ st.error(f"Unexpected error: {str(e)}")
65
+
66
+ # Chat History
67
+ st.subheader("Chat History")
68
+ for i, (q, a) in enumerate(st.session_state.history):
69
+ st.write(f"**Q{i+1}:** {q}")
70
+ st.write(f"**A{i+1}:** {a}")
71
+ st.markdown("---")
72
+
73
+ # Main area
74
  def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  st.title("RAG Q&A App with Mistral AI")
76
+ st.markdown("Welcome to the BSNL RAG App. Upload a PDF and ask questions!")
77
+
78
  if not st.session_state.authenticated:
79
+ st.warning("Please authenticate using your RAG Access Key in the sidebar.")
80
  return
81
+
82
  if st.session_state.vectorstore is None:
83
  st.info("Please upload and process a PDF file in the sidebar.")
84
  return
85
+
86
  query = st.text_input("Enter your question:")
87
  if st.button("Submit") and query:
88
  with st.spinner("Generating answer..."):
89
+ try:
90
+ answer = answer_question(st.session_state.vectorstore, query)
91
+ st.session_state.history.append((query, answer))
92
+ st.write("**Answer:**", answer)
93
+ except Exception as e:
94
+ st.error(f"Failed to generate answer: {str(e)}")
95
 
96
  def process_input(input_data):
97
+ os.makedirs("vectorstore", exist_ok=True)
98
+ os.chmod("vectorstore", 0o777)
99
+
 
 
 
 
 
 
100
  progress_bar = st.progress(0)
101
+ status = st.status("Processing PDF...", expanded=True)
102
+
 
103
  status.update(label="Reading PDF file...")
104
+ progress_bar.progress(0.2)
105
+
106
  pdf_reader = PdfReader(BytesIO(input_data.read()))
107
+ documents = "".join(page.extract_text() or "" for page in pdf_reader.pages)
108
+
109
+ status.update(label="Splitting text...")
110
+ progress_bar.progress(0.4)
111
+
112
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
113
+ texts = splitter.split_text(documents)
114
+
 
 
 
 
115
  status.update(label="Creating embeddings...")
116
+ progress_bar.progress(0.6)
117
+
118
+ embeddings = HuggingFaceEmbeddings(
119
  model_name="sentence-transformers/all-mpnet-base-v2",
120
+ model_kwargs={"device": "cpu"}
121
  )
122
+
123
+ status.update(label="Building FAISS index...")
124
+ progress_bar.progress(0.8)
125
+
126
+ dimension = len(embeddings.embed_query("sample text"))
 
127
  index = faiss.IndexFlatL2(dimension)
128
  vector_store = FAISS(
129
+ embedding_function=embeddings,
130
  index=index,
131
  docstore=InMemoryDocstore({}),
132
  index_to_docstore_id={}
133
  )
134
+ uuids = [str(uuid.uuid4()) for _ in texts]
 
 
135
  vector_store.add_texts(texts, ids=uuids)
136
+
 
137
  status.update(label="Saving vector store...")
138
+ progress_bar.progress(0.9)
 
139
  vector_store.save_local("vectorstore/faiss_index")
140
+
141
+ status.update(label="Done!", state="complete")
 
142
  progress_bar.progress(1.0)
143
+
144
  return vector_store
145
 
146
  def answer_question(vectorstore, query):
 
149
  model_kwargs={"temperature": 0.7, "max_length": 512},
150
  huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
151
  )
152
+
153
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
154
+
155
  prompt_template = PromptTemplate(
156
  template="Use the provided context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
157
  input_variables=["context", "question"]
158
  )
159
+
160
  qa_chain = RetrievalQA.from_chain_type(
161
  llm=llm,
162
  chain_type="stuff",
 
164
  return_source_documents=False,
165
  chain_type_kwargs={"prompt": prompt_template}
166
  )
167
+
168
  result = qa_chain({"query": query})
169
  return result["result"].split("Answer:")[-1].strip()
170