samim2024 commited on
Commit
0a1db48
·
verified ·
1 Parent(s): 4a31251

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -42
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import streamlit as st
2
  import os
3
  from io import BytesIO
@@ -13,16 +14,13 @@ import faiss
13
  import uuid
14
  from dotenv import load_dotenv
15
 
16
- # Load local .env (only useful locally)
17
  load_dotenv()
18
-
19
- # Load keys
20
- RAG_ACCESS_KEY = os.getenv("RAG_ACCESS_KEY")
21
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "").strip()
 
22
 
23
  if not HUGGINGFACEHUB_API_TOKEN:
24
- st.warning("Hugging Face API token not found in environment variables! "
25
- "Please set it in your Hugging Face Secrets or your .env file.")
26
 
27
  # Initialize session state
28
  if "vectorstore" not in st.session_state:
@@ -38,10 +36,10 @@ with st.sidebar:
38
  st.image("bsnl_logo.png", width=200)
39
  except Exception:
40
  st.warning("BSNL logo not found.")
41
-
42
  st.header("RAG Control Panel")
43
  api_key_input = st.text_input("Enter RAG Access Key", type="password")
44
-
45
  # Blue authenticate button style
46
  st.markdown("""
47
  <style>
@@ -61,7 +59,7 @@ with st.sidebar:
61
  }
62
  </style>
63
  """, unsafe_allow_html=True)
64
-
65
  with st.container():
66
  st.markdown('<div class="auth-button">', unsafe_allow_html=True)
67
  if st.button("Authenticate"):
@@ -71,18 +69,22 @@ with st.sidebar:
71
  else:
72
  st.error("Invalid API key.")
73
  st.markdown('</div>', unsafe_allow_html=True)
74
-
75
  if st.session_state.authenticated:
76
  input_data = st.file_uploader("Upload a PDF file", type=["pdf"])
77
-
78
  if st.button("Process File") and input_data is not None:
79
  try:
80
  vector_store = process_input(input_data)
81
  st.session_state.vectorstore = vector_store
82
  st.success("File processed successfully. You can now ask questions.")
 
 
 
 
83
  except Exception as e:
84
- st.error(f"Processing failed: {str(e)}")
85
-
86
  st.subheader("Chat History")
87
  for i, (q, a) in enumerate(st.session_state.history):
88
  st.write(f"**Q{i+1}:** {q}")
@@ -93,25 +95,52 @@ with st.sidebar:
93
  def main():
94
  st.markdown("""
95
  <style>
 
96
  .stApp {
 
97
  font-family: 'Roboto', sans-serif;
 
 
 
98
  background-color: #FFFFFF;
99
- color: #333;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  }
101
  </style>
102
  """, unsafe_allow_html=True)
103
-
104
  st.title("RAG Q&A App with Mistral AI")
105
- st.markdown("Welcome to the BSNL RAG App! Upload a PDF and ask questions.")
106
-
107
  if not st.session_state.authenticated:
108
  st.warning("Please authenticate using the sidebar.")
109
  return
110
-
111
  if st.session_state.vectorstore is None:
112
  st.info("Please upload and process a PDF file.")
113
  return
114
-
115
  query = st.text_input("Enter your question:")
116
  if st.button("Submit") and query:
117
  with st.spinner("Generating answer..."):
@@ -124,31 +153,37 @@ def main():
124
 
125
  # PDF processing logic
126
  def process_input(input_data):
127
- os.makedirs("vectorstore", exist_ok=True)
128
- os.chmod("vectorstore", 0o777)
129
-
130
  progress_bar = st.progress(0)
131
  status = st.empty()
132
-
 
133
  status.text("Reading PDF file...")
134
- progress_bar.progress(0.2)
 
135
  pdf_reader = PdfReader(BytesIO(input_data.read()))
136
  documents = "".join([page.extract_text() or "" for page in pdf_reader.pages])
137
-
138
- status.text("Splitting text...")
139
- progress_bar.progress(0.4)
 
 
140
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
141
  texts = text_splitter.split_text(documents)
142
-
 
143
  status.text("Creating embeddings...")
144
- progress_bar.progress(0.6)
 
145
  hf_embeddings = HuggingFaceEmbeddings(
146
  model_name="sentence-transformers/all-mpnet-base-v2",
147
  model_kwargs={'device': 'cpu'}
148
  )
149
-
 
150
  status.text("Building vector store...")
151
- progress_bar.progress(0.8)
 
152
  dimension = len(hf_embeddings.embed_query("test"))
153
  index = faiss.IndexFlatL2(dimension)
154
  vector_store = FAISS(
@@ -157,35 +192,34 @@ def process_input(input_data):
157
  docstore=InMemoryDocstore({}),
158
  index_to_docstore_id={}
159
  )
160
-
 
161
  uuids = [str(uuid.uuid4()) for _ in texts]
162
  vector_store.add_texts(texts, ids=uuids)
163
-
164
- status.text("Saving vector store...")
165
- progress_bar.progress(0.9)
166
- vector_store.save_local("vectorstore/faiss_index")
167
-
168
- status.text("Done!")
169
  progress_bar.progress(1.0)
 
170
  return vector_store
171
 
172
  # Question-answering logic
173
  def answer_question(vectorstore, query):
174
  if not HUGGINGFACEHUB_API_TOKEN:
175
  raise RuntimeError("Missing Hugging Face API token. Please set it in your secrets.")
176
-
177
  llm = HuggingFaceHub(
178
  repo_id="mistralai/Mistral-7B-Instruct-v0.1",
179
  model_kwargs={"temperature": 0.7, "max_length": 512},
180
  huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
181
  )
182
-
183
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
184
  prompt_template = PromptTemplate(
185
  template="Use the context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
186
  input_variables=["context", "question"]
187
  )
188
-
189
  qa_chain = RetrievalQA.from_chain_type(
190
  llm=llm,
191
  chain_type="stuff",
@@ -193,7 +227,7 @@ def answer_question(vectorstore, query):
193
  return_source_documents=False,
194
  chain_type_kwargs={"prompt": prompt_template}
195
  )
196
-
197
  result = qa_chain({"query": query})
198
  return result["result"].split("Answer:")[-1].strip()
199
 
 
1
+ # app.py
2
  import streamlit as st
3
  import os
4
  from io import BytesIO
 
14
  import uuid
15
  from dotenv import load_dotenv
16
 
17
+ # Load environment variables
18
  load_dotenv()
 
 
 
19
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "").strip()
20
+ RAG_ACCESS_KEY = os.getenv("RAG_ACCESS_KEY")
21
 
22
  if not HUGGINGFACEHUB_API_TOKEN:
23
+ st.warning("Hugging Face API token not found! Please set HUGGINGFACEHUB_API_TOKEN in your .env file.")
 
24
 
25
  # Initialize session state
26
  if "vectorstore" not in st.session_state:
 
36
  st.image("bsnl_logo.png", width=200)
37
  except Exception:
38
  st.warning("BSNL logo not found.")
39
+
40
  st.header("RAG Control Panel")
41
  api_key_input = st.text_input("Enter RAG Access Key", type="password")
42
+
43
  # Blue authenticate button style
44
  st.markdown("""
45
  <style>
 
59
  }
60
  </style>
61
  """, unsafe_allow_html=True)
62
+
63
  with st.container():
64
  st.markdown('<div class="auth-button">', unsafe_allow_html=True)
65
  if st.button("Authenticate"):
 
69
  else:
70
  st.error("Invalid API key.")
71
  st.markdown('</div>', unsafe_allow_html=True)
72
+
73
  if st.session_state.authenticated:
74
  input_data = st.file_uploader("Upload a PDF file", type=["pdf"])
75
+
76
  if st.button("Process File") and input_data is not None:
77
  try:
78
  vector_store = process_input(input_data)
79
  st.session_state.vectorstore = vector_store
80
  st.success("File processed successfully. You can now ask questions.")
81
+ except st.StreamlitAPIException as e:
82
+ st.error(f"File upload failed: Streamlit API error - {str(e)}. Check server configuration.")
83
+ except (PermissionError, OSError) as e:
84
+ st.error(f"File upload failed: Permission or OS error - {str(e)}. Check file system access.")
85
  except Exception as e:
86
+ st.error(f"File upload failed: Unexpected error - {str(e)}. Please try again or check server logs.")
87
+
88
  st.subheader("Chat History")
89
  for i, (q, a) in enumerate(st.session_state.history):
90
  st.write(f"**Q{i+1}:** {q}")
 
95
  def main():
96
  st.markdown("""
97
  <style>
98
+ @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
99
  .stApp {
100
+ background-color: #FFFFFF;
101
  font-family: 'Roboto', sans-serif;
102
+ color: #333333;
103
+ }
104
+ .stTextInput > div > div > input {
105
  background-color: #FFFFFF;
106
+ color: #333333;
107
+ border-radius: 8px;
108
+ border: 1px solid #007BFF;
109
+ padding: 10px;
110
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
111
+ }
112
+ .stButton > button {
113
+ background-color: #007BFF;
114
+ color: white;
115
+ border-radius: 8px;
116
+ padding: 10px 20px;
117
+ border: none;
118
+ transition: all 0.3s ease;
119
+ box-shadow: 0 2px 4px rgba(0,0,0,0.2);
120
+ }
121
+ .stButton > button:hover {
122
+ background-color: #0056b3;
123
+ transform: scale(1.05);
124
+ }
125
+ .stSidebar {
126
+ background-color: #F5F5F5;
127
+ padding: 20px;
128
+ border-right: 2px solid #007BFF;
129
  }
130
  </style>
131
  """, unsafe_allow_html=True)
132
+
133
  st.title("RAG Q&A App with Mistral AI")
134
+ st.markdown("Welcome to the BSNL RAG App! Upload a PDF file and ask questions.", unsafe_allow_html=True)
135
+
136
  if not st.session_state.authenticated:
137
  st.warning("Please authenticate using the sidebar.")
138
  return
139
+
140
  if st.session_state.vectorstore is None:
141
  st.info("Please upload and process a PDF file.")
142
  return
143
+
144
  query = st.text_input("Enter your question:")
145
  if st.button("Submit") and query:
146
  with st.spinner("Generating answer..."):
 
153
 
154
  # PDF processing logic
155
  def process_input(input_data):
156
+ # Initialize progress bar and status
 
 
157
  progress_bar = st.progress(0)
158
  status = st.empty()
159
+
160
+ # Step 1: Read PDF file in memory
161
  status.text("Reading PDF file...")
162
+ progress_bar.progress(0.20)
163
+
164
  pdf_reader = PdfReader(BytesIO(input_data.read()))
165
  documents = "".join([page.extract_text() or "" for page in pdf_reader.pages])
166
+
167
+ # Step 2: Split text
168
+ status.text("Splitting text into chunks...")
169
+ progress_bar.progress(0.40)
170
+
171
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
172
  texts = text_splitter.split_text(documents)
173
+
174
+ # Step 3: Create embeddings
175
  status.text("Creating embeddings...")
176
+ progress_bar.progress(0.60)
177
+
178
  hf_embeddings = HuggingFaceEmbeddings(
179
  model_name="sentence-transformers/all-mpnet-base-v2",
180
  model_kwargs={'device': 'cpu'}
181
  )
182
+
183
+ # Step 4: Initialize FAISS vector store
184
  status.text("Building vector store...")
185
+ progress_bar.progress(0.80)
186
+
187
  dimension = len(hf_embeddings.embed_query("test"))
188
  index = faiss.IndexFlatL2(dimension)
189
  vector_store = FAISS(
 
192
  docstore=InMemoryDocstore({}),
193
  index_to_docstore_id={}
194
  )
195
+
196
+ # Add texts to vector store
197
  uuids = [str(uuid.uuid4()) for _ in texts]
198
  vector_store.add_texts(texts, ids=uuids)
199
+
200
+ # Step 5: Complete processing
201
+ status.text("Processing complete!")
 
 
 
202
  progress_bar.progress(1.0)
203
+
204
  return vector_store
205
 
206
  # Question-answering logic
207
  def answer_question(vectorstore, query):
208
  if not HUGGINGFACEHUB_API_TOKEN:
209
  raise RuntimeError("Missing Hugging Face API token. Please set it in your secrets.")
210
+
211
  llm = HuggingFaceHub(
212
  repo_id="mistralai/Mistral-7B-Instruct-v0.1",
213
  model_kwargs={"temperature": 0.7, "max_length": 512},
214
  huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
215
  )
216
+
217
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
218
  prompt_template = PromptTemplate(
219
  template="Use the context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
220
  input_variables=["context", "question"]
221
  )
222
+
223
  qa_chain = RetrievalQA.from_chain_type(
224
  llm=llm,
225
  chain_type="stuff",
 
227
  return_source_documents=False,
228
  chain_type_kwargs={"prompt": prompt_template}
229
  )
230
+
231
  result = qa_chain({"query": query})
232
  return result["result"].split("Answer:")[-1].strip()
233