samim2024 commited on
Commit
6f96a50
·
verified ·
1 Parent(s): 117edbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -79
app.py CHANGED
@@ -31,6 +31,85 @@ if "history" not in st.session_state:
31
  if "authenticated" not in st.session_state:
32
  st.session_state.authenticated = False
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Sidebar with BSNL logo and authentication
35
  with st.sidebar:
36
  try:
@@ -152,84 +231,5 @@ def main():
152
  except Exception as e:
153
  st.error(f"Error generating answer: {str(e)}")
154
 
155
- # PDF processing logic
156
- def process_input(input_data):
157
- # Initialize progress bar and status
158
- progress_bar = st.progress(0)
159
- status = st.empty()
160
-
161
- # Step 1: Read PDF file in memory
162
- status.text("Reading PDF file...")
163
- progress_bar.progress(0.25)
164
-
165
- pdf_reader = PdfReader(BytesIO(input_data.read()))
166
- documents = "".join([page.extract_text() or "" for page in pdf_reader.pages])
167
-
168
- # Step 2: Split text
169
- status.text("Splitting text into chunks...")
170
- progress_bar.progress(0.50)
171
-
172
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
173
- texts = text_splitter.split_text(documents)
174
-
175
- # Step 3: Create embeddings
176
- status.text("Creating embeddings...")
177
- progress_bar.progress(0.75)
178
-
179
- hf_embeddings = HuggingFaceEmbeddings(
180
- model_name="sentence-transformers/all-mpnet-base-v2",
181
- model_kwargs={'device': 'cpu'}
182
- )
183
-
184
- # Step 4: Initialize FAISS vector store
185
- status.text("Building vector store...")
186
- progress_bar.progress(1.0)
187
-
188
- dimension = len(hf_embeddings.embed_query("test"))
189
- index = faiss.IndexFlatL2(dimension)
190
- vector_store = FAISS(
191
- embedding_function=hf_embeddings,
192
- index=index,
193
- docstore=InMemoryDocstore({}),
194
- index_to_docstore_id={}
195
- )
196
-
197
- # Add texts to vector store
198
- uuids = [str(uuid.uuid4()) for _ in texts]
199
- vector_store.add_texts(texts, ids=uuids)
200
-
201
- # Complete processing
202
- status.text("Processing complete!")
203
-
204
- return vector_store
205
-
206
- # Question-answering logic
207
- def answer_question(vectorstore, query):
208
- if not HUGGINGFACEHUB_API_TOKEN:
209
- raise RuntimeError("Missing Hugging Face API token. Please set it in your secrets.")
210
-
211
- llm = HuggingFaceHub(
212
- repo_id="mistralai/Mistral-7B-Instruct-v0.1",
213
- model_kwargs={"temperature": 0.7, "max_length": 512},
214
- huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
215
- )
216
-
217
- retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
218
- prompt_template = PromptTemplate(
219
- template="Use the context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
220
- input_variables=["context", "question"]
221
- )
222
-
223
- qa_chain = RetrievalQA.from_chain_type(
224
- llm=llm,
225
- chain_type="stuff",
226
- retriever=retriever,
227
- return_source_documents=False,
228
- chain_type_kwargs={"prompt": prompt_template}
229
- )
230
-
231
- result = qa_chain({"query": query})
232
- return result["result"].split("Answer:")[-1].strip()
233
-
234
  if __name__ == "__main__":
235
  main()
 
31
  if "authenticated" not in st.session_state:
32
  st.session_state.authenticated = False
33
 
34
+ # PDF processing logic
35
+ def process_input(input_data):
36
+ # Initialize progress bar and status
37
+ progress_bar = st.progress(0)
38
+ status = st.empty()
39
+
40
+ # Step 1: Read PDF file in memory
41
+ status.text("Reading PDF file...")
42
+ progress_bar.progress(0.25)
43
+
44
+ pdf_reader = PdfReader(BytesIO(input_data.read()))
45
+ documents = "".join([page.extract_text() or "" for page in pdf_reader.pages])
46
+
47
+ # Step 2: Split text
48
+ status.text("Splitting text into chunks...")
49
+ progress_bar.progress(0.50)
50
+
51
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
52
+ texts = text_splitter.split_text(documents)
53
+
54
+ # Step 3: Create embeddings
55
+ status.text("Creating embeddings...")
56
+ progress_bar.progress(0.75)
57
+
58
+ hf_embeddings = HuggingFaceEmbeddings(
59
+ model_name="sentence-transformers/all-mpnet-base-v2",
60
+ model_kwargs={'device': 'cpu'}
61
+ )
62
+
63
+ # Step 4: Initialize FAISS vector store
64
+ status.text("Building vector store...")
65
+ progress_bar.progress(1.0)
66
+
67
+ dimension = len(hf_embeddings.embed_query("test"))
68
+ index = faiss.IndexFlatL2(dimension)
69
+ vector_store = FAISS(
70
+ embedding_function=hf_embeddings,
71
+ index=index,
72
+ docstore=InMemoryDocstore({}),
73
+ index_to_docstore_id={}
74
+ )
75
+
76
+ # Add texts to vector store
77
+ uuids = [str(uuid.uuid4()) for _ in texts]
78
+ vector_store.add_texts(texts, ids=uuids)
79
+
80
+ # Complete processing
81
+ status.text("Processing complete!")
82
+
83
+ return vector_store
84
+
85
+ # Question-answering logic
86
+ def answer_question(vectorstore, query):
87
+ if not HUGGINGFACEHUB_API_TOKEN:
88
+ raise RuntimeError("Missing Hugging Face API token. Please set it in your secrets.")
89
+
90
+ llm = HuggingFaceHub(
91
+ repo_id="mistralai/Mistral-7B-Instruct-v0.1",
92
+ model_kwargs={"temperature": 0.7, "max_length": 512},
93
+ huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
94
+ )
95
+
96
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
97
+ prompt_template = PromptTemplate(
98
+ template="Use the context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
99
+ input_variables=["context", "question"]
100
+ )
101
+
102
+ qa_chain = RetrievalQA.from_chain_type(
103
+ llm=llm,
104
+ chain_type="stuff",
105
+ retriever=retriever,
106
+ return_source_documents=False,
107
+ chain_type_kwargs={"prompt": prompt_template}
108
+ )
109
+
110
+ result = qa_chain({"query": query})
111
+ return result["result"].split("Answer:")[-1].strip()
112
+
113
  # Sidebar with BSNL logo and authentication
114
  with st.sidebar:
115
  try:
 
231
  except Exception as e:
232
  st.error(f"Error generating answer: {str(e)}")
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  if __name__ == "__main__":
235
  main()