samim2024 commited on
Commit
7edfd17
·
verified ·
1 Parent(s): 38af0d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -30
app.py CHANGED
@@ -1,7 +1,5 @@
1
- # app.py
2
  import streamlit as st
3
  import os
4
- import tempfile
5
  from io import BytesIO
6
  from PyPDF2 import PdfReader
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -59,7 +57,9 @@ with st.sidebar:
59
  st.session_state.vectorstore = vector_store
60
  st.success("File processed successfully. You can now ask questions.")
61
  except (PermissionError, OSError) as e:
62
- st.error(f"Error processing file: {str(e)}. Check file permissions or server configuration.")
 
 
63
 
64
  # Display chat history
65
  st.subheader("Chat History")
@@ -133,57 +133,46 @@ def main():
133
  st.write("**Answer:**", answer)
134
 
135
  def process_input(input_data):
136
- # Create uploads directory with proper permissions
137
  try:
138
- os.makedirs("uploads", exist_ok=True)
139
- os.chmod("uploads", 0o777) # Ensure write permissions
140
  except PermissionError as e:
141
- st.error(f"Failed to create uploads directory: {str(e)}")
142
  raise
143
 
144
  # Initialize progress bar and status
145
  progress_bar = st.progress(0)
146
  status = st.status("Processing PDF file...", expanded=True)
147
 
148
- # Step 1: Save file temporarily
149
- status.update(label="Saving PDF file...")
150
  progress_bar.progress(0.20)
151
 
152
- with tempfile.NamedTemporaryFile(delete=False, dir="uploads", suffix=".pdf") as tmp_file:
153
- tmp_file.write(input_data.read())
154
- tmp_file_path = tmp_file.name
155
-
156
- # Step 2: Read PDF file
157
- status.update(label="Reading PDF file...")
158
- progress_bar.progress(0.40)
159
 
160
- try:
161
- pdf_reader = PdfReader(tmp_file_path)
162
- documents = ""
163
- for page in pdf_reader.pages:
164
- documents += page.extract_text() or ""
165
- finally:
166
- os.remove(tmp_file_path) # Clean up temporary file
167
-
168
- # Step 3: Split text
169
  status.update(label="Splitting text into chunks...")
170
- progress_bar.progress(0.60)
171
 
172
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
173
  texts = text_splitter.split_text(documents)
174
 
175
- # Step 4: Create embeddings
176
  status.update(label="Creating embeddings...")
177
- progress_bar.progress(0.80)
178
 
179
  hf_embeddings = HuggingFaceEmbeddings(
180
  model_name="sentence-transformers/all-mpnet-base-v2",
181
  model_kwargs={'device': 'cpu'}
182
  )
183
 
184
- # Step 5: Initialize FAISS vector store
185
  status.update(label="Building vector store...")
186
- progress_bar.progress(0.90)
187
 
188
  dimension = len(hf_embeddings.embed_query("sample text"))
189
  index = faiss.IndexFlatL2(dimension)
@@ -199,6 +188,9 @@ def process_input(input_data):
199
  vector_store.add_texts(texts, ids=uuids)
200
 
201
  # Save vector store locally
 
 
 
202
  vector_store.save_local("vectorstore/faiss_index")
203
 
204
  # Complete processing
 
 
1
  import streamlit as st
2
  import os
 
3
  from io import BytesIO
4
  from PyPDF2 import PdfReader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
57
  st.session_state.vectorstore = vector_store
58
  st.success("File processed successfully. You can now ask questions.")
59
  except (PermissionError, OSError) as e:
60
+ st.error(f"File upload failed (Permission or OS error): {str(e)}. Check server permissions or file system access.")
61
+ except Exception as e:
62
+ st.error(f"File upload failed (Unexpected error): {str(e)}. Please try again or check server logs.")
63
 
64
  # Display chat history
65
  st.subheader("Chat History")
 
133
  st.write("**Answer:**", answer)
134
 
135
  def process_input(input_data):
136
+ # Create vectorstore directory for FAISS index
137
  try:
138
+ os.makedirs("vectorstore", exist_ok=True)
139
+ os.chmod("vectorstore", 0o777) # Ensure write permissions
140
  except PermissionError as e:
141
+ st.error(f"Failed to create vectorstore directory: {str(e)}")
142
  raise
143
 
144
  # Initialize progress bar and status
145
  progress_bar = st.progress(0)
146
  status = st.status("Processing PDF file...", expanded=True)
147
 
148
+ # Step 1: Read PDF file in memory
149
+ status.update(label="Reading PDF file...")
150
  progress_bar.progress(0.20)
151
 
152
+ pdf_reader = PdfReader(BytesIO(input_data.read()))
153
+ documents = ""
154
+ for page in pdf_reader.pages:
155
+ documents += page.extract_text() or ""
 
 
 
156
 
157
+ # Step 2: Split text
 
 
 
 
 
 
 
 
158
  status.update(label="Splitting text into chunks...")
159
+ progress_bar.progress(0.40)
160
 
161
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
162
  texts = text_splitter.split_text(documents)
163
 
164
+ # Step 3: Create embeddings
165
  status.update(label="Creating embeddings...")
166
+ progress_bar.progress(0.60)
167
 
168
  hf_embeddings = HuggingFaceEmbeddings(
169
  model_name="sentence-transformers/all-mpnet-base-v2",
170
  model_kwargs={'device': 'cpu'}
171
  )
172
 
173
+ # Step 4: Initialize FAISS vector store
174
  status.update(label="Building vector store...")
175
+ progress_bar.progress(0.80)
176
 
177
  dimension = len(hf_embeddings.embed_query("sample text"))
178
  index = faiss.IndexFlatL2(dimension)
 
188
  vector_store.add_texts(texts, ids=uuids)
189
 
190
  # Save vector store locally
191
+ status.update(label="Saving vector store...")
192
+ progress_bar.progress(0.90)
193
+
194
  vector_store.save_local("vectorstore/faiss_index")
195
 
196
  # Complete processing