inam09 commited on
Commit
3b50de3
·
verified ·
1 Parent(s): 33a7b36

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +302 -308
src/streamlit_app.py CHANGED
@@ -21,247 +21,241 @@ import torch # Import torch
21
  # nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir, quiet=True)
22
  # nltk.data.path.append(nltk_data_dir)
23
 
24
- nltk_data_dir = "/tmp/nltk_data"
25
- os.makedirs(nltk_data_dir, exist_ok=True)
26
- nltk.data.path.append(nltk_data_dir)
27
-
28
- # nltk.download('punkt', download_dir=nltk_data_dir)
29
- # nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir)
30
- nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir, quiet=True)
31
- nltk.download('punkt', download_dir=nltk_data_dir, quiet=True)
32
- nltk.download('punkt_tab', download_dir=nltk_data_dir, quiet=True)
33
-
34
-
35
- # Load models
36
- # summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=torch.device("cpu"))
37
- # summarizer = pipeline(
38
- # "summarization",
39
- # model="sshleifer/distilbart-cnn-12-6",
40
- # from_flax=True,
41
- # device=-1 # CPU mode, use device=0 for GPU
42
- # )
43
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
44
- qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", device=torch.device("cpu"))
45
- embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
46
- vector_dbs = {} # Dictionary to store multiple vector databases, keyed by document title
47
- extracted_texts = {} # Dictionary to store extracted text, keyed by document title
48
- current_doc_title = None
49
-
50
- # ---------------------------------------------
51
- # Extract text from PDF or Image
52
- # ---------------------------------------------
53
- def extract_text(uploaded_file):
54
- global current_doc_title
55
- current_doc_title = uploaded_file.name
56
- suffix = uploaded_file.name.lower()
57
- with tempfile.NamedTemporaryFile(delete=False) as tmp:
58
- tmp.write(uploaded_file.read())
59
- path = tmp.name
60
-
61
- text = ""
62
- if suffix.endswith(".pdf"):
63
- with pdfplumber.open(path) as pdf:
64
- for page in pdf.pages:
65
- page_text = page.extract_text()
66
- if page_text:
67
- text += page_text + "\n"
68
- else:
69
- try:
70
- text = pytesseract.image_to_string(Image.open(path))
71
- except Exception as e:
72
- st.error(f"Error during OCR for {uploaded_file.name}: {e}")
73
- text = ""
74
-
75
- os.remove(path)
76
- return text.strip()
77
-
78
- # ---------------------------------------------
79
- # Store Embeddings in FAISS
80
- # ---------------------------------------------
81
- def store_vector(text):
82
- global vector_dbs, current_doc_title
83
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
84
- docs = text_splitter.create_documents([text])
85
- for doc in docs:
86
- doc.metadata = {"title": current_doc_title} # Add document title as metadata
87
-
88
- if current_doc_title in vector_dbs:
89
- vector_dbs[current_doc_title].add_documents(docs) # Append to existing DB
90
- else:
91
- vector_dbs[current_doc_title] = FAISS.from_documents(docs, embedding_model)
92
-
93
- # ---------------------------------------------
94
- # Summarize Text
95
- # ---------------------------------------------
96
- def summarize(text):
97
- if len(text.split()) < 100:
98
- return "Text too short to summarize."
99
- chunks = [text[i : i + 1024] for i in range(0, len(text), 1024)]
100
- summaries = []
101
- for chunk in chunks:
102
- max_len = int(len(chunk.split()) * 0.6)
103
- max_len = max(30, min(max_len, 150))
104
- try:
105
- summary = summarizer(chunk, max_length=max_len, min_length=20)[0]["summary_text"]
106
- summaries.append(summary)
107
- except Exception as e:
108
- st.error(f"Error during summarization: {e}")
109
- return "An error occurred during summarization."
110
- return " ".join(summaries)
111
-
112
- # ---------------------------------------------
113
- # Question Answering
114
- # ---------------------------------------------
115
- def topic_search(question, doc_title=None):
116
- global vector_dbs
117
- if not vector_dbs:
118
- st.warning("Please upload and process a file first in the 'Upload & Extract' tab.")
119
- return ""
120
-
121
- try:
122
- if doc_title and doc_title in vector_dbs:
123
- retriever = vector_dbs[doc_title].as_retriever(search_kwargs={"k": 3})
124
- else:
125
- combined_docs = []
126
- for db in vector_dbs.values():
127
- combined_docs.extend(db.get_relevant_documents(question))
128
- if not combined_docs:
129
- return "No relevant information found across uploaded documents."
130
- temp_db = FAISS.from_documents(combined_docs, embedding_model)
131
- retriever = temp_db.as_retriever(search_kwargs={"k": 3})
132
-
133
- relevant_docs = retriever.get_relevant_documents(question)
134
- context = "\n\n".join([doc.page_content for doc in relevant_docs])
135
- answer = qa_pipeline(question=question, context=context)["answer"]
136
- return answer.strip()
137
- except Exception as e:
138
- st.error(f"Error during question answering: {e}")
139
- return "An error occurred while trying to answer the question."
140
-
141
- # ---------------------------------------------
142
- # Flashcard Generation
143
- # ---------------------------------------------
144
- def generate_flashcards(text):
145
- flashcards = []
146
- seen_terms = set()
147
- sentences = nltk.sent_tokenize(text)
148
-
149
- for i, sent in enumerate(sentences):
150
- words = nltk.word_tokenize(sent)
151
- tagged_words = nltk.pos_tag(words)
152
-
153
- potential_terms = [word for word, tag in tagged_words if tag.startswith('NN') or tag.startswith('NP')]
154
-
155
- for term in potential_terms:
156
- if term in seen_terms:
157
- continue
158
-
159
- defining_patterns = [r"\b" + re.escape(term) + r"\b\s+is\s+(?:a|an|the)\s+(.+?)(?:\.|,|\n|$)",
160
- r"\b" + re.escape(term) + r"\b\s+refers\s+to\s+(.+?)(?:\.|,|\n|$)",
161
- r"\b" + re.escape(term) + r"\b\s+means\s+(.+?)(?:\.|,|\n|$)",
162
- r"\b" + re.escape(term) + r"\b,\s+defined\s+as\s+(.+?)(?:\.|,|\n|$)",
163
- r"\b" + re.escape(term) + r"\b:\s+(.+?)(?:\.|,|\n|$)"]
164
-
165
- potential_definitions = []
166
- for pattern in defining_patterns:
167
- match = re.search(pattern, sent, re.IGNORECASE)
168
- if match and len(match.groups()) >= 1:
169
- potential_definitions.append(match.group(1).strip())
170
-
171
- for definition in potential_definitions:
172
- if 2 <= len(definition.split()) <= 30:
173
- flashcards.append({"term": term, "definition": definition})
174
- seen_terms.add(term)
175
- break
176
-
177
- if term not in seen_terms and i > 0:
178
- prev_sent = sentences[i-1]
179
- defining_patterns_prev = [r"The\s+\b" + re.escape(term) + r"\b\s+is\s+(.+?)(?:\.|,|\n|$)",
180
- r"This\s+\b" + re.escape(term) + r"\b\s+refers\s+to\s+(.+?)(?:\.|,|\n|$)",
181
- r"It\s+means\s+the\s+\b" + re.escape(term) + r"\b\s+(.+?)(?:\.|,|\n|$)"]
182
- for pattern in defining_patterns_prev:
183
- match = re.search(pattern, prev_sent, re.IGNORECASE)
184
- if match and term in sent and len(match.groups()) >= 1:
185
- definition = match.group(1).strip()
186
- if 2 <= len(definition.split()) <= 30:
187
- flashcards.append({"term": term, "definition": definition})
188
- seen_terms.add(term)
189
- break
190
-
191
- return flashcards
192
-
193
- # ---------------------------------------------
194
- # Text to Speech
195
- # ---------------------------------------------
196
- def read_aloud(text):
197
- try:
198
- tts = gTTS(text)
199
- audio_path = os.path.join(tempfile.gettempdir(), "summary.mp3")
200
- tts.save(audio_path)
201
- return audio_path
202
- except Exception as e:
203
- st.error(f"Error during text-to-speech: {e}")
204
- return None
205
 
206
- # ---------------------------------------------
207
- # Quiz Generation and Handling
208
- # ---------------------------------------------
209
- def generate_quiz_questions(text, num_questions=5):
210
- flashcards = generate_flashcards(text) # Reuse flashcard logic for potential terms/definitions
211
- if not flashcards:
212
- return []
213
-
214
- questions = []
215
- used_indices = set()
216
- num_available = len(flashcards)
217
-
218
- while len(questions) < num_questions and len(used_indices) < num_available:
219
- index = random.randint(0, num_available - 1)
220
- if index in used_indices:
221
- continue
222
- used_indices.add(index)
223
- card = flashcards[index]
224
- correct_answer = card['term']
225
- definition = card['definition']
226
-
227
- # Generate incorrect answers (very basic for now)
228
- incorrect_options = random.sample([c['term'] for i, c in enumerate(flashcards) if i != index], 3)
229
- options = [correct_answer] + incorrect_options
230
- random.shuffle(options)
231
-
232
- questions.append({
233
- "question": f"What is the term for: {definition}",
234
- "options": options,
235
- "correct_answer": correct_answer,
236
- "user_answer": None # To store user's choice
237
- })
238
-
239
- return questions
240
-
241
- def display_quiz(questions):
242
- st.session_state.quiz_questions = questions
243
- st.session_state.user_answers = {}
244
- st.session_state.quiz_submitted = False
245
- for i, q in enumerate(st.session_state.quiz_questions):
246
- st.subheader(f"Question {i + 1}:")
247
- st.write(q["question"])
248
- st.session_state.user_answers[i] = st.radio(f"Answer for Question {i + 1}", q["options"])
249
- st.button("Submit Quiz", on_click=submit_quiz)
250
-
251
- def submit_quiz():
252
- st.session_state.quiz_submitted = True
253
-
254
- def grade_quiz():
255
- if st.session_state.quiz_submitted:
256
- score = 0
257
- for i, q in enumerate(st.session_state.quiz_questions):
258
- user_answer = st.session_state.user_answers.get(i)
259
- if user_answer == q["correct_answer"]:
260
- score += 1
261
- st.success(f"Question {i + 1}: Correct!")
262
- else:
263
- st.error(f"Question {i + 1}: Incorrect. Correct answer was: {q['correct_answer']}")
264
- st.write(f"## Your Score: {score} / {len(st.session_state.quiz_questions)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
  # ---------------------------------------------
267
  # Streamlit Interface with Tabs
@@ -305,71 +299,71 @@ with tab2:
305
  else:
306
  st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
307
 
308
- with tab3:
309
- st.header("❓ Question Answering")
310
- doc_titles = list(vector_dbs.keys())
311
- if doc_titles:
312
- doc_title = st.selectbox("Search within document:", ["All Documents"] + doc_titles)
313
- question = st.text_input("Ask a question about the content:")
314
- if question:
315
- with st.spinner("Searching for answer..."):
316
- if doc_title == "All Documents":
317
- answer = topic_search(question)
318
- else:
319
- answer = topic_search(question, doc_title=doc_title)
320
- if answer:
321
- st.subheader("Answer:")
322
- st.write(answer)
323
- else:
324
- st.warning("Could not find an answer in the selected document(s).")
325
- else:
326
- st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
327
-
328
- with tab4:
329
- st.header("🧠 Interactive Learning: Flashcards")
330
- doc_titles = list(extracted_texts.keys())
331
- if doc_titles:
332
- selected_doc_title_flashcard = st.selectbox("Generate flashcards from document:", doc_titles)
333
- if st.button("Generate Flashcards"):
334
- if selected_doc_title_flashcard in extracted_texts:
335
- with st.spinner(f"Generating flashcards from {selected_doc_title_flashcard}..."):
336
- flashcards = generate_flashcards(extracted_texts[selected_doc_title_flashcard])
337
- if flashcards:
338
- st.subheader("Flashcards")
339
- for i, card in enumerate(flashcards):
340
- with st.expander(f"Card {i+1}"):
341
- st.markdown(f"*Term:* {card['term']}")
342
- st.markdown(f"*Definition:* {card['definition']}")
343
- else:
344
- st.info("No flashcards could be generated from this document using the current method.")
345
- else:
346
- st.warning(f"Original text for {selected_doc_title_flashcard} not found. Please re-upload.")
347
- else:
348
- st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
349
-
350
- with tab5:
351
- st.header("📝 Quiz Yourself!")
352
- doc_titles = list(extracted_texts.keys())
353
- if doc_titles:
354
- selected_doc_title_quiz = st.selectbox("Generate quiz from document:", doc_titles)
355
- if selected_doc_title_quiz in extracted_texts:
356
- text_for_quiz =extracted_texts[selected_doc_title_quiz]
357
- if "quiz_questions" not in st.session_state:
358
- st.session_state.quiz_questions = generate_quiz_questions(text_for_quiz)
359
-
360
- if st.session_state.quiz_questions:
361
- display_quiz(st.session_state.quiz_questions)
362
- if st.session_state.quiz_submitted:
363
- grade_quiz()
364
-
365
- if st.button("Refresh Questions"):
366
- st.session_state.quiz_questions = generate_quiz_questions(text_for_quiz)
367
- st.session_state.quiz_submitted = False
368
- st.session_state.user_answers = {}
369
- st.rerun() # Force a re-render to show new questions
370
- else:
371
- st.info("Could not generate quiz questions from the current document.")
372
- else:
373
- st.warning(f"Original text for {selected_doc_title_quiz} not found. Please re-upload.")
374
- else:
375
- st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
 
21
  # nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir, quiet=True)
22
  # nltk.data.path.append(nltk_data_dir)
23
 
24
+ # nltk_data_dir = "/tmp/nltk_data"
25
+ # os.makedirs(nltk_data_dir, exist_ok=True)
26
+ # nltk.data.path.append(nltk_data_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # # nltk.download('punkt', download_dir=nltk_data_dir)
29
+ # # nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir)
30
+ # nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir, quiet=True)
31
+ # nltk.download('punkt', download_dir=nltk_data_dir, quiet=True)
32
+
33
+
34
+
35
+ # # Load models
36
+
37
+ # summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
38
+ # qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", device=torch.device("cpu"))
39
+ # embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
40
+ # vector_dbs = {} # Dictionary to store multiple vector databases, keyed by document title
41
+ # extracted_texts = {} # Dictionary to store extracted text, keyed by document title
42
+ # current_doc_title = None
43
+
44
+ # # ---------------------------------------------
45
+ # # Extract text from PDF or Image
46
+ # # ---------------------------------------------
47
+ # def extract_text(uploaded_file):
48
+ # global current_doc_title
49
+ # current_doc_title = uploaded_file.name
50
+ # suffix = uploaded_file.name.lower()
51
+ # with tempfile.NamedTemporaryFile(delete=False) as tmp:
52
+ # tmp.write(uploaded_file.read())
53
+ # path = tmp.name
54
+
55
+ # text = ""
56
+ # if suffix.endswith(".pdf"):
57
+ # with pdfplumber.open(path) as pdf:
58
+ # for page in pdf.pages:
59
+ # page_text = page.extract_text()
60
+ # if page_text:
61
+ # text += page_text + "\n"
62
+ # else:
63
+ # try:
64
+ # text = pytesseract.image_to_string(Image.open(path))
65
+ # except Exception as e:
66
+ # st.error(f"Error during OCR for {uploaded_file.name}: {e}")
67
+ # text = ""
68
+
69
+ # os.remove(path)
70
+ # return text.strip()
71
+
72
+ # # ---------------------------------------------
73
+ # # Store Embeddings in FAISS
74
+ # # ---------------------------------------------
75
+ # def store_vector(text):
76
+ # global vector_dbs, current_doc_title
77
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
78
+ # docs = text_splitter.create_documents([text])
79
+ # for doc in docs:
80
+ # doc.metadata = {"title": current_doc_title} # Add document title as metadata
81
+
82
+ # if current_doc_title in vector_dbs:
83
+ # vector_dbs[current_doc_title].add_documents(docs) # Append to existing DB
84
+ # else:
85
+ # vector_dbs[current_doc_title] = FAISS.from_documents(docs, embedding_model)
86
+
87
+ # # ---------------------------------------------
88
+ # # Summarize Text
89
+ # # ---------------------------------------------
90
+ # def summarize(text):
91
+ # if len(text.split()) < 100:
92
+ # return "Text too short to summarize."
93
+ # chunks = [text[i : i + 1024] for i in range(0, len(text), 1024)]
94
+ # summaries = []
95
+ # for chunk in chunks:
96
+ # max_len = int(len(chunk.split()) * 0.6)
97
+ # max_len = max(30, min(max_len, 150))
98
+ # try:
99
+ # summary = summarizer(chunk, max_length=max_len, min_length=20)[0]["summary_text"]
100
+ # summaries.append(summary)
101
+ # except Exception as e:
102
+ # st.error(f"Error during summarization: {e}")
103
+ # return "An error occurred during summarization."
104
+ # return " ".join(summaries)
105
+
106
+ # # ---------------------------------------------
107
+ # # Question Answering
108
+ # # ---------------------------------------------
109
+ # def topic_search(question, doc_title=None):
110
+ # global vector_dbs
111
+ # if not vector_dbs:
112
+ # st.warning("Please upload and process a file first in the 'Upload & Extract' tab.")
113
+ # return ""
114
+
115
+ # try:
116
+ # if doc_title and doc_title in vector_dbs:
117
+ # retriever = vector_dbs[doc_title].as_retriever(search_kwargs={"k": 3})
118
+ # else:
119
+ # combined_docs = []
120
+ # for db in vector_dbs.values():
121
+ # combined_docs.extend(db.get_relevant_documents(question))
122
+ # if not combined_docs:
123
+ # return "No relevant information found across uploaded documents."
124
+ # temp_db = FAISS.from_documents(combined_docs, embedding_model)
125
+ # retriever = temp_db.as_retriever(search_kwargs={"k": 3})
126
+
127
+ # relevant_docs = retriever.get_relevant_documents(question)
128
+ # context = "\n\n".join([doc.page_content for doc in relevant_docs])
129
+ # answer = qa_pipeline(question=question, context=context)["answer"]
130
+ # return answer.strip()
131
+ # except Exception as e:
132
+ # st.error(f"Error during question answering: {e}")
133
+ # return "An error occurred while trying to answer the question."
134
+
135
+ # # ---------------------------------------------
136
+ # # Flashcard Generation
137
+ # # ---------------------------------------------
138
+ # def generate_flashcards(text):
139
+ # flashcards = []
140
+ # seen_terms = set()
141
+ # sentences = nltk.sent_tokenize(text)
142
+
143
+ # for i, sent in enumerate(sentences):
144
+ # words = nltk.word_tokenize(sent)
145
+ # tagged_words = nltk.pos_tag(words)
146
+
147
+ # potential_terms = [word for word, tag in tagged_words if tag.startswith('NN') or tag.startswith('NP')]
148
+
149
+ # for term in potential_terms:
150
+ # if term in seen_terms:
151
+ # continue
152
+
153
+ # defining_patterns = [r"\b" + re.escape(term) + r"\b\s+is\s+(?:a|an|the)\s+(.+?)(?:\.|,|\n|$)",
154
+ # r"\b" + re.escape(term) + r"\b\s+refers\s+to\s+(.+?)(?:\.|,|\n|$)",
155
+ # r"\b" + re.escape(term) + r"\b\s+means\s+(.+?)(?:\.|,|\n|$)",
156
+ # r"\b" + re.escape(term) + r"\b,\s+defined\s+as\s+(.+?)(?:\.|,|\n|$)",
157
+ # r"\b" + re.escape(term) + r"\b:\s+(.+?)(?:\.|,|\n|$)"]
158
+
159
+ # potential_definitions = []
160
+ # for pattern in defining_patterns:
161
+ # match = re.search(pattern, sent, re.IGNORECASE)
162
+ # if match and len(match.groups()) >= 1:
163
+ # potential_definitions.append(match.group(1).strip())
164
+
165
+ # for definition in potential_definitions:
166
+ # if 2 <= len(definition.split()) <= 30:
167
+ # flashcards.append({"term": term, "definition": definition})
168
+ # seen_terms.add(term)
169
+ # break
170
+
171
+ # if term not in seen_terms and i > 0:
172
+ # prev_sent = sentences[i-1]
173
+ # defining_patterns_prev = [r"The\s+\b" + re.escape(term) + r"\b\s+is\s+(.+?)(?:\.|,|\n|$)",
174
+ # r"This\s+\b" + re.escape(term) + r"\b\s+refers\s+to\s+(.+?)(?:\.|,|\n|$)",
175
+ # r"It\s+means\s+the\s+\b" + re.escape(term) + r"\b\s+(.+?)(?:\.|,|\n|$)"]
176
+ # for pattern in defining_patterns_prev:
177
+ # match = re.search(pattern, prev_sent, re.IGNORECASE)
178
+ # if match and term in sent and len(match.groups()) >= 1:
179
+ # definition = match.group(1).strip()
180
+ # if 2 <= len(definition.split()) <= 30:
181
+ # flashcards.append({"term": term, "definition": definition})
182
+ # seen_terms.add(term)
183
+ # break
184
+
185
+ # return flashcards
186
+
187
+ # # ---------------------------------------------
188
+ # # Text to Speech
189
+ # # ---------------------------------------------
190
+ # def read_aloud(text):
191
+ # try:
192
+ # tts = gTTS(text)
193
+ # audio_path = os.path.join(tempfile.gettempdir(), "summary.mp3")
194
+ # tts.save(audio_path)
195
+ # return audio_path
196
+ # except Exception as e:
197
+ # st.error(f"Error during text-to-speech: {e}")
198
+ # return None
199
+
200
+ # # ---------------------------------------------
201
+ # # Quiz Generation and Handling
202
+ # # ---------------------------------------------
203
+ # def generate_quiz_questions(text, num_questions=5):
204
+ # flashcards = generate_flashcards(text) # Reuse flashcard logic for potential terms/definitions
205
+ # if not flashcards:
206
+ # return []
207
+
208
+ # questions = []
209
+ # used_indices = set()
210
+ # num_available = len(flashcards)
211
+
212
+ # while len(questions) < num_questions and len(used_indices) < num_available:
213
+ # index = random.randint(0, num_available - 1)
214
+ # if index in used_indices:
215
+ # continue
216
+ # used_indices.add(index)
217
+ # card = flashcards[index]
218
+ # correct_answer = card['term']
219
+ # definition = card['definition']
220
+
221
+ # # Generate incorrect answers (very basic for now)
222
+ # incorrect_options = random.sample([c['term'] for i, c in enumerate(flashcards) if i != index], 3)
223
+ # options = [correct_answer] + incorrect_options
224
+ # random.shuffle(options)
225
+
226
+ # questions.append({
227
+ # "question": f"What is the term for: {definition}",
228
+ # "options": options,
229
+ # "correct_answer": correct_answer,
230
+ # "user_answer": None # To store user's choice
231
+ # })
232
+
233
+ # return questions
234
+
235
+ # def display_quiz(questions):
236
+ # st.session_state.quiz_questions = questions
237
+ # st.session_state.user_answers = {}
238
+ # st.session_state.quiz_submitted = False
239
+ # for i, q in enumerate(st.session_state.quiz_questions):
240
+ # st.subheader(f"Question {i + 1}:")
241
+ # st.write(q["question"])
242
+ # st.session_state.user_answers[i] = st.radio(f"Answer for Question {i + 1}", q["options"])
243
+ # st.button("Submit Quiz", on_click=submit_quiz)
244
+
245
+ # def submit_quiz():
246
+ # st.session_state.quiz_submitted = True
247
+
248
+ # def grade_quiz():
249
+ # if st.session_state.quiz_submitted:
250
+ # score = 0
251
+ # for i, q in enumerate(st.session_state.quiz_questions):
252
+ # user_answer = st.session_state.user_answers.get(i)
253
+ # if user_answer == q["correct_answer"]:
254
+ # score += 1
255
+ # st.success(f"Question {i + 1}: Correct!")
256
+ # else:
257
+ # st.error(f"Question {i + 1}: Incorrect. Correct answer was: {q['correct_answer']}")
258
+ # st.write(f"## Your Score: {score} / {len(st.session_state.quiz_questions)}")
259
 
260
  # ---------------------------------------------
261
  # Streamlit Interface with Tabs
 
299
  else:
300
  st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
301
 
302
+ # with tab3:
303
+ # st.header("❓ Question Answering")
304
+ # doc_titles = list(vector_dbs.keys())
305
+ # if doc_titles:
306
+ # doc_title = st.selectbox("Search within document:", ["All Documents"] + doc_titles)
307
+ # question = st.text_input("Ask a question about the content:")
308
+ # if question:
309
+ # with st.spinner("Searching for answer..."):
310
+ # if doc_title == "All Documents":
311
+ # answer = topic_search(question)
312
+ # else:
313
+ # answer = topic_search(question, doc_title=doc_title)
314
+ # if answer:
315
+ # st.subheader("Answer:")
316
+ # st.write(answer)
317
+ # else:
318
+ # st.warning("Could not find an answer in the selected document(s).")
319
+ # else:
320
+ # st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
321
+
322
+ # with tab4:
323
+ # st.header("🧠 Interactive Learning: Flashcards")
324
+ # doc_titles = list(extracted_texts.keys())
325
+ # if doc_titles:
326
+ # selected_doc_title_flashcard = st.selectbox("Generate flashcards from document:", doc_titles)
327
+ # if st.button("Generate Flashcards"):
328
+ # if selected_doc_title_flashcard in extracted_texts:
329
+ # with st.spinner(f"Generating flashcards from {selected_doc_title_flashcard}..."):
330
+ # flashcards = generate_flashcards(extracted_texts[selected_doc_title_flashcard])
331
+ # if flashcards:
332
+ # st.subheader("Flashcards")
333
+ # for i, card in enumerate(flashcards):
334
+ # with st.expander(f"Card {i+1}"):
335
+ # st.markdown(f"*Term:* {card['term']}")
336
+ # st.markdown(f"*Definition:* {card['definition']}")
337
+ # else:
338
+ # st.info("No flashcards could be generated from this document using the current method.")
339
+ # else:
340
+ # st.warning(f"Original text for {selected_doc_title_flashcard} not found. Please re-upload.")
341
+ # else:
342
+ # st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
343
+
344
+ # with tab5:
345
+ # st.header("📝 Quiz Yourself!")
346
+ # doc_titles = list(extracted_texts.keys())
347
+ # if doc_titles:
348
+ # selected_doc_title_quiz = st.selectbox("Generate quiz from document:", doc_titles)
349
+ # if selected_doc_title_quiz in extracted_texts:
350
+ # text_for_quiz =extracted_texts[selected_doc_title_quiz]
351
+ # if "quiz_questions" not in st.session_state:
352
+ # st.session_state.quiz_questions = generate_quiz_questions(text_for_quiz)
353
+
354
+ # if st.session_state.quiz_questions:
355
+ # display_quiz(st.session_state.quiz_questions)
356
+ # if st.session_state.quiz_submitted:
357
+ # grade_quiz()
358
+
359
+ # if st.button("Refresh Questions"):
360
+ # st.session_state.quiz_questions = generate_quiz_questions(text_for_quiz)
361
+ # st.session_state.quiz_submitted = False
362
+ # st.session_state.user_answers = {}
363
+ # st.rerun() # Force a re-render to show new questions
364
+ # else:
365
+ # st.info("Could not generate quiz questions from the current document.")
366
+ # else:
367
+ # st.warning(f"Original text for {selected_doc_title_quiz} not found. Please re-upload.")
368
+ # else:
369
+ # st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")