Spaces:

inam09
/

dummy_test

Sleeping

App Files Files Community

inam09 commited on May 18

Commit

3b50de3

verified ·

1 Parent(s): 33a7b36

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +302 -308

src/streamlit_app.py CHANGED Viewed

@@ -21,247 +21,241 @@ import torch  # Import torch
 # nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir, quiet=True)
 # nltk.data.path.append(nltk_data_dir)
-nltk_data_dir = "/tmp/nltk_data"
-os.makedirs(nltk_data_dir, exist_ok=True)
-nltk.data.path.append(nltk_data_dir)
-# nltk.download('punkt', download_dir=nltk_data_dir)
-# nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir)
-nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir, quiet=True)
-nltk.download('punkt', download_dir=nltk_data_dir, quiet=True)
-nltk.download('punkt_tab', download_dir=nltk_data_dir, quiet=True)
-# Load models
-# summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=torch.device("cpu"))
-# summarizer = pipeline(
-#     "summarization",
-#     model="sshleifer/distilbart-cnn-12-6",
-#     from_flax=True,
-#     device=-1  # CPU mode, use device=0 for GPU
-# )
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", device=torch.device("cpu"))
-embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-vector_dbs = {}  # Dictionary to store multiple vector databases, keyed by document title
-extracted_texts = {} # Dictionary to store extracted text, keyed by document title
-current_doc_title = None
-# ---------------------------------------------
-# Extract text from PDF or Image
-# ---------------------------------------------
-def extract_text(uploaded_file):
-    global current_doc_title
-    current_doc_title = uploaded_file.name
-    suffix = uploaded_file.name.lower()
-    with tempfile.NamedTemporaryFile(delete=False) as tmp:
-        tmp.write(uploaded_file.read())
-        path = tmp.name
-    text = ""
-    if suffix.endswith(".pdf"):
-        with pdfplumber.open(path) as pdf:
-            for page in pdf.pages:
-                page_text = page.extract_text()
-                if page_text:
-                    text += page_text + "\n"
-    else:
-        try:
-            text = pytesseract.image_to_string(Image.open(path))
-        except Exception as e:
-            st.error(f"Error during OCR for {uploaded_file.name}: {e}")
-            text = ""
-    os.remove(path)
-    return text.strip()
-# ---------------------------------------------
-# Store Embeddings in FAISS
-# ---------------------------------------------
-def store_vector(text):
-    global vector_dbs, current_doc_title
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
-    docs = text_splitter.create_documents([text])
-    for doc in docs:
-        doc.metadata = {"title": current_doc_title}  # Add document title as metadata
-    if current_doc_title in vector_dbs:
-        vector_dbs[current_doc_title].add_documents(docs)  # Append to existing DB
-    else:
-        vector_dbs[current_doc_title] = FAISS.from_documents(docs, embedding_model)
-# ---------------------------------------------
-# Summarize Text
-# ---------------------------------------------
-def summarize(text):
-    if len(text.split()) < 100:
-        return "Text too short to summarize."
-    chunks = [text[i : i + 1024] for i in range(0, len(text), 1024)]
-    summaries = []
-    for chunk in chunks:
-        max_len = int(len(chunk.split()) * 0.6)
-        max_len = max(30, min(max_len, 150))
-        try:
-            summary = summarizer(chunk, max_length=max_len, min_length=20)[0]["summary_text"]
-            summaries.append(summary)
-        except Exception as e:
-            st.error(f"Error during summarization: {e}")
-            return "An error occurred during summarization."
-    return " ".join(summaries)
-# ---------------------------------------------
-# Question Answering
-# ---------------------------------------------
-def topic_search(question, doc_title=None):
-    global vector_dbs
-    if not vector_dbs:
-        st.warning("Please upload and process a file first in the 'Upload & Extract' tab.")
-        return ""
-    try:
-        if doc_title and doc_title in vector_dbs:
-            retriever = vector_dbs[doc_title].as_retriever(search_kwargs={"k": 3})
-        else:
-            combined_docs = []
-            for db in vector_dbs.values():
-                combined_docs.extend(db.get_relevant_documents(question))
-            if not combined_docs:
-                 return "No relevant information found across uploaded documents."
-            temp_db = FAISS.from_documents(combined_docs, embedding_model)
-            retriever = temp_db.as_retriever(search_kwargs={"k": 3})
-        relevant_docs = retriever.get_relevant_documents(question)
-        context = "\n\n".join([doc.page_content for doc in relevant_docs])
-        answer = qa_pipeline(question=question, context=context)["answer"]
-        return answer.strip()
-    except Exception as e:
-        st.error(f"Error during question answering: {e}")
-        return "An error occurred while trying to answer the question."
-# ---------------------------------------------
-# Flashcard Generation
-# ---------------------------------------------
-def generate_flashcards(text):
-    flashcards = []
-    seen_terms = set()
-    sentences = nltk.sent_tokenize(text)
-    for i, sent in enumerate(sentences):
-        words = nltk.word_tokenize(sent)
-        tagged_words = nltk.pos_tag(words)
-        potential_terms = [word for word, tag in tagged_words if tag.startswith('NN') or tag.startswith('NP')]
-        for term in potential_terms:
-            if term in seen_terms:
-                continue
-            defining_patterns = [r"\b" + re.escape(term) + r"\b\s+is\s+(?:a|an|the)\s+(.+?)(?:\.|,|\n|$)",
-                                 r"\b" + re.escape(term) + r"\b\s+refers\s+to\s+(.+?)(?:\.|,|\n|$)",
-                                 r"\b" + re.escape(term) + r"\b\s+means\s+(.+?)(?:\.|,|\n|$)",
-                                 r"\b" + re.escape(term) + r"\b,\s+defined\s+as\s+(.+?)(?:\.|,|\n|$)",
-                                 r"\b" + re.escape(term) + r"\b:\s+(.+?)(?:\.|,|\n|$)"]
-            potential_definitions = []
-            for pattern in defining_patterns:
-                match = re.search(pattern, sent, re.IGNORECASE)
-                if match and len(match.groups()) >= 1:
-                    potential_definitions.append(match.group(1).strip())
-            for definition in potential_definitions:
-                if 2 <= len(definition.split()) <= 30:
-                    flashcards.append({"term": term, "definition": definition})
-                    seen_terms.add(term)
-                    break
-            if term not in seen_terms and i > 0:
-                prev_sent = sentences[i-1]
-                defining_patterns_prev = [r"The\s+\b" + re.escape(term) + r"\b\s+is\s+(.+?)(?:\.|,|\n|$)",
-                                          r"This\s+\b" + re.escape(term) + r"\b\s+refers\s+to\s+(.+?)(?:\.|,|\n|$)",
-                                          r"It\s+means\s+the\s+\b" + re.escape(term) + r"\b\s+(.+?)(?:\.|,|\n|$)"]
-                for pattern in defining_patterns_prev:
-                    match = re.search(pattern, prev_sent, re.IGNORECASE)
-                    if match and term in sent and len(match.groups()) >= 1:
-                        definition = match.group(1).strip()
-                        if 2 <= len(definition.split()) <= 30:
-                            flashcards.append({"term": term, "definition": definition})
-                            seen_terms.add(term)
-                            break
-    return flashcards
-# ---------------------------------------------
-# Text to Speech
-# ---------------------------------------------
-def read_aloud(text):
-    try:
-        tts = gTTS(text)
-        audio_path = os.path.join(tempfile.gettempdir(), "summary.mp3")
-        tts.save(audio_path)
-        return audio_path
-    except Exception as e:
-        st.error(f"Error during text-to-speech: {e}")
-        return None
-# ---------------------------------------------
-# Quiz Generation and Handling
-# ---------------------------------------------
-def generate_quiz_questions(text, num_questions=5):
-    flashcards = generate_flashcards(text) # Reuse flashcard logic for potential terms/definitions
-    if not flashcards:
-        return []
-    questions = []
-    used_indices = set()
-    num_available = len(flashcards)
-    while len(questions) < num_questions and len(used_indices) < num_available:
-        index = random.randint(0, num_available - 1)
-        if index in used_indices:
-            continue
-        used_indices.add(index)
-        card = flashcards[index]
-        correct_answer = card['term']
-        definition = card['definition']
-        # Generate incorrect answers (very basic for now)
-        incorrect_options = random.sample([c['term'] for i, c in enumerate(flashcards) if i != index], 3)
-        options = [correct_answer] + incorrect_options
-        random.shuffle(options)
-        questions.append({
-            "question": f"What is the term for: {definition}",
-            "options": options,
-            "correct_answer": correct_answer,
-            "user_answer": None # To store user's choice
-        })
-    return questions
-def display_quiz(questions):
-    st.session_state.quiz_questions = questions
-    st.session_state.user_answers = {}
-    st.session_state.quiz_submitted = False
-    for i, q in enumerate(st.session_state.quiz_questions):
-        st.subheader(f"Question {i + 1}:")
-        st.write(q["question"])
-        st.session_state.user_answers[i] = st.radio(f"Answer for Question {i + 1}", q["options"])
-    st.button("Submit Quiz", on_click=submit_quiz)
-def submit_quiz():
-    st.session_state.quiz_submitted = True
-def grade_quiz():
-    if st.session_state.quiz_submitted:
-        score = 0
-        for i, q in enumerate(st.session_state.quiz_questions):
-            user_answer = st.session_state.user_answers.get(i)
-            if user_answer == q["correct_answer"]:
-                score += 1
-                st.success(f"Question {i + 1}: Correct!")
-            else:
-                st.error(f"Question {i + 1}: Incorrect. Correct answer was: {q['correct_answer']}")
-        st.write(f"## Your Score: {score} / {len(st.session_state.quiz_questions)}")
 # ---------------------------------------------
 # Streamlit Interface with Tabs
@@ -305,71 +299,71 @@ with tab2:
     else:
         st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
-with tab3:
-    st.header("❓ Question Answering")
-    doc_titles = list(vector_dbs.keys())
-    if doc_titles:
-        doc_title = st.selectbox("Search within document:", ["All Documents"] + doc_titles)
-        question = st.text_input("Ask a question about the content:")
-        if question:
-            with st.spinner("Searching for answer..."):
-                if doc_title == "All Documents":
-                    answer = topic_search(question)
-                else:
-                    answer = topic_search(question, doc_title=doc_title)
-            if answer:
-                st.subheader("Answer:")
-                st.write(answer)
-            else:
-                st.warning("Could not find an answer in the selected document(s).")
-    else:
-        st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
-with tab4:
-    st.header("🧠 Interactive Learning: Flashcards")
-    doc_titles = list(extracted_texts.keys())
-    if doc_titles:
-        selected_doc_title_flashcard = st.selectbox("Generate flashcards from document:", doc_titles)
-        if st.button("Generate Flashcards"):
-            if selected_doc_title_flashcard in extracted_texts:
-                with st.spinner(f"Generating flashcards from {selected_doc_title_flashcard}..."):
-                    flashcards = generate_flashcards(extracted_texts[selected_doc_title_flashcard])
-                if flashcards:
-                    st.subheader("Flashcards")
-                    for i, card in enumerate(flashcards):
-                        with st.expander(f"Card {i+1}"):
-                            st.markdown(f"*Term:* {card['term']}")
-                            st.markdown(f"*Definition:* {card['definition']}")
-                else:
-                    st.info("No flashcards could be generated from this document using the current method.")
-            else:
-                st.warning(f"Original text for {selected_doc_title_flashcard} not found. Please re-upload.")
-    else:
-        st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
-with tab5:
-    st.header("📝 Quiz Yourself!")
-    doc_titles = list(extracted_texts.keys())
-    if doc_titles:
-        selected_doc_title_quiz = st.selectbox("Generate quiz from document:", doc_titles)
-        if selected_doc_title_quiz in extracted_texts:
-            text_for_quiz =extracted_texts[selected_doc_title_quiz]
-            if "quiz_questions" not in st.session_state:
-                st.session_state.quiz_questions = generate_quiz_questions(text_for_quiz)
-            if st.session_state.quiz_questions:
-                display_quiz(st.session_state.quiz_questions)
-                if st.session_state.quiz_submitted:
-                    grade_quiz()
-                if st.button("Refresh Questions"):
-                    st.session_state.quiz_questions = generate_quiz_questions(text_for_quiz)
-                    st.session_state.quiz_submitted = False
-                    st.session_state.user_answers = {}
-                    st.rerun() # Force a re-render to show new questions
-            else:
-                st.info("Could not generate quiz questions from the current document.")
-        else:
-            st.warning(f"Original text for {selected_doc_title_quiz} not found. Please re-upload.")
-    else:
-        st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")

 # nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir, quiet=True)
 # nltk.data.path.append(nltk_data_dir)
+# nltk_data_dir = "/tmp/nltk_data"
+# os.makedirs(nltk_data_dir, exist_ok=True)
+# nltk.data.path.append(nltk_data_dir)
+# # nltk.download('punkt', download_dir=nltk_data_dir)
+# # nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir)
+# nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir, quiet=True)
+# nltk.download('punkt', download_dir=nltk_data_dir, quiet=True)
+# # Load models
+# summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+# qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", device=torch.device("cpu"))
+# embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+# vector_dbs = {}  # Dictionary to store multiple vector databases, keyed by document title
+# extracted_texts = {} # Dictionary to store extracted text, keyed by document title
+# current_doc_title = None
+# # ---------------------------------------------
+# # Extract text from PDF or Image
+# # ---------------------------------------------
+# def extract_text(uploaded_file):
+#     global current_doc_title
+#     current_doc_title = uploaded_file.name
+#     suffix = uploaded_file.name.lower()
+#     with tempfile.NamedTemporaryFile(delete=False) as tmp:
+#         tmp.write(uploaded_file.read())
+#         path = tmp.name
+#     text = ""
+#     if suffix.endswith(".pdf"):
+#         with pdfplumber.open(path) as pdf:
+#             for page in pdf.pages:
+#                 page_text = page.extract_text()
+#                 if page_text:
+#                     text += page_text + "\n"
+#     else:
+#         try:
+#             text = pytesseract.image_to_string(Image.open(path))
+#         except Exception as e:
+#             st.error(f"Error during OCR for {uploaded_file.name}: {e}")
+#             text = ""
+#     os.remove(path)
+#     return text.strip()
+# # ---------------------------------------------
+# # Store Embeddings in FAISS
+# # ---------------------------------------------
+# def store_vector(text):
+#     global vector_dbs, current_doc_title
+#     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+#     docs = text_splitter.create_documents([text])
+#     for doc in docs:
+#         doc.metadata = {"title": current_doc_title}  # Add document title as metadata
+#     if current_doc_title in vector_dbs:
+#         vector_dbs[current_doc_title].add_documents(docs)  # Append to existing DB
+#     else:
+#         vector_dbs[current_doc_title] = FAISS.from_documents(docs, embedding_model)
+# # ---------------------------------------------
+# # Summarize Text
+# # ---------------------------------------------
+# def summarize(text):
+#     if len(text.split()) < 100:
+#         return "Text too short to summarize."
+#     chunks = [text[i : i + 1024] for i in range(0, len(text), 1024)]
+#     summaries = []
+#     for chunk in chunks:
+#         max_len = int(len(chunk.split()) * 0.6)
+#         max_len = max(30, min(max_len, 150))
+#         try:
+#             summary = summarizer(chunk, max_length=max_len, min_length=20)[0]["summary_text"]
+#             summaries.append(summary)
+#         except Exception as e:
+#             st.error(f"Error during summarization: {e}")
+#             return "An error occurred during summarization."
+#     return " ".join(summaries)
+# # ---------------------------------------------
+# # Question Answering
+# # ---------------------------------------------
+# def topic_search(question, doc_title=None):
+#     global vector_dbs
+#     if not vector_dbs:
+#         st.warning("Please upload and process a file first in the 'Upload & Extract' tab.")
+#         return ""
+#     try:
+#         if doc_title and doc_title in vector_dbs:
+#             retriever = vector_dbs[doc_title].as_retriever(search_kwargs={"k": 3})
+#         else:
+#             combined_docs = []
+#             for db in vector_dbs.values():
+#                 combined_docs.extend(db.get_relevant_documents(question))
+#             if not combined_docs:
+#                  return "No relevant information found across uploaded documents."
+#             temp_db = FAISS.from_documents(combined_docs, embedding_model)
+#             retriever = temp_db.as_retriever(search_kwargs={"k": 3})
+#         relevant_docs = retriever.get_relevant_documents(question)
+#         context = "\n\n".join([doc.page_content for doc in relevant_docs])
+#         answer = qa_pipeline(question=question, context=context)["answer"]
+#         return answer.strip()
+#     except Exception as e:
+#         st.error(f"Error during question answering: {e}")
+#         return "An error occurred while trying to answer the question."
+# # ---------------------------------------------
+# # Flashcard Generation
+# # ---------------------------------------------
+# def generate_flashcards(text):
+#     flashcards = []
+#     seen_terms = set()
+#     sentences = nltk.sent_tokenize(text)
+#     for i, sent in enumerate(sentences):
+#         words = nltk.word_tokenize(sent)
+#         tagged_words = nltk.pos_tag(words)
+#         potential_terms = [word for word, tag in tagged_words if tag.startswith('NN') or tag.startswith('NP')]
+#         for term in potential_terms:
+#             if term in seen_terms:
+#                 continue
+#             defining_patterns = [r"\b" + re.escape(term) + r"\b\s+is\s+(?:a|an|the)\s+(.+?)(?:\.|,|\n|$)",
+#                                  r"\b" + re.escape(term) + r"\b\s+refers\s+to\s+(.+?)(?:\.|,|\n|$)",
+#                                  r"\b" + re.escape(term) + r"\b\s+means\s+(.+?)(?:\.|,|\n|$)",
+#                                  r"\b" + re.escape(term) + r"\b,\s+defined\s+as\s+(.+?)(?:\.|,|\n|$)",
+#                                  r"\b" + re.escape(term) + r"\b:\s+(.+?)(?:\.|,|\n|$)"]
+#             potential_definitions = []
+#             for pattern in defining_patterns:
+#                 match = re.search(pattern, sent, re.IGNORECASE)
+#                 if match and len(match.groups()) >= 1:
+#                     potential_definitions.append(match.group(1).strip())
+#             for definition in potential_definitions:
+#                 if 2 <= len(definition.split()) <= 30:
+#                     flashcards.append({"term": term, "definition": definition})
+#                     seen_terms.add(term)
+#                     break
+#             if term not in seen_terms and i > 0:
+#                 prev_sent = sentences[i-1]
+#                 defining_patterns_prev = [r"The\s+\b" + re.escape(term) + r"\b\s+is\s+(.+?)(?:\.|,|\n|$)",
+#                                           r"This\s+\b" + re.escape(term) + r"\b\s+refers\s+to\s+(.+?)(?:\.|,|\n|$)",
+#                                           r"It\s+means\s+the\s+\b" + re.escape(term) + r"\b\s+(.+?)(?:\.|,|\n|$)"]
+#                 for pattern in defining_patterns_prev:
+#                     match = re.search(pattern, prev_sent, re.IGNORECASE)
+#                     if match and term in sent and len(match.groups()) >= 1:
+#                         definition = match.group(1).strip()
+#                         if 2 <= len(definition.split()) <= 30:
+#                             flashcards.append({"term": term, "definition": definition})
+#                             seen_terms.add(term)
+#                             break
+#     return flashcards
+# # ---------------------------------------------
+# # Text to Speech
+# # ---------------------------------------------
+# def read_aloud(text):
+#     try:
+#         tts = gTTS(text)
+#         audio_path = os.path.join(tempfile.gettempdir(), "summary.mp3")
+#         tts.save(audio_path)
+#         return audio_path
+#     except Exception as e:
+#         st.error(f"Error during text-to-speech: {e}")
+#         return None
+# # ---------------------------------------------
+# # Quiz Generation and Handling
+# # ---------------------------------------------
+# def generate_quiz_questions(text, num_questions=5):
+#     flashcards = generate_flashcards(text) # Reuse flashcard logic for potential terms/definitions
+#     if not flashcards:
+#         return []
+#     questions = []
+#     used_indices = set()
+#     num_available = len(flashcards)
+#     while len(questions) < num_questions and len(used_indices) < num_available:
+#         index = random.randint(0, num_available - 1)
+#         if index in used_indices:
+#             continue
+#         used_indices.add(index)
+#         card = flashcards[index]
+#         correct_answer = card['term']
+#         definition = card['definition']
+#         # Generate incorrect answers (very basic for now)
+#         incorrect_options = random.sample([c['term'] for i, c in enumerate(flashcards) if i != index], 3)
+#         options = [correct_answer] + incorrect_options
+#         random.shuffle(options)
+#         questions.append({
+#             "question": f"What is the term for: {definition}",
+#             "options": options,
+#             "correct_answer": correct_answer,
+#             "user_answer": None # To store user's choice
+#         })
+#     return questions
+# def display_quiz(questions):
+#     st.session_state.quiz_questions = questions
+#     st.session_state.user_answers = {}
+#     st.session_state.quiz_submitted = False
+#     for i, q in enumerate(st.session_state.quiz_questions):
+#         st.subheader(f"Question {i + 1}:")
+#         st.write(q["question"])
+#         st.session_state.user_answers[i] = st.radio(f"Answer for Question {i + 1}", q["options"])
+#     st.button("Submit Quiz", on_click=submit_quiz)
+# def submit_quiz():
+#     st.session_state.quiz_submitted = True
+# def grade_quiz():
+#     if st.session_state.quiz_submitted:
+#         score = 0
+#         for i, q in enumerate(st.session_state.quiz_questions):
+#             user_answer = st.session_state.user_answers.get(i)
+#             if user_answer == q["correct_answer"]:
+#                 score += 1
+#                 st.success(f"Question {i + 1}: Correct!")
+#             else:
+#                 st.error(f"Question {i + 1}: Incorrect. Correct answer was: {q['correct_answer']}")
+#         st.write(f"## Your Score: {score} / {len(st.session_state.quiz_questions)}")
 # ---------------------------------------------
 # Streamlit Interface with Tabs
     else:
         st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
+# with tab3:
+#     st.header("❓ Question Answering")
+#     doc_titles = list(vector_dbs.keys())
+#     if doc_titles:
+#         doc_title = st.selectbox("Search within document:", ["All Documents"] + doc_titles)
+#         question = st.text_input("Ask a question about the content:")
+#         if question:
+#             with st.spinner("Searching for answer..."):
+#                 if doc_title == "All Documents":
+#                     answer = topic_search(question)
+#                 else:
+#                     answer = topic_search(question, doc_title=doc_title)
+#             if answer:
+#                 st.subheader("Answer:")
+#                 st.write(answer)
+#             else:
+#                 st.warning("Could not find an answer in the selected document(s).")
+#     else:
+#         st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
+# with tab4:
+#     st.header("🧠 Interactive Learning: Flashcards")
+#     doc_titles = list(extracted_texts.keys())
+#     if doc_titles:
+#         selected_doc_title_flashcard = st.selectbox("Generate flashcards from document:", doc_titles)
+#         if st.button("Generate Flashcards"):
+#             if selected_doc_title_flashcard in extracted_texts:
+#                 with st.spinner(f"Generating flashcards from {selected_doc_title_flashcard}..."):
+#                     flashcards = generate_flashcards(extracted_texts[selected_doc_title_flashcard])
+#                 if flashcards:
+#                     st.subheader("Flashcards")
+#                     for i, card in enumerate(flashcards):
+#                         with st.expander(f"Card {i+1}"):
+#                             st.markdown(f"*Term:* {card['term']}")
+#                             st.markdown(f"*Definition:* {card['definition']}")
+#                 else:
+#                     st.info("No flashcards could be generated from this document using the current method.")
+#             else:
+#                 st.warning(f"Original text for {selected_doc_title_flashcard} not found. Please re-upload.")
+#     else:
+#         st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")
+# with tab5:
+#     st.header("📝 Quiz Yourself!")
+#     doc_titles = list(extracted_texts.keys())
+#     if doc_titles:
+#         selected_doc_title_quiz = st.selectbox("Generate quiz from document:", doc_titles)
+#         if selected_doc_title_quiz in extracted_texts:
+#             text_for_quiz =extracted_texts[selected_doc_title_quiz]
+#             if "quiz_questions" not in st.session_state:
+#                 st.session_state.quiz_questions = generate_quiz_questions(text_for_quiz)
+#             if st.session_state.quiz_questions:
+#                 display_quiz(st.session_state.quiz_questions)
+#                 if st.session_state.quiz_submitted:
+#                     grade_quiz()
+#                 if st.button("Refresh Questions"):
+#                     st.session_state.quiz_questions = generate_quiz_questions(text_for_quiz)
+#                     st.session_state.quiz_submitted = False
+#                     st.session_state.user_answers = {}
+#                     st.rerun() # Force a re-render to show new questions
+#             else:
+#                 st.info("Could not generate quiz questions from the current document.")
+#         else:
+#             st.warning(f"Original text for {selected_doc_title_quiz} not found. Please re-upload.")
+#     else:
+#         st.info("Please upload and extract a file in the 'Upload & Extract' tab first.")