import torch from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM import streamlit as st import fitz # PyMuPDF for PDF text extraction # Streamlit UI setup st.set_page_config(page_title="Text Simplifier", layout="centered") st.title("🧠 Academic Text Simplifier") # Model selection model_options = { "Mistral (Instruction-tuned)": "mistralai/Mistral-7B-Instruct-v0.1", "T5 (Simplification finetuned)": "mrm8488/t5-base-finetuned-common_gen", "BART (Paraphrasing/Simplification)": "tuner007/pegasus_paraphrase" } model_choice = st.selectbox("Choose a simplification model:", list(model_options.keys())) model_name = model_options[model_choice] @st.cache_resource(show_spinner=True) def load_model(name): if "t5" in name.lower(): tokenizer = AutoTokenizer.from_pretrained(name, use_fast=False) model = AutoModelForSeq2SeqLM.from_pretrained(name) return pipeline("text2text-generation", model=model, tokenizer=tokenizer) elif "pegasus" in name.lower(): return pipeline("text2text-generation", model=name) else: return pipeline("text-generation", model=name) simplifier = load_model(model_name) def simplify_text(text): try: if "t5" in model_name.lower(): prompt = f"simplify: {text}" elif "mistral" in model_name.lower() or "instruct" in model_name.lower(): prompt = f"Rewrite the following text using simpler vocabulary and structure:\n{text}" elif "pegasus" in model_name.lower(): prompt = f"paraphrase: {text}" else: prompt = text output = simplifier(prompt, max_length=256, min_length=30, do_sample=False)[0] return output.get('summary_text') or output.get('generated_text') or "(No output)" except Exception as e: return f"Error simplifying text: {e}" def extract_text_from_pdf(pdf_file): doc = fitz.open(stream=pdf_file.read(), filetype="pdf") text = "\n".join(page.get_text("text") for page in doc) return text # Input options option = st.radio("Choose input type:", ("Text Input", "Upload PDF")) if option == "Text Input": user_text = st.text_area("Enter your complex academic text here:", height=200) if st.button("Simplify Text") and user_text: simplified_text = simplify_text(user_text) st.text_area("🔽 Simplified Text:", simplified_text, height=200) elif option == "Upload PDF": uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"]) if uploaded_file and st.button("Simplify Extracted Text"): extracted_text = extract_text_from_pdf(uploaded_file) simplified_text = simplify_text(extracted_text[:2000]) # limit for performance st.text_area("🔽 Simplified Text from PDF:", simplified_text, height=200) st.markdown("---") st.markdown("Made with ❤️ by Harshitha")