|
import torch |
|
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM |
|
import streamlit as st |
|
import fitz |
|
|
|
|
|
st.set_page_config(page_title="Text Simplifier", layout="centered") |
|
st.title("🧠 Academic Text Simplifier") |
|
|
|
|
|
model_options = { |
|
"Mistral (Instruction-tuned)": "mistralai/Mistral-7B-Instruct-v0.1", |
|
"T5 (Simplification finetuned)": "mrm8488/t5-base-finetuned-common_gen", |
|
"BART (Paraphrasing/Simplification)": "tuner007/pegasus_paraphrase" |
|
} |
|
|
|
model_choice = st.selectbox("Choose a simplification model:", list(model_options.keys())) |
|
model_name = model_options[model_choice] |
|
|
|
@st.cache_resource(show_spinner=True) |
|
def load_model(name): |
|
if "t5" in name.lower(): |
|
tokenizer = AutoTokenizer.from_pretrained(name, use_fast=False) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(name) |
|
return pipeline("text2text-generation", model=model, tokenizer=tokenizer) |
|
elif "pegasus" in name.lower(): |
|
return pipeline("text2text-generation", model=name) |
|
else: |
|
return pipeline("text-generation", model=name) |
|
|
|
simplifier = load_model(model_name) |
|
|
|
def simplify_text(text): |
|
try: |
|
if "t5" in model_name.lower(): |
|
prompt = f"simplify: {text}" |
|
elif "mistral" in model_name.lower() or "instruct" in model_name.lower(): |
|
prompt = f"Rewrite the following text using simpler vocabulary and structure:\n{text}" |
|
elif "pegasus" in model_name.lower(): |
|
prompt = f"paraphrase: {text}" |
|
else: |
|
prompt = text |
|
|
|
output = simplifier(prompt, max_length=256, min_length=30, do_sample=False)[0] |
|
return output.get('summary_text') or output.get('generated_text') or "(No output)" |
|
except Exception as e: |
|
return f"Error simplifying text: {e}" |
|
|
|
def extract_text_from_pdf(pdf_file): |
|
doc = fitz.open(stream=pdf_file.read(), filetype="pdf") |
|
text = "\n".join(page.get_text("text") for page in doc) |
|
return text |
|
|
|
|
|
option = st.radio("Choose input type:", ("Text Input", "Upload PDF")) |
|
|
|
if option == "Text Input": |
|
user_text = st.text_area("Enter your complex academic text here:", height=200) |
|
if st.button("Simplify Text") and user_text: |
|
simplified_text = simplify_text(user_text) |
|
st.text_area("🔽 Simplified Text:", simplified_text, height=200) |
|
|
|
elif option == "Upload PDF": |
|
uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"]) |
|
if uploaded_file and st.button("Simplify Extracted Text"): |
|
extracted_text = extract_text_from_pdf(uploaded_file) |
|
simplified_text = simplify_text(extracted_text[:2000]) |
|
st.text_area("🔽 Simplified Text from PDF:", simplified_text, height=200) |
|
|
|
st.markdown("---") |
|
st.markdown("Made with ❤️ by Harshitha") |
|
|