File size: 2,874 Bytes
65bd869
d151635
65bd869
 
 
884190d
 
 
 
 
 
 
 
 
877c158
 
884190d
 
 
 
 
d151635
 
 
 
 
 
 
 
884190d
 
877c158
884190d
941e747
 
884190d
 
 
 
 
 
 
877c158
884190d
 
941e747
 
877c158
884190d
 
 
65bd869
 
884190d
65bd869
 
 
884190d
 
 
 
65bd869
 
884190d
 
 
 
 
877c158
 
941e747
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import streamlit as st
import fitz  # PyMuPDF for PDF text extraction

# Streamlit UI setup
st.set_page_config(page_title="Text Simplifier", layout="centered")
st.title("🧠 Academic Text Simplifier")

# Model selection
model_options = {
    "Mistral (Instruction-tuned)": "mistralai/Mistral-7B-Instruct-v0.1",
    "T5 (Simplification finetuned)": "mrm8488/t5-base-finetuned-common_gen",
    "BART (Paraphrasing/Simplification)": "tuner007/pegasus_paraphrase"
}

model_choice = st.selectbox("Choose a simplification model:", list(model_options.keys()))
model_name = model_options[model_choice]

@st.cache_resource(show_spinner=True)
def load_model(name):
    if "t5" in name.lower():
        tokenizer = AutoTokenizer.from_pretrained(name, use_fast=False)
        model = AutoModelForSeq2SeqLM.from_pretrained(name)
        return pipeline("text2text-generation", model=model, tokenizer=tokenizer)
    elif "pegasus" in name.lower():
        return pipeline("text2text-generation", model=name)
    else:
        return pipeline("text-generation", model=name)

simplifier = load_model(model_name)

def simplify_text(text):
    try:
        if "t5" in model_name.lower():
            prompt = f"simplify: {text}"
        elif "mistral" in model_name.lower() or "instruct" in model_name.lower():
            prompt = f"Rewrite the following text using simpler vocabulary and structure:\n{text}"
        elif "pegasus" in model_name.lower():
            prompt = f"paraphrase: {text}"
        else:
            prompt = text

        output = simplifier(prompt, max_length=256, min_length=30, do_sample=False)[0]
        return output.get('summary_text') or output.get('generated_text') or "(No output)"
    except Exception as e:
        return f"Error simplifying text: {e}"

def extract_text_from_pdf(pdf_file):
    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
    text = "\n".join(page.get_text("text") for page in doc)
    return text

# Input options
option = st.radio("Choose input type:", ("Text Input", "Upload PDF"))

if option == "Text Input":
    user_text = st.text_area("Enter your complex academic text here:", height=200)
    if st.button("Simplify Text") and user_text:
        simplified_text = simplify_text(user_text)
        st.text_area("🔽 Simplified Text:", simplified_text, height=200)

elif option == "Upload PDF":
    uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
    if uploaded_file and st.button("Simplify Extracted Text"):
        extracted_text = extract_text_from_pdf(uploaded_file)
        simplified_text = simplify_text(extracted_text[:2000])  # limit for performance
        st.text_area("🔽 Simplified Text from PDF:", simplified_text, height=200)

st.markdown("---")
st.markdown("Made with ❤️ by Harshitha")