Spaces:

harshithasudhakar
/

text-simplification

Sleeping

App Files Files Community

harshithasudhakar commited on Apr 4

Commit

877c158

verified ·

1 Parent(s): d5e24d9

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -32

app.py CHANGED Viewed

@@ -3,43 +3,61 @@ from transformers import pipeline
 import streamlit as st
 import fitz  # PyMuPDF for PDF text extraction
-# Load pretrained model for simplification
-simplifier = pipeline("summarization", model="facebook/bart-large-cnn")
-def simplify_text(text):
-    """Simplifies a given academic text using a pretrained model."""
-    simplified = simplifier(text, max_length=96, min_length=30, do_sample=False)
-    return simplified[0]['summary_text']
-def extract_text_from_pdf(pdf_file):
-    """Extracts text from an uploaded PDF file stream."""
-    text = ""
-    with fitz.open(stream=pdf_file.read(), filetype="pdf") as doc:
-        for page in doc:
-            text += page.get_text()
     return text
-# Streamlit UI
-st.title("Text Simplification with Pretrained Model")
 option = st.radio("Choose input type:", ("Text Input", "Upload PDF"))
 if option == "Text Input":
-    user_text = st.text_area("Enter your text:")
-    if st.button("Simplify") and user_text:
-        simplified_text = simplify_text(user_text)
-        st.subheader("Simplified Text:")
-        st.text_area("Simplified Output", simplified_text, height=150)
 elif option == "Upload PDF":
-    uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
     if uploaded_file:
-        extracted_text = extract_text_from_pdf(uploaded_file)
-        st.subheader("Extracted Text from PDF:")
-        st.text_area("Extracted Text", extracted_text, height=200)
-        if st.button("Simplify Extracted Text"):
-            simplified_text = simplify_text(extracted_text[:1000])  # Limit length for model input
-            st.subheader("Simplified Text:")
-            st.text_area("Simplified Output", simplified_text, height=150)
-st.write("\nMade by Harshitha")

 import streamlit as st
 import fitz  # PyMuPDF for PDF text extraction
+st.set_page_config(page_title="Text Simplifier", layout="centered")
+st.title("📚 Jargon Simplifier")
+st.write("This tool simplifies complex or academic text into easier, plain language.")
+# ---------------------------- Available Models ----------------------------
+MODEL_OPTIONS = {
+    "PEGASUS (Simplification - pszemraj)": "pszemraj/pegasus-xsum-simplify",
+    "T5 Small (Prompted Simplify)": "t5-small",
+    "T5 Base (Prompted Simplify)": "t5-base"
+}
+# ---------------------------- Model Selection ----------------------------
+selected_model = st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()))
+model_name = MODEL_OPTIONS[selected_model]
+@st.cache_resource(show_spinner=True)
+def load_model(name):
+    return pipeline("text2text-generation", model=name)
+simplifier = load_model(model_name)
+# ---------------------------- Simplification Function ----------------------------
+def simplify_text(text, model_name):
+    if "t5" in model_name:
+        text = "simplify: " + text  # T5 needs task prefix
+    output = simplifier(text, max_length=256, min_length=30, do_sample=False)
+    return output[0]['generated_text']
+# ---------------------------- PDF Extraction ----------------------------
+def extract_text_from_pdf(uploaded_file):
+    with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
+        text = "\n".join(page.get_text("text") for page in doc)
     return text
+# ---------------------------- UI ----------------------------
 option = st.radio("Choose input type:", ("Text Input", "Upload PDF"))
 if option == "Text Input":
+    user_text = st.text_area("✍️ Enter complex text here:")
+    if st.button("Simplify") and user_text.strip():
+        simplified_text = simplify_text(user_text.strip(), model_name)
+        st.text_area("✅ Simplified Output:", value=simplified_text, height=200)
 elif option == "Upload PDF":
+    uploaded_file = st.file_uploader("📄 Upload a PDF file", type=["pdf"])
     if uploaded_file:
+        try:
+            extracted_text = extract_text_from_pdf(uploaded_file)
+            preview = st.text_area("📄 Extracted Text Preview (first 1000 chars):", value=extracted_text[:1000], height=200)
+            if st.button("Simplify Extracted Text"):
+                simplified_text = simplify_text(extracted_text[:1000], model_name)
+                st.text_area("✅ Simplified Output:", value=simplified_text, height=200)
+        except Exception as e:
+            st.error(f"❌ Error reading PDF: {e}")
+st.markdown("---")
+st.caption("Made with ❤️ using HuggingFace Transformers and Streamlit.")