harshithasudhakar commited on
Commit
941e747
Β·
verified Β·
1 Parent(s): 877c158

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -38
app.py CHANGED
@@ -3,61 +3,59 @@ from transformers import pipeline
3
  import streamlit as st
4
  import fitz # PyMuPDF for PDF text extraction
5
 
6
- st.set_page_config(page_title="Text Simplifier", layout="centered")
7
-
8
- st.title("πŸ“š Jargon Simplifier")
9
- st.write("This tool simplifies complex or academic text into easier, plain language.")
10
-
11
- # ---------------------------- Available Models ----------------------------
12
  MODEL_OPTIONS = {
13
- "PEGASUS (Simplification - pszemraj)": "pszemraj/pegasus-xsum-simplify",
14
- "T5 Small (Prompted Simplify)": "t5-small",
15
- "T5 Base (Prompted Simplify)": "t5-base"
16
  }
17
 
18
- # ---------------------------- Model Selection ----------------------------
19
- selected_model = st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()))
20
- model_name = MODEL_OPTIONS[selected_model]
21
-
22
- @st.cache_resource(show_spinner=True)
23
- def load_model(name):
24
- return pipeline("text2text-generation", model=name)
25
 
26
- simplifier = load_model(model_name)
 
 
 
 
27
 
28
- # ---------------------------- Simplification Function ----------------------------
29
- def simplify_text(text, model_name):
30
- if "t5" in model_name:
31
- text = "simplify: " + text # T5 needs task prefix
32
- output = simplifier(text, max_length=256, min_length=30, do_sample=False)
33
- return output[0]['generated_text']
34
 
35
- # ---------------------------- PDF Extraction ----------------------------
36
  def extract_text_from_pdf(uploaded_file):
37
  with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
38
- text = "\n".join(page.get_text("text") for page in doc)
39
  return text
40
 
41
- # ---------------------------- UI ----------------------------
 
 
 
 
 
 
 
 
42
  option = st.radio("Choose input type:", ("Text Input", "Upload PDF"))
43
 
44
  if option == "Text Input":
45
- user_text = st.text_area("✍️ Enter complex text here:")
46
  if st.button("Simplify") and user_text.strip():
47
- simplified_text = simplify_text(user_text.strip(), model_name)
48
- st.text_area("βœ… Simplified Output:", value=simplified_text, height=200)
 
49
 
50
  elif option == "Upload PDF":
51
- uploaded_file = st.file_uploader("πŸ“„ Upload a PDF file", type=["pdf"])
52
  if uploaded_file:
53
- try:
54
  extracted_text = extract_text_from_pdf(uploaded_file)
55
- preview = st.text_area("πŸ“„ Extracted Text Preview (first 1000 chars):", value=extracted_text[:1000], height=200)
56
- if st.button("Simplify Extracted Text"):
57
- simplified_text = simplify_text(extracted_text[:1000], model_name)
58
- st.text_area("βœ… Simplified Output:", value=simplified_text, height=200)
59
- except Exception as e:
60
- st.error(f"❌ Error reading PDF: {e}")
61
 
62
  st.markdown("---")
63
- st.caption("Made with ❀️ using HuggingFace Transformers and Streamlit.")
 
3
  import streamlit as st
4
  import fitz # PyMuPDF for PDF text extraction
5
 
6
+ # ------------------------------
7
+ # Supported models
 
 
 
 
8
  MODEL_OPTIONS = {
9
+ "Long T5 (Scientific Simplifier)": "pszemraj/long-t5-tglobal-base-sci-simplify",
10
+ "T5 Base (General Simplifier)": "t5-base"
 
11
  }
12
 
13
+ @st.cache_resource
14
+ def load_model(model_name):
15
+ return pipeline("summarization", model=model_name)
 
 
 
 
16
 
17
+ def simplify_text(text, simplifier, model_name):
18
+ try:
19
+ # T5 expects a "summarize: " prefix
20
+ if "t5" in model_name.lower():
21
+ text = "summarize: " + text
22
 
23
+ simplified = simplifier(text, max_length=256, min_length=30, do_sample=False)
24
+ return simplified[0]['summary_text']
25
+ except Exception as e:
26
+ return f"Error simplifying text: {e}"
 
 
27
 
 
28
  def extract_text_from_pdf(uploaded_file):
29
  with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
30
+ text = "\n".join(page.get_text() for page in doc)
31
  return text
32
 
33
+ # ------------------------------
34
+ # Streamlit UI
35
+ st.set_page_config(page_title="Jargon Simplifier", layout="centered")
36
+ st.title("🧠 Jargon to Simple: Academic Text Simplifier")
37
+
38
+ selected_model_name = st.selectbox("Choose a simplification model:", list(MODEL_OPTIONS.keys()))
39
+ model_id = MODEL_OPTIONS[selected_model_name]
40
+ simplifier = load_model(model_id)
41
+
42
  option = st.radio("Choose input type:", ("Text Input", "Upload PDF"))
43
 
44
  if option == "Text Input":
45
+ user_text = st.text_area("Enter complex academic text:")
46
  if st.button("Simplify") and user_text.strip():
47
+ with st.spinner("Simplifying..."):
48
+ simplified_output = simplify_text(user_text, simplifier, model_id)
49
+ st.text_area("Simplified Output:", value=simplified_output, height=200)
50
 
51
  elif option == "Upload PDF":
52
+ uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
53
  if uploaded_file:
54
+ with st.spinner("Extracting and simplifying text..."):
55
  extracted_text = extract_text_from_pdf(uploaded_file)
56
+ truncated_text = extracted_text[:2000] # Trim for model input
57
+ simplified_output = simplify_text(truncated_text, simplifier, model_id)
58
+ st.text_area("Simplified Output:", value=simplified_output, height=200)
 
 
 
59
 
60
  st.markdown("---")
61
+ st.markdown("Made with ❀️ by Harshitha")