harshithasudhakar commited on
Commit
884190d
·
verified ·
1 Parent(s): 941e747

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -37
app.py CHANGED
@@ -3,59 +3,63 @@ from transformers import pipeline
3
  import streamlit as st
4
  import fitz # PyMuPDF for PDF text extraction
5
 
6
- # ------------------------------
7
- # Supported models
8
- MODEL_OPTIONS = {
9
- "Long T5 (Scientific Simplifier)": "pszemraj/long-t5-tglobal-base-sci-simplify",
10
- "T5 Base (General Simplifier)": "t5-base"
 
 
 
 
11
  }
12
 
13
- @st.cache_resource
14
- def load_model(model_name):
15
- return pipeline("summarization", model=model_name)
 
 
 
 
 
 
16
 
17
- def simplify_text(text, simplifier, model_name):
18
  try:
19
- # T5 expects a "summarize: " prefix
20
  if "t5" in model_name.lower():
21
- text = "summarize: " + text
 
 
 
 
 
 
22
 
23
- simplified = simplifier(text, max_length=256, min_length=30, do_sample=False)
24
- return simplified[0]['summary_text']
25
  except Exception as e:
26
  return f"Error simplifying text: {e}"
27
 
28
- def extract_text_from_pdf(uploaded_file):
29
- with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
30
- text = "\n".join(page.get_text() for page in doc)
31
  return text
32
 
33
- # ------------------------------
34
- # Streamlit UI
35
- st.set_page_config(page_title="Jargon Simplifier", layout="centered")
36
- st.title("🧠 Jargon to Simple: Academic Text Simplifier")
37
-
38
- selected_model_name = st.selectbox("Choose a simplification model:", list(MODEL_OPTIONS.keys()))
39
- model_id = MODEL_OPTIONS[selected_model_name]
40
- simplifier = load_model(model_id)
41
-
42
  option = st.radio("Choose input type:", ("Text Input", "Upload PDF"))
43
 
44
  if option == "Text Input":
45
- user_text = st.text_area("Enter complex academic text:")
46
- if st.button("Simplify") and user_text.strip():
47
- with st.spinner("Simplifying..."):
48
- simplified_output = simplify_text(user_text, simplifier, model_id)
49
- st.text_area("Simplified Output:", value=simplified_output, height=200)
50
 
51
  elif option == "Upload PDF":
52
- uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
53
- if uploaded_file:
54
- with st.spinner("Extracting and simplifying text..."):
55
- extracted_text = extract_text_from_pdf(uploaded_file)
56
- truncated_text = extracted_text[:2000] # Trim for model input
57
- simplified_output = simplify_text(truncated_text, simplifier, model_id)
58
- st.text_area("Simplified Output:", value=simplified_output, height=200)
59
 
60
  st.markdown("---")
61
  st.markdown("Made with ❤️ by Harshitha")
 
3
  import streamlit as st
4
  import fitz # PyMuPDF for PDF text extraction
5
 
6
+ # Streamlit UI setup
7
+ st.set_page_config(page_title="Text Simplifier", layout="centered")
8
+ st.title("🧠 Academic Text Simplifier")
9
+
10
+ # Model selection
11
+ model_options = {
12
+ "Mistral (Instruction-tuned)": "mistralai/Mistral-7B-Instruct-v0.1",
13
+ "T5 (Simplification finetuned)": "mrm8488/t5-base-finetuned-common_gen",
14
+ "BART (Paraphrasing/Simplification)": "tuner007/pegasus_paraphrase"
15
  }
16
 
17
+ model_choice = st.selectbox("Choose a simplification model:", list(model_options.keys()))
18
+ model_name = model_options[model_choice]
19
+
20
+ @st.cache_resource(show_spinner=True)
21
+ def load_model(name):
22
+ task = "text2text-generation" if "t5" in name.lower() or "pegasus" in name.lower() else "text-generation"
23
+ return pipeline(task, model=name)
24
+
25
+ simplifier = load_model(model_name)
26
 
27
+ def simplify_text(text):
28
  try:
 
29
  if "t5" in model_name.lower():
30
+ prompt = f"simplify: {text}"
31
+ elif "mistral" in model_name.lower() or "instruct" in model_name.lower():
32
+ prompt = f"Rewrite the following text using simpler vocabulary and structure:\n{text}"
33
+ elif "pegasus" in model_name.lower():
34
+ prompt = f"paraphrase: {text}"
35
+ else:
36
+ prompt = text
37
 
38
+ output = simplifier(prompt, max_length=256, min_length=30, do_sample=False)[0]
39
+ return output.get('summary_text') or output.get('generated_text') or "(No output)"
40
  except Exception as e:
41
  return f"Error simplifying text: {e}"
42
 
43
+ def extract_text_from_pdf(pdf_file):
44
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
45
+ text = "\n".join(page.get_text("text") for page in doc)
46
  return text
47
 
48
+ # Input options
 
 
 
 
 
 
 
 
49
  option = st.radio("Choose input type:", ("Text Input", "Upload PDF"))
50
 
51
  if option == "Text Input":
52
+ user_text = st.text_area("Enter your complex academic text here:", height=200)
53
+ if st.button("Simplify Text") and user_text:
54
+ simplified_text = simplify_text(user_text)
55
+ st.text_area("🔽 Simplified Text:", simplified_text, height=200)
 
56
 
57
  elif option == "Upload PDF":
58
+ uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
59
+ if uploaded_file and st.button("Simplify Extracted Text"):
60
+ extracted_text = extract_text_from_pdf(uploaded_file)
61
+ simplified_text = simplify_text(extracted_text[:2000]) # limit for performance
62
+ st.text_area("🔽 Simplified Text from PDF:", simplified_text, height=200)
 
 
63
 
64
  st.markdown("---")
65
  st.markdown("Made with ❤️ by Harshitha")