|
import torch |
|
from transformers import pipeline |
|
import streamlit as st |
|
import fitz |
|
|
|
|
|
simplifier = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
|
def simplify_text(text): |
|
"""Simplifies a given academic text using a pretrained model.""" |
|
simplified = simplifier(text, max_length=96, min_length=30, do_sample=False) |
|
return simplified[0]['summary_text'] |
|
|
|
def extract_text_from_pdf(pdf_file): |
|
"""Extracts text from an uploaded PDF file stream.""" |
|
text = "" |
|
with fitz.open(stream=pdf_file.read(), filetype="pdf") as doc: |
|
for page in doc: |
|
text += page.get_text() |
|
return text |
|
|
|
|
|
st.title("Text Simplification with Pretrained Model") |
|
option = st.radio("Choose input type:", ("Text Input", "Upload PDF")) |
|
|
|
if option == "Text Input": |
|
user_text = st.text_area("Enter your text:") |
|
if st.button("Simplify") and user_text: |
|
simplified_text = simplify_text(user_text) |
|
st.subheader("Simplified Text:") |
|
st.text_area("Simplified Output", simplified_text, height=150) |
|
|
|
elif option == "Upload PDF": |
|
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) |
|
if uploaded_file: |
|
extracted_text = extract_text_from_pdf(uploaded_file) |
|
st.subheader("Extracted Text from PDF:") |
|
st.text_area("Extracted Text", extracted_text, height=200) |
|
|
|
if st.button("Simplify Extracted Text"): |
|
simplified_text = simplify_text(extracted_text[:1000]) |
|
st.subheader("Simplified Text:") |
|
st.text_area("Simplified Output", simplified_text, height=150) |
|
|
|
st.write("\nMade by Harshitha") |
|
|