import streamlit as st
from PyPDF2 import PdfReader
from concurrent.futures import ThreadPoolExecutor
from base64 import b64encode
from fpdf import FPDF
import io, string, re, math
from io import StringIO
# Importing the Fastify Class
from fast_reader import Fastify_Reader
def pdf_extract_text(pdf_docs):
'''
Basic function for extracting text from the PDFs
'''
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def text_to_pdf_fastify(text):
'''
Basic function to apply fastification on the input text and convert it to bytes for PDF rendering
'''
# Applying the Fastify Logic
bold_text = Fastify_Reader(text).fastify()
bold_text = bold_text.encode('latin-1', 'ignore').decode('latin-1') #since fpdf works with latin-1 encoding
# Creating the PDF
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size = 12)
pdf.multi_cell(0, 10, txt = bold_text, markdown=True)
return bytes(pdf.output())
def text_to_pdf(text):
'''
Basic function on the input text and convert it to bytes for PDF rendering
'''
text = text.encode('latin-1', 'ignore').decode('latin-1') #since fpdf works with latin-1 encoding
# Creating the PDF
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size = 12)
pdf.multi_cell(0, 10, txt = text, markdown=True)
return bytes(pdf.output())
#Setting the page config
st.set_page_config(page_title="Fastify Reader",
page_icon=":books:",
layout="wide")
# Due to browser cache and streamlit issue, sometimes the PDFs are not rendered properly.
note_text = """
If the PDF is not being rendered by your browser,
1. Try this link - (https://akarshrajsingh7-fastify-reader.hf.space/)
2. Download the PDF and open it in a PDF viewer.
"""
# Sidebar
with st.sidebar:
st.image("Logo.jpg")
st.markdown("
How fast can you read?
", unsafe_allow_html=True)
# Main Page
tab1, tab2= st.tabs(["Input Text", "PDF file"])
# First tab where Text is input
with tab1:
user_input = st.text_input("Enter some text")
# Compare Check Box
compare = st.checkbox('Compare with Fastified Text', value=False, key='compare')
# Submit Button
if st.button("Submit", key="input-text"):
#Progess Bar for the processing
with st.spinner("Processing"):
text = user_input
# Generating base64 encoded text bytes for PDF rendering
original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
# Embedding the PDFs in the HTML
original_display = f"""
"""
pdf_display = f"""
"""
# Compare Logic implementation
if compare:
col1, col2, col3 = st.columns(3)
with col3:
st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
# Side by Side comparison
col1, col2 = st.columns([1, 1], gap="small")
with col1:
with st.container(border = True):
st.markdown("Original PDF viewer
", unsafe_allow_html=True)
st.markdown(original_display, unsafe_allow_html=True)
with col2:
with st.container(border = True):
st.markdown("Fastified PDF viewer
", unsafe_allow_html=True)
st.markdown(pdf_display, unsafe_allow_html=True)
# Browser Cache Note
st.markdown(f"""
""", unsafe_allow_html=True)
else:
# No Comparisons
col1, col2, col3 = st.columns(3)
with col2:
st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
with st.container(border = True):
st.markdown("Fastified PDF viewer
", unsafe_allow_html=True)
st.markdown(pdf_display, unsafe_allow_html=True)
# Browser Cache Note
st.markdown(f"""
""", unsafe_allow_html=True)
# Added support for PDFs having text
with tab2:
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf", accept_multiple_files=True)
# Compare Check Box
compare = st.checkbox('Compare with Fastified Text', value=False, key='compare_pdf')
# Submit Button
if st.button("Submit", key="pdf"):
#Progess Bar for the processing
with st.spinner("Processing"):
text = pdf_extract_text(uploaded_file)
# Generating base64 encoded text bytes for PDF rendering
original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
# Embedding the PDFs in the HTML
# original_display = f"""