Spaces:

intuitive262
/

Doc_Reader

Sleeping

File size: 3,177 Bytes

3bc9acc
 
 
 
10c178b
3bc9acc
 
10c178b
 
 
3bc9acc
10c178b
3bc9acc
 
 
 
 
 
 
10c178b
3bc9acc
 
10c178b
3bc9acc
10c178b
3bc9acc
10c178b
 
3bc9acc
10c178b
3bc9acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10c178b
3bc9acc
 
10c178b
3bc9acc

import gradio as gr
import numpy as np
from PIL import Image
import torch
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import re

# Load the first OCR model (Microsoft's TrOCR)
ms_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
ms_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

# Load the second OCR model (Surya-OCR)
surya_processor = TrOCRProcessor.from_pretrained("suryavarmaaddala/suryaocr")
surya_model = VisionEncoderDecoderModel.from_pretrained("suryavarmaaddala/suryaocr")

def preprocess_image(image):
    if isinstance(image, str):
        image = Image.open(image).convert("RGB")
    elif isinstance(image, np.ndarray):
        image = Image.fromarray(image).convert("RGB")
    return image

def microsoft_ocr(image):
    image = preprocess_image(image)
    pixel_values = ms_processor(image, return_tensors="pt").pixel_values
    
    generated_ids = ms_model.generate(pixel_values)
    generated_text = ms_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    return generated_text

def surya_ocr(image):
    image = preprocess_image(image)
    pixel_values = surya_processor(image, return_tensors="pt").pixel_values
    
    generated_ids = surya_model.generate(pixel_values)
    generated_text = surya_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    return generated_text

def post_process_text(text):
    # Simple post-processing to split into lines
    return '\n'.join(text.split('. '))

def search_text(text, query):
    try:
        pattern = re.compile(query, re.IGNORECASE)
        lines = text.split('\n')
        matching_lines = [line for line in lines if pattern.search(line)]
        return '\n'.join(matching_lines) if matching_lines else "No matches found."
    except re.error:
        return "Invalid regex pattern. Please try again."

def process_and_search(image, search_query):
    try:
        ms_text = microsoft_ocr(image)
        surya_text = surya_ocr(image)
        
        result = f"Microsoft OCR Result:\n{ms_text}\n\nSurya OCR Result:\n{surya_text}"
        processed_text = post_process_text(result)
        
        search = None
        if search_query:
            search = search_text(processed_text, search_query)
        return image, processed_text, search
    except Exception as e:
        return None, f"An error occurred: {str(e)}", None

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.Image(type="filepath", label="Upload your image")
            search_query_input = gr.Textbox(label="Enter search query")
            submit_button = gr.Button("Submit")
        
        with gr.Column(scale=2):
            displayed_image = gr.Image(label="Uploaded Image")
            ocr_result = gr.Textbox(label="OCR Result", lines=10)
            search_result = gr.Textbox(label="Search Result", lines=5)

    submit_button.click(
        fn=process_and_search,
        inputs=[image_input, search_query_input],
        outputs=[displayed_image, ocr_result, search_result]
    )

if __name__ == "__main__":
    demo.launch()