Spaces:

yuragoithf
/

OCRLLM

Sleeping

App Files Files Community

yuragoithf commited on Jul 28

Commit

e74b235

verified ·

1 Parent(s): 0fbcc15

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -28

app.py CHANGED Viewed

@@ -11,35 +11,43 @@ processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
 trocr_model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
 def recognize_handwritten_text(image):
-    # Convert Gradio image to format compatible with hezar
-    image_np = np.array(image)
-    processed_image = load_image(image_np)
-    # Detect text regions with CRAFT
-    outputs = craft_model.predict(processed_image)
-    if not outputs or "boxes" not in outputs[0]:
-        return Image.fromarray(processed_image), "No text detected"
-    boxes = outputs[0]["boxes"]
-    pil_image = Image.fromarray(processed_image)
-    texts = []
-    # Recognize text in each detected region
-    for box in boxes:
-        x_min, y_min, x_max, y_max = box[0][0], box[0][1], box[2][0], box[2][1]
-        crop = pil_image.crop((x_min, y_min, x_max, y_max))
-        pixel_values = processor(images=crop, return_tensors="pt").pixel_values
-        generated_ids = trocr_model.generate(pixel_values)
-        text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        texts.append(text)
-    # Draw boxes on the image
-    result_image = draw_boxes(processed_image, boxes)
-    result_pil = Image.fromarray(result_image)
-    # Join recognized texts
-    text_data = " ".join(texts) if texts else "No text recognized"
-    return result_pil, f"Recognized text: {text_data}"
 # Create Gradio interface
 interface = gr.Interface(

 trocr_model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
 def recognize_handwritten_text(image):
+    try:
+        # Ensure image is a PIL image and convert to NumPy array
+        if not isinstance(image, Image.Image):
+            image = Image.fromarray(np.array(image)).convert("RGB")
+        image_np = np.array(image)
+        # Load image with hezar utils
+        processed_image = load_image(image_np)
+        # Detect text regions with CRAFT
+        outputs = craft_model.predict(processed_image)
+        if not outputs or "boxes" not in outputs[0]:
+            return Image.fromarray(processed_image), "No text detected"
+        boxes = outputs[0]["boxes"]
+        pil_image = Image.fromarray(processed_image)
+        texts = []
+        # Recognize text in each detected region
+        for box in boxes:
+            x_min, y_min, x_max, y_max = box[0][0], box[0][1], box[2][0], box[2][1]
+            crop = pil_image.crop((x_min, y_min, x_max, y_max))
+            pixel_values = processor(images=crop, return_tensors="pt").pixel_values
+            generated_ids = trocr_model.generate(pixel_values)
+            text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+            texts.append(text)
+        # Draw boxes on the image
+        result_image = draw_boxes(processed_image, boxes)
+        result_pil = Image.fromarray(result_image)
+        # Join recognized texts
+        text_data = " ".join(texts) if texts else "No text recognized"
+        return result_pil, f"Recognized text: {text_data}"
+    except Exception as e:
+        return Image.fromarray(image_np), f"Error: {str(e)}"
 # Create Gradio interface
 interface = gr.Interface(