Spaces:

banao-tech
/

omniapi

Sleeping

App Files Files Community

banao-tech commited on Feb 4

Commit

00bda1b

verified ·

1 Parent(s): 3d30079

Update main.py

Browse files

Files changed (1) hide show

main.py +40 -27

main.py CHANGED Viewed

@@ -3,11 +3,9 @@ from pydantic import BaseModel
 import base64
 import io
 import os
 from PIL import Image
 import torch
-import numpy as np
-import logging
 # Existing imports
 from utils import (
@@ -16,19 +14,13 @@ from utils import (
     get_caption_model_processor,
     get_som_labeled_img,
 )
-from ultralytics import YOLO
 from transformers import AutoProcessor, AutoModelForCausalLM
 # Configure logging
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# main.py (YOLO loading fix)
-from utils import get_yolo_model
-import torch
-# Load YOLO model using official method
 yolo_model = get_yolo_model(model_path="weights/best.pt")
 # Handle device placement
@@ -70,7 +62,10 @@ def process(image_input: Image.Image, box_threshold: float, iou_threshold: float
     image_save_path = "imgs/saved_image_demo.png"
     os.makedirs(os.path.dirname(image_save_path), exist_ok=True)
     image_input.save(image_save_path)
     image = Image.open(image_save_path)
     box_overlay_ratio = image.size[0] / 3200
     draw_bbox_config = {
@@ -80,6 +75,7 @@ def process(image_input: Image.Image, box_threshold: float, iou_threshold: float
         "thickness": max(int(3 * box_overlay_ratio), 1),
     }
     ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
         image_save_path,
         display_img=False,
@@ -90,19 +86,27 @@ def process(image_input: Image.Image, box_threshold: float, iou_threshold: float
     )
     text, ocr_bbox = ocr_bbox_rslt
-    dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
-        image_save_path,
-        yolo_model,
-        BOX_TRESHOLD=box_threshold,
-        output_coord_in_ratio=True,
-        ocr_bbox=ocr_bbox,
-        draw_bbox_config=draw_bbox_config,
-        caption_model_processor=caption_model_processor,
-        ocr_text=text,
-        iou_threshold=iou_threshold,
-    )
     image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
-    print("Finish processing")
     parsed_content_list_str = "\n".join(parsed_content_list)
     buffered = io.BytesIO()
@@ -125,16 +129,25 @@ async def process_image(
         contents = await image_file.read()
         image_input = Image.open(io.BytesIO(contents)).convert("RGB")
-        print(f"Processing image: {image_file.filename}")
-        print(f"Image size: {image_input.size}")
         response = process(image_input, box_threshold, iou_threshold)
         if not response.image:
             raise ValueError("Empty image in response")
         return response
     except Exception as e:
         import traceback
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=str(e))

 import base64
 import io
 import os
+import logging
 from PIL import Image
 import torch
 # Existing imports
 from utils import (
     get_caption_model_processor,
     get_som_labeled_img,
 )
 from transformers import AutoProcessor, AutoModelForCausalLM
 # Configure logging
+logging.basicConfig(level=logging.DEBUG)  # Changed to DEBUG for more verbosity
 logger = logging.getLogger(__name__)
+# Load YOLO model
 yolo_model = get_yolo_model(model_path="weights/best.pt")
 # Handle device placement
     image_save_path = "imgs/saved_image_demo.png"
     os.makedirs(os.path.dirname(image_save_path), exist_ok=True)
     image_input.save(image_save_path)
+    logger.info(f"Saved image for processing: {image_save_path}")
+    # Open image and prepare it for further processing
     image = Image.open(image_save_path)
     box_overlay_ratio = image.size[0] / 3200
     draw_bbox_config = {
         "thickness": max(int(3 * box_overlay_ratio), 1),
     }
+    # OCR and YOLO box processing
     ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
         image_save_path,
         display_img=False,
     )
     text, ocr_bbox = ocr_bbox_rslt
+    # Process image and get result
+    try:
+        dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
+            image_save_path,
+            yolo_model,
+            BOX_TRESHOLD=box_threshold,
+            output_coord_in_ratio=True,
+            ocr_bbox=ocr_bbox,
+            draw_bbox_config=draw_bbox_config,
+            caption_model_processor=caption_model_processor,
+            ocr_text=text,
+            iou_threshold=iou_threshold,
+        )
+    except Exception as e:
+        logger.error(f"Error during labeling and captioning: {e}")
+        raise
+    logger.info("Finished processing image with YOLO and captioning.")
+    # Convert the image to base64 string
     image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
     parsed_content_list_str = "\n".join(parsed_content_list)
     buffered = io.BytesIO()
         contents = await image_file.read()
         image_input = Image.open(io.BytesIO(contents)).convert("RGB")
+        logger.info(f"Processing image: {image_file.filename}")
+        logger.info(f"Image size: {image_input.size}")
+        # Debugging the input image
+        if not image_input:
+            raise ValueError("Image input is empty or invalid.")
         response = process(image_input, box_threshold, iou_threshold)
+        # Ensure the response contains an image
         if not response.image:
             raise ValueError("Empty image in response")
+        logger.info("Processing complete, returning response.")
         return response
     except Exception as e:
+        logger.error(f"Error processing image: {e}")
         import traceback
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=str(e))