Spaces:

banao-tech
/

omniapi

Sleeping

App Files Files Community

banao-tech commited on Feb 8

Commit

ab332bc

verified ·

1 Parent(s): 13c1ab1

Update main.py

Browse files

Files changed (1) hide show

main.py +43 -32

main.py CHANGED Viewed

@@ -6,6 +6,7 @@ import os
 import logging
 from PIL import Image
 import torch
 # Existing imports
 from utils import (
@@ -58,11 +59,17 @@ class ProcessResponse(BaseModel):
     parsed_content_list: str
     label_coordinates: str
-def process(image_input: Image.Image, box_threshold: float, iou_threshold: float) -> ProcessResponse:
     image_save_path = "imgs/saved_image_demo.png"
     os.makedirs(os.path.dirname(image_save_path), exist_ok=True)
-    image_input.save(image_save_path)
     image = Image.open(image_save_path)
     box_overlay_ratio = image.size[0] / 3200
     draw_bbox_config = {
@@ -72,40 +79,46 @@ def process(image_input: Image.Image, box_threshold: float, iou_threshold: float
         "thickness": max(int(3 * box_overlay_ratio), 1),
     }
-    ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
         image_save_path,
-        display_img=False,
-        output_bb_format="xyxy",
-        goal_filtering=None,
-        easyocr_args={"paragraph": False, "text_threshold": 0.9},
-        use_paddleocr=True,
     )
     text, ocr_bbox = ocr_bbox_rslt
-    dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
-        image_save_path,
-        yolo_model,
-        BOX_TRESHOLD=box_threshold,
-        output_coord_in_ratio=True,
-        ocr_bbox=ocr_bbox,
-        draw_bbox_config=draw_bbox_config,
-        caption_model_processor=caption_model_processor,
-        ocr_text=text,
-        iou_threshold=iou_threshold,
-    )
-    # Log parsed_content_list to inspect its structure before joining
-    logger.info(f"Parsed content list before join: {parsed_content_list}")
-    # Ensure parsed_content_list is a list of strings, not dictionaries
-    parsed_content_list_str = "\n".join([str(item) for item in parsed_content_list])
     image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
-    print("Finish processing")
-    # Convert the image to base64
     buffered = io.BytesIO()
-    image.save(buffered, format="PNG")
     img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
     return ProcessResponse(
@@ -114,7 +127,6 @@ def process(image_input: Image.Image, box_threshold: float, iou_threshold: float
         label_coordinates=str(label_coordinates),
     )
 @app.post("/process_image", response_model=ProcessResponse)
 async def process_image(
     image_file: UploadFile = File(...),
@@ -132,7 +144,7 @@ async def process_image(
         if not image_input:
             raise ValueError("Image input is empty or invalid.")
-        response = process(image_input, box_threshold, iou_threshold)
         # Ensure the response contains an image
         if not response.image:
@@ -145,5 +157,4 @@ async def process_image(
         logger.error(f"Error processing image: {e}")
         import traceback
         traceback.print_exc()
-        raise HTTPException(status_code=500, detail=str(e))

 import logging
 from PIL import Image
 import torch
+import asyncio  # Import asyncio for asynchronous operations
 # Existing imports
 from utils import (
     parsed_content_list: str
     label_coordinates: str
+async def process(image_input: Image.Image, box_threshold: float, iou_threshold: float) -> ProcessResponse:
     image_save_path = "imgs/saved_image_demo.png"
     os.makedirs(os.path.dirname(image_save_path), exist_ok=True)
+    # Save the image asynchronously
+    loop = asyncio.get_event_loop()
+    await loop.run_in_executor(None, image_input.save, image_save_path)
+    logger.info(f"Saved image for processing: {image_save_path}")
+    # Open image and prepare it for further processing
     image = Image.open(image_save_path)
     box_overlay_ratio = image.size[0] / 3200
     draw_bbox_config = {
         "thickness": max(int(3 * box_overlay_ratio), 1),
     }
+    # OCR and YOLO box processing (run in a thread pool to avoid blocking the event loop)
+    ocr_bbox_rslt, is_goal_filtered = await loop.run_in_executor(
+        None,
+        check_ocr_box,
         image_save_path,
+        False,  # display_img
+        "xyxy",  # output_bb_format
+        None,  # goal_filtering
+        {"paragraph": False, "text_threshold": 0.9},  # easyocr_args
+        True,  # use_paddleocr
     )
     text, ocr_bbox = ocr_bbox_rslt
+    # Process image and get result (run in a thread pool)
+    try:
+        dino_labled_img, label_coordinates, parsed_content_list = await loop.run_in_executor(
+            None,
+            get_som_labeled_img,
+            image_save_path,
+            yolo_model,
+            box_threshold,  # BOX_TRESHOLD
+            True,  # output_coord_in_ratio
+            ocr_bbox,  # ocr_bbox
+            draw_bbox_config,  # draw_bbox_config
+            caption_model_processor,  # caption_model_processor
+            text,  # ocr_text
+            iou_threshold,  # iou_threshold
+        )
+    except Exception as e:
+        logger.error(f"Error during labeling and captioning: {e}")
+        raise
+    logger.info("Finished processing image with YOLO and captioning.")
+    # Convert the image to base64 string
     image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
+    parsed_content_list_str = "\n".join(parsed_content_list)
     buffered = io.BytesIO()
+    await loop.run_in_executor(None, image.save, buffered, "PNG")
     img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
     return ProcessResponse(
         label_coordinates=str(label_coordinates),
     )
 @app.post("/process_image", response_model=ProcessResponse)
 async def process_image(
     image_file: UploadFile = File(...),
         if not image_input:
             raise ValueError("Image input is empty or invalid.")
+        response = await process(image_input, box_threshold, iou_threshold)
         # Ensure the response contains an image
         if not response.image:
         logger.error(f"Error processing image: {e}")
         import traceback
         traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))