Spaces:

banao-tech
/

omniapi

Sleeping

App Files Files Community

banao-tech commited on Feb 9

Commit

7ecde71

verified ·

1 Parent(s): 70f32bc

Update main.py

Browse files

Files changed (1) hide show

main.py +78 -75

main.py CHANGED Viewed

@@ -6,8 +6,7 @@ import os
 import logging
 from PIL import Image
 import torch
-# Existing imports
 from utils import (
     check_ocr_box,
     get_yolo_model,
@@ -17,7 +16,7 @@ from utils import (
 from transformers import AutoProcessor, AutoModelForCausalLM
 # Configure logging
-logging.basicConfig(level=logging.DEBUG)  # Changed to DEBUG for more verbosity
 logger = logging.getLogger(__name__)
 # Load YOLO model
@@ -58,62 +57,72 @@ class ProcessResponse(BaseModel):
     parsed_content_list: str
     label_coordinates: str
-def process(image_input: Image.Image, box_threshold: float, iou_threshold: float) -> ProcessResponse:
-    image_save_path = "imgs/saved_image_demo.png"
-    os.makedirs(os.path.dirname(image_save_path), exist_ok=True)
-    image_input.save(image_save_path)
-    image = Image.open(image_save_path)
-    box_overlay_ratio = image.size[0] / 3200
-    draw_bbox_config = {
-        "text_scale": 0.8 * box_overlay_ratio,
-        "text_thickness": max(int(2 * box_overlay_ratio), 1),
-        "text_padding": max(int(3 * box_overlay_ratio), 1),
-        "thickness": max(int(3 * box_overlay_ratio), 1),
-    }
-    ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
-        image_save_path,
-        display_img=False,
-        output_bb_format="xyxy",
-        goal_filtering=None,
-        easyocr_args={"paragraph": False, "text_threshold": 0.9},
-        use_paddleocr=True,
-    )
-    text, ocr_bbox = ocr_bbox_rslt
-    dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
-        image_save_path,
-        yolo_model,
-        BOX_TRESHOLD=box_threshold,
-        output_coord_in_ratio=True,
-        ocr_bbox=ocr_bbox,
-        draw_bbox_config=draw_bbox_config,
-        caption_model_processor=caption_model_processor,
-        ocr_text=text,
-        iou_threshold=iou_threshold,
-    )
-    # Log parsed_content_list to inspect its structure before joining
-    logger.info(f"Parsed content list before join: {parsed_content_list}")
-    # Ensure parsed_content_list is a list of strings, not dictionaries
-    parsed_content_list_str = "\n".join([str(item) for item in parsed_content_list])
-    image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
-    print("Finish processing")
-    # Convert the image to base64
-    buffered = io.BytesIO()
-    image.save(buffered, format="PNG")
-    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
-    return ProcessResponse(
-        image=img_str,
-        parsed_content_list=parsed_content_list_str,
-        label_coordinates=str(label_coordinates),
-    )
 @app.post("/process_image", response_model=ProcessResponse)
 async def process_image(
@@ -122,28 +131,22 @@ async def process_image(
     iou_threshold: float = 0.1,
 ):
     try:
         contents = await image_file.read()
         image_input = Image.open(io.BytesIO(contents)).convert("RGB")
-        logger.info(f"Processing image: {image_file.filename}")
-        logger.info(f"Image size: {image_input.size}")
-        # Debugging the input image
-        if not image_input:
-            raise ValueError("Image input is empty or invalid.")
-        response = process(image_input, box_threshold, iou_threshold)
-        # Ensure the response contains an image
-        if not response.image:
-            raise ValueError("Empty image in response")
-        logger.info("Processing complete, returning response.")
         return response
     except Exception as e:
         logger.error(f"Error processing image: {e}")
-        import traceback
-        traceback.print_exc()
         raise HTTPException(status_code=500, detail=str(e))

 import logging
 from PIL import Image
 import torch
+import asyncio
 from utils import (
     check_ocr_box,
     get_yolo_model,
 from transformers import AutoProcessor, AutoModelForCausalLM
 # Configure logging
+logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
 # Load YOLO model
     parsed_content_list: str
     label_coordinates: str
+# Create a queue for sequential processing
+request_queue = asyncio.Queue()
+async def process(image_input: Image.Image, box_threshold: float, iou_threshold: float) -> ProcessResponse:
+    """
+    Asynchronously processes an image using YOLO and caption models.
+    """
+    try:
+        image_save_path = "imgs/saved_image_demo.png"
+        os.makedirs(os.path.dirname(image_save_path), exist_ok=True)
+        # Save the image asynchronously
+        buffer = io.BytesIO()
+        image_input.save(buffer, format="PNG")
+        buffer.seek(0)
+        # Perform YOLO and caption model inference
+        box_overlay_ratio = image_input.size[0] / 3200
+        draw_bbox_config = {
+            "text_scale": 0.8 * box_overlay_ratio,
+            "text_thickness": max(int(2 * box_overlay_ratio), 1),
+            "text_padding": max(int(3 * box_overlay_ratio), 1),
+            "thickness": max(int(3 * box_overlay_ratio), 1),
+        }
+        ocr_bbox_rslt, is_goal_filtered = await asyncio.to_thread(
+            check_ocr_box,
+            image_save_path,
+            display_img=False,
+            output_bb_format="xyxy",
+            goal_filtering=None,
+            easyocr_args={"paragraph": False, "text_threshold": 0.9},
+            use_paddleocr=True,
+        )
+        text, ocr_bbox = ocr_bbox_rslt
+        dino_labled_img, label_coordinates, parsed_content_list = await asyncio.to_thread(
+            get_som_labeled_img,
+            image_save_path,
+            yolo_model,
+            BOX_TRESHOLD=box_threshold,
+            output_coord_in_ratio=True,
+            ocr_bbox=ocr_bbox,
+            draw_bbox_config=draw_bbox_config,
+            caption_model_processor=caption_model_processor,
+            ocr_text=text,
+            iou_threshold=iou_threshold,
+        )
+        # Convert image to base64
+        image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
+        buffered = io.BytesIO()
+        image.save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+        # Join parsed content list
+        parsed_content_list_str = "\n".join([str(item) for item in parsed_content_list])
+        return ProcessResponse(
+            image=img_str,
+            parsed_content_list=parsed_content_list_str,
+            label_coordinates=str(label_coordinates),
+        )
+    except Exception as e:
+        logger.error(f"Error in process function: {e}")
+        raise
 @app.post("/process_image", response_model=ProcessResponse)
 async def process_image(
     iou_threshold: float = 0.1,
 ):
     try:
+        # Read the image file
         contents = await image_file.read()
         image_input = Image.open(io.BytesIO(contents)).convert("RGB")
+        # Add the task to the queue
+        task = asyncio.create_task(
+            process(image_input, box_threshold, iou_threshold)
+        )
+        await request_queue.put(task)
+        # Process the next task in the queue
+        task = await request_queue.get()
+        response = await task
+        request_queue.task_done()
         return response
     except Exception as e:
         logger.error(f"Error processing image: {e}")
         raise HTTPException(status_code=500, detail=str(e))