Spaces:

banao-tech
/

omniapi

Sleeping

App Files Files Community

banao-tech commited on Feb 9

Commit

70f32bc

verified ·

1 Parent(s): b7016df

Update main.py

Browse files

Files changed (1) hide show

main.py +32 -43

main.py CHANGED Viewed

@@ -6,7 +6,6 @@ import os
 import logging
 from PIL import Image
 import torch
-import asyncio  # Import asyncio for asynchronous operations
 # Existing imports
 from utils import (
@@ -59,17 +58,11 @@ class ProcessResponse(BaseModel):
     parsed_content_list: str
     label_coordinates: str
-async def process(image_input: Image.Image, box_threshold: float, iou_threshold: float) -> ProcessResponse:
     image_save_path = "imgs/saved_image_demo.png"
     os.makedirs(os.path.dirname(image_save_path), exist_ok=True)
-    # Save the image asynchronously
-    loop = asyncio.get_event_loop()
-    await loop.run_in_executor(None, image_input.save, image_save_path)
-    logger.info(f"Saved image for processing: {image_save_path}")
-    # Open image and prepare it for further processing
     image = Image.open(image_save_path)
     box_overlay_ratio = image.size[0] / 3200
     draw_bbox_config = {
@@ -79,46 +72,40 @@ async def process(image_input: Image.Image, box_threshold: float, iou_threshold:
         "thickness": max(int(3 * box_overlay_ratio), 1),
     }
-    # OCR and YOLO box processing (run in a thread pool to avoid blocking the event loop)
-    ocr_bbox_rslt, is_goal_filtered = await loop.run_in_executor(
-        None,
-        check_ocr_box,
         image_save_path,
-        False,  # display_img
-        "xyxy",  # output_bb_format
-        None,  # goal_filtering
-        {"paragraph": False, "text_threshold": 0.9},  # easyocr_args
-        True,  # use_paddleocr
     )
     text, ocr_bbox = ocr_bbox_rslt
-    # Process image and get result (run in a thread pool)
-    try:
-        dino_labled_img, label_coordinates, parsed_content_list = await loop.run_in_executor(
-            None,
-            get_som_labeled_img,
-            image_save_path,
-            yolo_model,
-            box_threshold,  # BOX_TRESHOLD
-            True,  # output_coord_in_ratio
-            ocr_bbox,  # ocr_bbox
-            draw_bbox_config,  # draw_bbox_config
-            caption_model_processor,  # caption_model_processor
-            text,  # ocr_text
-            iou_threshold,  # iou_threshold
-        )
-    except Exception as e:
-        logger.error(f"Error during labeling and captioning: {e}")
-        raise
-    logger.info("Finished processing image with YOLO and captioning.")
-    # Convert the image to base64 string
     image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
-    parsed_content_list_str = "\n".join(parsed_content_list)
     buffered = io.BytesIO()
-    await loop.run_in_executor(None, image.save, buffered, "PNG")
     img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
     return ProcessResponse(
@@ -127,6 +114,7 @@ async def process(image_input: Image.Image, box_threshold: float, iou_threshold:
         label_coordinates=str(label_coordinates),
     )
 @app.post("/process_image", response_model=ProcessResponse)
 async def process_image(
     image_file: UploadFile = File(...),
@@ -144,7 +132,7 @@ async def process_image(
         if not image_input:
             raise ValueError("Image input is empty or invalid.")
-        response = await process(image_input, box_threshold, iou_threshold)
         # Ensure the response contains an image
         if not response.image:
@@ -157,4 +145,5 @@ async def process_image(
         logger.error(f"Error processing image: {e}")
         import traceback
         traceback.print_exc()
-        raise HTTPException(status_code=500, detail=str(e))

 import logging
 from PIL import Image
 import torch
 # Existing imports
 from utils import (
     parsed_content_list: str
     label_coordinates: str
+def process(image_input: Image.Image, box_threshold: float, iou_threshold: float) -> ProcessResponse:
     image_save_path = "imgs/saved_image_demo.png"
     os.makedirs(os.path.dirname(image_save_path), exist_ok=True)
+    image_input.save(image_save_path)
     image = Image.open(image_save_path)
     box_overlay_ratio = image.size[0] / 3200
     draw_bbox_config = {
         "thickness": max(int(3 * box_overlay_ratio), 1),
     }
+    ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
         image_save_path,
+        display_img=False,
+        output_bb_format="xyxy",
+        goal_filtering=None,
+        easyocr_args={"paragraph": False, "text_threshold": 0.9},
+        use_paddleocr=True,
     )
     text, ocr_bbox = ocr_bbox_rslt
+    dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
+        image_save_path,
+        yolo_model,
+        BOX_TRESHOLD=box_threshold,
+        output_coord_in_ratio=True,
+        ocr_bbox=ocr_bbox,
+        draw_bbox_config=draw_bbox_config,
+        caption_model_processor=caption_model_processor,
+        ocr_text=text,
+        iou_threshold=iou_threshold,
+    )
+    # Log parsed_content_list to inspect its structure before joining
+    logger.info(f"Parsed content list before join: {parsed_content_list}")
+    # Ensure parsed_content_list is a list of strings, not dictionaries
+    parsed_content_list_str = "\n".join([str(item) for item in parsed_content_list])
     image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
+    print("Finish processing")
+    # Convert the image to base64
     buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
     img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
     return ProcessResponse(
         label_coordinates=str(label_coordinates),
     )
 @app.post("/process_image", response_model=ProcessResponse)
 async def process_image(
     image_file: UploadFile = File(...),
         if not image_input:
             raise ValueError("Image input is empty or invalid.")
+        response = process(image_input, box_threshold, iou_threshold)
         # Ensure the response contains an image
         if not response.image:
         logger.error(f"Error processing image: {e}")
         import traceback
         traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))