Spaces:

banao-tech
/

omniapi

Sleeping

App Files Files Community

banao-tech commited on Feb 4

Commit

9c616dc

verified ·

1 Parent(s): 99cd6de

Update main.py

Browse files

Files changed (1) hide show

main.py +51 -72

main.py CHANGED Viewed

@@ -1,71 +1,76 @@
 from fastapi import FastAPI, File, UploadFile, HTTPException
 from pydantic import BaseModel
 import base64
 import io
-import os
-import logging
 from PIL import Image
 import torch
 # Existing imports
 from utils import (
     check_ocr_box,
     get_yolo_model,
     get_caption_model_processor,
     get_som_labeled_img,
 )
-from transformers import AutoProcessor, AutoModelForCausalLM
-# Configure logging
-logging.basicConfig(level=logging.DEBUG)  # Changed to DEBUG for more verbosity
-logger = logging.getLogger(__name__)
-# Load YOLO model
-yolo_model = get_yolo_model(model_path="weights/best.pt")
-# Handle device placement
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-if str(device) == "cuda":
-    yolo_model = yolo_model.cuda()
-else:
-    yolo_model = yolo_model.cpu()
-# Load caption model and processor
 try:
-    processor = AutoProcessor.from_pretrained(
-        "microsoft/Florence-2-base", trust_remote_code=True
-    )
     model = AutoModelForCausalLM.from_pretrained(
         "weights/icon_caption_florence",
         torch_dtype=torch.float16,
         trust_remote_code=True,
     ).to("cuda")
-except Exception as e:
-    logger.warning(f"Failed to load caption model on GPU: {e}. Falling back to CPU.")
     model = AutoModelForCausalLM.from_pretrained(
         "weights/icon_caption_florence",
         torch_dtype=torch.float16,
         trust_remote_code=True,
     )
 caption_model_processor = {"processor": processor, "model": model}
-logger.info("Finished loading models!!!")
 app = FastAPI()
 class ProcessResponse(BaseModel):
     image: str  # Base64 encoded image
     parsed_content_list: str
     label_coordinates: str
-def process(image_input: Image.Image, box_threshold: float, iou_threshold: float) -> ProcessResponse:
     image_save_path = "imgs/saved_image_demo.png"
-    os.makedirs(os.path.dirname(image_save_path), exist_ok=True)
     image_input.save(image_save_path)
-    logger.info(f"Saved image for processing: {image_save_path}")
-    # Open image and prepare it for further processing
     image = Image.open(image_save_path)
     box_overlay_ratio = image.size[0] / 3200
     draw_bbox_config = {
@@ -75,7 +80,6 @@ def process(image_input: Image.Image, box_threshold: float, iou_threshold: float
         "thickness": max(int(3 * box_overlay_ratio), 1),
     }
-    # OCR and YOLO box processing
     ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
         image_save_path,
         display_img=False,
@@ -85,40 +89,33 @@ def process(image_input: Image.Image, box_threshold: float, iou_threshold: float
         use_paddleocr=True,
     )
     text, ocr_bbox = ocr_bbox_rslt
-    # Process image and get result
-    try:
-        dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
-            image_save_path,
-            yolo_model,
-            BOX_TRESHOLD=box_threshold,
-            output_coord_in_ratio=True,
-            ocr_bbox=ocr_bbox,
-            draw_bbox_config=draw_bbox_config,
-            caption_model_processor=caption_model_processor,
-            ocr_text=text,
-            iou_threshold=iou_threshold,
-        )
-    except Exception as e:
-        logger.error(f"Error during labeling and captioning: {e}")
-        raise
-    logger.info("Finished processing image with YOLO and captioning.")
-    # Convert the image to base64 string
     image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
     parsed_content_list_str = "\n".join(parsed_content_list)
     buffered = io.BytesIO()
     image.save(buffered, format="PNG")
     img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
     return ProcessResponse(
         image=img_str,
-        parsed_content_list=parsed_content_list_str,
         label_coordinates=str(label_coordinates),
     )
 @app.post("/process_image", response_model=ProcessResponse)
 async def process_image(
     image_file: UploadFile = File(...),
@@ -128,26 +125,8 @@ async def process_image(
     try:
         contents = await image_file.read()
         image_input = Image.open(io.BytesIO(contents)).convert("RGB")
-        logger.info(f"Processing image: {image_file.filename}")
-        logger.info(f"Image size: {image_input.size}")
-        # Debugging the input image
-        if not image_input:
-            raise ValueError("Image input is empty or invalid.")
-        response = process(image_input, box_threshold, iou_threshold)
-        # Ensure the response contains an image
-        if not response.image:
-            raise ValueError("Empty image in response")
-        logger.info("Processing complete, returning response.")
-        return response
     except Exception as e:
-        logger.error(f"Error processing image: {e}")
-        import traceback
-        traceback.print_exc()
-        raise HTTPException(status_code=500, detail=str(e))

 from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.responses import JSONResponse
 from pydantic import BaseModel
+from typing import Optional
 import base64
 import io
 from PIL import Image
 import torch
+import numpy as np
+import os
 # Existing imports
+import numpy as np
+import torch
+from PIL import Image
+import io
 from utils import (
     check_ocr_box,
     get_yolo_model,
     get_caption_model_processor,
     get_som_labeled_img,
 )
+import torch
+# yolo_model = get_yolo_model(model_path='/data/icon_detect/best.pt')
+# caption_model_processor = get_caption_model_processor(model_name="florence2", model_name_or_path="/data/icon_caption_florence")
+from ultralytics import YOLO
+# if not os.path.exists("/data/icon_detect"):
+#     os.makedirs("/data/icon_detect")
+try:
+    yolo_model = YOLO("weights/icon_detect/best.pt").to("cuda")
+except:
+    yolo_model = YOLO("weights/icon_detect/best.pt")
+from transformers import AutoProcessor, AutoModelForCausalLM
+processor = AutoProcessor.from_pretrained(
+    "microsoft/Florence-2-base", trust_remote_code=True
+)
 try:
     model = AutoModelForCausalLM.from_pretrained(
         "weights/icon_caption_florence",
         torch_dtype=torch.float16,
         trust_remote_code=True,
     ).to("cuda")
+except:
     model = AutoModelForCausalLM.from_pretrained(
         "weights/icon_caption_florence",
         torch_dtype=torch.float16,
         trust_remote_code=True,
     )
 caption_model_processor = {"processor": processor, "model": model}
+print("finish loading model!!!")
 app = FastAPI()
 class ProcessResponse(BaseModel):
     image: str  # Base64 encoded image
     parsed_content_list: str
     label_coordinates: str
+def process(
+    image_input: Image.Image, box_threshold: float, iou_threshold: float
+) -> ProcessResponse:
     image_save_path = "imgs/saved_image_demo.png"
     image_input.save(image_save_path)
     image = Image.open(image_save_path)
     box_overlay_ratio = image.size[0] / 3200
     draw_bbox_config = {
         "thickness": max(int(3 * box_overlay_ratio), 1),
     }
     ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
         image_save_path,
         display_img=False,
         use_paddleocr=True,
     )
     text, ocr_bbox = ocr_bbox_rslt
+    dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
+        image_save_path,
+        yolo_model,
+        BOX_TRESHOLD=box_threshold,
+        output_coord_in_ratio=True,
+        ocr_bbox=ocr_bbox,
+        draw_bbox_config=draw_bbox_config,
+        caption_model_processor=caption_model_processor,
+        ocr_text=text,
+        iou_threshold=iou_threshold,
+    )
     image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
+    print("finish processing")
     parsed_content_list_str = "\n".join(parsed_content_list)
+    # Encode image to base64
     buffered = io.BytesIO()
     image.save(buffered, format="PNG")
     img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
     return ProcessResponse(
         image=img_str,
+        parsed_content_list=str(parsed_content_list_str),
         label_coordinates=str(label_coordinates),
     )
 @app.post("/process_image", response_model=ProcessResponse)
 async def process_image(
     image_file: UploadFile = File(...),
     try:
         contents = await image_file.read()
         image_input = Image.open(io.BytesIO(contents)).convert("RGB")
     except Exception as e:
+        raise HTTPException(status_code=400, detail="Invalid image file")
+    response = process(image_input, box_threshold, iou_threshold)
+    return response