banao-tech commited on
Commit
00bda1b
·
verified ·
1 Parent(s): 3d30079

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +40 -27
main.py CHANGED
@@ -3,11 +3,9 @@ from pydantic import BaseModel
3
  import base64
4
  import io
5
  import os
6
-
7
  from PIL import Image
8
  import torch
9
- import numpy as np
10
- import logging
11
 
12
  # Existing imports
13
  from utils import (
@@ -16,19 +14,13 @@ from utils import (
16
  get_caption_model_processor,
17
  get_som_labeled_img,
18
  )
19
- from ultralytics import YOLO
20
  from transformers import AutoProcessor, AutoModelForCausalLM
21
 
22
  # Configure logging
23
- logging.basicConfig(level=logging.INFO)
24
  logger = logging.getLogger(__name__)
25
 
26
-
27
- # main.py (YOLO loading fix)
28
- from utils import get_yolo_model
29
- import torch
30
-
31
- # Load YOLO model using official method
32
  yolo_model = get_yolo_model(model_path="weights/best.pt")
33
 
34
  # Handle device placement
@@ -70,7 +62,10 @@ def process(image_input: Image.Image, box_threshold: float, iou_threshold: float
70
  image_save_path = "imgs/saved_image_demo.png"
71
  os.makedirs(os.path.dirname(image_save_path), exist_ok=True)
72
  image_input.save(image_save_path)
73
-
 
 
 
74
  image = Image.open(image_save_path)
75
  box_overlay_ratio = image.size[0] / 3200
76
  draw_bbox_config = {
@@ -80,6 +75,7 @@ def process(image_input: Image.Image, box_threshold: float, iou_threshold: float
80
  "thickness": max(int(3 * box_overlay_ratio), 1),
81
  }
82
 
 
83
  ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
84
  image_save_path,
85
  display_img=False,
@@ -90,19 +86,27 @@ def process(image_input: Image.Image, box_threshold: float, iou_threshold: float
90
  )
91
  text, ocr_bbox = ocr_bbox_rslt
92
 
93
- dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
94
- image_save_path,
95
- yolo_model,
96
- BOX_TRESHOLD=box_threshold,
97
- output_coord_in_ratio=True,
98
- ocr_bbox=ocr_bbox,
99
- draw_bbox_config=draw_bbox_config,
100
- caption_model_processor=caption_model_processor,
101
- ocr_text=text,
102
- iou_threshold=iou_threshold,
103
- )
 
 
 
 
 
 
 
 
 
104
  image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
105
- print("Finish processing")
106
  parsed_content_list_str = "\n".join(parsed_content_list)
107
 
108
  buffered = io.BytesIO()
@@ -125,16 +129,25 @@ async def process_image(
125
  contents = await image_file.read()
126
  image_input = Image.open(io.BytesIO(contents)).convert("RGB")
127
 
128
- print(f"Processing image: {image_file.filename}")
129
- print(f"Image size: {image_input.size}")
 
 
 
 
130
 
131
  response = process(image_input, box_threshold, iou_threshold)
 
 
132
  if not response.image:
133
  raise ValueError("Empty image in response")
134
-
 
135
  return response
136
 
137
  except Exception as e:
 
138
  import traceback
139
  traceback.print_exc()
140
  raise HTTPException(status_code=500, detail=str(e))
 
 
3
  import base64
4
  import io
5
  import os
6
+ import logging
7
  from PIL import Image
8
  import torch
 
 
9
 
10
  # Existing imports
11
  from utils import (
 
14
  get_caption_model_processor,
15
  get_som_labeled_img,
16
  )
 
17
  from transformers import AutoProcessor, AutoModelForCausalLM
18
 
19
  # Configure logging
20
+ logging.basicConfig(level=logging.DEBUG) # Changed to DEBUG for more verbosity
21
  logger = logging.getLogger(__name__)
22
 
23
+ # Load YOLO model
 
 
 
 
 
24
  yolo_model = get_yolo_model(model_path="weights/best.pt")
25
 
26
  # Handle device placement
 
62
  image_save_path = "imgs/saved_image_demo.png"
63
  os.makedirs(os.path.dirname(image_save_path), exist_ok=True)
64
  image_input.save(image_save_path)
65
+
66
+ logger.info(f"Saved image for processing: {image_save_path}")
67
+
68
+ # Open image and prepare it for further processing
69
  image = Image.open(image_save_path)
70
  box_overlay_ratio = image.size[0] / 3200
71
  draw_bbox_config = {
 
75
  "thickness": max(int(3 * box_overlay_ratio), 1),
76
  }
77
 
78
+ # OCR and YOLO box processing
79
  ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
80
  image_save_path,
81
  display_img=False,
 
86
  )
87
  text, ocr_bbox = ocr_bbox_rslt
88
 
89
+ # Process image and get result
90
+ try:
91
+ dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
92
+ image_save_path,
93
+ yolo_model,
94
+ BOX_TRESHOLD=box_threshold,
95
+ output_coord_in_ratio=True,
96
+ ocr_bbox=ocr_bbox,
97
+ draw_bbox_config=draw_bbox_config,
98
+ caption_model_processor=caption_model_processor,
99
+ ocr_text=text,
100
+ iou_threshold=iou_threshold,
101
+ )
102
+ except Exception as e:
103
+ logger.error(f"Error during labeling and captioning: {e}")
104
+ raise
105
+
106
+ logger.info("Finished processing image with YOLO and captioning.")
107
+
108
+ # Convert the image to base64 string
109
  image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
 
110
  parsed_content_list_str = "\n".join(parsed_content_list)
111
 
112
  buffered = io.BytesIO()
 
129
  contents = await image_file.read()
130
  image_input = Image.open(io.BytesIO(contents)).convert("RGB")
131
 
132
+ logger.info(f"Processing image: {image_file.filename}")
133
+ logger.info(f"Image size: {image_input.size}")
134
+
135
+ # Debugging the input image
136
+ if not image_input:
137
+ raise ValueError("Image input is empty or invalid.")
138
 
139
  response = process(image_input, box_threshold, iou_threshold)
140
+
141
+ # Ensure the response contains an image
142
  if not response.image:
143
  raise ValueError("Empty image in response")
144
+
145
+ logger.info("Processing complete, returning response.")
146
  return response
147
 
148
  except Exception as e:
149
+ logger.error(f"Error processing image: {e}")
150
  import traceback
151
  traceback.print_exc()
152
  raise HTTPException(status_code=500, detail=str(e))
153
+