Spaces:

ginigen
/

OmniParser-v2-pro

Running on Zero

App Files Files Community

ginipick commited on Aug 19

Commit

2187315

verified ·

1 Parent(s): c2f47fd

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -27

app.py CHANGED Viewed

@@ -8,6 +8,11 @@ import io
 import base64, os
 from huggingface_hub import snapshot_download
 import traceback
 # Import 유틸리티 함수들
 from util.utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img
@@ -23,17 +28,99 @@ if not os.path.exists(local_dir):
 else:
     print(f"Weights already exist at: {local_dir}")
 # Load models with error handling
 try:
     yolo_model = get_yolo_model(model_path='weights/icon_detect/model.pt')
-    caption_model_processor = get_caption_model_processor(
-        model_name="florence2",
-        model_name_or_path="weights/icon_caption"
-    )
-    print("Models loaded successfully")
 except Exception as e:
-    print(f"Error loading models: {e}")
-    raise
 # Markdown header text
 MARKDOWN = """
@@ -62,6 +149,22 @@ button:hover { transform: translateY(-2px); box-shadow: 0 4px 12px rgba(0,0,0,0.
 .gr-padded { padding: 16px; }
 """
 @spaces.GPU
 @torch.inference_mode()
 def process(
@@ -77,6 +180,10 @@ def process(
     if image_input is None:
         return None, "⚠️ Please upload an image for processing."
     try:
         # Log processing parameters
         print(f"Processing with parameters: box_threshold={box_threshold}, "
@@ -125,6 +232,12 @@ def process(
         # Get labeled image and parsed content via SOM (YOLO + caption model)
         try:
             dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
                 image_input,
                 yolo_model,
@@ -141,6 +254,21 @@ def process(
             if dino_labled_img is None:
                 raise ValueError("Failed to generate labeled image")
         except Exception as e:
             print(f"Error in SOM processing: {e}")
             # Return original image with error message if SOM fails
@@ -258,7 +386,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="OmniParser V2 Pro"
     # Button click event with loading spinner
     submit_button_component.click(
-        fn=process,
         inputs=[
             image_input_component,
             box_threshold_component,
@@ -269,29 +397,15 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="OmniParser V2 Pro"
         outputs=[image_output_component, text_output_component],
         show_progress=True
     )
-    # Add sample images if available
-    if os.path.exists("samples"):
-        gr.Examples(
-            examples=[
-                ["samples/mobile_app.png", 0.05, 0.1, True, 640],
-                ["samples/desktop_app.png", 0.05, 0.1, True, 1280],
-            ],
-            inputs=[
-                image_input_component,
-                box_threshold_component,
-                iou_threshold_component,
-                use_paddleocr_component,
-                imgsz_component
-            ],
-            outputs=[image_output_component, text_output_component],
-            fn=process,
-            cache_examples=False
-        )
 # Launch with queue support and error handling
 if __name__ == "__main__":
     try:
         demo.queue(max_size=10)
         demo.launch(
             share=False,
@@ -301,4 +415,5 @@ if __name__ == "__main__":
         )
     except Exception as e:
         print(f"Failed to launch app: {e}")
         raise

 import base64, os
 from huggingface_hub import snapshot_download
 import traceback
+import warnings
+# Suppress specific warnings
+warnings.filterwarnings("ignore", category=FutureWarning)
+warnings.filterwarnings("ignore", message=".*_supports_sdpa.*")
 # Import 유틸리티 함수들
 from util.utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img
 else:
     print(f"Weights already exist at: {local_dir}")
+# Monkey patch for Florence2 model compatibility
+def patch_florence2_model():
+    """Patch Florence2 model to fix compatibility issues with newer transformers"""
+    try:
+        import transformers
+        from transformers import AutoModelForCausalLM
+        # Try to import the Florence2 model class
+        try:
+            from transformers_modules.microsoft.Florence_2_base_ft.modeling_florence2 import Florence2ForConditionalGeneration
+        except ImportError:
+            # If not available, we'll patch it when loaded
+            pass
+        # Patch the model loading process
+        original_from_pretrained = AutoModelForCausalLM.from_pretrained
+        def patched_from_pretrained(model_name_or_path, *args, **kwargs):
+            # Force trust_remote_code and add config overrides for Florence2
+            if "florence" in model_name_or_path.lower() or "Florence" in model_name_or_path:
+                kwargs['trust_remote_code'] = True
+                # Add config to avoid SDPA issues
+                kwargs['attn_implementation'] = "eager"
+                kwargs['use_cache'] = False
+            model = original_from_pretrained(model_name_or_path, *args, **kwargs)
+            # Add missing attributes if needed
+            if not hasattr(model, '_supports_sdpa'):
+                model._supports_sdpa = False
+            return model
+        AutoModelForCausalLM.from_pretrained = patched_from_pretrained
+        print("Applied Florence2 compatibility patch")
+    except Exception as e:
+        print(f"Warning: Could not apply Florence2 patch: {e}")
+# Apply the patch before loading models
+patch_florence2_model()
 # Load models with error handling
 try:
+    print("Loading YOLO model...")
     yolo_model = get_yolo_model(model_path='weights/icon_detect/model.pt')
+    print("YOLO model loaded successfully")
+    print("Loading caption model...")
+    # Try loading with fallback options
+    try:
+        caption_model_processor = get_caption_model_processor(
+            model_name="florence2",
+            model_name_or_path="weights/icon_caption"
+        )
+        print("Florence2 caption model loaded successfully")
+    except Exception as e:
+        print(f"Error loading Florence2, trying alternative approach: {e}")
+        # Alternative loading method
+        import sys
+        sys.path.insert(0, "weights/icon_caption")
+        from transformers import AutoProcessor, AutoModelForCausalLM
+        # Load with specific configurations to avoid SDPA issues
+        processor = AutoProcessor.from_pretrained(
+            "weights/icon_caption",
+            trust_remote_code=True,
+            revision="main"
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            "weights/icon_caption",
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            trust_remote_code=True,
+            revision="main",
+            attn_implementation="eager",  # Avoid SDPA issues
+            device_map="auto" if torch.cuda.is_available() else None
+        )
+        # Add missing attribute
+        if not hasattr(model, '_supports_sdpa'):
+            model._supports_sdpa = False
+        caption_model_processor = {'model': model, 'processor': processor}
+        print("Caption model loaded with alternative method")
 except Exception as e:
+    print(f"Critical error loading models: {e}")
+    print(traceback.format_exc())
+    # Try to continue with a dummy model for testing
+    caption_model_processor = None
+    raise RuntimeError(f"Failed to load models: {e}")
 # Markdown header text
 MARKDOWN = """
 .gr-padded { padding: 16px; }
 """
+def safe_process_wrapper(*args, **kwargs):
+    """Wrapper to handle SDPA attribute errors"""
+    try:
+        return process(*args, **kwargs)
+    except AttributeError as e:
+        if '_supports_sdpa' in str(e):
+            # Try to fix the model on the fly
+            global caption_model_processor
+            if caption_model_processor and 'model' in caption_model_processor:
+                model = caption_model_processor['model']
+                if not hasattr(model, '_supports_sdpa'):
+                    model._supports_sdpa = False
+            return process(*args, **kwargs)
+        else:
+            raise
 @spaces.GPU
 @torch.inference_mode()
 def process(
     if image_input is None:
         return None, "⚠️ Please upload an image for processing."
+    # Check if caption model is loaded
+    if caption_model_processor is None:
+        return None, "⚠️ Caption model not loaded. Please restart the application."
     try:
         # Log processing parameters
         print(f"Processing with parameters: box_threshold={box_threshold}, "
         # Get labeled image and parsed content via SOM (YOLO + caption model)
         try:
+            # Fix model attributes before calling
+            if isinstance(caption_model_processor, dict) and 'model' in caption_model_processor:
+                model = caption_model_processor['model']
+                if not hasattr(model, '_supports_sdpa'):
+                    model._supports_sdpa = False
             dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
                 image_input,
                 yolo_model,
             if dino_labled_img is None:
                 raise ValueError("Failed to generate labeled image")
+        except AttributeError as e:
+            if '_supports_sdpa' in str(e):
+                print(f"SDPA attribute error, attempting to fix: {e}")
+                # Try to fix and retry
+                if isinstance(caption_model_processor, dict) and 'model' in caption_model_processor:
+                    caption_model_processor['model']._supports_sdpa = False
+                # Retry the operation
+                dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
+                    image_input, yolo_model, BOX_TRESHOLD=box_threshold,
+                    output_coord_in_ratio=True, ocr_bbox=ocr_bbox if ocr_bbox else [],
+                    draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor,
+                    ocr_text=text if text else [], iou_threshold=iou_threshold, imgsz=imgsz
+                )
+            else:
+                raise
         except Exception as e:
             print(f"Error in SOM processing: {e}")
             # Return original image with error message if SOM fails
     # Button click event with loading spinner
     submit_button_component.click(
+        fn=safe_process_wrapper,  # Use wrapper function
         inputs=[
             image_input_component,
             box_threshold_component,
         outputs=[image_output_component, text_output_component],
         show_progress=True
     )
 # Launch with queue support and error handling
 if __name__ == "__main__":
     try:
+        # Set environment variables for better compatibility
+        os.environ['TRANSFORMERS_OFFLINE'] = '0'
+        os.environ['HF_HUB_OFFLINE'] = '0'
+        os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  # For better error messages
         demo.queue(max_size=10)
         demo.launch(
             share=False,
         )
     except Exception as e:
         print(f"Failed to launch app: {e}")
+        print(traceback.format_exc())
         raise