Spaces:

vsaez
/

object-detection-app

Running

App Files Files Community

Víctor Sáez commited on Jul 20

Commit

4a473ee

1 Parent(s): 0b1e00c

Restirung Adding multilenguage support

Browse files

Files changed (1) hide show

app.py +189 -133

app.py CHANGED Viewed

@@ -2,19 +2,17 @@ import gradio as gr
 import torch
 from PIL import Image, ImageDraw, ImageFont
 from transformers import DetrImageProcessor, DetrForObjectDetection
-# Only import pipeline if translation is enabled
-ENABLE_TRANSLATION = False  # Cambia a True solo si puedes cargar modelos Helsinki localmente
-if ENABLE_TRANSLATION:
-    from transformers import pipeline
-# Global variables
 current_model = None
 current_processor = None
 current_model_name = None
 available_models = {
     "DETR ResNet-50": "facebook/detr-resnet-50",
     "DETR ResNet-101": "facebook/detr-resnet-101",
     "DETR DC5": "facebook/detr-resnet-50-dc5",
@@ -23,23 +21,37 @@ available_models = {
 def load_model(model_key):
     global current_model, current_processor, current_model_name
     model_name = available_models[model_key]
     if current_model_name != model_name:
         print(f"Loading model: {model_name}")
         current_processor = DetrImageProcessor.from_pretrained(model_name)
         current_model = DetrForObjectDetection.from_pretrained(model_name)
         current_model_name = model_name
     return current_model, current_processor
-def get_font(size=12):
-    try:
-        return ImageFont.truetype("arial.ttf", size=size)
-    except:
-        return ImageFont.load_default()
 translations = {
     "English": {
         "title": "## Enhanced Object Detection App\nUpload an image to detect objects using various DETR models.",
@@ -91,131 +103,186 @@ def t(language, key):
 def get_translated_model_choices(language):
     model_mapping = {
         "DETR ResNet-50": "model_fast",
         "DETR ResNet-101": "model_precision",
         "DETR DC5": "model_small",
         "DETR ResNet-50 Face Only": "model_faces"
     }
     translated_choices = []
     for model_key in available_models.keys():
         if model_key in model_mapping:
             translation_key = model_mapping[model_key]
             translated_name = t(language, translation_key)
         else:
-            translated_name = model_key
         translated_choices.append(translated_name)
     return translated_choices
 def get_model_key_from_translation(translated_name, language):
     model_mapping = {
         "DETR ResNet-50": "model_fast",
         "DETR ResNet-101": "model_precision",
         "DETR DC5": "model_small",
         "DETR ResNet-50 Face Only": "model_faces"
     }
     for model_key, translation_key in model_mapping.items():
         if t(language, translation_key) == translated_name:
             return model_key
     if translated_name in available_models:
         return translated_name
     return "DETR ResNet-50"
-# Translation logic (only if ENABLE_TRANSLATION and model is local)
 translation_cache = {}
 def translate_label(language_label, label):
-    if language_label == "English" or not ENABLE_TRANSLATION:
-        return label
     cache_key = f"{language_label}_{label}"
     if cache_key in translation_cache:
         return translation_cache[cache_key]
-    # Dummy fallback in Spaces, or if not preloaded, just warn
-    translation_cache[cache_key] = f"{label} (no translation)"
-    return translation_cache[cache_key]
-def detect_objects(image, language_selector, translated_model_selector, threshold):
     try:
-        if image is None:
-            return None, "Please upload an image before detecting objects."
-        model_selector = get_model_key_from_translation(translated_model_selector, language_selector)
-        model, processor = load_model(model_selector)
-        inputs = processor(images=image, return_tensors="pt")
-        outputs = model(**inputs)
-        target_sizes = torch.tensor([image.size[::-1]])
-        results = processor.post_process_object_detection(
-            outputs, threshold=threshold, target_sizes=target_sizes
-        )[0]
-        image_with_boxes = image.copy()
-        draw = ImageDraw.Draw(image_with_boxes)
-        detection_info = f"Detected {len(results['scores'])} objects with threshold {threshold}\n"
-        detection_info += f"Model: {translated_model_selector} ({model_selector})\n\n"
-        colors = {
-            'high': 'red',
-            'medium': 'orange',
-            'low': 'yellow'
-        }
-        detected_objects = []
-        for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
-            confidence = score.item()
-            box = [round(x, 2) for x in box.tolist()]
-            if confidence > 0.8:
-                color = colors['high']
-            elif confidence > 0.5:
-                color = colors['medium']
-            else:
-                color = colors['low']
-            draw.rectangle(box, outline=color, width=3)
-            label_text = model.config.id2label[label.item()]
-            translated_label = translate_label(language_selector, label_text)
-            display_text = f"{translated_label}: {round(confidence, 3)}"
-            detected_objects.append({
-                'label': label_text,
-                'translated': translated_label,
-                'confidence': confidence,
-                'box': box
-            })
-            try:
-                image_width = image.size[0]
-                font_size = max(image_width // 40, 12)
-                font = get_font(font_size)
-                text_bbox = draw.textbbox((0, 0), display_text, font=font)
-                text_width = text_bbox[2] - text_bbox[0]
-                text_height = text_bbox[3] - text_bbox[1]
-            except:
-                font = get_font(12)
-                text_width = 50
-                text_height = 20
-            text_bg = [
-                box[0], box[1] - text_height - 4,
-                        box[0] + text_width + 4, box[1]
-            ]
-            draw.rectangle(text_bg, fill="black")
-            draw.text((box[0] + 2, box[1] - text_height - 2), display_text, fill="white", font=font)
-        if detected_objects:
-            detection_info += "Objects found:\n"
-            for obj in sorted(detected_objects, key=lambda x: x['confidence'], reverse=True):
-                detection_info += f"- {obj['translated']} ({obj['label']}): {obj['confidence']:.3f}\n"
-        else:
-            detection_info += "No objects detected. Try lowering the threshold."
-        return image_with_boxes, detection_info
     except Exception as e:
-        import traceback
-        print("ERROR EN DETECT_OBJECTS:", e)
-        traceback.print_exc()
-        return None, f"Error detecting objects: {e}"
-def build_app():
-    # Crear componentes con referencias globales
-    title = gr.Markdown(t("English", "title"))
     with gr.Blocks(theme=gr.themes.Soft()) as app:
-        title.render()
         with gr.Row():
             with gr.Column(scale=1):
@@ -227,14 +294,14 @@ def build_app():
             with gr.Column(scale=1):
                 model_selector = gr.Dropdown(
                     choices=get_translated_model_choices("English"),
-                    value=t("English", "model_fast"),
                     label=t("English", "dropdown_detection_model_label")
                 )
             with gr.Column(scale=1):
                 threshold_slider = gr.Slider(
                     minimum=0.1,
                     maximum=0.95,
-                    value=0.5,
                     step=0.05,
                     label=t("English", "threshold_label")
                 )
@@ -251,56 +318,36 @@ def build_app():
                     max_lines=15
                 )
         def update_interface(selected_language):
-            try:
-                translated_choices = get_translated_model_choices(selected_language)
-                default_model = t(selected_language, "model_fast")
-                # Asegurar que default_model está en las opciones
-                if default_model not in translated_choices:
-                    default_model = translated_choices[0] if translated_choices else "General Objects (fast)"
-                updates = []
-                updates.append(gr.update(value=t(selected_language, "title")))  # title
-                updates.append(gr.update(label=t(selected_language, "dropdown_label")))  # language_selector
-                updates.append(gr.update(
                     choices=translated_choices,
                     value=default_model,
                     label=t(selected_language, "dropdown_detection_model_label")
-                ))  # model_selector
-                updates.append(gr.update(label=t(selected_language, "threshold_label")))  # threshold_slider
-                updates.append(gr.update(label=t(selected_language, "input_label")))  # input_image
-                updates.append(gr.update(value=t(selected_language, "button")))  # button
-                updates.append(gr.update(label=t(selected_language, "output_label")))  # output_image
-                updates.append(gr.update(label=t(selected_language, "info_label")))  # detection_info
-                return updates
-            except Exception as e:
-                print(f"Error in update_interface: {e}")
-                import traceback
-                traceback.print_exc()
-                # Retornar valores por defecto en caso de error
-                return [
-                    gr.update(),  # title
-                    gr.update(),  # language_selector
-                    gr.update(),  # model_selector
-                    gr.update(),  # threshold_slider
-                    gr.update(),  # input_image
-                    gr.update(),  # button
-                    gr.update(),  # output_image
-                    gr.update()  # detection_info
-                ]
-        # Configurar el evento de cambio de idioma
         language_selector.change(
             fn=update_interface,
-            inputs=[language_selector],
             outputs=[title, language_selector, model_selector, threshold_slider,
                      input_image, button, output_image, detection_info],
             queue=False
         )
-        # Configurar el botón de detección
         button.click(
             fn=detect_objects,
             inputs=[input_image, language_selector, model_selector, threshold_slider],
@@ -310,9 +357,18 @@ def build_app():
     return app
-# Precargar modelo por defecto
 load_model("DETR ResNet-50")
 if __name__ == "__main__":
     app = build_app()
     app.launch()

 import torch
 from PIL import Image, ImageDraw, ImageFont
 from transformers import DetrImageProcessor, DetrForObjectDetection
+from pathlib import Path
+import transformers
+# Global variables to cache models
 current_model = None
 current_processor = None
 current_model_name = None
+# Available models with better selection
 available_models = {
+    # DETR Models
     "DETR ResNet-50": "facebook/detr-resnet-50",
     "DETR ResNet-101": "facebook/detr-resnet-101",
     "DETR DC5": "facebook/detr-resnet-50-dc5",
 def load_model(model_key):
+    """Load model and processor based on selected model key"""
     global current_model, current_processor, current_model_name
     model_name = available_models[model_key]
+    # Only load if it's a different model
     if current_model_name != model_name:
         print(f"Loading model: {model_name}")
         current_processor = DetrImageProcessor.from_pretrained(model_name)
         current_model = DetrForObjectDetection.from_pretrained(model_name)
         current_model_name = model_name
+        print(f"Model loaded: {model_name}")
+        print(f"Available labels: {list(current_model.config.id2label.values())}")
     return current_model, current_processor
+# Load font
+font_path = Path("assets/fonts/arial.ttf")
+if not font_path.exists():
+    print(f"Font file {font_path} not found. Using default font.")
+    font = ImageFont.load_default()
+else:
+    font = ImageFont.truetype(str(font_path), size=100)  # Reduced font size
+# Set up translations for the app
 translations = {
     "English": {
         "title": "## Enhanced Object Detection App\nUpload an image to detect objects using various DETR models.",
 def get_translated_model_choices(language):
+    """Get model choices translated to the selected language"""
     model_mapping = {
         "DETR ResNet-50": "model_fast",
         "DETR ResNet-101": "model_precision",
         "DETR DC5": "model_small",
         "DETR ResNet-50 Face Only": "model_faces"
     }
     translated_choices = []
     for model_key in available_models.keys():
         if model_key in model_mapping:
             translation_key = model_mapping[model_key]
             translated_name = t(language, translation_key)
         else:
+            translated_name = model_key  # Fallback to original name
         translated_choices.append(translated_name)
     return translated_choices
 def get_model_key_from_translation(translated_name, language):
+    """Get the original model key from translated name"""
     model_mapping = {
         "DETR ResNet-50": "model_fast",
         "DETR ResNet-101": "model_precision",
         "DETR DC5": "model_small",
         "DETR ResNet-50 Face Only": "model_faces"
     }
+    # Reverse lookup
     for model_key, translation_key in model_mapping.items():
         if t(language, translation_key) == translated_name:
             return model_key
+    # If not found, try direct match
     if translated_name in available_models:
         return translated_name
+    # Default fallback
     return "DETR ResNet-50"
+def get_helsinki_model(language_label):
+    """Returns the Helsinki-NLP model name for translating from English to the selected language."""
+    lang_map = {
+        "Spanish": "es",
+        "French": "fr",
+        "English": "en"
+    }
+    target = lang_map.get(language_label)
+    if not target or target == "en":
+        return None
+    return f"Helsinki-NLP/opus-mt-en-{target}"
+# add cache for translations
 translation_cache = {}
 def translate_label(language_label, label):
+    """Translates the given label to the target language."""
+    # Check cache first
     cache_key = f"{language_label}_{label}"
     if cache_key in translation_cache:
         return translation_cache[cache_key]
+    model_name = get_helsinki_model(language_label)
+    if not model_name:
+        return label
     try:
+        translator = transformers.pipeline("translation", model=model_name)
+        result = translator(label, max_length=40)
+        translated = result[0]['translation_text']
+        # Cache the result
+        translation_cache[cache_key] = translated
+        return translated
     except Exception as e:
+        print(f"Translation error (429 or other): {e}")
+        return label  # Return original if translation fails
+def detect_objects(image, language_selector, translated_model_selector, threshold):
+    """Enhanced object detection with adjustable threshold and better info"""
+    # Get the actual model key from the translated name
+    model_selector = get_model_key_from_translation(translated_model_selector, language_selector)
+    print(f"Processing image. Language: {language_selector}, Model: {model_selector}, Threshold: {threshold}")
+    # Load the selected model
+    model, processor = load_model(model_selector)
+    # Process the image
+    inputs = processor(images=image, return_tensors="pt")
+    outputs = model(**inputs)
+    # Convert model output to usable detection results with custom threshold
+    target_sizes = torch.tensor([image.size[::-1]])
+    results = processor.post_process_object_detection(
+        outputs, threshold=threshold, target_sizes=target_sizes
+    )[0]
+    # Create a copy of the image for drawing
+    image_with_boxes = image.copy()
+    draw = ImageDraw.Draw(image_with_boxes)
+    # Detection info
+    detection_info = f"Detected {len(results['scores'])} objects with threshold {threshold}\n"
+    detection_info += f"Model: {translated_model_selector} ({model_selector})\n\n"
+    # Colors for different confidence levels
+    colors = {
+        'high': 'red',  # > 0.8
+        'medium': 'orange',  # 0.5-0.8
+        'low': 'yellow'  # < 0.5
+    }
+    detected_objects = []
+    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+        confidence = score.item()
+        box = [round(x, 2) for x in box.tolist()]
+        # Choose color based on confidence
+        if confidence > 0.8:
+            color = colors['high']
+        elif confidence > 0.5:
+            color = colors['medium']
+        else:
+            color = colors['low']
+        # Draw bounding box
+        draw.rectangle(box, outline=color, width=3)
+        # Prepare label text
+        label_text = model.config.id2label[label.item()]
+        translated_label = translate_label(language_selector, label_text)
+        display_text = f"{translated_label}: {round(confidence, 3)}"
+        # Store detection info
+        detected_objects.append({
+            'label': label_text,
+            'translated': translated_label,
+            'confidence': confidence,
+            'box': box
+        })
+        # Calculate text position and size
+        try:
+            text_bbox = draw.textbbox((0, 0), display_text, font=font)
+            text_width = text_bbox[2] - text_bbox[0]
+            text_height = text_bbox[3] - text_bbox[1]
+        except:
+            # Fallback for older PIL versions
+            text_width, text_height = draw.textsize(display_text, font=font)
+        # Draw text background
+        text_bg = [
+            box[0], box[1] - text_height - 4,
+                    box[0] + text_width + 4, box[1]
+        ]
+        draw.rectangle(text_bg, fill="black")
+        draw.text((box[0] + 2, box[1] - text_height - 2), display_text, fill="white", font=font)
+    # Create detailed detection info
+    if detected_objects:
+        detection_info += "Objects found:\n"
+        for obj in sorted(detected_objects, key=lambda x: x['confidence'], reverse=True):
+            detection_info += f"- {obj['translated']} ({obj['label']}): {obj['confidence']:.3f}\n"
+    else:
+        detection_info += "No objects detected. Try lowering the threshold."
+    return image_with_boxes, detection_info
+def build_app():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
+        with gr.Row():
+            title = gr.Markdown(t("English", "title"))
         with gr.Row():
             with gr.Column(scale=1):
             with gr.Column(scale=1):
                 model_selector = gr.Dropdown(
                     choices=get_translated_model_choices("English"),
+                    value=t("English", "model_fast"),  # Default to translated "fast" option
                     label=t("English", "dropdown_detection_model_label")
                 )
             with gr.Column(scale=1):
                 threshold_slider = gr.Slider(
                     minimum=0.1,
                     maximum=0.95,
+                    value=0.5,  # Lowered default threshold
                     step=0.05,
                     label=t("English", "threshold_label")
                 )
                     max_lines=15
                 )
+        # Function to update interface when language changes
         def update_interface(selected_language):
+            translated_choices = get_translated_model_choices(selected_language)
+            default_model = t(selected_language, "model_fast")
+            return [
+                gr.update(value=t(selected_language, "title")),
+                gr.update(label=t(selected_language, "dropdown_label")),
+                gr.update(
                     choices=translated_choices,
                     value=default_model,
                     label=t(selected_language, "dropdown_detection_model_label")
+                ),
+                gr.update(label=t(selected_language, "threshold_label")),
+                gr.update(label=t(selected_language, "input_label")),
+                gr.update(value=t(selected_language, "button")),
+                gr.update(label=t(selected_language, "output_label")),
+                gr.update(label=t(selected_language, "info_label"))
+            ]
+        # Connect language change event
         language_selector.change(
             fn=update_interface,
+            inputs=language_selector,
             outputs=[title, language_selector, model_selector, threshold_slider,
                      input_image, button, output_image, detection_info],
             queue=False
         )
+        # Connect detection button click event
         button.click(
             fn=detect_objects,
             inputs=[input_image, language_selector, model_selector, threshold_slider],
     return app
+# Initialize with default model
 load_model("DETR ResNet-50")
+# Launch the application
 if __name__ == "__main__":
     app = build_app()
     app.launch()