Spaces:

cella110n
/

cl_tagger

Running on Zero

App Files Files Community

cella110n commited on Apr 28

Commit

29788a3

verified ·

1 Parent(s): fe88ff7

Upload app.py

Browse files

Files changed (1) hide show

app.py +339 -44

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import os
 import io
 import requests
 # import matplotlib.pyplot as plt # No plotting yet
-# import matplotlib # No plotting yet
 from huggingface_hub import hf_hub_download
 from dataclasses import dataclass
 from typing import List, Dict, Optional, Tuple
@@ -103,6 +103,187 @@ def preprocess_image(image: Image.Image, target_size=(448, 448)):
     img_array = np.expand_dims(img_array, axis=0) # Add batch dimension
     return image, img_array
 # --- Constants ---
 REPO_ID = "cella110n/cl_tagger"
 # Use the specified ONNX model filename
@@ -142,66 +323,180 @@ def initialize_onnx_paths():
         # Raise Gradio error to make it visible in the UI
         raise gr.Error(f"Initialization failed: {e}. Check logs and HF_TOKEN.")
-# --- ONNX Loading Test Function ---
 @spaces.GPU()
-def test_onnx_load():
-    print("--- test_onnx_load function started (GPU worker) ---")
-    if g_onnx_model_path is None:
-        message = "Error: ONNX model path not initialized. Check startup logs."
         print(message)
-        return message
-    if not os.path.exists(g_onnx_model_path):
-         message = f"Error: ONNX file not found at {g_onnx_model_path}. Check download."
-         print(message)
-         return message
     try:
-        print(f"Attempting to load ONNX session from: {g_onnx_model_path}")
-        # Determine providers (GPU if available)
         available_providers = ort.get_available_providers()
-        print(f"Available ORT providers: {available_providers}")
         providers = []
-        # Prioritize GPU providers
         if 'CUDAExecutionProvider' in available_providers:
-            print("CUDAExecutionProvider found.")
             providers.append('CUDAExecutionProvider')
-        elif 'DmlExecutionProvider' in available_providers: # For Windows with DirectML
-             print("DmlExecutionProvider found.")
-             providers.append('DmlExecutionProvider')
-        # Always include CPU as fallback
         providers.append('CPUExecutionProvider')
         print(f"Attempting to load session with providers: {providers}")
         session = ort.InferenceSession(g_onnx_model_path, providers=providers)
-        active_provider = session.get_providers()[0]
-        message = f"ONNX session loaded successfully on GPU worker using provider: {active_provider}"
         print(message)
-        # Clean up session immediately after test?
-        # del session # Optional, depends if we want to keep it loaded
     except Exception as e:
-        message = f"Error loading ONNX session: {e}"
         print(message)
         import traceback; traceback.print_exc()
-    return message
-# --- Gradio Interface Definition (Minimal for ONNX Load Test) ---
-with gr.Blocks() as demo:
-    gr.Markdown("""
-    # ONNX Model Load Test
-    Downloads ONNX model and tag mapping, then attempts to load the ONNX session on the GPU worker when the button is clicked.
-    Check logs for download and loading messages.
-    """)
-    with gr.Column():
-        test_button = gr.Button("Test ONNX Load on GPU")
-        output_text = gr.Textbox(label="Output")
-    test_button.click(
-        fn=test_onnx_load,
-        inputs=[],
-        outputs=[output_text]
     )
 # --- Main Block ---

 import io
 import requests
 # import matplotlib.pyplot as plt # No plotting yet
+import matplotlib # For backend setting
 from huggingface_hub import hf_hub_download
 from dataclasses import dataclass
 from typing import List, Dict, Optional, Tuple
     img_array = np.expand_dims(img_array, axis=0) # Add batch dimension
     return image, img_array
+# Add get_tags function (from onnx_predict.py)
+def get_tags(probs, labels: LabelData, gen_threshold, char_threshold):
+    result = {
+        "rating": [],
+        "general": [],
+        "character": [],
+        "copyright": [],
+        "artist": [],
+        "meta": [],
+        "quality": []
+    }
+    # Rating (select max)
+    if len(labels.rating) > 0:
+        # Ensure indices are within bounds
+        valid_indices = labels.rating[labels.rating < len(probs)]
+        if len(valid_indices) > 0:
+            rating_probs = probs[valid_indices]
+            if len(rating_probs) > 0:
+                rating_idx_local = np.argmax(rating_probs)
+                rating_idx_global = valid_indices[rating_idx_local]
+                # Check if global index is valid for names list
+                if rating_idx_global < len(labels.names) and labels.names[rating_idx_global] is not None:
+                    rating_name = labels.names[rating_idx_global]
+                    rating_conf = float(rating_probs[rating_idx_local])
+                    result["rating"].append((rating_name, rating_conf))
+                else:
+                    print(f"Warning: Invalid global index {rating_idx_global} for rating tag.")
+            else:
+                 print("Warning: rating_probs became empty after filtering.")
+        else:
+            print("Warning: No valid indices found for rating tags within probs length.")
+    # Quality (select max)
+    if len(labels.quality) > 0:
+        valid_indices = labels.quality[labels.quality < len(probs)]
+        if len(valid_indices) > 0:
+            quality_probs = probs[valid_indices]
+            if len(quality_probs) > 0:
+                quality_idx_local = np.argmax(quality_probs)
+                quality_idx_global = valid_indices[quality_idx_local]
+                if quality_idx_global < len(labels.names) and labels.names[quality_idx_global] is not None:
+                    quality_name = labels.names[quality_idx_global]
+                    quality_conf = float(quality_probs[quality_idx_local])
+                    result["quality"].append((quality_name, quality_conf))
+                else:
+                     print(f"Warning: Invalid global index {quality_idx_global} for quality tag.")
+            else:
+                print("Warning: quality_probs became empty after filtering.")
+        else:
+            print("Warning: No valid indices found for quality tags within probs length.")
+    # Threshold-based categories
+    category_map = {
+        "general": (labels.general, gen_threshold),
+        "character": (labels.character, char_threshold),
+        "copyright": (labels.copyright, char_threshold),
+        "artist": (labels.artist, char_threshold),
+        "meta": (labels.meta, gen_threshold) # Use gen_threshold for meta as per original code
+    }
+    for category, (indices, threshold) in category_map.items():
+        if len(indices) > 0:
+            valid_indices = indices[(indices < len(probs))] # Check index bounds first
+            if len(valid_indices) > 0:
+                category_probs = probs[valid_indices]
+                mask = category_probs >= threshold
+                selected_indices_local = np.where(mask)[0]
+                if len(selected_indices_local) > 0:
+                    selected_indices_global = valid_indices[selected_indices_local]
+                    selected_probs = category_probs[selected_indices_local]
+                    for idx_global, prob_val in zip(selected_indices_global, selected_probs):
+                        # Check if global index is valid for names list
+                        if idx_global < len(labels.names) and labels.names[idx_global] is not None:
+                             result[category].append((labels.names[idx_global], float(prob_val)))
+                        else:
+                             print(f"Warning: Invalid global index {idx_global} for {category} tag.")
+                # else: print(f"No tags found for category '{category}' above threshold {threshold}")
+            # else: print(f"No valid indices found for category '{category}' within probs length.")
+        # else: print(f"No indices defined for category '{category}'")
+    # Sort by probability (descending)
+    for k in result:
+        result[k] = sorted(result[k], key=lambda x: x[1], reverse=True)
+    return result
+# Add visualize_predictions function (Adapted from onnx_predict.py and previous versions)
+def visualize_predictions(image: Image.Image, predictions: Dict, threshold: float):
+    # Filter out unwanted meta tags (e.g., id, commentary, request, mismatch)
+    filtered_meta = []
+    excluded_meta_patterns = ['id', 'commentary', 'request', 'mismatch']
+    for tag, prob in predictions.get("meta", []):
+        if not any(pattern in tag.lower() for pattern in excluded_meta_patterns):
+            filtered_meta.append((tag, prob))
+    predictions["meta"] = filtered_meta # Use filtered list for visualization
+    # --- Plotting Setup ---
+    plt.rcParams['font.family'] = 'DejaVu Sans' # Ensure font compatibility
+    fig = plt.figure(figsize=(20, 12), dpi=100)
+    gs = fig.add_gridspec(1, 2, width_ratios=[1.2, 1])
+    # Left side: Image
+    ax_img = fig.add_subplot(gs[0, 0])
+    ax_img.imshow(image)
+    ax_img.set_title("Original Image")
+    ax_img.axis('off')
+    # Right side: Tags
+    ax_tags = fig.add_subplot(gs[0, 1])
+    all_tags, all_probs, all_colors = [], [], []
+    color_map = {
+        'rating': 'red', 'character': 'blue', 'copyright': 'purple',
+        'artist': 'orange', 'general': 'green', 'meta': 'gray', 'quality': 'yellow'
+    }
+    # Aggregate tags from predictions dictionary
+    for cat, prefix, color in [
+        ('rating', 'R', color_map['rating']), ('quality', 'Q', color_map['quality']),
+        ('character', 'C', color_map['character']), ('copyright', '©', color_map['copyright']),
+        ('artist', 'A', color_map['artist']), ('general', 'G', color_map['general']),
+        ('meta', 'M', color_map['meta'])
+    ]:
+        # Sort within category by probability before adding
+        sorted_tags = sorted(predictions.get(cat, []), key=lambda x: x[1], reverse=True)
+        for tag, prob in sorted_tags:
+            # Add prefix to tag name for display
+            all_tags.append(f"[{prefix}] {tag.replace('_', ' ')}") # Replace underscores for display
+            all_probs.append(prob)
+            all_colors.append(color)
+    if not all_tags:
+        ax_tags.text(0.5, 0.5, "No tags found above threshold", ha='center', va='center')
+        ax_tags.set_title(f"Tags (Thresholds: Gen/Meta={threshold:.2f}, Char/Art/Copy={threshold:.2f})") # Assuming same threshold for now
+        ax_tags.axis('off')
+    else:
+        # Sort all aggregated tags by probability (descending) for plotting order
+        # Plotting from bottom up, so we want highest probability at the top
+        sorted_indices = sorted(range(len(all_probs)), key=lambda i: all_probs[i]) # Sort ascending for barh
+        all_tags = [all_tags[i] for i in sorted_indices]
+        all_probs = [all_probs[i] for i in sorted_indices]
+        all_colors = [all_colors[i] for i in sorted_indices]
+        num_tags = len(all_tags)
+        bar_height = min(0.8, max(0.1, 0.8 * (30 / num_tags))) if num_tags > 30 else 0.8
+        y_positions = np.arange(num_tags)
+        bars = ax_tags.barh(y_positions, all_probs, height=bar_height, color=all_colors)
+        ax_tags.set_yticks(y_positions)
+        ax_tags.set_yticklabels(all_tags)
+        fontsize = 10 if num_tags <= 40 else 8 if num_tags <= 60 else 6
+        for label in ax_tags.get_yticklabels():
+            label.set_fontsize(fontsize)
+        # Add probability text next to bars
+        for i, (bar, prob) in enumerate(zip(bars, all_probs)):
+             # Position text slightly outside the bar, ensuring it stays within plot bounds
+             text_x = min(prob + 0.02, 0.98) # Adjust x position
+             ax_tags.text(text_x, y_positions[i], f"{prob:.3f}", va='center', fontsize=fontsize)
+        ax_tags.set_xlim(0, 1)
+        ax_tags.set_title(f"Tags (Thresholds approx: {threshold:.2f})") # Indicate threshold used
+        # Add legend
+        from matplotlib.patches import Patch
+        legend_elements = [
+             Patch(facecolor=color, label=cat.capitalize()) for cat, color in color_map.items()
+             if any(t.startswith(f"[{cat[0].upper() if cat != 'copyright' else '©'}]") for t in all_tags)
+        ]
+        if legend_elements:
+             ax_tags.legend(handles=legend_elements, loc='lower right', fontsize=8)
+    plt.tight_layout()
+    plt.subplots_adjust(bottom=0.05)
+    # Save plot to buffer
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png', dpi=100)
+    plt.close(fig)
+    buf.seek(0)
+    viz_image = Image.open(buf)
+    return viz_image
 # --- Constants ---
 REPO_ID = "cella110n/cl_tagger"
 # Use the specified ONNX model filename
         # Raise Gradio error to make it visible in the UI
         raise gr.Error(f"Initialization failed: {e}. Check logs and HF_TOKEN.")
+# --- Main Prediction Function (ONNX) ---
 @spaces.GPU()
+def predict_onnx(image_input, gen_threshold, char_threshold, output_mode):
+    print("--- predict_onnx function started (GPU worker) ---")
+    # --- 1. Ensure paths and labels are loaded ---
+    if g_onnx_model_path is None or g_labels_data is None:
+        message = "Error: Paths or labels not initialized. Check startup logs."
         print(message)
+        # Return error message and None for the image output
+        return message, None
+    # --- 2. Load ONNX Session (inside worker) ---
+    session = None
     try:
+        print(f"Loading ONNX session from: {g_onnx_model_path}")
         available_providers = ort.get_available_providers()
         providers = []
         if 'CUDAExecutionProvider' in available_providers:
             providers.append('CUDAExecutionProvider')
         providers.append('CPUExecutionProvider')
         print(f"Attempting to load session with providers: {providers}")
         session = ort.InferenceSession(g_onnx_model_path, providers=providers)
+        print(f"ONNX session loaded using: {session.get_providers()[0]}")
+    except Exception as e:
+        message = f"Error loading ONNX session in worker: {e}"
         print(message)
+        import traceback; traceback.print_exc()
+        return message, None
+    # --- 3. Process Input Image ---
+    if image_input is None:
+        return "Please upload an image.", None
+    print(f"Processing image with thresholds: gen={gen_threshold}, char={char_threshold}")
+    try:
+        # Handle different input types (PIL, numpy, URL, file path)
+        if isinstance(image_input, str):
+            if image_input.startswith("http"): # URL
+                response = requests.get(image_input, timeout=10)
+                response.raise_for_status()
+                image = Image.open(io.BytesIO(response.content))
+            elif os.path.exists(image_input): # File path
+                image = Image.open(image_input)
+            else:
+                 raise ValueError(f"Invalid image input string: {image_input}")
+        elif isinstance(image_input, np.ndarray):
+             image = Image.fromarray(image_input)
+        elif isinstance(image_input, Image.Image):
+             image = image_input # Already a PIL image
+        else:
+             raise TypeError(f"Unsupported image input type: {type(image_input)}")
+        # Preprocess the PIL image
+        original_pil_image, input_tensor = preprocess_image(image)
+        # Ensure input tensor is float32, as expected by most ONNX models
+        # (even if the model internally uses float16)
+        input_tensor = input_tensor.astype(np.float32)
+    except Exception as e:
+        message = f"Error processing input image: {e}"
+        print(message)
+        return message, None
+    # --- 4. Run Inference ---
+    try:
+        input_name = session.get_inputs()[0].name
+        output_name = session.get_outputs()[0].name
+        print(f"Running inference with input '{input_name}', output '{output_name}'")
+        start_time = time.time()
+        outputs = session.run([output_name], {input_name: input_tensor})[0]
+        inference_time = time.time() - start_time
+        print(f"Inference completed in {inference_time:.3f} seconds")
+        # Check for NaN/Inf in outputs
+        if np.isnan(outputs).any() or np.isinf(outputs).any():
+            print("Warning: NaN or Inf detected in model output. Clamping...")
+            outputs = np.nan_to_num(outputs, nan=0.0, posinf=1.0, neginf=0.0) # Clamp to 0-1 range
+        # Apply sigmoid (outputs are likely logits)
+        # Use a stable sigmoid implementation
+        def stable_sigmoid(x):
+            return 1 / (1 + np.exp(-np.clip(x, -30, 30))) # Clip to avoid overflow
+        probs = stable_sigmoid(outputs[0]) # Assuming batch size 1
     except Exception as e:
+        message = f"Error during ONNX inference: {e}"
         print(message)
         import traceback; traceback.print_exc()
+        return message, None
+    finally:
+        # Clean up session if needed (might reduce memory usage between clicks)
+        del session
+    # --- 5. Post-process and Format Output ---
+    try:
+        print("Post-processing results...")
+        # Use the correct global variable for labels
+        predictions = get_tags(probs, g_labels_data, gen_threshold, char_threshold)
+        # Format output text string
+        output_tags = []
+        if predictions.get("rating"): output_tags.append(predictions["rating"][0][0].replace("_", " "))
+        if predictions.get("quality"): output_tags.append(predictions["quality"][0][0].replace("_", " "))
+        # Add other categories, respecting order and filtering meta if needed
+        for category in ["artist", "character", "copyright", "general", "meta"]:
+            tags_in_category = predictions.get(category, [])
+            for tag, prob in tags_in_category:
+                # Basic meta tag filtering for text output
+                if category == "meta" and any(p in tag.lower() for p in ['id', 'commentary', 'request', 'mismatch']):
+                    continue
+                output_tags.append(tag.replace("_", " "))
+        output_text = ", ".join(output_tags)
+        # Generate visualization if requested
+        viz_image = None
+        if output_mode == "Tags + Visualization":
+            print("Generating visualization...")
+            # Pass the correct threshold for display title (can pass both if needed)
+            # For simplicity, passing gen_threshold as a representative value
+            viz_image = visualize_predictions(original_pil_image, predictions, gen_threshold)
+            print("Visualization generated.")
+        else:
+            print("Visualization skipped.")
+        print("Prediction complete.")
+        return output_text, viz_image
+    except Exception as e:
+        message = f"Error during post-processing: {e}"
+        print(message)
+        import traceback; traceback.print_exc()
+        return message, None
+# --- Gradio Interface Definition (Full ONNX Version) ---
+css = """
+.gradio-container { font-family: 'IBM Plex Sans', sans-serif; }
+footer { display: none !important; }
+.gr-prose { max-width: 100% !important; }
+"""
+# js = """ /* Keep existing JS */ """ # No JS needed currently
+with gr.Blocks(css=css) as demo:
+    gr.Markdown("# WD EVA02 ONNX Tagger")
+    gr.Markdown("Upload an image or paste an image URL to predict tags using the fine-tuned WD EVA02 Tagger model (ONNX).")
+    gr.Markdown(f"Model Repository: [{REPO_ID}](https://huggingface.co/{REPO_ID}) - Using ONNX file: `{ONNX_FILENAME}`")
+    with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(type="pil", label="Input Image", elem_id="input-image")
+            # Add back URL input capability if desired (needs JS or separate component)
+            # gr.HTML("<div id='url-input-container'></div>")
+            gen_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.55, label="General/Meta Tag Threshold")
+            char_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.60, label="Character/Copyright/Artist Tag Threshold")
+            output_mode = gr.Radio(choices=["Tags Only", "Tags + Visualization"], value="Tags + Visualization", label="Output Mode")
+            predict_button = gr.Button("Predict", variant="primary")
+        with gr.Column(scale=1):
+            output_tags = gr.Textbox(label="Predicted Tags", lines=10, interactive=False)
+            output_visualization = gr.Image(type="pil", label="Prediction Visualization", interactive=False)
+    gr.Examples(
+        examples=[
+            ["https://pbs.twimg.com/media/GXBXsRvbQAAg1kp.jpg", 0.55, 0.60, "Tags + Visualization"],
+            ["https://pbs.twimg.com/media/GjlX0gibcAA4EJ4.jpg", 0.50, 0.50, "Tags Only"],
+            ["https://pbs.twimg.com/media/Gj4nQbjbEAATeoH.jpg", 0.55, 0.60, "Tags + Visualization"],
+            ["https://pbs.twimg.com/media/GkbtX0GaoAMlUZt.jpg", 0.45, 0.45, "Tags + Visualization"]
+        ],
+        inputs=[image_input, gen_threshold, char_threshold, output_mode],
+        outputs=[output_tags, output_visualization],
+        fn=predict_onnx, # Use the ONNX prediction function
+        cache_examples=False # Disable caching for examples during testing
+    )
+    predict_button.click(
+        fn=predict_onnx, # Use the ONNX prediction function
+        inputs=[image_input, gen_threshold, char_threshold, output_mode],
+        outputs=[output_tags, output_visualization]
     )
 # --- Main Block ---