Spaces:

skallewag
/

SegMatch

Running on Zero

App Files Files Community

skallewag commited on 7 days ago

Commit

62cc23b

verified ·

1 Parent(s): d843b3f

Upload 25 files

Browse files

Files changed (25) hide show

README.md +49 -4
app.py +956 -0
cdl_smoothing.py +497 -0
clothes_segmentation.py +292 -0
color_matching.py +698 -0
core.py +356 -0
examples/beach.jpg +0 -0
examples/field.jpg +0 -0
examples/sky.jpg +0 -0
face_comparison.py +246 -0
folder_paths.py +22 -0
human_parts_segmentation.py +322 -0
models/RMBG/segformer_clothes/.cache/huggingface/.gitignore +1 -0
models/RMBG/segformer_clothes/.cache/huggingface/download/config.json.lock +0 -0
models/RMBG/segformer_clothes/.cache/huggingface/download/config.json.metadata +3 -0
models/RMBG/segformer_clothes/.cache/huggingface/download/model.safetensors.lock +0 -0
models/RMBG/segformer_clothes/.cache/huggingface/download/model.safetensors.metadata +3 -0
models/RMBG/segformer_clothes/.cache/huggingface/download/preprocessor_config.json.lock +0 -0
models/RMBG/segformer_clothes/.cache/huggingface/download/preprocessor_config.json.metadata +3 -0
models/RMBG/segformer_clothes/config.json +110 -0
models/RMBG/segformer_clothes/model.safetensors +3 -0
models/RMBG/segformer_clothes/preprocessor_config.json +23 -0
models/onnx/human-parts/deeplabv3p-resnet50-human.onnx +3 -0
requirements.txt +30 -0
spaces.py +12 -0

README.md CHANGED Viewed

@@ -1,12 +1,57 @@
 ---
-title: SegMatch
-emoji: 📈
 colorFrom: gray
-colorTo: indigo
 sdk: gradio
-sdk_version: 5.33.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: LaDeco
+emoji: 👀
 colorFrom: gray
+colorTo: blue
 sdk: gradio
+sdk_version: 5.31.0
 app_file: app.py
 pinned: false
+short_description: 'LaDeco: A tool to analyze visual landscape elements'
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# LaDeco - Landscape Environment Semantic Analysis Model
+LaDeco is a tool that analyzes landscape images, performs semantic segmentation to identify different elements in the scene (sky, vegetation, buildings, etc.), and enables region-based color matching between images.
+## Features
+### Semantic Segmentation
+- Analyzes landscape images and segments them into different semantic regions
+- Provides area ratio analysis for each landscape element
+### Region-Based Color Matching
+- Matches colors between corresponding semantic regions of two images
+- Shows visualization of which regions are being matched between images
+- Offers multiple color matching algorithms:
+  - **adain**: Adaptive Instance Normalization - Matches mean and standard deviation of colors
+  - **mkl**: Monge-Kantorovich Linearization - Linear transformation of color statistics
+  - **reinhard**: Reinhard color transfer - Simple statistical approach that matches mean and standard deviation
+  - **mvgd**: Multi-Variate Gaussian Distribution - Uses color covariance matrices for more accurate matching
+  - **hm**: Histogram Matching - Matches the full color distribution histograms
+  - **hm-mvgd-hm**: Histogram + MVGD + Histogram compound method
+  - **hm-mkl-hm**: Histogram + MKL + Histogram compound method
+## Installation
+1. Clone this repository
+2. Create a virtual environment: `python3 -m venv .venv`
+3. Activate the virtual environment: `source .venv/bin/activate`
+4. Install requirements: `pip install -r requirements.txt`
+5. Run the application: `python app.py`
+## Usage
+1. Upload two landscape images - the first will be the color reference, the second will be color-matched to the first
+2. Choose a color matching method from the dropdown menu
+3. Click "Start Analysis" to process the images
+4. View the results in the Segmentation and Color Matching tabs
+   - Segmentation tab shows the semantic segmentation and area ratios for both images
+   - Color Matching tab shows the matched regions visualization and the color matching result
+## Reference
+Li-Chih Ho (2023), LaDeco: A Tool to Analyze Visual Landscape Elements, Ecological Informatics, vol. 78.
+https://www.sciencedirect.com/science/article/pii/S1574954123003187

app.py ADDED Viewed

	@@ -0,0 +1,956 @@

+import gradio as gr
+from core import Ladeco
+from matplotlib.figure import Figure
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+import spaces
+from PIL import Image
+import numpy as np
+from color_matching import RegionColorMatcher, create_comparison_figure
+from face_comparison import FaceComparison
+from cdl_smoothing import cdl_edge_smoothing, get_smoothing_stats, cdl_edge_smoothing_apply_to_source
+import tempfile
+import os
+import cv2
+plt.rcParams['figure.facecolor'] = '#0b0f19'
+plt.rcParams['text.color'] = '#aab6cc'
+ladeco = Ladeco()
+@spaces.GPU
+def infer_two_images(img1: str, img2: str, method: str, enable_face_matching: bool, enable_edge_smoothing: bool) -> tuple[Figure, Figure, Figure, Figure, Figure, Figure, str, str, str]:
+    """
+    Clean 4-step approach:
+    1. Segment both images identically
+    2. Determine segment correspondences
+    3. Match each segment pair in isolation
+    4. Composite all matched segments
+    """
+    cdl_display = ""  # Initialize CDL display string
+    # STEP 1: SEGMENT BOTH IMAGES IDENTICALLY
+    # This step is always identical regardless of face matching
+    print("Step 1: Segmenting both images...")
+    out1 = ladeco.predict(img1)
+    out2 = ladeco.predict(img2)
+    # Extract visualization and stats (unchanged)
+    seg1 = out1.visualize(level=2)[0].image
+    colormap1 = out1.color_map(level=2)
+    area1 = out1.area()[0]
+    seg2 = out2.visualize(level=2)[0].image
+    colormap2 = out2.color_map(level=2)
+    area2 = out2.area()[0]
+    # Process areas for pie charts
+    colors1, l2_area1 = [], {}
+    for labelname, area_ratio in area1.items():
+        if labelname.startswith("l2") and area_ratio > 0:
+            colors1.append(colormap1[labelname])
+            labelname = labelname.replace("l2_", "").capitalize()
+            l2_area1[labelname] = area_ratio
+    colors2, l2_area2 = [], {}
+    for labelname, area_ratio in area2.items():
+        if labelname.startswith("l2") and area_ratio > 0:
+            colors2.append(colormap2[labelname])
+            labelname = labelname.replace("l2_", "").capitalize()
+            l2_area2[labelname] = area_ratio
+    pie1 = plot_pie(l2_area1, colors=colors1)
+    pie2 = plot_pie(l2_area2, colors=colors2)
+    # Set plot sizes
+    for fig in [seg1, seg2, pie1, pie2]:
+        fig.set_dpi(96)
+        fig.set_size_inches(256/96, 256/96)
+    # Extract semantic masks - IDENTICAL for both images regardless of face matching
+    masks1 = extract_semantic_masks(out1)
+    masks2 = extract_semantic_masks(out2)
+    print(f"Extracted {len(masks1)} masks from img1, {len(masks2)} masks from img2")
+    # STEP 2: DETERMINE SEGMENT CORRESPONDENCES
+    print("Step 2: Determining segment correspondences...")
+    face_log = ["Step 2: Determining segment correspondences"]
+    # Find common segments between both images
+    common_segments = set(masks1.keys()).intersection(set(masks2.keys()))
+    face_log.append(f"Found {len(common_segments)} common segments: {sorted(common_segments)}")
+    # Determine which segments to match based on face matching logic
+    segments_to_match = determine_segments_to_match(img1, img2, common_segments, enable_face_matching, face_log)
+    face_log.append(f"Final segments to match: {sorted(segments_to_match)}")
+    # STEP 3: MATCH EACH SEGMENT PAIR IN ISOLATION
+    print("Step 3: Matching each segment pair in isolation...")
+    face_log.append("\nStep 3: Color matching each segment independently")
+    matched_regions = {}
+    segment_masks = {}  # Store masks for all segments being matched
+    for segment_name in segments_to_match:
+        if segment_name in masks1 and segment_name in masks2:
+            face_log.append(f"  Processing {segment_name}...")
+            # Match this segment in complete isolation
+            matched_region, final_mask1, final_mask2 = match_single_segment(
+                img1, img2,
+                masks1[segment_name], masks2[segment_name],
+                segment_name, method, face_log
+            )
+            if matched_region is not None:
+                matched_regions[segment_name] = matched_region
+                segment_masks[segment_name] = final_mask2  # Use mask from target image for compositing
+                face_log.append(f"  ✅ {segment_name} matched successfully")
+            else:
+                face_log.append(f"  ❌ {segment_name} matching failed")
+        elif segment_name.startswith('l4_'):
+            # Handle fine-grained segments that need to be generated
+            face_log.append(f"  Processing fine-grained {segment_name}...")
+            matched_region, final_mask1, final_mask2 = match_single_segment(
+                img1, img2, None, None, segment_name, method, face_log
+            )
+            if matched_region is not None:
+                matched_regions[segment_name] = matched_region
+                segment_masks[segment_name] = final_mask2  # Store the generated mask
+                face_log.append(f"  ✅ {segment_name} matched successfully")
+            else:
+                face_log.append(f"  ❌ {segment_name} matching failed")
+    # STEP 4: COMPOSITE ALL MATCHED SEGMENTS
+    print("Step 4: Compositing all matched segments...")
+    face_log.append(f"\nStep 4: Compositing {len(matched_regions)} matched segments")
+    final_image = composite_matched_segments(img2, matched_regions, segment_masks, face_log)
+    # STEP 5: OPTIONAL CDL-BASED EDGE SMOOTHING
+    if enable_edge_smoothing:
+        print("Step 5: Applying CDL-based edge smoothing...")
+        face_log.append("\nStep 5: CDL edge smoothing - applying CDL transform to image 2 based on composited result")
+        try:
+            # Save the composited result temporarily for CDL calculation
+            temp_dir = tempfile.gettempdir()
+            temp_composite_path = os.path.join(temp_dir, "temp_composite_for_cdl.png")
+            final_image.save(temp_composite_path, "PNG")
+            # Calculate CDL parameters to transform image 2 → composited result
+            cdl_stats = get_smoothing_stats(img2, temp_composite_path)
+            # Log the CDL values
+            slope = cdl_stats['cdl_slope']
+            offset = cdl_stats['cdl_offset']
+            power = cdl_stats['cdl_power']
+            # Format CDL values for display
+            cdl_display = f"""📊 CDL Parameters (Image 2 → Composited Result):
+🔧 Method: Simple Mean/Std Matching (basic statistical approach)
+🔸 Slope (Gain):
+   Red:   {slope[0]:.6f}
+   Green: {slope[1]:.6f}
+   Blue:  {slope[2]:.6f}
+🔸 Offset:
+   Red:   {offset[0]:.6f}
+   Green: {offset[1]:.6f}
+   Blue:  {offset[2]:.6f}
+🔸 Power (Gamma):
+   Red:   {power[0]:.6f}
+   Green: {power[1]:.6f}
+   Blue:  {power[2]:.6f}
+These CDL values represent the color transformation needed to convert Image 2 into the composited result.
+The CDL calculation uses the simplest possible approach: matches the mean and standard deviation
+of each color channel between the original and composited images, with simple gamma calculation
+based on brightness relationships.
+"""
+            face_log.append(f"📊 CDL Parameters (image 2 → composited result):")
+            face_log.append(f"  Method: Simple mean/std matching")
+            face_log.append(f"  Slope (R,G,B): [{slope[0]:.4f}, {slope[1]:.4f}, {slope[2]:.4f}]")
+            face_log.append(f"  Offset (R,G,B): [{offset[0]:.4f}, {offset[1]:.4f}, {offset[2]:.4f}]")
+            face_log.append(f"  Power (R,G,B): [{power[0]:.4f}, {power[1]:.4f}, {power[2]:.4f}]")
+            # Apply CDL transformation to image 2 to approximate the composited result
+            final_image = cdl_edge_smoothing_apply_to_source(img2, temp_composite_path, factor=1.0)
+            # Clean up temp file
+            if os.path.exists(temp_composite_path):
+                os.remove(temp_composite_path)
+            face_log.append("✅ CDL edge smoothing completed - transformed image 2 using calculated CDL parameters")
+        except Exception as e:
+            face_log.append(f"❌ CDL edge smoothing failed: {e}")
+            cdl_display = f"❌ CDL calculation failed: {e}"
+    else:
+        face_log.append("\nStep 5: CDL edge smoothing disabled")
+        cdl_display = "CDL edge smoothing is disabled. Enable it to see CDL parameters."
+    # Save result
+    temp_dir = tempfile.gettempdir()
+    filename = os.path.basename(img2).split('.')[0]
+    temp_filename = f"color_matched_{method}_{filename}.png"
+    temp_path = os.path.join(temp_dir, temp_filename)
+    final_image.save(temp_path, "PNG")
+    # Create visualizations
+    # For visualization, we need to collect the masks that were actually used
+    vis_masks1 = {}
+    vis_masks2 = {}
+    for segment_name in segments_to_match:
+        if segment_name in segment_masks:
+            if segment_name.startswith('l4_'):
+                # Fine-grained segments - we'll regenerate for visualization
+                part_name = segment_name.replace('l4_', '')
+                if part_name in ['face', 'hair']:
+                    from human_parts_segmentation import HumanPartsSegmentation
+                    segmenter = HumanPartsSegmentation()
+                    masks_dict1 = segmenter.segment_parts(img1, [part_name])
+                    masks_dict2 = segmenter.segment_parts(img2, [part_name])
+                    if part_name in masks_dict1 and part_name in masks_dict2:
+                        vis_masks1[segment_name] = masks_dict1[part_name]
+                        vis_masks2[segment_name] = masks_dict2[part_name]
+                elif part_name == 'upper_clothes':
+                    from clothes_segmentation import ClothesSegmentation
+                    segmenter = ClothesSegmentation()
+                    mask1 = segmenter.segment_clothes(img1, ["Upper-clothes"])
+                    mask2 = segmenter.segment_clothes(img2, ["Upper-clothes"])
+                    if mask1 is not None and mask2 is not None:
+                        vis_masks1[segment_name] = mask1
+                        vis_masks2[segment_name] = mask2
+            else:
+                # Regular segments - use original masks
+                if segment_name in masks1 and segment_name in masks2:
+                    vis_masks1[segment_name] = masks1[segment_name]
+                    vis_masks2[segment_name] = masks2[segment_name]
+    mask_vis = visualize_matching_masks(img1, img2, vis_masks1, vis_masks2)
+    comparison = create_comparison_figure(Image.open(img2), final_image, f"Color Matching Result ({method})")
+    face_log_text = "\n".join(face_log)
+    return seg1, pie1, seg2, pie2, comparison, mask_vis, temp_path, face_log_text, cdl_display
+def determine_segments_to_match(img1: str, img2: str, common_segments: set, enable_face_matching: bool, log: list) -> set:
+    """
+    Determine which segments should be matched based on face matching logic.
+    Returns the set of segment names to process.
+    """
+    if not enable_face_matching:
+        log.append("Face matching disabled - matching all common segments")
+        return common_segments
+    log.append("Face matching enabled - checking faces...")
+    # Run face comparison
+    face_comparator = FaceComparison()
+    faces_match, face_log = face_comparator.run_face_comparison(img1, img2)
+    log.extend(face_log)
+    if not faces_match:
+        # Remove human/bio segments from matching
+        log.append("No face match - excluding human/bio segments")
+        non_human_segments = set()
+        for segment in common_segments:
+            if not any(term in segment.lower() for term in ['l3_human', 'l2_bio']):
+                non_human_segments.add(segment)
+            else:
+                log.append(f"  Excluding human segment: {segment}")
+        log.append(f"Matching {len(non_human_segments)} non-human segments")
+        return non_human_segments
+    else:
+        # Faces match - include all segments + add fine-grained if possible
+        log.append("Faces match - including all segments + fine-grained")
+        segments_to_match = common_segments.copy()
+        # Add fine-grained human parts if bio regions exist
+        bio_segments = [s for s in common_segments if 'l2_bio' in s.lower()]
+        if bio_segments:
+            fine_grained_segments = add_fine_grained_segments(img1, img2, common_segments, log)
+            segments_to_match.update(fine_grained_segments)
+        return segments_to_match
+def add_fine_grained_segments(img1: str, img2: str, common_segments: set, log: list) -> set:
+    """
+    Add fine-grained human parts segments when faces match.
+    Returns set of fine-grained segment names that were successfully added.
+    """
+    fine_grained_segments = set()
+    try:
+        from human_parts_segmentation import HumanPartsSegmentation
+        from clothes_segmentation import ClothesSegmentation
+        log.append("  Adding fine-grained human parts...")
+        # Get face and hair masks
+        human_segmenter = HumanPartsSegmentation()
+        face_hair_masks1 = human_segmenter.segment_parts(img1, ['face', 'hair'])
+        face_hair_masks2 = human_segmenter.segment_parts(img2, ['face', 'hair'])
+        # Get clothes masks
+        clothes_segmenter = ClothesSegmentation()
+        clothes_mask1 = clothes_segmenter.segment_clothes(img1, ["Upper-clothes"])
+        clothes_mask2 = clothes_segmenter.segment_clothes(img2, ["Upper-clothes"])
+        # Process face/hair
+        for part_name, mask1 in face_hair_masks1.items():
+            if (mask1 is not None and part_name in face_hair_masks2 and
+                face_hair_masks2[part_name] is not None):
+                if np.sum(mask1 > 0) > 0 and np.sum(face_hair_masks2[part_name] > 0) > 0:
+                    fine_grained_segments.add(f'l4_{part_name}')
+                    log.append(f"    Added fine-grained: {part_name}")
+        # Process clothes
+        if (clothes_mask1 is not None and clothes_mask2 is not None and
+            np.sum(clothes_mask1 > 0) > 0 and np.sum(clothes_mask2 > 0) > 0):
+            fine_grained_segments.add('l4_upper_clothes')
+            log.append(f"    Added fine-grained: upper_clothes")
+    except Exception as e:
+        log.append(f"  Error adding fine-grained segments: {e}")
+    return fine_grained_segments
+def match_single_segment(img1_path: str, img2_path: str, mask1: np.ndarray, mask2: np.ndarray,
+                        segment_name: str, method: str, log: list) -> tuple[Image.Image, np.ndarray, np.ndarray]:
+    """
+    Match colors of a single segment in complete isolation from other segments.
+    Each segment is processed independently with no knowledge of other segments.
+    Returns: (matched_image, final_mask1, final_mask2)
+    """
+    try:
+        # Load images
+        img1 = Image.open(img1_path).convert("RGB")
+        img2 = Image.open(img2_path).convert("RGB")
+        # Convert to numpy
+        img1_np = np.array(img1)
+        img2_np = np.array(img2)
+        # Handle fine-grained segments
+        if segment_name.startswith('l4_'):
+            part_name = segment_name.replace('l4_', '')
+            if part_name in ['face', 'hair']:
+                from human_parts_segmentation import HumanPartsSegmentation
+                segmenter = HumanPartsSegmentation()
+                masks_dict1 = segmenter.segment_parts(img1_path, [part_name])
+                masks_dict2 = segmenter.segment_parts(img2_path, [part_name])
+                if part_name in masks_dict1 and part_name in masks_dict2:
+                    mask1 = masks_dict1[part_name]
+                    mask2 = masks_dict2[part_name]
+                else:
+                    return None, None, None
+            elif part_name == 'upper_clothes':
+                from clothes_segmentation import ClothesSegmentation
+                segmenter = ClothesSegmentation()
+                mask1 = segmenter.segment_clothes(img1_path, ["Upper-clothes"])
+                mask2 = segmenter.segment_clothes(img2_path, ["Upper-clothes"])
+                if mask1 is None or mask2 is None:
+                    return None, None, None
+        # Ensure masks are same size as images
+        if mask1.shape != img1_np.shape[:2]:
+            mask1 = cv2.resize(mask1.astype(np.float32), (img1_np.shape[1], img1_np.shape[0]),
+                             interpolation=cv2.INTER_NEAREST)
+        if mask2.shape != img2_np.shape[:2]:
+            mask2 = cv2.resize(mask2.astype(np.float32), (img2_np.shape[1], img2_np.shape[0]),
+                             interpolation=cv2.INTER_NEAREST)
+        # Convert to binary masks
+        mask1_binary = (mask1 > 0.5).astype(np.float32)
+        mask2_binary = (mask2 > 0.5).astype(np.float32)
+        # Check if masks have content
+        pixels1 = np.sum(mask1_binary > 0)
+        pixels2 = np.sum(mask2_binary > 0)
+        if pixels1 == 0 or pixels2 == 0:
+            log.append(f"    No pixels in {segment_name}: img1={pixels1}, img2={pixels2}")
+            return None, None, None
+        log.append(f"    {segment_name}: img1={pixels1} pixels, img2={pixels2} pixels")
+        # Create single-segment masks dictionary for color matcher
+        masks1_dict = {segment_name: mask1_binary}
+        masks2_dict = {segment_name: mask2_binary}
+        # Apply color matching to this segment only
+        color_matcher = RegionColorMatcher(factor=0.8, preserve_colors=True,
+                                         preserve_luminance=True, method=method)
+        matched_img = color_matcher.match_regions(img1_path, img2_path, masks1_dict, masks2_dict)
+        return matched_img, mask1_binary, mask2_binary
+    except Exception as e:
+        log.append(f"    Error matching {segment_name}: {e}")
+        return None, None, None
+def composite_matched_segments(base_img_path: str, matched_regions: dict, segment_masks: dict, log: list) -> Image.Image:
+    """
+    Composite all matched segments back together using simple alpha compositing.
+    Each matched segment is completely independent and overlaid on the base image.
+    """
+    # Start with base image
+    result = Image.open(base_img_path).convert("RGBA")
+    result_np = np.array(result)
+    log.append(f"Compositing {len(matched_regions)} segments onto base image")
+    for segment_name, matched_img in matched_regions.items():
+        if segment_name in segment_masks:
+            mask = segment_masks[segment_name]
+            # Ensure mask is right size
+            if mask.shape != result_np.shape[:2]:
+                mask = cv2.resize(mask.astype(np.float32),
+                                (result_np.shape[1], result_np.shape[0]),
+                                interpolation=cv2.INTER_NEAREST)
+            # Convert matched image to numpy
+            matched_np = np.array(matched_img.convert("RGB"))
+            # Ensure matched image is right size
+            if matched_np.shape[:2] != result_np.shape[:2]:
+                matched_pil = Image.fromarray(matched_np)
+                matched_pil = matched_pil.resize((result_np.shape[1], result_np.shape[0]), Image.LANCZOS)
+                matched_np = np.array(matched_pil)
+            # Apply mask with alpha blending
+            mask_binary = (mask > 0.5).astype(np.float32)
+            alpha = np.expand_dims(mask_binary, axis=2)
+            # Blend: result = result * (1 - alpha) + matched * alpha
+            result_np[:, :, :3] = (result_np[:, :, :3] * (1 - alpha) +
+                                 matched_np * alpha).astype(np.uint8)
+            pixels = np.sum(mask_binary > 0)
+            log.append(f"  Composited {segment_name}: {pixels} pixels")
+    return Image.fromarray(result_np).convert("RGB")
+def visualize_matching_masks(img1_path, img2_path, masks1, masks2):
+    """
+    Create a visualization of the masks being matched between two images.
+    Args:
+        img1_path: Path to first image
+        img2_path: Path to second image
+        masks1: Dictionary of masks for first image {label: binary_mask}
+        masks2: Dictionary of masks for second image {label: binary_mask}
+    Returns:
+        A matplotlib Figure showing the matched masks
+    """
+    # Load images
+    img1 = Image.open(img1_path).convert("RGB")
+    img2 = Image.open(img2_path).convert("RGB")
+    # Convert to numpy arrays
+    img1_np = np.array(img1)
+    img2_np = np.array(img2)
+    # Separate fine-grained human parts from regular masks
+    fine_grained_masks = {}
+    regular_masks = {}
+    for label, mask in masks1.items():
+        if label.startswith('l4_'):  # Fine-grained human parts
+            fine_grained_masks[label] = mask
+        else:
+            regular_masks[label] = mask
+    # Find common labels in both regular and fine-grained masks
+    common_regular = set(regular_masks.keys()).intersection(set(masks2.keys()))
+    # Count fine-grained masks that are in both masks1 and masks2
+    common_fine_grained = set()
+    for label in fine_grained_masks.keys():
+        if label.startswith('l4_') and label in masks2:
+            part_name = label.replace('l4_', '')
+            common_fine_grained.add(part_name)
+    # Count total rows needed
+    n_regular_rows = len(common_regular)
+    n_fine_rows = len(common_fine_grained)
+    n_rows = n_regular_rows + n_fine_rows
+    if n_rows == 0:
+        # No common regions found
+        fig, ax = plt.subplots(1, 1, figsize=(10, 5))
+        ax.text(0.5, 0.5, "No matching regions found between images",
+                ha='center', va='center', fontsize=14, color='white')
+        ax.axis('off')
+        return fig
+    fig, axes = plt.subplots(n_rows, 2, figsize=(12, 3 * n_rows))
+    # If only one row, reshape axes
+    if n_rows == 1:
+        axes = np.array([axes])
+    row_idx = 0
+    # Visualize regular semantic regions
+    for label in sorted(common_regular):
+        # Get label display name
+        display_name = label.replace("l2_", "").capitalize()
+        # Get masks and resize them to match the image dimensions
+        mask1 = regular_masks[label]
+        mask2 = masks2[label]
+        # Create visualizations
+        masked_img1, masked_img2 = create_mask_overlay(img1_np, img2_np, mask1, mask2, [255, 0, 0])  # Red
+        # Plot the masked images
+        axes[row_idx, 0].imshow(masked_img1)
+        axes[row_idx, 0].set_title(f"Image 1: {display_name}")
+        axes[row_idx, 0].axis('off')
+        axes[row_idx, 1].imshow(masked_img2)
+        axes[row_idx, 1].set_title(f"Image 2: {display_name}")
+        axes[row_idx, 1].axis('off')
+        row_idx += 1
+    # Visualize fine-grained human parts
+    part_colors = {
+        'face': [255, 0, 0],      # Red (like other masks)
+        'hair': [255, 0, 0],      # Red (like other masks)
+        'upper_clothes': [255, 0, 0]  # Red (like other masks)
+    }
+    for part_name in sorted(common_fine_grained):
+        label = f'l4_{part_name}'
+        if label in fine_grained_masks and label in masks2:
+            mask1 = fine_grained_masks[label]
+            mask2 = masks2[label]
+            color = part_colors.get(part_name, [255, 0, 0])  # Default to red
+            # Create visualizations
+            masked_img1, masked_img2 = create_mask_overlay(img1_np, img2_np, mask1, mask2, color)
+            # Plot the masked images
+            display_name = part_name.replace('_', ' ').title()
+            axes[row_idx, 0].imshow(masked_img1)
+            axes[row_idx, 0].set_title(f"Image 1: {display_name} (Fine-grained)")
+            axes[row_idx, 0].axis('off')
+            axes[row_idx, 1].imshow(masked_img2)
+            axes[row_idx, 1].set_title(f"Image 2: {display_name} (Fine-grained)")
+            axes[row_idx, 1].axis('off')
+            row_idx += 1
+    plt.suptitle("Matched Regions (highlighted with different colors)", fontsize=16, color='white')
+    plt.tight_layout()
+    return fig
+def create_mask_overlay(img1_np, img2_np, mask1, mask2, overlay_color):
+    """
+    Create mask overlays on images with the specified color.
+    Args:
+        img1_np: First image as numpy array
+        img2_np: Second image as numpy array
+        mask1: Mask for first image
+        mask2: Mask for second image
+        overlay_color: RGB color for overlay [R, G, B]
+    Returns:
+        Tuple of (masked_img1, masked_img2)
+    """
+    # Resize masks to match image dimensions if needed
+    if mask1.shape != img1_np.shape[:2]:
+        mask1_img = Image.fromarray((mask1 * 255).astype(np.uint8))
+        mask1_img = mask1_img.resize((img1_np.shape[1], img1_np.shape[0]), Image.NEAREST)
+        mask1 = np.array(mask1_img).astype(np.float32) / 255.0
+    if mask2.shape != img2_np.shape[:2]:
+        mask2_img = Image.fromarray((mask2 * 255).astype(np.uint8))
+        mask2_img = mask2_img.resize((img2_np.shape[1], img2_np.shape[0]), Image.NEAREST)
+        mask2 = np.array(mask2_img).astype(np.float32) / 255.0
+    # Create masked versions of the images
+    masked_img1 = img1_np.copy()
+    masked_img2 = img2_np.copy()
+    # Apply a semi-transparent colored overlay to show the masked region
+    overlay_color = np.array(overlay_color, dtype=np.uint8)
+    # Create alpha channel based on the mask (with transparency)
+    alpha1 = mask1 * 0.6  # Increased opacity for better visibility
+    alpha2 = mask2 * 0.6
+    # Apply the colored overlay to masked regions
+    for c in range(3):
+        masked_img1[:, :, c] = masked_img1[:, :, c] * (1 - alpha1) + overlay_color[c] * alpha1
+        masked_img2[:, :, c] = masked_img2[:, :, c] * (1 - alpha2) + overlay_color[c] * alpha2
+    return masked_img1, masked_img2
+def extract_semantic_masks(output):
+    """
+    Extract binary masks for each semantic region from the LadecoOutput.
+    Args:
+        output: LadecoOutput from Ladeco.predict()
+    Returns:
+        Dictionary mapping label names to binary masks
+    """
+    masks = {}
+    # Get the segmentation mask
+    seg_mask = output.masks[0].cpu().numpy()
+    # Process each label in level 2 (as we're visualizing at level 2)
+    for label, indices in output.ladeco2ade.items():
+        if label.startswith("l2_"):
+            # Create a binary mask for this label
+            binary_mask = np.zeros_like(seg_mask, dtype=np.float32)
+            # Set 1 for pixels matching this label
+            for idx in indices:
+                binary_mask[seg_mask == idx] = 1.0
+            # Only include labels that have some pixels in the image
+            if np.any(binary_mask):
+                masks[label] = binary_mask
+    return masks
+def plot_pie(data: dict[str, float], colors=None) -> Figure:
+    fig, ax = plt.subplots()
+    labels = list(data.keys())
+    sizes = list(data.values())
+    *_, autotexts = ax.pie(sizes, labels=labels, autopct="%1.1f%%", colors=colors)
+    for percent_text in autotexts:
+        percent_text.set_color("k")
+    ax.axis("equal")
+    return fig
+def choose_example(imgpath: str, target_component) -> gr.Image:
+    img = Image.open(imgpath)
+    width, height = img.size
+    ratio = 512 / max(width, height)
+    img = img.resize((int(width * ratio), int(height * ratio)))
+    return gr.Image(value=img, label="Input Image (SVG format not supported)", type="filepath")
+css = """
+.reference {
+    text-align: center;
+    font-size: 1.2em;
+    color: #d1d5db;
+    margin-bottom: 20px;
+}
+.reference a {
+    color: #FB923C;
+    text-decoration: none;
+}
+.reference a:hover {
+    text-decoration: underline;
+    color: #FB923C;
+}
+.description {
+    text-align: center;
+    font-size: 1.1em;
+    color: #d1d5db;
+    margin-bottom: 25px;
+}
+.footer {
+    text-align: center;
+    margin-top: 30px;
+    padding-top: 20px;
+    border-top: 1px solid #ddd;
+    color: #d1d5db;
+    font-size: 14px;
+}
+.main-title {
+    font-size: 24px;
+    font-weight: bold;
+    text-align: center;
+    margin-bottom: 20px;
+}
+.selected-image {
+    height: 756px;
+}
+.example-image {
+    height: 220px;
+    padding: 25px;
+}
+""".strip()
+theme = gr.themes.Base(
+    primary_hue="orange",
+    secondary_hue="cyan",
+    neutral_hue="gray",
+).set(
+    body_text_color='*neutral_100',
+    body_text_color_subdued='*neutral_600',
+    background_fill_primary='*neutral_950',
+    background_fill_secondary='*neutral_600',
+    border_color_accent='*secondary_800',
+    color_accent='*primary_50',
+    color_accent_soft='*secondary_800',
+    code_background_fill='*neutral_700',
+    block_background_fill_dark='*body_background_fill',
+    block_info_text_color='#6b7280',
+    block_label_text_color='*neutral_300',
+    block_label_text_weight='700',
+    block_title_text_color='*block_label_text_color',
+    block_title_text_weight='300',
+    panel_background_fill='*neutral_800',
+    table_text_color_dark='*secondary_800',
+    checkbox_background_color_selected='*primary_500',
+    checkbox_label_background_fill='*neutral_500',
+    checkbox_label_background_fill_hover='*neutral_700',
+    checkbox_label_text_color='*neutral_200',
+    input_background_fill='*neutral_700',
+    input_background_fill_focus='*neutral_600',
+    slider_color='*primary_500',
+    table_even_background_fill='*neutral_700',
+    table_odd_background_fill='*neutral_600',
+    table_row_focus='*neutral_800'
+)
+with gr.Blocks(css=css, theme=theme) as demo:
+    gr.HTML(
+        """
+        <div class="main-title">SegMatch – Zero Shot Segmentation-based color matching</div>
+        <div class="description">
+          Advanced region-based color matching using semantic segmentation and fine-grained human parts detection for precise, contextually-aware color transfer between images.
+        </div>
+    """.strip()
+    )
+    with gr.Row():
+        # First image inputs
+        with gr.Column():
+            img1 = gr.Image(
+                label="First Input Image - Color Reference (SVG not supported)",
+                type="filepath",
+                height="256px",
+            )
+            gr.Label("Example Images for First Input", show_label=False)
+            with gr.Row():
+                ex1_1 = gr.Image(
+                    value="examples/beach.jpg",
+                    show_label=False,
+                    type="filepath",
+                    elem_classes="example-image",
+                    interactive=False,
+                    show_download_button=False,
+                    show_fullscreen_button=False,
+                    show_share_button=False,
+                )
+                ex1_2 = gr.Image(
+                    value="examples/field.jpg",
+                    show_label=False,
+                    type="filepath",
+                    elem_classes="example-image",
+                    interactive=False,
+                    show_download_button=False,
+                    show_fullscreen_button=False,
+                    show_share_button=False,
+                )
+        # Second image inputs
+        with gr.Column():
+            img2 = gr.Image(
+                label="Second Input Image - To Be Color Matched (SVG not supported)",
+                type="filepath",
+                height="256px",
+            )
+            gr.Label("Example Images for Second Input", show_label=False)
+            with gr.Row():
+                ex2_1 = gr.Image(
+                    value="examples/field.jpg",
+                    show_label=False,
+                    type="filepath",
+                    elem_classes="example-image",
+                    interactive=False,
+                    show_download_button=False,
+                    show_fullscreen_button=False,
+                    show_share_button=False,
+                )
+                ex2_2 = gr.Image(
+                    value="examples/sky.jpg",
+                    show_label=False,
+                    type="filepath",
+                    elem_classes="example-image",
+                    interactive=False,
+                    show_download_button=False,
+                    show_fullscreen_button=False,
+                    show_share_button=False,
+                )
+    with gr.Row():
+        with gr.Column():
+            method = gr.Dropdown(
+                label="Color Matching Method",
+                choices=["adain", "mkl", "hm", "reinhard", "mvgd", "hm-mvgd-hm", "hm-mkl-hm", "coral"],
+                value="adain",
+                info="Choose the algorithm for color matching between regions"
+                )
+        with gr.Column():
+            enable_face_matching = gr.Checkbox(
+                label="Enable Face Matching for Human Regions",
+                value=True,
+                info="Only match human regions if faces are similar (requires DeepFace)"
+            )
+    with gr.Row():
+        with gr.Column():
+            enable_edge_smoothing = gr.Checkbox(
+                label="Enable CDL Edge Smoothing",
+                value=False,
+                info="Apply CDL transform to original image using calculated parameters (see log for values)"
+            )
+    start = gr.Button("Start Analysis", variant="primary")
+    # Download button positioned right after the start button
+    download_btn = gr.File(
+        label="📥 Download Color-Matched Image",
+        visible=True,
+        interactive=False
+    )
+    with gr.Tabs():
+        with gr.TabItem("Segmentation Results"):
+            with gr.Row():
+                # First image results
+                with gr.Column():
+                    gr.Label("Results for First Image", show_label=True)
+                    seg1 = gr.Plot(label="Semantic Segmentation")
+                    pie1 = gr.Plot(label="Element Area Ratio")
+                # Second image results
+                with gr.Column():
+                    gr.Label("Results for Second Image", show_label=True)
+                    seg2 = gr.Plot(label="Semantic Segmentation")
+                    pie2 = gr.Plot(label="Element Area Ratio")
+        with gr.TabItem("Color Matching"):
+            gr.Markdown("""
+            ### Region-Based Color Matching
+            This tab shows the result of matching the colors of the second image to the first image's colors,
+            but only within corresponding semantic regions. For example, sky areas in the second image are
+            matched to sky areas in the first image, while vegetation areas are matched separately.
+            #### Face Matching Feature:
+            When enabled, the system will detect faces within human/bio regions and only apply color matching
+            to human regions where similar faces are found in both images. This ensures that color transfer
+            only occurs between images of the same person.
+            #### CDL Edge Smoothing Feature:
+            When enabled, calculates Color Decision List (CDL) parameters to transform the original target image
+            towards the segment-matched result, then applies those CDL parameters to the original image. This creates
+            a "smoothed" version that maintains the original image's overall characteristics while incorporating the
+            color relationships found through segment matching.
+            The CDL calculation uses the simplest possible approach: matches the mean and standard deviation
+            of each color channel between the original and composited images, with simple gamma calculation
+            based on brightness relationships.
+            #### Available Methods:
+            - **adain**: Adaptive Instance Normalization - Matches mean and standard deviation of colors
+            - **mkl**: Monge-Kantorovich Linearization - Linear transformation of color statistics
+            - **reinhard**: Reinhard color transfer - Simple statistical approach that matches mean and standard deviation
+            - **mvgd**: Multi-Variate Gaussian Distribution - Uses color covariance matrices for more accurate matching
+            - **hm**: Histogram Matching - Matches the full color distribution histograms
+            - **hm-mvgd-hm**: Histogram + MVGD + Histogram compound method
+            - **hm-mkl-hm**: Histogram + MKL + Histogram compound method
+            - **coral**: CORAL (Color Transfer using Correlated Color Temperature) - Advanced covariance-based method for natural color transfer
+            """)
+            # CDL Parameters Display
+            cdl_display = gr.Textbox(
+                label="📊 CDL Parameters",
+                lines=15,
+                max_lines=20,
+                interactive=False,
+                info="Color Decision List parameters calculated when CDL edge smoothing is enabled"
+            )
+            face_log = gr.Textbox(
+                label="Face Matching Log",
+                lines=8,
+                max_lines=15,
+                interactive=False,
+                info="Shows details of face detection and matching process"
+            )
+            mask_vis = gr.Plot(label="Matched Regions Visualization")
+            comparison = gr.Plot(label="Region-Based Color Matching Result")
+    gr.HTML(
+        """
+        <div class="footer">
+            © 2024 SegMatch All Rights Reserved<br>
+            Developer: Stefan Allen
+        </div>
+    """.strip()
+    )
+    # Connect the inference function
+    start.click(
+        fn=infer_two_images,
+        inputs=[img1, img2, method, enable_face_matching, enable_edge_smoothing],
+        outputs=[seg1, pie1, seg2, pie2, comparison, mask_vis, download_btn, face_log, cdl_display]
+    )
+    # Example image selection handlers
+    ex1_1.select(fn=lambda x: choose_example(x, img1), inputs=ex1_1, outputs=img1)
+    ex1_2.select(fn=lambda x: choose_example(x, img1), inputs=ex1_2, outputs=img1)
+    ex2_1.select(fn=lambda x: choose_example(x, img2), inputs=ex2_1, outputs=img2)
+    ex2_2.select(fn=lambda x: choose_example(x, img2), inputs=ex2_2, outputs=img2)
+if __name__ == "__main__":
+    demo.launch()

cdl_smoothing.py ADDED Viewed

	@@ -0,0 +1,497 @@

+#!/usr/bin/env python3
+"""
+CDL (Color Decision List) based edge smoothing for SegMatch
+"""
+import numpy as np
+from typing import Tuple, Optional
+from PIL import Image
+import cv2
+def calculate_cdl_params_face_only(source: np.ndarray, target: np.ndarray,
+                                  source_face_mask: np.ndarray, target_face_mask: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Calculate CDL parameters using only face pixels for focused accuracy.
+    Args:
+        source (np.ndarray): Source image as numpy array (0-1 range)
+        target (np.ndarray): Target image as numpy array (0-1 range)
+        source_face_mask (np.ndarray): Binary mask of face in source image
+        target_face_mask (np.ndarray): Binary mask of face in target image
+    Returns:
+        Tuple[np.ndarray, np.ndarray, np.ndarray]: (slope, offset, power)
+    """
+    epsilon = 1e-6
+    # Extract face pixels only
+    source_face_pixels = source[source_face_mask > 0.5]
+    target_face_pixels = target[target_face_mask > 0.5]
+    # Ensure we have enough face pixels
+    if len(source_face_pixels) < 100 or len(target_face_pixels) < 100:
+        # Fallback to simple calculation if not enough face pixels
+        return calculate_cdl_params_simple(source, target)
+    slopes = []
+    offsets = []
+    powers = []
+    for channel in range(3):
+        src_channel = source_face_pixels[:, channel]
+        tgt_channel = target_face_pixels[:, channel]
+        # Use robust percentiles for face pixels
+        percentiles = [10, 25, 50, 75, 90]
+        src_percentiles = np.percentile(src_channel, percentiles)
+        tgt_percentiles = np.percentile(tgt_channel, percentiles)
+        # Calculate slope from face pixel range
+        src_range = src_percentiles[4] - src_percentiles[0]  # 90th - 10th
+        tgt_range = tgt_percentiles[4] - tgt_percentiles[0]
+        slope = tgt_range / (src_range + epsilon)
+        # Calculate offset using face median
+        src_median = src_percentiles[2]
+        tgt_median = tgt_percentiles[2]
+        offset = tgt_median - (src_median * slope)
+        # Calculate gamma from face brightness relationship
+        src_mean = np.mean(src_channel)
+        tgt_mean = np.mean(tgt_channel)
+        if src_mean > epsilon:
+            power = np.log(tgt_mean + epsilon) / np.log(src_mean + epsilon)
+            power = np.clip(power, 0.3, 3.0)
+        else:
+            power = 1.0
+        slopes.append(slope)
+        offsets.append(offset)
+        powers.append(power)
+    return np.array(slopes), np.array(offsets), np.array(powers)
+def calculate_cdl_params_simple(source: np.ndarray, target: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Simple CDL calculation as fallback method.
+    Args:
+        source (np.ndarray): Source image as numpy array (0-1 range)
+        target (np.ndarray): Target image as numpy array (0-1 range)
+    Returns:
+        Tuple[np.ndarray, np.ndarray, np.ndarray]: (slope, offset, power)
+    """
+    epsilon = 1e-6
+    # Calculate mean and standard deviation for each RGB channel
+    source_mean = np.mean(source, axis=(0, 1))
+    source_std = np.std(source, axis=(0, 1))
+    target_mean = np.mean(target, axis=(0, 1))
+    target_std = np.std(target, axis=(0, 1))
+    # Calculate slope (gain)
+    slope = target_std / (source_std + epsilon)
+    # Calculate offset
+    offset = target_mean - (source_mean * slope)
+    # Set power to neutral
+    power = np.ones(3)
+    return slope, offset, power
+def calculate_cdl_params_histogram(source: np.ndarray, target: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Calculate CDL parameters using histogram matching approach.
+    Args:
+        source (np.ndarray): Source image as numpy array (0-1 range)
+        target (np.ndarray): Target image as numpy array (0-1 range)
+    Returns:
+        Tuple[np.ndarray, np.ndarray, np.ndarray]: (slope, offset, power)
+    """
+    epsilon = 1e-6
+    # Convert to 0-255 range for histogram calculation
+    source_255 = (source * 255).astype(np.uint8)
+    target_255 = (target * 255).astype(np.uint8)
+    slopes = []
+    offsets = []
+    powers = []
+    for channel in range(3):
+        # Calculate histograms
+        hist_source = cv2.calcHist([source_255], [channel], None, [256], [0, 256])
+        hist_target = cv2.calcHist([target_255], [channel], None, [256], [0, 256])
+        # Calculate cumulative distributions
+        cdf_source = np.cumsum(hist_source) / np.sum(hist_source)
+        cdf_target = np.cumsum(hist_target) / np.sum(hist_target)
+        # Find percentile mappings
+        p25_src = np.percentile(source[:, :, channel], 25)
+        p75_src = np.percentile(source[:, :, channel], 75)
+        p25_tgt = np.percentile(target[:, :, channel], 25)
+        p75_tgt = np.percentile(target[:, :, channel], 75)
+        # Calculate slope from percentile mapping
+        slope = (p75_tgt - p25_tgt) / (p75_src - p25_src + epsilon)
+        # Calculate offset
+        median_src = np.percentile(source[:, :, channel], 50)
+        median_tgt = np.percentile(target[:, :, channel], 50)
+        offset = median_tgt - (median_src * slope)
+        # Estimate power/gamma from the histogram shape
+        mean_src = np.mean(source[:, :, channel])
+        mean_tgt = np.mean(target[:, :, channel])
+        if mean_src > epsilon:
+            power = np.log(mean_tgt + epsilon) / np.log(mean_src + epsilon)
+            power = np.clip(power, 0.1, 10.0)  # Reasonable gamma range
+        else:
+            power = 1.0
+        slopes.append(slope)
+        offsets.append(offset)
+        powers.append(power)
+    return np.array(slopes), np.array(offsets), np.array(powers)
+def calculate_cdl_params_mask_aware(source: np.ndarray, target: np.ndarray,
+                                   changed_mask: Optional[np.ndarray] = None) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Calculate CDL parameters focusing only on changed regions.
+    Args:
+        source (np.ndarray): Source image as numpy array (0-1 range)
+        target (np.ndarray): Target image as numpy array (0-1 range)
+        changed_mask (np.ndarray, optional): Binary mask of changed regions
+    Returns:
+        Tuple[np.ndarray, np.ndarray, np.ndarray]: (slope, offset, power)
+    """
+    if changed_mask is not None:
+        # Only use pixels where changes occurred
+        mask_bool = changed_mask > 0.5
+        if np.sum(mask_bool) > 100:  # Ensure enough pixels
+            source_masked = source[mask_bool]
+            target_masked = target[mask_bool]
+            # Reshape back to have channel dimension
+            source_masked = source_masked.reshape(-1, 3)
+            target_masked = target_masked.reshape(-1, 3)
+            # Calculate statistics on masked regions
+            epsilon = 1e-6
+            source_mean = np.mean(source_masked, axis=0)
+            source_std = np.std(source_masked, axis=0)
+            target_mean = np.mean(target_masked, axis=0)
+            target_std = np.std(target_masked, axis=0)
+            slope = target_std / (source_std + epsilon)
+            offset = target_mean - (source_mean * slope)
+            power = np.ones(3)
+            return slope, offset, power
+    # Fallback to simple method if mask is not useful
+    return calculate_cdl_params_simple(source, target)
+def calculate_cdl_params_lab(source: np.ndarray, target: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Calculate CDL parameters in LAB color space for better perceptual matching.
+    Args:
+        source (np.ndarray): Source image as numpy array (0-1 range)
+        target (np.ndarray): Target image as numpy array (0-1 range)
+    Returns:
+        Tuple[np.ndarray, np.ndarray, np.ndarray]: (slope, offset, power)
+    """
+    # Convert to LAB color space
+    source_lab = cv2.cvtColor((source * 255).astype(np.uint8), cv2.COLOR_RGB2LAB).astype(np.float32)
+    target_lab = cv2.cvtColor((target * 255).astype(np.uint8), cv2.COLOR_RGB2LAB).astype(np.float32)
+    # Normalize LAB values
+    source_lab[:, :, 0] /= 100.0  # L: 0-100 -> 0-1
+    source_lab[:, :, 1] = (source_lab[:, :, 1] + 128) / 255.0  # A: -128-127 -> 0-1
+    source_lab[:, :, 2] = (source_lab[:, :, 2] + 128) / 255.0  # B: -128-127 -> 0-1
+    target_lab[:, :, 0] /= 100.0
+    target_lab[:, :, 1] = (target_lab[:, :, 1] + 128) / 255.0
+    target_lab[:, :, 2] = (target_lab[:, :, 2] + 128) / 255.0
+    # Calculate CDL in LAB space
+    epsilon = 1e-6
+    source_mean = np.mean(source_lab, axis=(0, 1))
+    source_std = np.std(source_lab, axis=(0, 1))
+    target_mean = np.mean(target_lab, axis=(0, 1))
+    target_std = np.std(target_lab, axis=(0, 1))
+    slope_lab = target_std / (source_std + epsilon)
+    offset_lab = target_mean - (source_mean * slope_lab)
+    # Convert back to RGB approximation
+    # This is a simplified conversion - for full accuracy we'd need to convert each pixel
+    slope = np.array([slope_lab[0], slope_lab[1], slope_lab[2]])  # Rough mapping
+    offset = np.array([offset_lab[0], offset_lab[1], offset_lab[2]])
+    power = np.ones(3)
+    return slope, offset, power
+def calculate_cdl_params(source: np.ndarray, target: np.ndarray,
+                        source_path: str = None, target_path: str = None) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Calculate CDL parameters using simple mean/std matching - the most basic approach.
+    Args:
+        source (np.ndarray): Source image as numpy array (0-1 range)
+        target (np.ndarray): Target image as numpy array (0-1 range)
+        source_path (str, optional): Ignored - kept for compatibility
+        target_path (str, optional): Ignored - kept for compatibility
+    Returns:
+        Tuple[np.ndarray, np.ndarray, np.ndarray]: (slope, offset, power)
+    """
+    epsilon = 1e-6
+    # Calculate simple mean and standard deviation for each RGB channel
+    source_mean = np.mean(source, axis=(0, 1))
+    source_std = np.std(source, axis=(0, 1))
+    target_mean = np.mean(target, axis=(0, 1))
+    target_std = np.std(target, axis=(0, 1))
+    # Calculate slope (gain) from std ratio
+    slope = target_std / (source_std + epsilon)
+    # Calculate offset from mean difference
+    offset = target_mean - (source_mean * slope)
+    # Calculate simple gamma from brightness relationship
+    power = []
+    for channel in range(3):
+        if source_mean[channel] > epsilon:
+            gamma = np.log(target_mean[channel] + epsilon) / np.log(source_mean[channel] + epsilon)
+            gamma = np.clip(gamma, 0.1, 10.0)  # Keep within reasonable bounds
+        else:
+            gamma = 1.0
+        power.append(gamma)
+    power = np.array(power)
+    return slope, offset, power
+def calculate_change_mask(original: np.ndarray, composited: np.ndarray, threshold: float = 0.05) -> np.ndarray:
+    """Calculate a mask of significantly changed regions between original and composited images.
+    Args:
+        original (np.ndarray): Original image (0-1 range)
+        composited (np.ndarray): Composited result (0-1 range)
+        threshold (float): Threshold for detecting significant changes
+    Returns:
+        np.ndarray: Binary mask of changed regions
+    """
+    # Calculate per-pixel difference
+    diff = np.sqrt(np.sum((composited - original) ** 2, axis=2))
+    # Create binary mask where changes exceed threshold
+    change_mask = (diff > threshold).astype(np.float32)
+    # Apply morphological operations to clean up the mask
+    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+    change_mask = cv2.morphologyEx(change_mask, cv2.MORPH_CLOSE, kernel)
+    return change_mask
+def calculate_channel_stats(array: np.ndarray) -> dict:
+    """Calculate per-channel statistics for an image array.
+    Args:
+        array: Image array of shape (H, W, 3)
+    Returns:
+        dict: Dictionary containing mean, std, min, max for each channel
+    """
+    stats = {
+        'mean': np.mean(array, axis=(0, 1)),
+        'std': np.std(array, axis=(0, 1)),
+        'min': np.min(array, axis=(0, 1)),
+        'max': np.max(array, axis=(0, 1))
+    }
+    return stats
+def apply_cdl_transform(image: np.ndarray, slope: np.ndarray, offset: np.ndarray, power: np.ndarray,
+                       factor: float = 0.3) -> np.ndarray:
+    """Apply CDL transformation to an image.
+    Args:
+        image (np.ndarray): Input image (0-1 range)
+        slope (np.ndarray): CDL slope parameters for each channel
+        offset (np.ndarray): CDL offset parameters for each channel
+        power (np.ndarray): CDL power parameters for each channel
+        factor (float): Blending factor (0.0 = no change, 1.0 = full transform)
+    Returns:
+        np.ndarray: Transformed image
+    """
+    # Apply CDL transform: out = ((in * slope) + offset) ** power
+    transformed = np.power(np.maximum(image * slope + offset, 0), power)
+    # Clamp to valid range
+    transformed = np.clip(transformed, 0.0, 1.0)
+    # Blend with original based on factor
+    result = (1 - factor) * image + factor * transformed
+    return result
+def cdl_edge_smoothing(composited_image_path: str, original_image_path: str, factor: float = 0.3) -> Image.Image:
+    """Apply CDL-based edge smoothing between composited result and original image.
+    Args:
+        composited_image_path (str): Path to the composited result image
+        original_image_path (str): Path to the original target image
+        factor (float): Smoothing strength (0.0 = no smoothing, 1.0 = full smoothing)
+    Returns:
+        Image.Image: Smoothed result image
+    """
+    # Load images
+    composited_img = Image.open(composited_image_path).convert("RGB")
+    original_img = Image.open(original_image_path).convert("RGB")
+    # Ensure same dimensions
+    if composited_img.size != original_img.size:
+        composited_img = composited_img.resize(original_img.size, Image.LANCZOS)
+    # Convert to numpy arrays (0-1 range)
+    composited_np = np.array(composited_img).astype(np.float32) / 255.0
+    original_np = np.array(original_img).astype(np.float32) / 255.0
+    # Calculate CDL parameters to transform composited to match original
+    slope, offset, power = calculate_cdl_params(composited_np, original_np)
+    # Apply CDL transformation with blending
+    smoothed_np = apply_cdl_transform(composited_np, slope, offset, power, factor)
+    # Convert back to PIL Image
+    smoothed_img = Image.fromarray((smoothed_np * 255).astype(np.uint8))
+    return smoothed_img
+def get_smoothing_stats(original_image_path: str, composited_image_path: str) -> dict:
+    """Get statistics about the CDL transformation for debugging.
+    Args:
+        original_image_path (str): Path to the original target image
+        composited_image_path (str): Path to the composited result image
+    Returns:
+        dict: Statistics about the transformation
+    """
+    # Load images
+    composited_img = Image.open(composited_image_path).convert("RGB")
+    original_img = Image.open(original_image_path).convert("RGB")
+    # Ensure same dimensions
+    if composited_img.size != original_img.size:
+        composited_img = composited_img.resize(original_img.size, Image.LANCZOS)
+    # Convert to numpy arrays (0-1 range)
+    composited_np = np.array(composited_img).astype(np.float32) / 255.0
+    original_np = np.array(original_img).astype(np.float32) / 255.0
+    # Calculate statistics
+    composited_stats = calculate_channel_stats(composited_np)
+    original_stats = calculate_channel_stats(original_np)
+    # Calculate CDL parameters using face-based method when possible
+    slope, offset, power = calculate_cdl_params(original_np, composited_np,
+                                               original_image_path, composited_image_path)
+    return {
+        'composited_stats': composited_stats,
+        'original_stats': original_stats,
+        'cdl_slope': slope,
+        'cdl_offset': offset,
+        'cdl_power': power
+    }
+def cdl_edge_smoothing_apply_to_source(source_image_path: str, target_image_path: str, factor: float = 1.0) -> Image.Image:
+    """Apply CDL transformation to source image using face-based parameters when possible.
+    This function:
+    1. Calculates CDL parameters to transform source to match target (using face pixels when available)
+    2. Applies those CDL parameters to the entire source image
+    3. Returns the transformed source image
+    Args:
+        source_image_path (str): Path to the source image (to be transformed)
+        target_image_path (str): Path to the target image (reference for CDL calculation)
+        factor (float): Transform strength (0.0 = no change, 1.0 = full transform)
+    Returns:
+        Image.Image: Source image with CDL transformation applied
+    """
+    # Load images
+    source_img = Image.open(source_image_path).convert("RGB")
+    target_img = Image.open(target_image_path).convert("RGB")
+    # Ensure same dimensions
+    if source_img.size != target_img.size:
+        target_img = target_img.resize(source_img.size, Image.LANCZOS)
+    # Convert to numpy arrays (0-1 range)
+    source_np = np.array(source_img).astype(np.float32) / 255.0
+    target_np = np.array(target_img).astype(np.float32) / 255.0
+    # Calculate CDL parameters using face-based method when possible
+    slope, offset, power = calculate_cdl_params(source_np, target_np,
+                                               source_image_path, target_image_path)
+    # Apply CDL transformation to the entire source image
+    transformed_np = apply_cdl_transform(source_np, slope, offset, power, factor)
+    # Convert back to PIL Image
+    transformed_img = Image.fromarray((transformed_np * 255).astype(np.uint8))
+    return transformed_img
+def extract_face_mask(image_path: str) -> Optional[np.ndarray]:
+    """Extract face mask from an image using human parts segmentation.
+    Args:
+        image_path (str): Path to the image
+    Returns:
+        np.ndarray or None: Binary face mask, or None if no face found
+    """
+    try:
+        from human_parts_segmentation import HumanPartsSegmentation
+        segmenter = HumanPartsSegmentation()
+        masks_dict = segmenter.segment_parts(image_path, ['face'])
+        if 'face' in masks_dict and masks_dict['face'] is not None:
+            face_mask = masks_dict['face']
+            # Ensure it's a proper binary mask
+            if np.sum(face_mask > 0.5) > 100:  # At least 100 face pixels
+                return face_mask
+        return None
+    except Exception as e:
+        print(f"Face extraction failed: {e}")
+        return None

clothes_segmentation.py ADDED Viewed

	@@ -0,0 +1,292 @@

+import os
+import torch
+import torch.nn as nn
+import numpy as np
+from typing import Union, Tuple
+from PIL import Image, ImageFilter
+import cv2
+from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation
+from huggingface_hub import hf_hub_download
+import shutil
+# Device configuration
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Model configuration
+AVAILABLE_MODELS = {
+    "segformer_b2_clothes": "1038lab/segformer_clothes"
+}
+# Model paths
+current_dir = os.path.dirname(os.path.abspath(__file__))
+models_dir = os.path.join(current_dir, "models")
+def pil2tensor(image: Image.Image) -> torch.Tensor:
+    """Convert PIL Image to tensor."""
+    return torch.from_numpy(np.array(image).astype(np.float32) / 255.0)[None,]
+def tensor2pil(image: torch.Tensor) -> Image.Image:
+    """Convert tensor to PIL Image."""
+    return Image.fromarray(np.clip(255. * image.cpu().numpy(), 0, 255).astype(np.uint8))
+def image2mask(image: Image.Image) -> torch.Tensor:
+    """Convert image to mask tensor."""
+    if isinstance(image, Image.Image):
+        image = pil2tensor(image)
+    return image.squeeze()[..., 0]
+def mask2image(mask: torch.Tensor) -> Image.Image:
+    """Convert mask tensor to PIL Image."""
+    if len(mask.shape) == 2:
+        mask = mask.unsqueeze(0)
+    return tensor2pil(mask)
+class ClothesSegmentation:
+    """
+    Standalone clothing segmentation using Segformer model.
+    """
+    def __init__(self):
+        self.processor = None
+        self.model = None
+        self.cache_dir = os.path.join(models_dir, "RMBG", "segformer_clothes")
+        # Class mapping for segmentation - consistent with latest repo
+        self.class_map = {
+            "Background": 0, "Hat": 1, "Hair": 2, "Sunglasses": 3,
+            "Upper-clothes": 4, "Skirt": 5, "Pants": 6, "Dress": 7,
+            "Belt": 8, "Left-shoe": 9, "Right-shoe": 10, "Face": 11,
+            "Left-leg": 12, "Right-leg": 13, "Left-arm": 14, "Right-arm": 15,
+            "Bag": 16, "Scarf": 17
+        }
+    def check_model_cache(self):
+        """Check if model files exist in cache."""
+        if not os.path.exists(self.cache_dir):
+            return False, "Model directory not found"
+        required_files = [
+            'config.json',
+            'model.safetensors',
+            'preprocessor_config.json'
+        ]
+        missing_files = [f for f in required_files if not os.path.exists(os.path.join(self.cache_dir, f))]
+        if missing_files:
+            return False, f"Required model files missing: {', '.join(missing_files)}"
+        return True, "Model cache verified"
+    def clear_model(self):
+        """Clear model from memory - improved version."""
+        if self.model is not None:
+            self.model.cpu()
+            del self.model
+            self.model = None
+            self.processor = None
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+    def download_model_files(self):
+        """Download model files from Hugging Face - improved version."""
+        model_id = AVAILABLE_MODELS["segformer_b2_clothes"]
+        model_files = {
+            'config.json': 'config.json',
+            'model.safetensors': 'model.safetensors',
+            'preprocessor_config.json': 'preprocessor_config.json'
+        }
+        os.makedirs(self.cache_dir, exist_ok=True)
+        print(f"Downloading Clothes Segformer model files...")
+        try:
+            for save_name, repo_path in model_files.items():
+                print(f"Downloading {save_name}...")
+                downloaded_path = hf_hub_download(
+                    repo_id=model_id,
+                    filename=repo_path,
+                    local_dir=self.cache_dir,
+                    local_dir_use_symlinks=False
+                )
+                if os.path.dirname(downloaded_path) != self.cache_dir:
+                    target_path = os.path.join(self.cache_dir, save_name)
+                    shutil.move(downloaded_path, target_path)
+            return True, "Model files downloaded successfully"
+        except Exception as e:
+            return False, f"Error downloading model files: {str(e)}"
+    def load_model(self):
+        """Load the clothing segmentation model - improved version."""
+        try:
+            # Check and download model if needed
+            cache_status, message = self.check_model_cache()
+            if not cache_status:
+                print(f"Cache check: {message}")
+                download_status, download_message = self.download_model_files()
+                if not download_status:
+                    print(f"❌ {download_message}")
+                    return False
+            # Load model if needed
+            if self.processor is None:
+                print("Loading clothes segmentation model...")
+                self.processor = SegformerImageProcessor.from_pretrained(self.cache_dir)
+                self.model = AutoModelForSemanticSegmentation.from_pretrained(self.cache_dir)
+                self.model.eval()
+                for param in self.model.parameters():
+                    param.requires_grad = False
+                self.model.to(device)
+                print("✅ Clothes segmentation model loaded successfully")
+            return True
+        except Exception as e:
+            print(f"❌ Error loading clothes model: {e}")
+            self.clear_model()  # Cleanup on error
+            return False
+    def segment_clothes(self, image_path: str, target_classes: list = None, process_res: int = 512) -> np.ndarray:
+        """
+        Segment clothing from an image - improved version with process_res parameter.
+        Args:
+            image_path: Path to the image
+            target_classes: List of clothing classes to segment (default: ["Upper-clothes"])
+            process_res: Processing resolution (default: 512)
+        Returns:
+            Binary mask as numpy array
+        """
+        if target_classes is None:
+            target_classes = ["Upper-clothes"]
+        if not self.load_model():
+            print("❌ Cannot load clothes segmentation model")
+            return None
+        try:
+            # Load and preprocess image
+            image = cv2.imread(image_path)
+            if image is None:
+                print(f"❌ Could not load image: {image_path}")
+                return None
+            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            original_size = image_rgb.shape[:2]
+            # Preprocess image with custom resolution
+            pil_image = Image.fromarray(image_rgb)
+            # Resize for processing if needed
+            if process_res != 512:
+                pil_image = pil_image.resize((process_res, process_res), Image.Resampling.LANCZOS)
+            inputs = self.processor(images=pil_image, return_tensors="pt")
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            # Run inference
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                logits = outputs.logits.cpu()
+                # Resize logits to original image size
+                upsampled_logits = nn.functional.interpolate(
+                    logits,
+                    size=original_size,
+                    mode="bilinear",
+                    align_corners=False,
+                )
+                pred_seg = upsampled_logits.argmax(dim=1)[0]
+                # Combine selected class masks
+                combined_mask = None
+                for class_name in target_classes:
+                    if class_name in self.class_map:
+                        mask = (pred_seg == self.class_map[class_name]).float()
+                        if combined_mask is None:
+                            combined_mask = mask
+                        else:
+                            combined_mask = torch.clamp(combined_mask + mask, 0, 1)
+                    else:
+                        print(f"⚠️  Unknown class: {class_name}")
+                if combined_mask is None:
+                    print(f"❌ No valid classes found in: {target_classes}")
+                    return None
+                # Convert to numpy
+                mask_np = combined_mask.numpy().astype(np.float32)
+                return mask_np
+        except Exception as e:
+            print(f"❌ Error in clothes segmentation: {e}")
+            return None
+        finally:
+            # Clean up model if not training (consistent with updated repo)
+            if self.model is not None and not self.model.training:
+                self.clear_model()
+    def segment_clothes_with_filters(self, image_path: str, target_classes: list = None,
+                                   mask_blur: int = 0, mask_offset: int = 0,
+                                   process_res: int = 512) -> np.ndarray:
+        """
+        Segment clothing with additional filtering options - new method from updated repo.
+        Args:
+            image_path: Path to the image
+            target_classes: List of clothing classes to segment
+            mask_blur: Blur amount for mask edges
+            mask_offset: Expand/Shrink mask boundary
+            process_res: Processing resolution
+        Returns:
+            Filtered binary mask as numpy array
+        """
+        # Get initial mask
+        mask = self.segment_clothes(image_path, target_classes, process_res)
+        if mask is None:
+            return None
+        try:
+            # Convert to PIL for filtering
+            mask_image = Image.fromarray((mask * 255).astype(np.uint8))
+            # Apply blur if specified
+            if mask_blur > 0:
+                mask_image = mask_image.filter(ImageFilter.GaussianBlur(radius=mask_blur))
+            # Apply offset if specified
+            if mask_offset != 0:
+                if mask_offset > 0:
+                    mask_image = mask_image.filter(ImageFilter.MaxFilter(size=mask_offset * 2 + 1))
+                else:
+                    mask_image = mask_image.filter(ImageFilter.MinFilter(size=-mask_offset * 2 + 1))
+            # Convert back to numpy
+            filtered_mask = np.array(mask_image).astype(np.float32) / 255.0
+            return filtered_mask
+        except Exception as e:
+            print(f"❌ Error applying filters: {e}")
+            return mask
+# Standalone function for easy usage
+def segment_upper_clothes(image_path: str) -> np.ndarray:
+    """
+    Convenience function to segment upper clothes from an image.
+    Args:
+        image_path: Path to the image
+    Returns:
+        Binary mask as numpy array or None if failed
+    """
+    segmenter = ClothesSegmentation()
+    return segmenter.segment_clothes(image_path, ["Upper-clothes"])

color_matching.py ADDED Viewed

	@@ -0,0 +1,698 @@

+import torch
+import numpy as np
+from PIL import Image
+import matplotlib.pyplot as plt
+import matplotlib.figure as figure
+from matplotlib.figure import Figure
+import numpy.typing as npt
+import os
+import sys
+import tempfile
+import time
+class RegionColorMatcher:
+    def __init__(self, factor=1.0, preserve_colors=True, preserve_luminance=True, method="adain"):
+        """
+        Initialize the RegionColorMatcher.
+        Args:
+            factor: Strength of the color matching (0.0 to 1.0)
+            preserve_colors: If True, convert to YUV and preserve color relationships
+            preserve_luminance: If True, preserve the luminance when in YUV mode
+            method: The color matching method to use (adain, mkl, hm, reinhard, mvgd, hm-mvgd-hm, hm-mkl-hm)
+        """
+        self.factor = factor
+        self.preserve_colors = preserve_colors
+        self.preserve_luminance = preserve_luminance
+        self.method = method
+    def match_regions(self, img1_path, img2_path, masks1, masks2):
+        """
+        Match colors between corresponding masked regions of two images.
+        Args:
+            img1_path: Path to first image
+            img2_path: Path to second image
+            masks1: Dictionary of masks for first image {label: binary_mask}
+            masks2: Dictionary of masks for second image {label: binary_mask}
+        Returns:
+            A PIL Image with the color-matched result
+        """
+        print(f"🎨 Color matching with method: {self.method}")
+        print(f"📊 Processing {len(masks1)} regions from img1 and {len(masks2)} regions from img2")
+        # Load images
+        img1 = Image.open(img1_path).convert("RGB")
+        img2 = Image.open(img2_path).convert("RGB")
+        # Convert to numpy arrays and normalize to [0,1]
+        img1_np = np.array(img1).astype(np.float32) / 255.0
+        img2_np = np.array(img2).astype(np.float32) / 255.0
+        # Create a copy of the second image as our base for color matching
+        # We want to make img2 look like img1's colors
+        result_np = np.copy(img2_np)
+        # Convert images to PyTorch tensors
+        img1_tensor = torch.from_numpy(img1_np)
+        img2_tensor = torch.from_numpy(img2_np)
+        result_tensor = torch.from_numpy(result_np)
+        # Track coverage to ensure all regions are processed
+        total_coverage = np.zeros(img2_np.shape[:2], dtype=np.float32)
+        processed_regions = 0
+        # Process each mask region
+        for label, mask1 in masks1.items():
+            if label not in masks2:
+                print(f"⚠️  Skipping {label} - not found in masks2")
+                continue
+            mask2 = masks2[label]
+            # Resize masks to match image dimensions if needed
+            if mask1.shape != img1_np.shape[:2]:
+                mask1 = self._resize_mask(mask1, img1_np.shape[:2])
+            if mask2.shape != img2_np.shape[:2]:
+                mask2 = self._resize_mask(mask2, img2_np.shape[:2])
+            # Check mask coverage
+            mask1_pixels = np.sum(mask1 > 0)
+            mask2_pixels = np.sum(mask2 > 0)
+            print(f"🔍 Processing {label}: {mask1_pixels} pixels (img1) → {mask2_pixels} pixels (img2)")
+            if mask1_pixels == 0 or mask2_pixels == 0:
+                print(f"⚠️  Skipping {label} - no pixels in mask")
+                continue
+            # Track coverage
+            total_coverage += (mask2 > 0).astype(np.float32)
+            processed_regions += 1
+            # Convert masks to torch tensors
+            mask1_tensor = torch.from_numpy(mask1.astype(np.float32))
+            mask2_tensor = torch.from_numpy(mask2.astype(np.float32))
+            # Apply color matching for this region based on selected method
+            if self.method == "adain":
+                result_tensor = self._apply_adain_to_region(
+                    img1_tensor,
+                    img2_tensor,
+                    result_tensor,
+                    mask1_tensor,
+                    mask2_tensor
+                )
+            else:
+                result_tensor = self._apply_color_matcher_to_region(
+                    img1_tensor,
+                    img2_tensor,
+                    result_tensor,
+                    mask1_tensor,
+                    mask2_tensor,
+                    self.method
+                )
+            print(f"✅ Completed color matching for {label}")
+        # Debug coverage
+        total_pixels = img2_np.shape[0] * img2_np.shape[1]
+        covered_pixels = np.sum(total_coverage > 0)
+        overlap_pixels = np.sum(total_coverage > 1)
+        print(f"📊 Coverage summary:")
+        print(f"  Total image pixels: {total_pixels}")
+        print(f"  Covered pixels: {covered_pixels} ({100*covered_pixels/total_pixels:.1f}%)")
+        print(f"  Overlapping pixels: {overlap_pixels} ({100*overlap_pixels/total_pixels:.1f}%)")
+        print(f"  Processed regions: {processed_regions}")
+        # Convert back to numpy, scale to [0,255] and convert to uint8
+        result_np = (result_tensor.numpy() * 255.0).astype(np.uint8)
+        # Convert to PIL Image
+        result_img = Image.fromarray(result_np)
+        return result_img
+    def _resize_mask(self, mask, target_shape):
+        """
+        Resize a mask to match the target shape.
+        Args:
+            mask: Binary mask array
+            target_shape: Target shape (height, width)
+        Returns:
+            Resized mask array
+        """
+        # Convert to PIL Image for resizing
+        mask_img = Image.fromarray((mask * 255).astype(np.uint8))
+        # Resize to target shape
+        mask_img = mask_img.resize((target_shape[1], target_shape[0]), Image.NEAREST)
+        # Convert back to numpy array and normalize to [0,1]
+        resized_mask = np.array(mask_img).astype(np.float32) / 255.0
+        return resized_mask
+    def _apply_adain_to_region(self, source_img, target_img, result_img, source_mask, target_mask):
+        """
+        Apply AdaIN to match the statistics of the masked region in source to the target.
+        Args:
+            source_img: Source image tensor [H,W,3] (reference for color matching)
+            target_img: Target image tensor [H,W,3] (to be color matched)
+            result_img: Result image tensor to modify [H,W,3]
+            source_mask: Binary mask for source image [H,W]
+            target_mask: Binary mask for target image [H,W]
+        Returns:
+            Modified result tensor
+        """
+        # Ensure masks are binary
+        source_mask_binary = (source_mask > 0.5).float()
+        target_mask_binary = (target_mask > 0.5).float()
+        # If preserving colors, convert to YUV
+        if self.preserve_colors:
+            # RGB to YUV conversion matrix
+            rgb_to_yuv = torch.tensor([
+                [0.299, 0.587, 0.114],
+                [-0.14713, -0.28886, 0.436],
+                [0.615, -0.51499, -0.10001]
+            ])
+            # Convert to YUV
+            source_yuv = torch.matmul(source_img, rgb_to_yuv.T)
+            target_yuv = torch.matmul(target_img, rgb_to_yuv.T)
+            result_yuv = torch.matmul(result_img, rgb_to_yuv.T)
+            # Only normalize Y channel if preserving luminance is False
+            channels_to_process = [0] if not self.preserve_luminance else []
+            # Always process U and V channels (chroma)
+            channels_to_process.extend([1, 2])
+            # Process selected channels
+            for c in channels_to_process:
+                # Apply the color matching only to the masked region in the result
+                result_channel = result_yuv[:,:,c]
+                matched_channel = self._match_channel_statistics(
+                    source_yuv[:,:,c],
+                    target_yuv[:,:,c],
+                    result_channel,
+                    source_mask_binary,
+                    target_mask_binary
+                )
+                # Only update the masked region in the result
+                mask_expanded = target_mask_binary.unsqueeze(-1).expand_as(result_yuv)[:,:,c]
+                result_yuv[:,:,c] = torch.where(
+                    mask_expanded > 0.5,
+                    matched_channel,
+                    result_channel
+                )
+            # Convert back to RGB
+            yuv_to_rgb = torch.tensor([
+                [1.0, 0.0, 1.13983],
+                [1.0, -0.39465, -0.58060],
+                [1.0, 2.03211, 0.0]
+            ])
+            result_rgb = torch.matmul(result_yuv, yuv_to_rgb.T)
+            # Only update the masked region in the result
+            mask_expanded = target_mask_binary.unsqueeze(-1).expand_as(result_img)
+            result_img = torch.where(
+                mask_expanded > 0.5,
+                result_rgb,
+                result_img
+            )
+        else:
+            # Process each RGB channel separately
+            for c in range(3):
+                # Apply the color matching only to the masked region in the result
+                result_channel = result_img[:,:,c]
+                matched_channel = self._match_channel_statistics(
+                    source_img[:,:,c],
+                    target_img[:,:,c],
+                    result_channel,
+                    source_mask_binary,
+                    target_mask_binary
+                )
+                # Only update the masked region in the result
+                mask_expanded = target_mask_binary.unsqueeze(-1).expand_as(result_img)[:,:,c]
+                result_img[:,:,c] = torch.where(
+                    mask_expanded > 0.5,
+                    matched_channel,
+                    result_channel
+                )
+        # Ensure values are in valid range [0, 1]
+        return torch.clamp(result_img, 0.0, 1.0)
+    def _apply_color_matcher_to_region(self, source_img, target_img, result_img, source_mask, target_mask, method):
+        """
+        Apply color-matcher library methods to match the statistics of the masked region in source to the target.
+        Args:
+            source_img: Source image tensor [H,W,3] (reference for color matching)
+            target_img: Target image tensor [H,W,3] (to be color matched)
+            result_img: Result image tensor to modify [H,W,3]
+            source_mask: Binary mask for source image [H,W]
+            target_mask: Binary mask for target image [H,W]
+            method: The color matching method to use (mkl, hm, reinhard, mvgd, hm-mvgd-hm, hm-mkl-hm)
+        Returns:
+            Modified result tensor
+        """
+        # Ensure masks are binary
+        source_mask_binary = (source_mask > 0.5).float()
+        target_mask_binary = (target_mask > 0.5).float()
+        # Convert tensors to numpy arrays
+        source_np = source_img.detach().cpu().numpy()
+        target_np = target_img.detach().cpu().numpy()
+        source_mask_np = source_mask_binary.detach().cpu().numpy()
+        target_mask_np = target_mask_binary.detach().cpu().numpy()
+        try:
+            # Try to import the color_matcher library
+            try:
+                from color_matcher import ColorMatcher
+                from color_matcher.normalizer import Normalizer
+            except ImportError:
+                self._install_package("color-matcher")
+                from color_matcher import ColorMatcher
+                from color_matcher.normalizer import Normalizer
+            # Extract only the masked pixels from both images
+            source_coords = np.where(source_mask_np > 0.5)
+            target_coords = np.where(target_mask_np > 0.5)
+            if len(source_coords[0]) == 0 or len(target_coords[0]) == 0:
+                return result_img
+            # Extract pixel values from masked regions
+            source_pixels = source_np[source_coords]
+            target_pixels = target_np[target_coords]
+            # Initialize color matcher
+            cm = ColorMatcher()
+            if method == "mkl":
+                # For MKL, calculate mean and standard deviation from masked regions
+                source_mean = np.mean(source_pixels, axis=0)
+                source_std = np.std(source_pixels, axis=0)
+                target_mean = np.mean(target_pixels, axis=0)
+                target_std = np.std(target_pixels, axis=0)
+                # Apply the transformation
+                result_np = np.copy(target_np)
+                for c in range(3):
+                    # Normalize the target channel and scale by source statistics
+                    normalized = (target_np[:,:,c] - target_mean[c]) / (target_std[c] + 1e-8) * source_std[c] + source_mean[c]
+                    # Only apply to masked region
+                    result_np[:,:,c] = np.where(target_mask_np > 0.5, normalized, target_np[:,:,c])
+                # Convert back to tensor
+                result_tensor = torch.from_numpy(result_np).to(result_img.device)
+                # Blend with original based on factor
+                result_img = torch.lerp(result_img, result_tensor, self.factor)
+            elif method == "reinhard":
+                # Similar to MKL but with a different normalization approach
+                source_mean = np.mean(source_pixels, axis=0)
+                source_std = np.std(source_pixels, axis=0)
+                target_mean = np.mean(target_pixels, axis=0)
+                target_std = np.std(target_pixels, axis=0)
+                # Apply the transformation
+                result_np = np.copy(target_np)
+                for c in range(3):
+                    # Normalize the target channel and scale by source statistics
+                    normalized = (target_np[:,:,c] - target_mean[c]) / (target_std[c] + 1e-8) * source_std[c] + source_mean[c]
+                    # Only apply to masked region
+                    result_np[:,:,c] = np.where(target_mask_np > 0.5, normalized, target_np[:,:,c])
+                # Convert back to tensor
+                result_tensor = torch.from_numpy(result_np).to(result_img.device)
+                # Blend with original based on factor
+                result_img = torch.lerp(result_img, result_tensor, self.factor)
+            elif method == "mvgd":
+                # For MVGD, we need mean and covariance matrices
+                source_mean = np.mean(source_pixels, axis=0)
+                source_cov = np.cov(source_pixels, rowvar=False)
+                target_mean = np.mean(target_pixels, axis=0)
+                target_cov = np.cov(target_pixels, rowvar=False)
+                # Check if covariance matrices are valid
+                if np.isnan(source_cov).any() or np.isnan(target_cov).any():
+                    # Fallback to simple statistics matching
+                    source_std = np.std(source_pixels, axis=0)
+                    target_std = np.std(target_pixels, axis=0)
+                    result_np = np.copy(target_np)
+                    for c in range(3):
+                        normalized = (target_np[:,:,c] - target_mean[c]) / (target_std[c] + 1e-8) * source_std[c] + source_mean[c]
+                        result_np[:,:,c] = np.where(target_mask_np > 0.5, normalized, target_np[:,:,c])
+                else:
+                    # Apply full MVGD transformation to masked pixels
+                    # Reshape the masked pixels for matrix operations
+                    target_flat = target_np.reshape(-1, 3)
+                    result_np = np.copy(target_np)
+                    try:
+                        # Try to compute the full MVGD transformation
+                        source_cov_sqrt = np.linalg.cholesky(source_cov)
+                        target_cov_sqrt = np.linalg.cholesky(target_cov)
+                        target_cov_sqrt_inv = np.linalg.inv(target_cov_sqrt)
+                        # Compute the transformation matrix
+                        temp = target_cov_sqrt_inv @ source_cov @ target_cov_sqrt_inv.T
+                        temp_sqrt_inv = np.linalg.inv(np.linalg.cholesky(temp))
+                        A = target_cov_sqrt @ temp_sqrt_inv @ target_cov_sqrt_inv
+                        # Apply the transformation to all pixels
+                        for i in range(target_np.shape[0]):
+                            for j in range(target_np.shape[1]):
+                                if target_mask_np[i, j] > 0.5:
+                                    # Only apply to masked pixels
+                                    pixel = target_np[i, j]
+                                    centered = pixel - target_mean
+                                    transformed = centered @ A.T + source_mean
+                                    result_np[i, j] = transformed
+                    except np.linalg.LinAlgError:
+                        # Fallback to simple statistics matching
+                        source_std = np.std(source_pixels, axis=0)
+                        target_std = np.std(target_pixels, axis=0)
+                        for c in range(3):
+                            normalized = (target_np[:,:,c] - target_mean[c]) / (target_std[c] + 1e-8) * source_std[c] + source_mean[c]
+                            result_np[:,:,c] = np.where(target_mask_np > 0.5, normalized, target_np[:,:,c])
+                # Convert back to tensor
+                result_tensor = torch.from_numpy(result_np).to(result_img.device)
+                # Blend with original based on factor
+                result_img = torch.lerp(result_img, result_tensor, self.factor)
+            elif method in ["hm", "hm-mvgd-hm", "hm-mkl-hm"]:
+                # For histogram-based methods, we'll create temporary cropped images with just the masked regions
+                # Get the bounding box of the masked regions
+                source_min_y, source_min_x = np.min(source_coords[0]), np.min(source_coords[1])
+                source_max_y, source_max_x = np.max(source_coords[0]), np.max(source_coords[1])
+                target_min_y, target_min_x = np.min(target_coords[0]), np.min(target_coords[1])
+                target_max_y, target_max_x = np.max(target_coords[0]), np.max(target_coords[1])
+                # Create cropped images with just the masked regions
+                source_crop = source_np[source_min_y:source_max_y+1, source_min_x:source_max_x+1].copy()
+                target_crop = target_np[target_min_y:target_max_y+1, target_min_x:target_max_x+1].copy()
+                # Create cropped masks
+                source_mask_crop = source_mask_np[source_min_y:source_max_y+1, source_min_x:source_max_x+1]
+                target_mask_crop = target_mask_np[target_min_y:target_max_y+1, target_min_x:target_max_x+1]
+                # Apply the mask to the cropped images
+                # For non-masked areas, use the average color
+                source_avg_color = np.mean(source_pixels, axis=0)
+                target_avg_color = np.mean(target_pixels, axis=0)
+                for c in range(3):
+                    source_crop[:, :, c] = np.where(source_mask_crop > 0.5, source_crop[:, :, c], source_avg_color[c])
+                    target_crop[:, :, c] = np.where(target_mask_crop > 0.5, target_crop[:, :, c], target_avg_color[c])
+                try:
+                    # Use the color matcher directly on the masked regions
+                    matched_crop = cm.transfer(src=target_crop, ref=source_crop, method=method)
+                    # Apply the matched colors back to the original image, only in the masked region
+                    result_np = np.copy(target_np)
+                    # Create a mapping from crop coordinates to original image coordinates
+                    for i in range(target_crop.shape[0]):
+                        for j in range(target_crop.shape[1]):
+                            orig_i = target_min_y + i
+                            orig_j = target_min_x + j
+                            if orig_i < target_np.shape[0] and orig_j < target_np.shape[1] and target_mask_np[orig_i, orig_j] > 0.5:
+                                result_np[orig_i, orig_j] = matched_crop[i, j]
+                    # Convert back to tensor
+                    result_tensor = torch.from_numpy(result_np).to(result_img.device)
+                    # Blend with original based on factor
+                    result_img = torch.lerp(result_img, result_tensor, self.factor)
+                except Exception as e:
+                    # Fallback to AdaIN if color matcher fails
+                    print(f"Color matcher failed for {method}, using fallback: {str(e)}")
+                    result_img = self._apply_adain_to_region(
+                        source_img,
+                        target_img,
+                        result_img,
+                        source_mask_binary,
+                        target_mask_binary
+                    )
+            elif method == "coral":
+                # For CORAL method, extract masked regions and apply CORAL color transfer
+                try:
+                    # Create masked versions of the images
+                    source_masked = source_np.copy()
+                    target_masked = target_np.copy()
+                    # Apply masks - set non-masked areas to average color
+                    source_avg_color = np.mean(source_pixels, axis=0)
+                    target_avg_color = np.mean(target_pixels, axis=0)
+                    for c in range(3):
+                        source_masked[:, :, c] = np.where(source_mask_np > 0.5, source_masked[:, :, c], source_avg_color[c])
+                        target_masked[:, :, c] = np.where(target_mask_np > 0.5, target_masked[:, :, c], target_avg_color[c])
+                    # Convert to torch tensors and rearrange to [C, H, W]
+                    source_tensor = torch.from_numpy(source_masked).permute(2, 0, 1).float()
+                    target_tensor = torch.from_numpy(target_masked).permute(2, 0, 1).float()
+                    # Apply CORAL color transfer
+                    matched_tensor = coral(target_tensor, source_tensor)  # target gets matched to source
+                    # Convert back to [H, W, C] format
+                    matched_np = matched_tensor.permute(1, 2, 0).numpy()
+                    # Apply the matched colors back to the original image, only in the masked region
+                    result_np = np.copy(target_np)
+                    for c in range(3):
+                        result_np[:, :, c] = np.where(target_mask_np > 0.5, matched_np[:, :, c], target_np[:, :, c])
+                    # Convert back to tensor
+                    result_tensor = torch.from_numpy(result_np).to(result_img.device)
+                    # Blend with original based on factor
+                    result_img = torch.lerp(result_img, result_tensor, self.factor)
+                except Exception as e:
+                    # Fallback to AdaIN if CORAL fails
+                    print(f"CORAL failed for {method}, using fallback: {str(e)}")
+                    result_img = self._apply_adain_to_region(
+                        source_img,
+                        target_img,
+                        result_img,
+                        source_mask_binary,
+                        target_mask_binary
+                    )
+            else:
+                # Default to AdaIN for unsupported methods
+                result_img = self._apply_adain_to_region(
+                    source_img,
+                    target_img,
+                    result_img,
+                    source_mask_binary,
+                    target_mask_binary
+                )
+        except Exception as e:
+            # If all fails, fallback to AdaIN
+            print(f"Error in color matching: {str(e)}, using AdaIN as fallback")
+            result_img = self._apply_adain_to_region(
+                source_img,
+                target_img,
+                result_img,
+                source_mask_binary,
+                target_mask_binary
+            )
+        return torch.clamp(result_img, 0.0, 1.0)
+    def _match_channel_statistics(self, source_channel, target_channel, result_channel, source_mask, target_mask):
+        """
+        Match the statistics of a single channel.
+        Args:
+            source_channel: Source channel [H,W] (reference for color matching)
+            target_channel: Target channel [H,W] (to be color matched)
+            result_channel: Result channel to modify [H,W]
+            source_mask: Binary mask for source [H,W]
+            target_mask: Binary mask for target [H,W]
+        Returns:
+            Modified result channel
+        """
+        # Count non-zero elements in masks
+        source_count = torch.sum(source_mask)
+        target_count = torch.sum(target_mask)
+        if source_count > 0 and target_count > 0:
+            # Calculate statistics only from masked regions
+            source_masked = source_channel * source_mask
+            target_masked = target_channel * target_mask
+            # Calculate mean
+            source_mean = torch.sum(source_masked) / source_count
+            target_mean = torch.sum(target_masked) / target_count
+            # Calculate variance
+            source_var = torch.sum(((source_channel - source_mean) * source_mask) ** 2) / source_count
+            target_var = torch.sum(((target_channel - target_mean) * target_mask) ** 2) / target_count
+            # Calculate std (add small epsilon to avoid division by zero)
+            source_std = torch.sqrt(source_var + 1e-8)
+            target_std = torch.sqrt(target_var + 1e-8)
+            # Apply AdaIN to the masked region
+            normalized = ((target_channel - target_mean) / target_std) * source_std + source_mean
+            # Blend with original based on factor
+            result = torch.lerp(target_channel, normalized, self.factor)
+            return result
+        return result_channel
+    def _install_package(self, package_name):
+        """Install a package using pip."""
+        import subprocess
+        subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
+def create_comparison_figure(original_img, matched_img, title="Color Matching Comparison"):
+    """
+    Create a matplotlib figure with the original and color-matched images.
+    Args:
+        original_img: Original PIL Image
+        matched_img: Color-matched PIL Image
+        title: Title for the figure
+    Returns:
+        matplotlib Figure
+    """
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
+    ax1.imshow(original_img)
+    ax1.set_title("Original")
+    ax1.axis('off')
+    ax2.imshow(matched_img)
+    ax2.set_title("Color Matched")
+    ax2.axis('off')
+    plt.suptitle(title)
+    plt.tight_layout()
+    return fig
+def coral(source, target):
+    """
+    CORAL (Color Transfer using Correlated Color Temperature) implementation.
+    Based on the original ColorMatchImage approach.
+    Args:
+        source: Source image tensor [C, H, W] (to be color matched)
+        target: Target image tensor [C, H, W] (reference for color matching)
+    Returns:
+        Color-matched source image tensor [C, H, W]
+    """
+    # Ensure tensors are float
+    source = source.float()
+    target = target.float()
+    # Reshape to [C, N] where N is number of pixels
+    C, H, W = source.shape
+    source_flat = source.view(C, -1)  # [C, H*W]
+    target_flat = target.view(C, -1)  # [C, H*W]
+    # Compute means
+    source_mean = torch.mean(source_flat, dim=1, keepdim=True)  # [C, 1]
+    target_mean = torch.mean(target_flat, dim=1, keepdim=True)  # [C, 1]
+    # Center the data
+    source_centered = source_flat - source_mean  # [C, H*W]
+    target_centered = target_flat - target_mean  # [C, H*W]
+    # Compute covariance matrices
+    N = source_centered.shape[1]
+    source_cov = torch.mm(source_centered, source_centered.t()) / (N - 1)  # [C, C]
+    target_cov = torch.mm(target_centered, target_centered.t()) / (N - 1)  # [C, C]
+    # Add small epsilon to diagonal for numerical stability
+    eps = 1e-5
+    source_cov += eps * torch.eye(C, device=source.device)
+    target_cov += eps * torch.eye(C, device=source.device)
+    try:
+        # Compute the transformation matrix using Cholesky decomposition
+        # This is more stable than eigendecomposition for positive definite matrices
+        # Cholesky decomposition: A = L * L^T
+        source_chol = torch.linalg.cholesky(source_cov)  # Lower triangular
+        target_chol = torch.linalg.cholesky(target_cov)  # Lower triangular
+        # Compute the transformation matrix
+        # We want to transform source covariance to target covariance
+        # Transform = target_chol * source_chol^(-1)
+        source_chol_inv = torch.linalg.inv(source_chol)
+        transform_matrix = torch.mm(target_chol, source_chol_inv)
+        # Apply transformation: result = transform_matrix * (source - source_mean) + target_mean
+        result_centered = torch.mm(transform_matrix, source_centered)
+        result_flat = result_centered + target_mean
+        # Reshape back to original shape
+        result = result_flat.view(C, H, W)
+        # Clamp to valid range
+        result = torch.clamp(result, 0.0, 1.0)
+        return result
+    except Exception as e:
+        # Fallback to simple mean/std matching if Cholesky fails
+        print(f"CORAL Cholesky failed, using simple statistics matching: {e}")
+        # Simple per-channel statistics matching
+        source_std = torch.std(source_centered, dim=1, keepdim=True)
+        target_std = torch.std(target_centered, dim=1, keepdim=True)
+        # Avoid division by zero
+        source_std = torch.clamp(source_std, min=eps)
+        # Apply simple transformation: (source - source_mean) / source_std * target_std + target_mean
+        result_flat = (source_centered / source_std) * target_std + target_mean
+        result = result_flat.view(C, H, W)
+        # Clamp to valid range
+        result = torch.clamp(result, 0.0, 1.0)
+        return result

core.py ADDED Viewed

	@@ -0,0 +1,356 @@

+"""Core part of LaDeco v2
+Example usage:
+>>> from core import Ladeco
+>>> from PIL import Image
+>>> from pathlib import Path
+>>>
+>>> # predict
+>>> ldc = Ladeco()
+>>> imgs = (thing for thing in Path("example").glob("*.jpg"))
+>>> out = ldc.predict(imgs)
+>>>
+>>> # output - visualization
+>>> segs = out.visualize(level=2)
+>>> segs[0].image.show()
+>>>
+>>> # output - element area
+>>> area = out.area()
+>>> area[0]
+{"fid": "example/.jpg", "l1_nature": 0.673, "l1_man_made": 0.241, ...}
+"""
+from matplotlib.patches import Rectangle
+from pathlib import Path
+from PIL import Image
+from transformers import AutoModelForUniversalSegmentation, AutoProcessor
+import math
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from functools import lru_cache
+from matplotlib.figure import Figure
+import numpy.typing as npt
+from typing import Iterable, NamedTuple, Generator
+from tqdm import tqdm
+class LadecoVisualization(NamedTuple):
+    filename: str
+    image: Figure
+class Ladeco:
+    def __init__(self,
+        model_name: str = "shi-labs/oneformer_ade20k_swin_large",
+        area_threshold: float = 0.01,
+        device: str | None = None,
+    ):
+        if device is None:
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        else:
+            self.device = device
+        self.processor = AutoProcessor.from_pretrained(model_name)
+        self.model = AutoModelForUniversalSegmentation.from_pretrained(model_name).to(self.device)
+        self.area_threshold = area_threshold
+        self.ade20k_labels = {
+            name.strip(): int(idx)
+            for name, idx in self.model.config.label2id.items()
+        }
+        self.ladeco2ade20k: dict[str, tuple[int]] = _get_ladeco_labels(self.ade20k_labels)
+    def predict(
+            self, image_paths: str | Path | Iterable[str | Path], show_progress: bool = False
+        ) -> "LadecoOutput":
+        if isinstance(image_paths, (str, Path)):
+            imgpaths = [image_paths]
+        else:
+            imgpaths = list(image_paths)
+        images = (
+            Image.open(img_path).convert("RGB")
+            for img_path in imgpaths
+        )
+        # batch inference functionality of OneFormer is broken
+        masks: list[torch.Tensor] = []
+        for img in tqdm(images, total=len(imgpaths), desc="Segmenting", disable=not show_progress):
+            samples = self.processor(
+                images=img, task_inputs=["semantic"], return_tensors="pt"
+            ).to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**samples)
+            masks.append(
+                self.processor.post_process_semantic_segmentation(outputs)[0]
+            )
+        return LadecoOutput(imgpaths, masks, self.ladeco2ade20k, self.area_threshold)
+class LadecoOutput:
+    def __init__(
+        self,
+        filenames: list[str | Path],
+        masks: torch.Tensor,
+        ladeco2ade: dict[str, tuple[int]],
+        threshold: float,
+    ):
+        self.filenames = filenames
+        self.masks = masks
+        self.ladeco2ade: dict[str, tuple[int]] = ladeco2ade
+        self.ade2ladeco: dict[int, str] = {
+            idx: label
+            for label, indices in self.ladeco2ade.items()
+            for idx in indices
+        }
+        self.threshold = threshold
+    def visualize(self, level: int) -> list[LadecoVisualization]:
+        return list(self.ivisualize(level))
+    def ivisualize(self, level: int) -> Generator[LadecoVisualization, None, None]:
+        colormaps = self.color_map(level)
+        labelnames = [name for name in self.ladeco2ade if name.startswith(f"l{level}")]
+        for fname, mask in zip(self.filenames, self.masks):
+            size = mask.shape + (3,)  # (H, W, RGB)
+            vis = torch.zeros(size, dtype=torch.uint8)
+            for name in labelnames:
+                for idx in self.ladeco2ade[name]:
+                    color = torch.tensor(colormaps[name] * 255, dtype=torch.uint8)
+                    vis[mask == idx] = color
+            with Image.open(fname) as img:
+                target_size = img.size
+            vis = Image.fromarray(vis.numpy(), mode="RGB").resize(target_size)
+            fig, ax = plt.subplots()
+            ax.imshow(vis)
+            ax.axis('off')
+            yield LadecoVisualization(filename=str(fname), image=fig)
+    def area(self) -> list[dict[str, float | str]]:
+        return list(self.iarea())
+    def iarea(self) -> Generator[dict[str, float | str], None, None]:
+        n_label_ADE20k = 150
+        for filename, mask in zip(self.filenames, self.masks):
+            ade_ratios = torch.tensor([(mask == i).count_nonzero() / mask.numel() for i in range(n_label_ADE20k)])
+            #breakpoint()
+            ldc_ratios: dict[str, float] = {
+                label: round(ade_ratios[list(ade_indices)].sum().item(), 4)
+                for label, ade_indices in self.ladeco2ade.items()
+            }
+            ldc_ratios: dict[str, float] = {
+                label: 0 if ratio < self.threshold else ratio
+                for label, ratio in ldc_ratios.items()
+            }
+            others = round(1 - ldc_ratios["l1_nature"] - ldc_ratios["l1_man_made"], 4)
+            nfi = round(ldc_ratios["l1_nature"]/ (ldc_ratios["l1_nature"] + ldc_ratios.get("l1_man_made", 0) + 1e-6), 4)
+            yield {
+                "fid": str(filename), **ldc_ratios, "others": others, "LC_NFI": nfi,
+            }
+    def color_map(self, level: int) -> dict[str, npt.NDArray[np.float64]]:
+        "returns {'label_name': (R, G, B), ...}, where (R, G, B) in range [0, 1]"
+        labels = [
+            name for name in self.ladeco2ade.keys() if name.startswith(f"l{level}")
+        ]
+        if len(labels) == 0:
+            raise RuntimeError(
+                f"LaDeco only has 4 levels in 1, 2, 3, 4. You assigned {level}."
+            )
+        colormap = mpl.colormaps["viridis"].resampled(len(labels)).colors[:, :-1]
+        # [:, :-1]: discard alpha channel
+        return {name: color for name, color in zip(labels, colormap)}
+    def color_legend(self, level: int) -> Figure:
+        colors = self.color_map(level)
+        match level:
+            case 1:
+                ncols = 1
+            case 2:
+                ncols = 1
+            case 3:
+                ncols = 2
+            case 4:
+                ncols = 5
+        cell_width = 212
+        cell_height = 22
+        swatch_width = 48
+        margin = 12
+        nrows = math.ceil(len(colors) / ncols)
+        width = cell_width * ncols + 2 * margin
+        height = cell_height * nrows + 2 * margin
+        dpi = 72
+        fig, ax = plt.subplots(figsize=(width / dpi, height / dpi), dpi=dpi)
+        fig.subplots_adjust(margin/width, margin/height,
+                            (width-margin)/width, (height-margin*2)/height)
+        ax.set_xlim(0, cell_width * ncols)
+        ax.set_ylim(cell_height * (nrows-0.5), -cell_height/2.)
+        ax.yaxis.set_visible(False)
+        ax.xaxis.set_visible(False)
+        ax.set_axis_off()
+        for i, name in enumerate(colors):
+            row = i % nrows
+            col = i // nrows
+            y = row * cell_height
+            swatch_start_x = cell_width * col
+            text_pos_x = cell_width * col + swatch_width + 7
+            ax.text(text_pos_x, y, name, fontsize=14,
+                    horizontalalignment='left',
+                    verticalalignment='center')
+            ax.add_patch(
+                Rectangle(xy=(swatch_start_x, y-9), width=swatch_width,
+                            height=18, facecolor=colors[name], edgecolor='0.7')
+            )
+            ax.set_title(f"LaDeco Color Legend - Level {level}")
+        return fig
+def _get_ladeco_labels(ade20k: dict[str, int]) -> dict[str, tuple[int]]:
+    labels =  {
+        # level 4 labels
+        # under l3_architecture
+        "l4_hovel": (ade20k["hovel, hut, hutch, shack, shanty"],),
+        "l4_building": (ade20k["building"], ade20k["house"]),
+        "l4_skyscraper": (ade20k["skyscraper"],),
+        "l4_tower": (ade20k["tower"],),
+        # under l3_archi_parts
+        "l4_step": (ade20k["step, stair"],),
+        "l4_canopy": (ade20k["awning, sunshade, sunblind"], ade20k["canopy"]),
+        "l4_arcade": (ade20k["arcade machine"],),
+        "l4_door": (ade20k["door"],),
+        "l4_window": (ade20k["window"],),
+        "l4_wall": (ade20k["wall"],),
+        # under l3_roadway
+        "l4_stairway": (ade20k["stairway, staircase"],),
+        "l4_sidewalk": (ade20k["sidewalk, pavement"],),
+        "l4_road": (ade20k["road, route"],),
+        # under l3_furniture
+        "l4_sculpture": (ade20k["sculpture"],),
+        "l4_flag": (ade20k["flag"],),
+        "l4_can": (ade20k["trash can"],),
+        "l4_chair": (ade20k["chair"],),
+        "l4_pot": (ade20k["pot"],),
+        "l4_booth": (ade20k["booth"],),
+        "l4_streetlight": (ade20k["street lamp"],),
+        "l4_bench": (ade20k["bench"],),
+        "l4_fence": (ade20k["fence"],),
+        "l4_table": (ade20k["table"],),
+        # under l3_vehicle
+        "l4_bike": (ade20k["bicycle"],),
+        "l4_motorbike": (ade20k["minibike, motorbike"],),
+        "l4_van": (ade20k["van"],),
+        "l4_truck": (ade20k["truck"],),
+        "l4_bus": (ade20k["bus"],),
+        "l4_car": (ade20k["car"],),
+        # under l3_sign
+        "l4_traffic_sign": (ade20k["traffic light"],),
+        "l4_poster": (ade20k["poster, posting, placard, notice, bill, card"],),
+        "l4_signboard": (ade20k["signboard, sign"],),
+        # under l3_vert_land
+        "l4_rock": (ade20k["rock, stone"],),
+        "l4_hill": (ade20k["hill"],),
+        "l4_mountain": (ade20k["mountain, mount"],),
+        # under l3_hori_land
+        "l4_ground": (ade20k["earth, ground"], ade20k["land, ground, soil"]),
+        "l4_field": (ade20k["field"],),
+        "l4_sand": (ade20k["sand"],),
+        "l4_dirt": (ade20k["dirt track"],),
+        "l4_path": (ade20k["path"],),
+        # under l3_flower
+        "l4_flower": (ade20k["flower"],),
+        # under l3_grass
+        "l4_grass": (ade20k["grass"],),
+        # under l3_shrub
+        "l4_flora": (ade20k["plant"],),
+        # under l3_arbor
+        "l4_tree": (ade20k["tree"],),
+        "l4_palm": (ade20k["palm, palm tree"],),
+        # under l3_hori_water
+        "l4_lake": (ade20k["lake"],),
+        "l4_pool": (ade20k["pool"],),
+        "l4_river": (ade20k["river"],),
+        "l4_sea": (ade20k["sea"],),
+        "l4_water": (ade20k["water"],),
+        # under l3_vert_water
+        "l4_fountain": (ade20k["fountain"],),
+        "l4_waterfall": (ade20k["falls"],),
+        # under l3_human
+        "l4_person": (ade20k["person"],),
+        # under l3_animal
+        "l4_animal": (ade20k["animal"],),
+        # under l3_sky
+        "l4_sky": (ade20k["sky"],),
+    }
+    labels = labels | {
+        # level 3 labels
+        # under l2_landform
+        "l3_hori_land": labels["l4_ground"] + labels["l4_field"] + labels["l4_sand"] + labels["l4_dirt"] + labels["l4_path"],
+        "l3_vert_land": labels["l4_mountain"] + labels["l4_hill"] + labels["l4_rock"],
+        # under l2_vegetation
+        "l3_woody_plant": labels["l4_tree"] + labels["l4_palm"] + labels["l4_flora"],
+        "l3_herb_plant": labels["l4_grass"],
+        "l3_flower": labels["l4_flower"],
+        # under l2_water
+        "l3_hori_water": labels["l4_water"] + labels["l4_sea"] + labels["l4_river"] + labels["l4_pool"] + labels["l4_lake"],
+        "l3_vert_water": labels["l4_fountain"] + labels["l4_waterfall"],
+        # under l2_bio
+        "l3_human": labels["l4_person"],
+        "l3_animal": labels["l4_animal"],
+        # under l2_sky
+        "l3_sky": labels["l4_sky"],
+        # under l2_archi
+        "l3_architecture": labels["l4_building"] + labels["l4_hovel"] + labels["l4_tower"] + labels["l4_skyscraper"],
+        "l3_archi_parts": labels["l4_wall"] + labels["l4_window"] + labels["l4_door"] + labels["l4_arcade"] + labels["l4_canopy"] + labels["l4_step"],
+        # under l2_street
+        "l3_roadway": labels["l4_road"] + labels["l4_sidewalk"] + labels["l4_stairway"],
+        "l3_furniture": labels["l4_table"] + labels["l4_chair"] + labels["l4_fence"] + labels["l4_bench"] + labels["l4_streetlight"] + labels["l4_booth"] + labels["l4_pot"] + labels["l4_can"] + labels["l4_flag"] + labels["l4_sculpture"],
+        "l3_vehicle": labels["l4_car"] + labels["l4_bus"] + labels["l4_truck"] + labels["l4_van"] + labels["l4_motorbike"] + labels["l4_bike"],
+        "l3_sign": labels["l4_signboard"] + labels["l4_poster"] + labels["l4_traffic_sign"],
+    }
+    labels = labels | {
+        # level 2 labels
+        # under l1_nature
+        "l2_landform": labels["l3_hori_land"] + labels["l3_vert_land"],
+        "l2_vegetation": labels["l3_woody_plant"] + labels["l3_herb_plant"] + labels["l3_flower"],
+        "l2_water": labels["l3_hori_water"] + labels["l3_vert_water"],
+        "l2_bio": labels["l3_human"] + labels["l3_animal"],
+        "l2_sky": labels["l3_sky"],
+        # under l1_man_made
+        "l2_archi": labels["l3_architecture"] + labels["l3_archi_parts"],
+        "l2_street": labels["l3_roadway"] + labels["l3_furniture"] + labels["l3_vehicle"] + labels["l3_sign"],
+    }
+    labels = labels | {
+        # level 1 labels
+        "l1_nature": labels["l2_landform"] + labels["l2_vegetation"] + labels["l2_water"] + labels["l2_bio"] + labels["l2_sky"],
+        "l1_man_made": labels["l2_archi"] + labels["l2_street"],
+    }
+    return labels
+if __name__ == "__main__":
+    ldc = Ladeco()
+    image = Path("images") / "canyon_3011_00002354.jpg"
+    out = ldc.predict(image)

examples/beach.jpg ADDED Viewed

examples/field.jpg ADDED Viewed

examples/sky.jpg ADDED Viewed

face_comparison.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import cv2
+import numpy as np
+from PIL import Image
+import tempfile
+import os
+import subprocess
+import sys
+import json
+from typing import Dict, List, Tuple, Optional
+import logging
+# Set up logging to suppress DeepFace warnings
+logging.getLogger('deepface').setLevel(logging.ERROR)
+try:
+    from deepface import DeepFace
+    DEEPFACE_AVAILABLE = True
+except ImportError:
+    DEEPFACE_AVAILABLE = False
+    print("Warning: DeepFace not available. Face comparison will be disabled.")
+def run_deepface_in_subprocess(img1_path: str, img2_path: str) -> dict:
+    """
+    Run DeepFace verification in a separate process to avoid TensorFlow conflicts.
+    """
+    script_content = f'''
+import sys
+import json
+from deepface import DeepFace
+try:
+    result = DeepFace.verify(img1_path="{img1_path}", img2_path="{img2_path}")
+    print(json.dumps(result))
+except Exception as e:
+    print(json.dumps({{"error": str(e)}}))
+'''
+    try:
+        # Write the script to a temporary file
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as script_file:
+            script_file.write(script_content)
+            script_path = script_file.name
+        # Run the script in a subprocess
+        result = subprocess.run([sys.executable, script_path],
+                              capture_output=True, text=True, timeout=30)
+        # Clean up the script file
+        os.unlink(script_path)
+        if result.returncode == 0:
+            return json.loads(result.stdout.strip())
+        else:
+            return {"error": f"Subprocess failed: {result.stderr}"}
+    except Exception as e:
+        return {"error": str(e)}
+class FaceComparison:
+    """
+    Handles face detection and comparison on full images.
+    Only responsible for determining if faces match - does not handle segmentation.
+    """
+    def __init__(self):
+        """
+        Initialize face comparison using DeepFace's default verification threshold.
+        """
+        self.available = DEEPFACE_AVAILABLE
+        self.face_match_result = None
+        self.comparison_log = []
+    def extract_faces(self, image_path: str) -> List[np.ndarray]:
+        """
+        Extract faces from the full image using DeepFace (exactly like the working script).
+        Args:
+            image_path: Path to the image
+        Returns:
+            List of face arrays
+        """
+        if not self.available:
+            return []
+        try:
+            faces = DeepFace.extract_faces(img_path=image_path, detector_backend='opencv')
+            if len(faces) == 0:
+                return []
+            return [f['face'] for f in faces]
+        except Exception as e:
+            print(f"Error extracting faces from {image_path}: {str(e)}")
+            return []
+    def compare_all_faces(self, image1_path: str, image2_path: str) -> Tuple[bool, List[str]]:
+        """
+        Compare all faces between two images (exactly like the working script).
+        Args:
+            image1_path: Path to first image
+            image2_path: Path to second image
+        Returns:
+            Tuple of (match_found, log_messages)
+        """
+        if not self.available:
+            return False, ["Face comparison not available - DeepFace not installed"]
+        log_messages = []
+        try:
+            faces1 = self.extract_faces(image1_path)
+            faces2 = self.extract_faces(image2_path)
+            match_found = False
+            log_messages.append(f"Found {len(faces1)} face(s) in Image 1 and {len(faces2)} face(s) in Image 2")
+            if len(faces1) == 0 or len(faces2) == 0:
+                log_messages.append("❌ No faces found in one or both images")
+                return False, log_messages
+            for idx1, face1 in enumerate(faces1):
+                for idx2, face2 in enumerate(faces2):
+                    # Create temporary files instead of permanent ones (exactly like original)
+                    with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp1, \
+                         tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp2:
+                        # Convert faces to uint8 and save temporarily (exactly like original)
+                        face1_uint8 = (face1 * 255).astype(np.uint8)
+                        face2_uint8 = (face2 * 255).astype(np.uint8)
+                        cv2.imwrite(temp1.name, cv2.cvtColor(face1_uint8, cv2.COLOR_RGB2BGR))
+                        cv2.imwrite(temp2.name, cv2.cvtColor(face2_uint8, cv2.COLOR_RGB2BGR))
+                        try:
+                            # Try subprocess approach first to avoid TensorFlow conflicts
+                            result = run_deepface_in_subprocess(temp1.name, temp2.name)
+                            if "error" in result:
+                                # If subprocess fails, try direct approach
+                                result = DeepFace.verify(img1_path=temp1.name, img2_path=temp2.name)
+                            similarity = 1 - result['distance']
+                            log_messages.append(f"Comparing Face1-{idx1} to Face2-{idx2} | Similarity: {similarity:.3f}")
+                            if result['verified']:
+                                log_messages.append(f"✅ Match found between Face1-{idx1} and Face2-{idx2}")
+                                match_found = True
+                            else:
+                                log_messages.append(f"❌ No match between Face1-{idx1} and Face2-{idx2}")
+                        except Exception as e:
+                            log_messages.append(f"❌ Error comparing Face1-{idx1} to Face2-{idx2}: {str(e)}")
+                        # Clean up temporary files immediately
+                        try:
+                            os.unlink(temp1.name)
+                            os.unlink(temp2.name)
+                        except:
+                            pass
+            if not match_found:
+                log_messages.append("❌ No matching faces found between the two images.")
+            return match_found, log_messages
+        except Exception as e:
+            log_messages.append(f"Error in face comparison: {str(e)}")
+            return False, log_messages
+    def run_face_comparison(self, img1_path: str, img2_path: str) -> Tuple[bool, List[str]]:
+        """
+        Run face comparison and store results for later use.
+        Args:
+            img1_path: Path to first image
+            img2_path: Path to second image
+        Returns:
+            Tuple of (faces_match, log_messages)
+        """
+        faces_match, log_messages = self.compare_all_faces(img1_path, img2_path)
+        # Store results for later filtering
+        self.face_match_result = faces_match
+        self.comparison_log = log_messages
+        return faces_match, log_messages
+    def filter_human_regions_by_face_match(self, masks: Dict[str, np.ndarray]) -> Tuple[Dict[str, np.ndarray], List[str]]:
+        """
+        Filter human regions based on previously computed face comparison results.
+        This only includes/excludes human regions - fine-grained segmentation happens elsewhere.
+        Args:
+            masks: Dictionary of semantic masks
+        Returns:
+            Tuple of (filtered_masks, log_messages)
+        """
+        if not self.available:
+            return masks, ["Face comparison not available - DeepFace not installed"]
+        if self.face_match_result is None:
+            return masks, ["No face comparison results available. Run face comparison first."]
+        filtered_masks = {}
+        log_messages = []
+        # Look for human-specific regions (l3_human, not l2_bio which includes animals)
+        human_labels = [label for label in masks.keys() if 'l3_human' in label.lower()]
+        bio_labels = [label for label in masks.keys() if 'l2_bio' in label.lower()]
+        log_messages.append(f"Found human labels: {human_labels}")
+        log_messages.append(f"Found bio labels: {bio_labels}")
+        # Include all non-human regions regardless of face matching
+        for label, mask in masks.items():
+            if not any(human_term in label.lower() for human_term in ['l3_human', 'l2_bio']):
+                filtered_masks[label] = mask
+                log_messages.append(f"✅ Including non-human region: {label}")
+            else:
+                log_messages.append(f"🔍 Found human/bio region: {label}")
+        # Handle human regions based on face matching results
+        if self.face_match_result:
+            log_messages.append("✅ Faces matched! Including human regions in color matching.")
+            # Include human regions since faces matched
+            for label in human_labels + bio_labels:
+                if label in masks:
+                    filtered_masks[label] = masks[label]
+                    log_messages.append(f"✅ Including human region (faces matched): {label}")
+        else:
+            log_messages.append("❌ No face match found. Excluding human regions from color matching.")
+            # Don't include human regions since faces didn't match
+            for label in human_labels + bio_labels:
+                log_messages.append(f"❌ Excluding human region (no face match): {label}")
+        log_messages.append(f"📊 Final filtered masks: {list(filtered_masks.keys())}")
+        return filtered_masks, log_messages

folder_paths.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import os
+# Simple folder_paths module to replace ComfyUI's folder_paths
+current_dir = os.path.dirname(os.path.abspath(__file__))
+models_dir = os.path.join(current_dir, "models")
+# Model folder mappings
+model_folder_paths = {}
+def add_model_folder_path(name, path):
+    """Add a model folder path."""
+    model_folder_paths[name] = path
+    os.makedirs(path, exist_ok=True)
+def get_full_path(dirname, filename):
+    """Get the full path for a model file."""
+    if dirname in model_folder_paths:
+        return os.path.join(model_folder_paths[dirname], filename)
+    return os.path.join(models_dir, dirname, filename)
+# Initialize default paths
+os.makedirs(models_dir, exist_ok=True)

human_parts_segmentation.py ADDED Viewed

	@@ -0,0 +1,322 @@

+import os
+import numpy as np
+import torch
+from PIL import Image, ImageFilter
+import cv2
+import requests
+from typing import Dict, List, Tuple, Optional
+import onnxruntime as ort
+# Human parts labels based on CCIHP dataset - consistent with latest repo
+HUMAN_PARTS_LABELS = {
+    0: ("background", "Background"),
+    1: ("hat", "Hat: Hat, helmet, cap, hood, veil, headscarf, part covering the skull and hair of a hood/balaclava, crown…"),
+    2: ("hair", "Hair"),
+    3: ("glove", "Glove"),
+    4: ("glasses", "Sunglasses/Glasses: Sunglasses, eyewear, protective glasses…"),
+    5: ("upper_clothes", "UpperClothes: T-shirt, shirt, tank top, sweater under a coat, top of a dress…"),
+    6: ("face_mask", "Face Mask: Protective mask, surgical mask, carnival mask, facial part of a balaclava, visor of a helmet…"),
+    7: ("coat", "Coat: Coat, jacket worn without anything on it, vest with nothing on it, a sweater with nothing on it…"),
+    8: ("socks", "Socks"),
+    9: ("pants", "Pants: Pants, shorts, tights, leggings, swimsuit bottoms… (clothing with 2 legs)"),
+    10: ("torso-skin", "Torso-skin"),
+    11: ("scarf", "Scarf: Scarf, bow tie, tie…"),
+    12: ("skirt", "Skirt: Skirt, kilt, bottom of a dress…"),
+    13: ("face", "Face"),
+    14: ("left-arm", "Left-arm (naked part)"),
+    15: ("right-arm", "Right-arm (naked part)"),
+    16: ("left-leg", "Left-leg (naked part)"),
+    17: ("right-leg", "Right-leg (naked part)"),
+    18: ("left-shoe", "Left-shoe"),
+    19: ("right-shoe", "Right-shoe"),
+    20: ("bag", "Bag: Backpack, shoulder bag, fanny pack… (bag carried on oneself"),
+    21: ("", "Others: Jewelry, tags, bibs, belts, ribbons, pins, head decorations, headphones…"),
+}
+# Model configuration - updated paths consistent with new repos
+current_dir = os.path.dirname(os.path.abspath(__file__))
+models_dir = os.path.join(current_dir, "models")
+models_dir_path = os.path.join(models_dir, "onnx", "human-parts")
+model_url = "https://huggingface.co/Metal3d/deeplabv3p-resnet50-human/resolve/main/deeplabv3p-resnet50-human.onnx"
+model_name = "deeplabv3p-resnet50-human.onnx"
+model_path = os.path.join(models_dir_path, model_name)
+def get_class_index(class_name: str) -> int:
+    """Return the index of the class name in the model."""
+    if class_name == "":
+        return -1
+    for key, value in HUMAN_PARTS_LABELS.items():
+        if value[0] == class_name:
+            return key
+    return -1
+def download_model(model_url: str, model_path: str) -> bool:
+    """Download the human parts segmentation model if not present - improved version."""
+    if os.path.exists(model_path):
+        return True
+    try:
+        os.makedirs(os.path.dirname(model_path), exist_ok=True)
+        print(f"Downloading human parts model to {model_path}...")
+        response = requests.get(model_url, stream=True)
+        response.raise_for_status()
+        total_size = int(response.headers.get('content-length', 0))
+        downloaded = 0
+        with open(model_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+                downloaded += len(chunk)
+                if total_size > 0:
+                    percent = (downloaded / total_size) * 100
+                    print(f"\rDownload progress: {percent:.1f}%", end='', flush=True)
+        print("\n✅ Model download completed")
+        return True
+    except Exception as e:
+        print(f"\n❌ Error downloading model: {e}")
+        return False
+def get_human_parts_mask(image: torch.Tensor, model: ort.InferenceSession, rotation: float = 0, **kwargs) -> Tuple[torch.Tensor, int]:
+    """
+    Generate human parts mask using the ONNX model - improved version.
+    Args:
+        image: Input image tensor
+        model: ONNX inference session
+        rotation: Rotation angle (not used currently)
+        **kwargs: Part-specific enable flags
+    Returns:
+        Tuple of (mask_tensor, score)
+    """
+    image = image.squeeze(0)
+    image_np = image.numpy() * 255
+    pil_image = Image.fromarray(image_np.astype(np.uint8))
+    original_size = pil_image.size
+    # Resize to 512x512 as the model expects
+    pil_image = pil_image.resize((512, 512))
+    center = (256, 256)
+    if rotation != 0:
+        pil_image = pil_image.rotate(rotation, center=center)
+    # Normalize the image
+    image_np = np.array(pil_image).astype(np.float32) / 127.5 - 1
+    image_np = np.expand_dims(image_np, axis=0)
+    # Use the ONNX model to get the segmentation
+    input_name = model.get_inputs()[0].name
+    output_name = model.get_outputs()[0].name
+    result = model.run([output_name], {input_name: image_np})
+    result = np.array(result[0]).argmax(axis=3).squeeze(0)
+    # Debug: Check what classes the model actually detected
+    unique_classes = np.unique(result)
+    score = 0
+    mask = np.zeros_like(result)
+    # Combine masks for enabled classes
+    for class_name, enabled in kwargs.items():
+        class_index = get_class_index(class_name)
+        if enabled and class_index != -1:
+            detected = result == class_index
+            mask[detected] = 255
+            score += mask.sum()
+    # Resize back to original size
+    mask_image = Image.fromarray(mask.astype(np.uint8), mode="L")
+    if rotation != 0:
+        mask_image = mask_image.rotate(-rotation, center=center)
+    mask_image = mask_image.resize(original_size)
+    # Convert back to numpy - improved tensor handling
+    mask = np.array(mask_image).astype(np.float32) / 255.0  # Normalize to 0-1 range
+    # Add dimensions for torch tensor - consistent format
+    mask = np.expand_dims(mask, axis=0)
+    mask = np.expand_dims(mask, axis=0)
+    return torch.from_numpy(mask), score
+def numpy_to_torch_tensor(image_np: np.ndarray) -> torch.Tensor:
+    """Convert numpy array to torch tensor in the format expected by the models."""
+    if len(image_np.shape) == 3:
+        return torch.from_numpy(image_np.astype(np.float32) / 255.0).unsqueeze(0)
+    return torch.from_numpy(image_np.astype(np.float32) / 255.0)
+def torch_tensor_to_numpy(tensor: torch.Tensor) -> np.ndarray:
+    """Convert torch tensor back to numpy array - improved version."""
+    if len(tensor.shape) == 4:
+        tensor = tensor.squeeze(0)
+    # Always handle as float32 tensor in 0-1 range then convert to binary
+    tensor_np = tensor.numpy()
+    if tensor_np.dtype == np.float32 and tensor_np.max() <= 1.0:
+        return (tensor_np > 0.5).astype(np.float32)  # Binary threshold
+    else:
+        return tensor_np
+class HumanPartsSegmentation:
+    """
+    Standalone human parts segmentation for face and hair using DeepLabV3+ ResNet50.
+    """
+    def __init__(self):
+        self.model = None
+    def check_model_cache(self):
+        """Check if model file exists in cache - consistent with updated repos."""
+        if not os.path.exists(model_path):
+            return False, "Model file not found"
+        return True, "Model cache verified"
+    def clear_model(self):
+        """Clear model from memory - improved version."""
+        if self.model is not None:
+            del self.model
+            self.model = None
+    def load_model(self):
+        """Load the human parts segmentation model - improved version."""
+        try:
+            # Check and download model if needed
+            cache_status, message = self.check_model_cache()
+            if not cache_status:
+                print(f"Cache check: {message}")
+                if not download_model(model_url, model_path):
+                    return False
+            # Load model if needed
+            if self.model is None:
+                print("Loading human parts segmentation model...")
+                self.model = ort.InferenceSession(model_path)
+                print("✅ Human parts segmentation model loaded successfully")
+            return True
+        except Exception as e:
+            print(f"❌ Error loading human parts model: {e}")
+            self.clear_model()  # Cleanup on error
+            return False
+    def segment_parts(self, image_path: str, parts: List[str], mask_blur: int = 0, mask_offset: int = 0) -> Dict[str, np.ndarray]:
+        """
+        Segment specific human parts from an image - improved version with filtering.
+        Args:
+            image_path: Path to the image file
+            parts: List of part names to segment (e.g., ['face', 'hair'])
+            mask_blur: Blur amount for mask edges
+            mask_offset: Expand/Shrink mask boundary
+        Returns:
+            Dictionary mapping part names to binary masks
+        """
+        if not self.load_model():
+            print("❌ Cannot load human parts segmentation model")
+            return {}
+        try:
+            # Load image
+            image = cv2.imread(image_path)
+            if image is None:
+                print(f"❌ Could not load image: {image_path}")
+                return {}
+            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            # Convert to tensor format expected by the model
+            image_tensor = numpy_to_torch_tensor(image_rgb)
+            # Prepare kwargs for each part
+            part_kwargs = {part: True for part in parts}
+            # Get segmentation mask
+            mask_tensor, score = get_human_parts_mask(image_tensor, self.model, **part_kwargs)
+            # Convert back to numpy
+            if len(mask_tensor.shape) == 4:
+                mask_tensor = mask_tensor.squeeze(0).squeeze(0)
+            elif len(mask_tensor.shape) == 3:
+                mask_tensor = mask_tensor.squeeze(0)
+            # Get the combined mask for all requested parts
+            combined_mask = mask_tensor.numpy()
+            # Generate individual masks for each part if multiple parts requested
+            result_masks = {}
+            if len(parts) == 1:
+                # Single part - return the combined mask
+                part_name = parts[0]
+                final_mask = self._apply_filters(combined_mask, mask_blur, mask_offset)
+                if np.sum(final_mask > 0) > 0:
+                    result_masks[part_name] = final_mask
+                else:
+                    result_masks[part_name] = final_mask  # Return empty mask instead of None
+            else:
+                # Multiple parts - need to segment each individually
+                for part in parts:
+                    single_part_kwargs = {part: True}
+                    single_mask_tensor, _ = get_human_parts_mask(image_tensor, self.model, **single_part_kwargs)
+                    if len(single_mask_tensor.shape) == 4:
+                        single_mask_tensor = single_mask_tensor.squeeze(0).squeeze(0)
+                    elif len(single_mask_tensor.shape) == 3:
+                        single_mask_tensor = single_mask_tensor.squeeze(0)
+                    single_mask = single_mask_tensor.numpy()
+                    final_mask = self._apply_filters(single_mask, mask_blur, mask_offset)
+                    result_masks[part] = final_mask  # Always add mask, even if empty
+            return result_masks
+        except Exception as e:
+            print(f"❌ Error in human parts segmentation: {e}")
+            return {}
+        finally:
+            # Clean up model if not needed
+            self.clear_model()
+    def _apply_filters(self, mask: np.ndarray, mask_blur: int = 0, mask_offset: int = 0) -> np.ndarray:
+        """Apply filtering to mask - new method from updated repo."""
+        if mask_blur == 0 and mask_offset == 0:
+            return mask
+        try:
+            # Convert to PIL for filtering
+            mask_image = Image.fromarray((mask * 255).astype(np.uint8))
+            # Apply blur if specified
+            if mask_blur > 0:
+                mask_image = mask_image.filter(ImageFilter.GaussianBlur(radius=mask_blur))
+            # Apply offset if specified
+            if mask_offset != 0:
+                if mask_offset > 0:
+                    mask_image = mask_image.filter(ImageFilter.MaxFilter(size=mask_offset * 2 + 1))
+                else:
+                    mask_image = mask_image.filter(ImageFilter.MinFilter(size=-mask_offset * 2 + 1))
+            # Convert back to numpy
+            filtered_mask = np.array(mask_image).astype(np.float32) / 255.0
+            return filtered_mask
+        except Exception as e:
+            print(f"❌ Error applying filters: {e}")
+            return mask

models/RMBG/segformer_clothes/.cache/huggingface/.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *

models/RMBG/segformer_clothes/.cache/huggingface/download/config.json.lock ADDED Viewed

File without changes

models/RMBG/segformer_clothes/.cache/huggingface/download/config.json.metadata ADDED Viewed

	@@ -0,0 +1,3 @@

+2634bcc40712620e414ffb0efd5f5e4ea732ec5d
+8352c4562bb0e1f72767dcb170ad6f3f56007836
+1748821507.461211

models/RMBG/segformer_clothes/.cache/huggingface/download/model.safetensors.lock ADDED Viewed

File without changes

models/RMBG/segformer_clothes/.cache/huggingface/download/model.safetensors.metadata ADDED Viewed

	@@ -0,0 +1,3 @@

+2634bcc40712620e414ffb0efd5f5e4ea732ec5d
+f70ae566c5773fb335796ebaa8acc924ac25eb97222c2b2967d44d2fc11568e6
+1748821512.848557

models/RMBG/segformer_clothes/.cache/huggingface/download/preprocessor_config.json.lock ADDED Viewed

File without changes

models/RMBG/segformer_clothes/.cache/huggingface/download/preprocessor_config.json.metadata ADDED Viewed

	@@ -0,0 +1,3 @@

+2634bcc40712620e414ffb0efd5f5e4ea732ec5d
+b2340cf4e53b37fda4f5b92d28f11c0f33c3d0fd
+1748821513.065366

models/RMBG/segformer_clothes/config.json ADDED Viewed

	@@ -0,0 +1,110 @@

+{
+  "_name_or_path": "nvidia/mit-b3",
+  "architectures": [
+    "SegformerForSemanticSegmentation"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "classifier_dropout_prob": 0.1,
+  "decoder_hidden_size": 768,
+  "depths": [
+    3,
+    4,
+    18,
+    3
+  ],
+  "downsampling_rates": [
+    1,
+    4,
+    8,
+    16
+  ],
+  "drop_path_rate": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_sizes": [
+    64,
+    128,
+    320,
+    512
+  ],
+  "id2label": {
+    "0": "Background",
+    "1": "Hat",
+    "10": "Right-shoe",
+    "11": "Face",
+    "12": "Left-leg",
+    "13": "Right-leg",
+    "14": "Left-arm",
+    "15": "Right-arm",
+    "16": "Bag",
+    "17": "Scarf",
+    "2": "Hair",
+    "3": "Sunglasses",
+    "4": "Upper-clothes",
+    "5": "Skirt",
+    "6": "Pants",
+    "7": "Dress",
+    "8": "Belt",
+    "9": "Left-shoe"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "label2id": {
+    "Background": "0",
+    "Bag": "16",
+    "Belt": "8",
+    "Dress": "7",
+    "Face": "11",
+    "Hair": "2",
+    "Hat": "1",
+    "Left-arm": "14",
+    "Left-leg": "12",
+    "Left-shoe": "9",
+    "Pants": "6",
+    "Right-arm": "15",
+    "Right-leg": "13",
+    "Right-shoe": "10",
+    "Scarf": "17",
+    "Skirt": "5",
+    "Sunglasses": "3",
+    "Upper-clothes": "4"
+  },
+  "layer_norm_eps": 1e-06,
+  "mlp_ratios": [
+    4,
+    4,
+    4,
+    4
+  ],
+  "model_type": "segformer",
+  "num_attention_heads": [
+    1,
+    2,
+    5,
+    8
+  ],
+  "num_channels": 3,
+  "num_encoder_blocks": 4,
+  "patch_sizes": [
+    7,
+    3,
+    3,
+    3
+  ],
+  "reshape_last_stage": true,
+  "semantic_loss_ignore_index": 255,
+  "sr_ratios": [
+    8,
+    4,
+    2,
+    1
+  ],
+  "strides": [
+    4,
+    2,
+    2,
+    2
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.1"
+}

models/RMBG/segformer_clothes/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f70ae566c5773fb335796ebaa8acc924ac25eb97222c2b2967d44d2fc11568e6
+size 189029000

models/RMBG/segformer_clothes/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "do_normalize": true,
+  "do_reduce_labels": false,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "SegformerImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 512,
+    "width": 512
+  }
+}

models/onnx/human-parts/deeplabv3p-resnet50-human.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6e823a82da10ba24c29adfb544130684568c46bfac865e215bbace3b4035a71
+size 47210581

requirements.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+# LaDeco requirements
+torch==2.3.1
+torchaudio
+torchvision
+tf-keras
+transformers==4.42.4
+diffusers
+opencv-python
+Pillow
+numpy
+matplotlib
+scipy
+scikit-learn
+# For Gradio interface
+gradio
+# Face comparison
+deepface
+# Human parts segmentation
+onnxruntime
+# Clothing segmentation
+huggingface-hub>=0.19.0
+segment-anything>=1.0
+# Color matching dependencies
+color-matcher
+spaces

spaces.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import functools
+import os
+def GPU(func):
+    """
+    A decorator to indicate that a function should use GPU acceleration if available.
+    This is used specifically for Hugging Face Spaces.
+    """
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        return func(*args, **kwargs)
+    return wrapper