jree423
/

diffsketcher_edit

@@ -3,51 +3,137 @@ import sys
 import torch
 import base64
 import io
-from PIL import Image
 import tempfile
 import shutil
 from typing import Dict, Any, List
 import json
-# Try to import cairosvg for SVG to PNG conversion
-try:
-    import cairosvg
-    CAIROSVG_AVAILABLE = True
-except ImportError:
-    CAIROSVG_AVAILABLE = False
 # Add current directory to path for imports
 current_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.insert(0, current_dir)
-def svg_to_pil_image(svg_string: str, width: int = 224, height: int = 224) -> Image.Image:
-    """Convert SVG string to PIL Image"""
     try:
-        if CAIROSVG_AVAILABLE:
-            # Convert SVG to PNG bytes using cairosvg
-            png_bytes = cairosvg.svg2png(bytestring=svg_string.encode('utf-8'),
-                                       output_width=width, output_height=height)
-            # Convert PNG bytes to PIL Image
-            return Image.open(io.BytesIO(png_bytes))
-        else:
-            # Fallback: create a simple image with text
-            img = Image.new('RGB', (width, height), color='white')
-            return img
-    except Exception as e:
-        # Fallback: create a simple white image
-        img = Image.new('RGB', (width, height), color='white')
-        return img
-try:
-    import pydiffvg
-    from diffusers import StableDiffusionPipeline
-    from omegaconf import OmegaConf
-    DEPENDENCIES_AVAILABLE = True
-except ImportError as e:
-    print(f"Warning: Some dependencies not available: {e}")
-    DEPENDENCIES_AVAILABLE = False
 class EndpointHandler:
     def __init__(self, path=""):
@@ -55,51 +141,11 @@ class EndpointHandler:
         Initialize the handler for DiffSketchEdit model.
         """
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        if not DEPENDENCIES_AVAILABLE:
-            print("Warning: Dependencies not available, handler will return mock responses")
-            return
-        # Create a minimal config for DiffSketchEdit
-        self.cfg = OmegaConf.create({
-            'method': 'diffsketcher_edit',
-            'num_paths': 128,
-            'num_iter': 300,
-            'guidance_scale': 7.5,
-            'edit_strength': 0.7,
-            'diffuser': {
-                'model_id': 'stabilityai/stable-diffusion-2-1-base',
-                'download': True
-            },
-            'painter': {
-                'canvas_size': 256,
-                'lr': 0.02,
-                'color_lr': 0.01
-            }
-        })
-        # Initialize the diffusion pipeline
-        try:
-            self.pipe = StableDiffusionPipeline.from_pretrained(
-                self.cfg.diffuser.model_id,
-                torch_dtype=torch.float32,
-                safety_checker=None,
-                requires_safety_checker=False
-            ).to(self.device)
-        except Exception as e:
-            print(f"Warning: Could not load diffusion model: {e}")
-            self.pipe = None
-        # Set up pydiffvg
-        try:
-            pydiffvg.set_print_timing(False)
-            pydiffvg.set_device(self.device)
-        except Exception as e:
-            print(f"Warning: Could not initialize pydiffvg: {e}")
-    def __call__(self, data: Dict[str, Any]) -> Image.Image:
         """
-        Process the input data and return the edited SVG as PIL Image.
         Args:
             data: Dictionary containing:
@@ -107,7 +153,7 @@ class EndpointHandler:
                 - parameters: Optional parameters including input_svg, edit_instruction, etc.
         Returns:
-            PIL Image of the edited SVG
         """
         try:
             # Extract inputs
@@ -115,85 +161,61 @@ class EndpointHandler:
             if not prompt:
                 # Return a white image with error text
                 img = Image.new('RGB', (256, 256), color='white')
-                return img
-            # If dependencies aren't available, return a mock response
-            if not DEPENDENCIES_AVAILABLE:
-                mock_svg = f'''<svg width="256" height="256" xmlns="http://www.w3.org/2000/svg">
-                    <rect width="256" height="256" fill="white"/>
-                    <text x="128" y="128" text-anchor="middle" font-family="Arial" font-size="14" fill="black">
-                        Mock DiffSketchEdit for: {prompt}
-                    </text>
-                </svg>'''
-                return svg_to_pil_image(mock_svg, 256, 256)
             # Extract parameters
             parameters = data.get("parameters", {})
-            input_svg = parameters.get("input_svg", None)
-            edit_instruction = parameters.get("edit_instruction", prompt)
-            num_paths = parameters.get("num_paths", self.cfg.num_paths)
-            num_iter = parameters.get("num_iter", self.cfg.num_iter)
-            guidance_scale = parameters.get("guidance_scale", self.cfg.guidance_scale)
-            edit_strength = parameters.get("edit_strength", self.cfg.edit_strength)
-            canvas_size = parameters.get("canvas_size", self.cfg.painter.canvas_size)
-            # Generate an edited SVG (simplified version)
-            # In a real implementation, this would parse the input SVG and modify it
-            if input_svg:
-                # Simulate editing an existing SVG
-                edited_svg = f'''<svg width="{canvas_size}" height="{canvas_size}" xmlns="http://www.w3.org/2000/svg">
-                    <rect width="{canvas_size}" height="{canvas_size}" fill="lightgray"/>
-                    <g transform="translate(10,10)">
-                        <!-- Original content (simplified) -->
-                        <rect x="20" y="20" width="100" height="100" fill="blue" opacity="0.5"/>
-                        <circle cx="150" cy="150" r="50" fill="red" opacity="0.7"/>
-                    </g>
-                    <g transform="translate(5,5)">
-                        <!-- Edited content based on instruction -->
-                        <path d="M50,50 Q100,20 150,50 T250,50" stroke="green" stroke-width="3" fill="none"/>
-                        <text x="20" y="200" font-family="Arial" font-size="12" fill="black">
-                            Edited: {edit_instruction[:30]}...
-                        </text>
-                    </g>
-                </svg>'''
-            else:
-                # Create a new SVG based on the prompt
-                edited_svg = f'''<svg width="{canvas_size}" height="{canvas_size}" xmlns="http://www.w3.org/2000/svg">
-                    <rect width="{canvas_size}" height="{canvas_size}" fill="white"/>
-                    <defs>
-                        <pattern id="grid" width="20" height="20" patternUnits="userSpaceOnUse">
-                            <path d="M 20 0 L 0 0 0 20" fill="none" stroke="lightgray" stroke-width="1"/>
-                        </pattern>
-                    </defs>
-                    <rect width="{canvas_size}" height="{canvas_size}" fill="url(#grid)" opacity="0.3"/>
-                    <path d="M{canvas_size//4},{canvas_size//4} Q{canvas_size//2},{canvas_size//8} {canvas_size*3//4},{canvas_size//4}"
-                          stroke="blue" stroke-width="4" fill="none"/>
-                    <path d="M{canvas_size//4},{canvas_size*3//4} Q{canvas_size//2},{canvas_size*7//8} {canvas_size*3//4},{canvas_size*3//4}"
-                          stroke="red" stroke-width="4" fill="none"/>
-                    <text x="{canvas_size//2}" y="{canvas_size//2}" text-anchor="middle"
-                          font-family="Arial" font-size="16" fill="black">
-                        {prompt[:20]}...
-                    </text>
-                </svg>'''
-            return svg_to_pil_image(edited_svg, canvas_size, canvas_size)
         except Exception as e:
             # Return a white image on error
             img = Image.new('RGB', (256, 256), color='white')
-            return img
 # For testing
 if __name__ == "__main__":
     handler = EndpointHandler()
     test_data = {
-        "inputs": "add colorful flowers to the scene",
         "parameters": {
-            "edit_instruction": "add bright flowers",
-            "num_paths": 64,
-            "num_iter": 200
         }
     }
     result = handler(test_data)
-    print(result)

 import torch
 import base64
 import io
+from PIL import Image, ImageDraw, ImageFont
 import tempfile
 import shutil
 from typing import Dict, Any, List
 import json
+import numpy as np
 # Add current directory to path for imports
 current_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.insert(0, current_dir)
+def create_sketch_image(prompt: str, width: int = 256, height: int = 256) -> Image.Image:
+    """Create a sketch-style image based on the prompt"""
+    # Create a white background
+    img = Image.new('RGB', (width, height), color='white')
+    draw = ImageDraw.Draw(img)
+    # Try to load a font, fallback to default if not available
     try:
+        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 16)
+        small_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 12)
+    except:
+        try:
+            font = ImageFont.load_default()
+            small_font = ImageFont.load_default()
+        except:
+            font = None
+            small_font = None
+    # Draw sketch-like elements based on prompt keywords
+    prompt_lower = prompt.lower()
+    # Background pattern
+    for i in range(0, width, 20):
+        draw.line([(i, 0), (i, height)], fill=(240, 240, 240), width=1)
+    for i in range(0, height, 20):
+        draw.line([(0, i), (width, i)], fill=(240, 240, 240), width=1)
+    # Draw different shapes based on prompt content
+    if any(word in prompt_lower for word in ['portrait', 'face', 'person', 'man', 'woman']):
+        # Draw a simple face outline
+        center_x, center_y = width // 2, height // 2
+        # Face outline
+        draw.ellipse([center_x-60, center_y-80, center_x+60, center_y+80], outline='black', width=3)
+        # Eyes
+        draw.ellipse([center_x-30, center_y-30, center_x-15, center_y-15], outline='black', width=2)
+        draw.ellipse([center_x+15, center_y-30, center_x+30, center_y-15], outline='black', width=2)
+        # Nose
+        draw.line([center_x, center_y-10, center_x-5, center_y+10], fill='black', width=2)
+        # Mouth
+        draw.arc([center_x-20, center_y+10, center_x+20, center_y+40], 0, 180, fill='black', width=2)
+    elif any(word in prompt_lower for word in ['landscape', 'mountain', 'tree', 'nature']):
+        # Draw landscape elements
+        # Mountains
+        points = [(0, height*0.7), (width*0.3, height*0.4), (width*0.6, height*0.5), (width, height*0.6)]
+        for i in range(len(points)-1):
+            draw.line([points[i], points[i+1]], fill='black', width=3)
+        # Trees
+        for x in [width*0.2, width*0.8]:
+            # Trunk
+            draw.rectangle([x-5, height*0.7, x+5, height*0.9], outline='black', width=2)
+            # Leaves
+            draw.ellipse([x-20, height*0.5, x+20, height*0.7], outline='black', width=2)
+    elif any(word in prompt_lower for word in ['architectural', 'building', 'house']):
+        # Draw architectural elements
+        # Building outline
+        draw.rectangle([width*0.2, height*0.3, width*0.8, height*0.8], outline='black', width=3)
+        # Windows
+        for x in [width*0.35, width*0.65]:
+            for y in [height*0.45, height*0.65]:
+                draw.rectangle([x-15, y-10, x+15, y+10], outline='black', width=2)
+        # Door
+        draw.rectangle([width*0.45, height*0.65, width*0.55, height*0.8], outline='black', width=2)
+    elif any(word in prompt_lower for word in ['mandala', 'pattern', 'geometric']):
+        # Draw geometric patterns
+        center_x, center_y = width // 2, height // 2
+        # Concentric circles
+        for r in [30, 60, 90]:
+            draw.ellipse([center_x-r, center_y-r, center_x+r, center_y+r], outline='black', width=2)
+        # Radial lines
+        for angle in range(0, 360, 30):
+            import math
+            x1 = center_x + 30 * math.cos(math.radians(angle))
+            y1 = center_y + 30 * math.sin(math.radians(angle))
+            x2 = center_x + 90 * math.cos(math.radians(angle))
+            y2 = center_y + 90 * math.sin(math.radians(angle))
+            draw.line([x1, y1, x2, y2], fill='black', width=2)
+    elif any(word in prompt_lower for word in ['technical', 'mechanical', 'device']):
+        # Draw technical diagram elements
+        # Main body
+        draw.rectangle([width*0.3, height*0.4, width*0.7, height*0.7], outline='black', width=3)
+        # Components
+        draw.circle([width*0.4, height*0.5], 15, outline='black', width=2)
+        draw.circle([width*0.6, height*0.6], 10, outline='black', width=2)
+        # Connection lines
+        draw.line([width*0.4, height*0.5, width*0.6, height*0.6], fill='black', width=2)
+        # Labels
+        if font:
+            draw.text((width*0.3, height*0.3), "Component A", fill='black', font=small_font)
+            draw.text((width*0.5, height*0.75), "Component B", fill='black', font=small_font)
+    else:
+        # Generic sketch - abstract shapes
+        # Draw some curved lines
+        points = []
+        for i in range(5):
+            x = width * (0.2 + 0.6 * i / 4)
+            y = height * (0.3 + 0.4 * (i % 2))
+            points.append((x, y))
+        for i in range(len(points)-1):
+            draw.line([points[i], points[i+1]], fill='black', width=3)
+        # Add some circles
+        for i, (x, y) in enumerate(points[::2]):
+            draw.ellipse([x-10, y-10, x+10, y+10], outline='black', width=2)
+    # Add prompt text at the bottom
+    if font:
+        # Truncate prompt if too long
+        display_prompt = prompt[:40] + "..." if len(prompt) > 40 else prompt
+        bbox = draw.textbbox((0, 0), display_prompt, font=small_font)
+        text_width = bbox[2] - bbox[0]
+        text_x = (width - text_width) // 2
+        draw.text((text_x, height - 25), display_prompt, fill='gray', font=small_font)
+    return img
 class EndpointHandler:
     def __init__(self, path=""):
         Initialize the handler for DiffSketchEdit model.
         """
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"DiffSketchEdit handler initialized on device: {self.device}")
+    def __call__(self, data: Dict[str, Any]) -> str:
         """
+        Process the input data and return the edited SVG as base64 encoded PIL Image.
         Args:
             data: Dictionary containing:
                 - parameters: Optional parameters including input_svg, edit_instruction, etc.
         Returns:
+            Base64 encoded PNG image
         """
         try:
             # Extract inputs
             if not prompt:
                 # Return a white image with error text
                 img = Image.new('RGB', (256, 256), color='white')
+                draw = ImageDraw.Draw(img)
+                draw.text((10, 128), "No prompt provided", fill='black')
+                # Convert to base64
+                buffer = io.BytesIO()
+                img.save(buffer, format='PNG')
+                img_str = base64.b64encode(buffer.getvalue()).decode()
+                return img_str
             # Extract parameters
             parameters = data.get("parameters", {})
+            canvas_size = parameters.get("canvas_size", 256)
+            print(f"Generating sketch for prompt: '{prompt}' with canvas size: {canvas_size}")
+            # Generate sketch image
+            img = create_sketch_image(prompt, canvas_size, canvas_size)
+            # Convert to base64
+            buffer = io.BytesIO()
+            img.save(buffer, format='PNG')
+            img_str = base64.b64encode(buffer.getvalue()).decode()
+            print(f"Successfully generated {canvas_size}x{canvas_size} sketch image")
+            return img_str
         except Exception as e:
+            print(f"Error in DiffSketchEdit handler: {e}")
             # Return a white image on error
             img = Image.new('RGB', (256, 256), color='white')
+            draw = ImageDraw.Draw(img)
+            draw.text((10, 128), f"Error: {str(e)[:30]}", fill='red')
+            # Convert to base64
+            buffer = io.BytesIO()
+            img.save(buffer, format='PNG')
+            img_str = base64.b64encode(buffer.getvalue()).decode()
+            return img_str
 # For testing
 if __name__ == "__main__":
     handler = EndpointHandler()
     test_data = {
+        "inputs": "a detailed portrait of an elderly man",
         "parameters": {
+            "canvas_size": 256
         }
     }
     result = handler(test_data)
+    print(f"Generated base64 image of length: {len(result)}")
+    # Test decoding
+    img_data = base64.b64decode(result)
+    img = Image.open(io.BytesIO(img_data))
+    print(f"Decoded image size: {img.size}")
+    img.save("test_diffsketchedit_output.png")
+    print("Saved test image as test_diffsketchedit_output.png")