img2img-turbo-sketch

Runtime error

App Files Files Community

Inmental commited on Aug 28, 2024

Commit

f59de63

verified ·

1 Parent(s): 6e73b66

Upload 4 files

Browse files

Files changed (4) hide show

draw.py +277 -0
flask_sketch2imagehd.py +462 -0
gradio_sketch2imagehd.py +222 -0
preview_server.py +19 -0

draw.py ADDED Viewed

	@@ -0,0 +1,277 @@

+import random
+import numpy as np
+from PIL import Image, ImageOps
+import base64
+from io import BytesIO
+import torch
+import torchvision.transforms.functional as F
+import gradio as gr
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from flask import Flask, request, jsonify, render_template_string, send_file
+from flask_cors import CORS
+import threading
+import hashlib
+import signal
+import sys
+import os
+# Load models
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")
+# Pix2Pix model placeholder (Assume you have this model correctly implemented)
+class Pix2Pix_Turbo:
+    def __init__(self, mode):
+        pass
+    def __call__(self, c_t, prompt, deterministic, r, noise_map):
+        # Dummy image processing function for demonstration purposes
+        return c_t
+pix2pix_model = Pix2Pix_Turbo("sketch_to_image_stochastic")
+# Flask application setup
+app = Flask(__name__)
+CORS(app)  # Handle CORS issues
+# Global Constants and Configuration
+STYLE_LIST = [
+    {"name": "Cinematic", "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy"},
+    {"name": "3D Model", "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting"},
+    {"name": "Anime", "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed"},
+    {"name": "Digital Art", "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed"},
+    {"name": "Photographic", "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed"},
+    {"name": "Pixel art", "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics"},
+    {"name": "Fantasy art", "prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy"},
+    {"name": "Neonpunk", "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional"},
+    {"name": "Manga", "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style"},
+]
+STYLES = {style["name"]: style["prompt"] for style in STYLE_LIST}
+STYLE_NAMES = list(STYLES.keys())
+DEFAULT_STYLE_NAME = "Fantasy art"
+MAX_SEED = np.iinfo(np.int32).max
+# Paths for storing sketches and outputs
+SKETCH_PATH = "sketch.png"
+OUTPUT_PATH = "output.png"
+# Image processing function
+def run(image, prompt, prompt_template, style_name, seed, val_r):
+    if not prompt.strip():
+        prompt = "Generated by drawing tool"
+    prompt = prompt_template.replace("{prompt}", prompt)
+    image = image.convert("RGB")
+    image_tensor = F.to_tensor(image) > 0.5
+    with torch.no_grad():
+        c_t = image_tensor.unsqueeze(0).to("cuda").float()
+        torch.manual_seed(seed)
+        noise = torch.randn((1, 4, c_t.shape[2] // 8, c_t.shape[3] // 8), device=c_t.device)
+        output_image = pix2pix_model(c_t, prompt, deterministic=False, r=val_r, noise_map=noise)
+    output_pil = F.to_pil_image(output_image[0].cpu() * 0.5 + 0.5)
+    output_pil.save(OUTPUT_PATH)  # Save the output image
+    buffered = BytesIO()
+    output_pil.save(buffered, format="PNG")
+    output_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return output_data
+# Flask route to handle image processing
+@app.route('/process-image', methods=['POST'])
+def process_image():
+    try:
+        data = request.get_json()
+        image_data = data.get("image", "").split(",")[1]
+        image = Image.open(BytesIO(base64.b64decode(image_data))).convert("RGB")
+        # Process the image
+        output_image_uri = run(
+            image,
+            data.get("prompt", ""),
+            STYLES.get(data.get("style_name", DEFAULT_STYLE_NAME)),
+            data.get("style_name", DEFAULT_STYLE_NAME),
+            int(data.get("seed", random.randint(0, MAX_SEED))),
+            float(data.get("val_r", 0.4))
+        )
+        return jsonify({"image": output_image_uri})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+# Flask route to serve the sketch image
+@app.route('/get_sketch', methods=['GET'])
+def get_sketch():
+    if os.path.exists(SKETCH_PATH):
+        return send_file(SKETCH_PATH, mimetype='image/png')
+    return jsonify({"status": "error", "message": "Sketch not found."}), 404
+# Flask route to serve the output image
+@app.route('/get_output', methods=['GET'])
+def get_output():
+    if os.path.exists(OUTPUT_PATH):
+        return send_file(OUTPUT_PATH, mimetype='image/png')
+    return jsonify({"status": "error", "message": "Output not found."}), 404
+# HTML page for drawing
+@app.route('/')
+def draw_page():
+    html_template = """
+    <!doctype html>
+    <html lang="en">
+    <head>
+        <meta charset="utf-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Drawing Page</title>
+        <style>
+            body, html {
+                margin: 0;
+                padding: 0;
+                height: 100%;
+                display: flex;
+                justify-content: center;
+                align-items: center;
+                background-color: #f0f0f0;
+            }
+            .canvas-container {
+                border: 2px solid black;
+                position: relative;
+            }
+            .toolbar {
+                display: flex;
+                justify-content: center;
+                margin-bottom: 10px;
+            }
+            button {
+                margin-right: 5px;
+            }
+            canvas {
+                cursor: crosshair;
+            }
+        </style>
+    </head>
+    <body>
+        <div class="toolbar">
+            <button id="brush" onclick="setTool('brush')">Brush</button>
+            <button id="line" onclick="setTool('line')">Line</button>
+            <button id="eraser" onclick="setTool('eraser')">Eraser</button>
+            <button id="clear" onclick="clearCanvas()">Clear</button>
+            <input type="color" id="colorPicker" value="#000000">
+            <input type="range" id="brushSize" min="1" max="20" value="4">
+        </div>
+        <div class="canvas-container">
+            <canvas id="drawingCanvas" width="800" height="600"></canvas>
+        </div>
+        <script>
+            let canvas = document.getElementById('drawingCanvas');
+            let ctx = canvas.getContext('2d');
+            let drawing = false;
+            let tool = 'brush';
+            let lastX = 0, lastY = 0;
+            canvas.addEventListener('mousedown', (e) => {
+                drawing = true;
+                [lastX, lastY] = [e.offsetX, e.offsetY];
+            });
+            canvas.addEventListener('mousemove', draw);
+            canvas.addEventListener('mouseup', () => {
+                drawing = false;
+                sendDrawingToBackend();
+            });
+            canvas.addEventListener('mouseout', () => drawing = false);
+            function draw(e) {
+                if (!drawing) return;
+                ctx.strokeStyle = document.getElementById('colorPicker').value;
+                ctx.lineWidth = document.getElementById('brushSize').value;
+                ctx.lineJoin = 'round';
+                ctx.lineCap = 'round';
+                ctx.beginPath();
+                ctx.moveTo(lastX, lastY);
+                ctx.lineTo(e.offsetX, e.offsetY);
+                ctx.stroke();
+                [lastX, lastY] = [e.offsetX, e.offsetY];
+            }
+            function setTool(selectedTool) {
+                tool = selectedTool;
+                ctx.globalCompositeOperation = (tool === 'eraser') ? 'destination-out' : 'source-over';
+            }
+            function clearCanvas() {
+                ctx.clearRect(0, 0, canvas.width, canvas.height);
+            }
+            function sendDrawingToBackend() {
+                let dataURL = canvas.toDataURL('image/png');
+                fetch('/process-image', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({ image: dataURL }),
+                })
+                .then(response => response.json())
+                .then(data => console.log('Image processed', data))
+                .catch(error => console.error('Error processing image:', error));
+            }
+        </script>
+    </body>
+    </html>
+    """
+    return render_template_string(html_template)
+# HTML page for previewing the processed image
+@app.route('/preview')
+def preview_page():
+    html_template = """
+    <!doctype html>
+    <html lang="en">
+    <head>
+        <meta charset="utf-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Preview Page</title>
+        <style>
+            body, html {
+                margin: 0;
+                padding: 0;
+                height: 100%;
+                background-color: black;
+            }
+            .full-screen-image {
+                width: 100%;
+                height: 100%;
+                object-fit: contain;
+            }
+        </style>
+        <script>
+            function refreshImage() {
+                var img = document.getElementById("output-image");
+                img.src = "/get_output?" + new Date().getTime();
+            }
+            // Auto-refresh every 2 seconds to show the latest image
+            setInterval(refreshImage, 2000);
+        </script>
+    </head>
+    <body>
+        <img id="output-image" src="/get_output" class="full-screen-image">
+    </body>
+    </html>
+    """
+    return render_template_string(html_template)
+def signal_handler(sig, frame):
+    print("Ctrl+C pressed, shutting down.")
+    sys.exit(0)
+# Register the signal handler for Ctrl+C
+signal.signal(signal.SIGINT, signal_handler)
+if __name__ == "__main__":
+    app.run(host='0.0.0.0', port=2073)

flask_sketch2imagehd.py ADDED Viewed

	@@ -0,0 +1,462 @@

+import random
+import numpy as np
+from PIL import Image, ImageOps
+import base64
+from io import BytesIO
+import torch
+import torchvision.transforms.functional as F
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from src.pix2pix_turbo import Pix2Pix_Turbo
+import nltk
+from nltk import pos_tag
+from nltk.tokenize import word_tokenize
+import re
+import os
+import threading
+import hashlib
+from flask import Flask, request, send_file, jsonify, render_template_string
+from flask_cors import CORS
+import signal
+import sys
+import logging
+import json
+import gc
+from torch.cuda.amp import autocast
+# Set environment variable for better memory management
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
+# Function to clear CUDA cache and collect garbage
+def clear_memory():
+    torch.cuda.empty_cache()
+    gc.collect()
+# Load the configuration from config.json
+with open('config.json', 'r') as config_file:
+    config = json.load(config_file)
+# Setup logging as per config
+logging.basicConfig(level=config["logging"]["level"], format=config["logging"]["format"])
+# Ensure NLTK resources are downloaded
+nltk.download('averaged_perceptron_tagger', quiet=True)
+nltk.download('punkt', quiet=True)
+# File paths for storing sketches and outputs
+SKETCH_PATH = config["file_paths"]["sketch_path"]
+OUTPUT_PATH = config["file_paths"]["output_path"]
+# Processing queue
+processing_queue = []
+# Global Constants and Configuration
+STYLE_LIST = config["style_list"]
+STYLES = {style["name"]: style["prompt"] for style in STYLE_LIST}
+DEFAULT_STYLE_NAME = config["default_style_name"]
+RANDOM_VALUES = config["random_values"]
+PIX2PIX_MODEL_NAME = config["model_params"]["pix2pix_model_name"]
+DEVICE = config["model_params"]["device"]
+DEFAULT_SEED = config["model_params"]["default_seed"]
+VAL_R_DEFAULT = config["model_params"]["val_r_default"]
+MAX_SEED = config["model_params"]["max_seed"]
+# Canvas configuration
+CANVAS_WIDTH = config["canvas"]["width"]
+CANVAS_HEIGHT = config["canvas"]["height"]
+BACKGROUND_COLOR = config["canvas"]["background_color"]
+DEFAULT_BRUSH_COLOR = config["canvas"]["default_brush_color"]
+DEFAULT_BRUSH_SIZE = config["canvas"]["default_brush_size"]
+ERASER_COLOR = config["canvas"]["eraser_color"]
+MAX_BRUSH_SIZE = config["canvas"]["max_brush_size"]
+MIN_BRUSH_SIZE = config["canvas"]["min_brush_size"]
+# Preload Models
+logging.debug("Loading BLIP and Pix2Pix models...")
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(DEVICE).eval()  # Set model to eval mode
+pix2pix_model = Pix2Pix_Turbo(PIX2PIX_MODEL_NAME).to(DEVICE).eval()  # Set model to eval mode
+logging.debug("Models loaded.")
+style_list = [
+    {
+        "name": "Cinematic",
+        "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
+    },
+    # Other styles...
+]
+styles = {k["name"]: k["prompt"] for k in style_list}
+STYLE_NAMES = list(styles.keys())
+DEFAULT_STYLE_NAME = "Fantasy art"
+MAX_SEED = np.iinfo(np.int32).max
+# Shared flag and thread for managing the current processing
+current_thread = None
+cancel_flag = threading.Event()
+def pil_image_to_data_uri(img: Image, format="PNG") -> str:
+    """Converts a PIL image to a data URI."""
+    buffered = BytesIO()
+    img.save(buffered, format=format)
+    img_str = base64.b64encode(buffered.getvalue()).decode()
+    return f"data:image/{format.lower()};base64,{img_str}"
+def generate_prompt_from_sketch(image: Image) -> str:
+    """Generates a text prompt based on a sketch using the BLIP model."""
+    logging.debug("Generating prompt from sketch...")
+    image = ImageOps.fit(image, (CANVAS_WIDTH, CANVAS_HEIGHT), Image.LANCZOS)
+    inputs = processor(image, return_tensors="pt").to(DEVICE)
+    with torch.no_grad():
+        out = blip_model.generate(**inputs, max_new_tokens=50)
+    text_prompt = processor.decode(out[0], skip_special_tokens=True)
+    logging.debug(f"Generated prompt: {text_prompt}")
+    recognized_items = [extract_main_words(item) for item in text_prompt.split(', ') if item.strip()]
+    random_prefix = random.choice(RANDOM_VALUES)
+    prompt = f"a photo of a {' and '.join(recognized_items)}, {random_prefix}"
+    logging.debug(f"Final prompt: {prompt}")
+    return prompt
+def extract_main_words(item: str) -> str:
+    """Extracts all nouns from a given text fragment and returns them as a space-separated string."""
+    words = word_tokenize(item.strip())
+    tagged = pos_tag(words)
+    nouns = [word.capitalize() for word, tag in tagged if tag in ('NN', 'NNP', 'NNPS', 'NNS')]
+    return ' '.join(nouns)
+def run(image, prompt, prompt_template, style_name, seed, val_r):
+    """Runs the main image processing pipeline."""
+    logging.debug("Running model inference...")
+    if image is None:
+        blank_image = Image.new("L", (CANVAS_WIDTH, CANVAS_HEIGHT), 255)
+        blank_image.save(SKETCH_PATH)  # Save blank image as sketch
+        logging.debug("No image provided. Saving blank image.")
+        return "", "", "", ""
+    if not prompt.strip():
+        prompt = generate_prompt_from_sketch(image)
+    # Save the sketch to a file
+    image.save(SKETCH_PATH)
+    # Show the original prompt before processing
+    original_prompt = f"Original Prompt: {prompt}"
+    logging.debug(original_prompt)
+    # Add the task to the processing queue
+    processing_queue.append({"prompt": prompt, "status": "processing"})
+    prompt = prompt_template.replace("{prompt}", prompt)
+    logging.debug(f"Processing with prompt: {prompt}")
+    image = image.convert("RGB")
+    image_tensor = F.to_tensor(image) * 2 - 1  # Normalize to [-1, 1]
+    clear_memory()  # Clear memory before running the model
+    try:
+        with torch.no_grad():
+            c_t = image_tensor.unsqueeze(0).to(DEVICE).float()
+            torch.manual_seed(seed)
+            B, C, H, W = c_t.shape
+            noise = torch.randn((1, 4, H // 8, W // 8), device=c_t.device)
+            logging.debug("Calling Pix2Pix model...")
+            # Enable mixed precision
+            with autocast():
+                if cancel_flag.is_set():
+                    logging.debug("Processing canceled.")
+                    return "", "", "", original_prompt
+                output_image = pix2pix_model(c_t, prompt, deterministic=False, r=val_r, noise_map=noise)
+            logging.debug("Model inference completed.")
+    except RuntimeError as e:
+        if "CUDA out of memory" in str(e):
+            logging.warning("CUDA out of memory error. Falling back to CPU.")
+            with torch.no_grad():
+                c_t = c_t.cpu()
+                noise = noise.cpu()
+                pix2pix_model_cpu = pix2pix_model.cpu()  # Move the model to CPU
+                output_image = pix2pix_model_cpu(c_t, prompt, deterministic=False, r=val_r, noise_map=noise)
+        else:
+            raise e
+    output_pil = F.to_pil_image(output_image[0].cpu() * 0.5 + 0.5)
+    output_pil.save(OUTPUT_PATH)
+    logging.debug("Output image saved.")
+    input_sketch_uri = pil_image_to_data_uri(Image.fromarray(255 - np.array(image)))
+    output_image_uri = pil_image_to_data_uri(output_pil)
+    logging.debug(f"Generated output URI: {output_image_uri}")
+    clear_memory()  # Clear memory after running the model
+    return output_image_uri, input_sketch_uri, output_image_uri, original_prompt
+def process_image_task(image, prompt, style_name, seed, val_r):
+    try:
+        global cancel_flag
+        cancel_flag.clear()  # Clear any previous cancellation flag
+        output_image_uri, _, _, _ = run(image, prompt, STYLES.get(style_name, DEFAULT_STYLE_NAME), style_name, seed, val_r)
+        logging.debug(f"Processed image URI: {output_image_uri}")
+        return jsonify({"image": output_image_uri})
+    except Exception as e:
+        logging.error(f"Error processing image: {e}")
+        return jsonify({"error": str(e)}), 500
+# Flask Server Setup for Preview and JSON endpoint
+app = Flask(__name__)
+CORS(app)  # Enable CORS
+@app.route('/process-image', methods=['POST'])
+def process_image():
+    global current_thread, cancel_flag
+    # Cancel any ongoing processing
+    if current_thread is not None and current_thread.is_alive():
+        logging.debug("Cancelling previous processing...")
+        cancel_flag.set()
+        current_thread.join()  # Wait for the thread to finish
+    data = request.get_json()
+    # Extract and decode the base64 image
+    image_data = data.get("image", "").split(",")[1]
+    image = Image.open(BytesIO(base64.b64decode(image_data))).convert("RGB")
+    prompt = data.get("prompt", "")
+    style_name = data.get("style_name", DEFAULT_STYLE_NAME)
+    seed = int(data.get("seed", DEFAULT_SEED))
+    val_r = float(data.get("val_r", VAL_R_DEFAULT))
+    # Start new processing in a separate thread
+    current_thread = threading.Thread(target=process_image_task, args=(image, prompt, style_name, seed, val_r))
+    current_thread.start()
+    return jsonify({"status": "processing_started"})
+@app.route('/get_sketch', methods=['GET'])
+def get_sketch():
+    if os.path.exists(SKETCH_PATH):
+        return send_file(SKETCH_PATH, mimetype='image/png')
+    return jsonify({"status": "error", "message": "Sketch not found."}), 404
+@app.route('/get_output', methods=['GET'])
+def get_output():
+    if os.path.exists(OUTPUT_PATH):
+        return send_file(OUTPUT_PATH, mimetype='image/png')
+    return jsonify({"status": "error", "message": "Output not found."}), 404
+@app.route('/get_status', methods=['GET'])
+def get_status():
+    """Returns a JSON with the last image base64 encoded, its checksum, and the processing queue."""
+    if os.path.exists(OUTPUT_PATH):
+        with open(OUTPUT_PATH, "rb") as f:
+            img_data = f.read()
+            base64_image = base64.b64encode(img_data).decode('utf-8')
+            checksum = hashlib.sha256(img_data).hexdigest()
+    else:
+        base64_image = ""
+        checksum = ""
+    return jsonify({
+        "image_base64": base64_image,
+        "checksum": checksum,
+        "processing_queue": processing_queue
+    })
+@app.route('/')
+def index():
+    # HTML template for the preview page
+    html_template = """
+    <!doctype html>
+    <html lang="en">
+      <head>
+        <meta charset="utf-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Preview Page</title>
+        <style>
+          body, html {
+            margin: 0;
+            padding: 0;
+            height: 100%;
+            background-color: black;
+          }
+          .full-screen-image {
+            width: 100%;
+            height: 100%;
+            object-fit: contain;
+          }
+        </style>
+        <script>
+          function refreshImage() {
+            var img = document.getElementById("output-image");
+            img.src = "/get_output?" + new Date().getTime();
+          }
+          // Auto-refresh every 2 seconds to show the latest image
+          setInterval(refreshImage, 2000);
+        </script>
+      </head>
+      <body>
+        <img id="output-image" src="/get_output" class="full-screen-image">
+      </body>
+    </html>
+    """
+    return render_template_string(html_template)
+@app.route('/draw')
+def draw_page():
+    # HTML template for the drawing page at /draw
+    html_template = """
+    <!doctype html>
+    <html lang="en">
+    <head>
+        <meta charset="utf-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Drawing Page</title>
+        <style>
+            body, html {
+                margin: 0;
+                padding: 0;
+                height: 100%;
+                display: flex;
+                justify-content: center;
+                align-items: center;
+                background-color: #f0f0f0;
+            }
+            .canvas-container {
+                border: none;
+                position: relative;
+            }
+            .toolbar {
+                display: flex;
+                justify-content: center;
+                margin-bottom: 10px;
+            }
+            button {
+                margin-right: 5px;
+            }
+            canvas {
+                cursor: crosshair;
+            }
+        </style>
+    </head>
+    <body>
+    <div style="position: fixed;
+    bottom: 0;
+    width: 100%;">
+        <div class="toolbar">
+            <button id="brush" onclick="setTool('brush')">Brush</button>
+            <button id="line" onclick="setTool('line')">Line</button>
+            <button id="eraser" onclick="setTool('eraser')">Eraser</button>
+            <button id="clear" onclick="clearCanvas()">Clear</button>
+            <input type="color" id="colorPicker" value="#000000">
+            <input type="range" id="brushSize" min="1" max="20" value="4">
+        </div>
+        </div>
+        <div class="canvas-container">
+            <canvas id="drawingCanvas" width="512" height="512"></canvas>
+        </div>
+        <script>
+            let canvas = document.getElementById('drawingCanvas');
+            let ctx = canvas.getContext('2d');
+            let drawing = false;
+            let tool = 'brush';
+            let lastX = 0, lastY = 0;
+            // Fill the canvas with white background
+            ctx.fillStyle = "#ffffff";
+            ctx.fillRect(0, 0, canvas.width, canvas.height);
+            canvas.addEventListener('mousedown', (e) => {
+                drawing = true;
+                [lastX, lastY] = [e.offsetX, e.offsetY];
+            });
+            canvas.addEventListener('mousemove', draw);
+            canvas.addEventListener('mouseup', () => {
+                drawing = false;
+                sendDrawingToBackend();
+            });
+            canvas.addEventListener('mouseout', () => drawing = false);
+            function draw(e) {
+                if (!drawing) return;
+                ctx.strokeStyle = document.getElementById('colorPicker').value;
+                ctx.lineWidth = document.getElementById('brushSize').value;
+                ctx.lineJoin = 'round';
+                ctx.lineCap = 'round';
+                ctx.beginPath();
+                ctx.moveTo(lastX, lastY);
+                ctx.lineTo(e.offsetX, e.offsetY);
+                ctx.stroke();
+                [lastX, lastY] = [e.offsetX, e.offsetY];
+            }
+            function setTool(selectedTool) {
+                tool = selectedTool;
+                if (tool === 'eraser') {
+                    ctx.strokeStyle = "#ffffff";  // Use white color for eraser
+                } else {
+                    ctx.strokeStyle = document.getElementById('colorPicker').value;
+                }
+                ctx.globalCompositeOperation = 'source-over';
+            }
+            function clearCanvas() {
+                ctx.fillStyle = "#ffffff";
+                ctx.fillRect(0, 0, canvas.width, canvas.height);
+                fetch('/clear_preview', { method: 'POST' })
+                .then(response => response.json())
+                .then(data => console.log('Cleared preview', data))
+                .catch(error => console.error('Error clearing preview:', error));
+            }
+            function sendDrawingToBackend() {
+                let dataURL = canvas.toDataURL('image/png');
+                fetch('/process-image', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({ image: dataURL }),
+                })
+                .then(response => response.json())
+                .then(data => console.log('Image processed', data))
+                .catch(error => console.error('Error processing image:', error));
+            }
+        </script>
+    </body>
+    </html>
+    """
+    return render_template_string(html_template)
+@app.route('/clear_preview', methods=['POST'])
+def clear_preview():
+    if os.path.exists(OUTPUT_PATH):
+        os.remove(OUTPUT_PATH)
+    return jsonify({"status": "cleared"})
+def start_flask_app():
+    app.run(host=config["server"]["host"], port=config["server"]["port"], threaded=True)
+def signal_handler(sig, frame):
+    print("Ctrl+C pressed, shutting down.")
+    sys.exit(0)
+# Register the signal handler for Ctrl+C
+signal.signal(signal.SIGINT, signal_handler)
+if __name__ == "__main__":
+    start_flask_app()

gradio_sketch2imagehd.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import gradio as gr
+from fastapi import FastAPI, UploadFile, File
+from fastapi.responses import FileResponse, JSONResponse
+import os
+import random
+import torch
+from PIL import Image, ImageOps
+from io import BytesIO
+import base64
+import json
+import logging
+import gc
+from transformers import BlipProcessor, BlipForConditionalGeneration
+import torchvision.transforms.functional as F
+from src.pix2pix_turbo import Pix2Pix_Turbo  # Asegúrate de que esta ruta de importación sea correcta
+from fastapi.middleware.cors import CORSMiddleware
+# Configuración de logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Cargar la configuración desde config.json
+logging.info("Cargando configuración desde config.json...")
+with open('config.json', 'r') as config_file:
+    config = json.load(config_file)
+# Variables Globales
+OUTPUT_PATH = "result.jpg"  # La imagen resultante se guardará como result.jpg
+INPUT_PATH = "draw.jpg"     # La imagen recibida se guardará como draw.jpg
+STYLE_LIST = config["style_list"]
+STYLES = {style["name"]: style["prompt"] for style in STYLE_LIST}
+DEVICE = config["model_params"]["device"]
+DEFAULT_SEED = config["model_params"]["default_seed"]
+VAL_R_DEFAULT = config["model_params"]["val_r_default"]
+CANVAS_WIDTH = config["canvas"]["width"]
+CANVAS_HEIGHT = config["canvas"]["height"]
+PIX2PIX_MODEL_NAME = config["model_params"]["pix2pix_model_name"]
+logging.info(f"Dispositivo seleccionado: {DEVICE}")
+logging.info(f"Modelo Pix2Pix cargado: {PIX2PIX_MODEL_NAME}")
+# Cargar y configurar los modelos
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(DEVICE)
+pix2pix_model = Pix2Pix_Turbo(PIX2PIX_MODEL_NAME)
+def print_welcome_message(app):
+    for route in app.routes:
+        full_url = f"http://0.0.0.0:{app.server_port}{route.path}"
+        if hasattr(route, 'methods'):
+            route_info = f"URL: {full_url}, Methods: {route.methods}"
+        else:
+            route_info = f"URL: {full_url}, Methods: Not applicable"
+        print(route_info)
+def clear_memory():
+    """Limpiar la memoria CUDA y recolectar basura si es necesario."""
+    logging.debug("Limpiando la memoria CUDA y recolectando basura...")
+    torch.cuda.empty_cache()
+    gc.collect()
+def generate_prompt_from_sketch(image: Image) -> str:
+    """Generar un texto a partir del sketch usando BLIP."""
+    logging.debug("Generando el prompt desde el sketch...")
+    image = ImageOps.fit(image, (CANVAS_WIDTH, CANVAS_HEIGHT), Image.LANCZOS)
+    inputs = processor(image, return_tensors="pt").to(DEVICE)
+    with torch.no_grad():
+        out = blip_model.generate(**inputs, max_new_tokens=50)
+    text_prompt = processor.decode(out[0], skip_special_tokens=True)
+    logging.debug(f"Prompt generado: {text_prompt}")
+    recognized_items = [item.strip() for item in text_prompt.split(', ') if item.strip()]
+    random_prefix = random.choice(config["random_values"])
+    prompt = f"a photo of a {' and '.join(recognized_items)}, {random_prefix}"
+    logging.debug(f"Prompt final: {prompt}")
+    return prompt
+def normalize_image(image, range_from=(-1, 1)):
+    """Normalizar la imagen de entrada."""
+    logging.debug("Normalizando la imagen...")
+    image_t = F.to_tensor(image)
+    if range_from == (-1, 1):
+        image_t = image_t * 2 - 1
+    return image_t
+def process_sketch(sketch_image, prompt=None, style_name=None, seed=DEFAULT_SEED, val_r=VAL_R_DEFAULT):
+    """Procesar el sketch y generar una imagen usando el modelo Pix2Pix."""
+    logging.debug("Iniciando el procesamiento del sketch...")
+    if not prompt:
+        logging.info("Prompt no proporcionado, generando uno a partir del sketch...")
+        prompt = generate_prompt_from_sketch(sketch_image)
+    prompt_template = STYLES.get(style_name, STYLES[config["default_style_name"]])
+    prompt = prompt_template.replace("{prompt}", prompt)
+    sketch_image = sketch_image.convert("RGB")
+    sketch_tensor = normalize_image(sketch_image, range_from=(-1, 1))
+    #image_t = F.to_tensor(sketch_image).unsqueeze(0).to(torch.float32)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    #clear_memory()
+    try:
+        with torch.no_grad():
+            logging.info("Iniciando la inferencia del modelo Pix2Pix...")
+            c_t = sketch_tensor.unsqueeze(0).to(DEVICE).float()
+            torch.manual_seed(seed)
+            B, C, H, W = c_t.shape
+            #noise = torch.randn((1, 4, c_t.shape[2] // 8, c_t.shape[3] // 8), device=c_t.device)
+            noise = torch.randn((1, 4, H // 8, W // 8), device=device)
+            with torch.cuda.amp.autocast():
+                output_image = pix2pix_model(c_t, prompt, deterministic=False, r=val_r, noise_map=noise)
+            output_pil = F.to_pil_image(output_image[0].cpu() * 0.5 + 0.5)
+            output_pil.save(OUTPUT_PATH)
+            logging.info("Imagen generada y guardada correctamente.")
+            return output_pil
+    except RuntimeError as e:
+        logging.error(f"Error de runtime durante la inferencia: {str(e)}")
+        if "CUDA out of memory" in str(e):
+            logging.warning("Error de memoria CUDA. Cambiando a CPU.")
+            with torch.no_grad():
+                c_t = c_t.cpu()
+                noise = noise.cpu()
+                pix2pix_model_cpu = pix2pix_model.cpu()
+                output_image = pix2pix_model_cpu(c_t, prompt, deterministic=False, r=val_r, noise_map=noise)
+                output_pil = F.to_pil_image(output_image[0].cpu() * 0.5 + 0.5)
+                output_pil.save(OUTPUT_PATH)
+                logging.info("Inferencia realizada en CPU y la imagen fue generada y guardada.")
+                return output_pil
+        else:
+            raise e
+def get_image_as_base64(image_path):
+    """Convertir una imagen a cadena base64."""
+    logging.debug(f"Convirtiendo la imagen {image_path} a base64...")
+    with open(image_path, "rb") as image_file:
+        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
+    return encoded_string
+# Crear una instancia de FastAPI
+app = FastAPI()
+# Configurar el middleware de CORS
+logging.info("Configurando el middleware de CORS...")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Permitir todas las orígenes. Puedes especificar orígenes específicos en lugar de "*"
+    allow_credentials=True,
+    allow_methods=["*"],  # Permitir todos los métodos HTTP (GET, POST, etc.)
+    allow_headers=["*"],  # Permitir todos los encabezados
+)
+@app.get("/")
+def read_image():
+    """
+    Retorna el archivo 'result.jpg' si existe, o un mensaje de error si no.
+    """
+    logging.info("Petición GET recibida en '/'. Verificando si existe una imagen procesada...")
+    if os.path.exists(OUTPUT_PATH):
+        logging.info(f"Retornando la imagen {OUTPUT_PATH}.")
+        return FileResponse(OUTPUT_PATH, media_type='image/jpeg', filename="result.jpg")
+    else:
+        logging.warning("No se ha procesado ninguna imagen aún.")
+        return {"error": "No image processed yet."}
+@app.get("/image_base64")
+def get_image_base64():
+    """
+    Retorna la imagen procesada como una cadena en formato base64 dentro de un objeto JSON.
+    """
+    if os.path.exists(OUTPUT_PATH):
+        # Convertir la imagen en base64
+        base64_str = get_image_as_base64(OUTPUT_PATH)
+        logging.info(f"Imagen convertida a base64 y enviada como respuesta JSON.")
+        return JSONResponse(content={"image_base64": base64_str})
+    else:
+        logging.error("No se encontró ninguna imagen procesada.")
+        return JSONResponse(content={"error": "No image processed yet."})
+@app.post("/process_image")
+async def process_image(file: UploadFile = File(...)):
+    """
+    Procesa la imagen enviada y devuelve la imagen generada.
+    """
+    logging.info("Petición POST recibida en '/process_image'. Procesando imagen...")
+    image = Image.open(BytesIO(await file.read()))
+    # Guardar la imagen recibida como 'draw.png'
+    image.save("draw.png")  # Guardar en formato PNG
+    logging.info("Imagen recibida guardada como 'draw.png'.")
+    # Procesar la imagen y guardar el resultado
+    processed_image = process_sketch(image)
+    processed_image.save(OUTPUT_PATH)  # Guardar la imagen procesada como 'result.jpg'
+    logging.info("Imagen procesada y guardada correctamente.")
+    return {"status": f"Image processed and saved as {OUTPUT_PATH}"}
+# Montar la aplicación de Gradio en FastAPI
+logging.info("Montando la interfaz de Gradio en la aplicación FastAPI...")
+interface = gr.Interface(
+    fn=process_sketch,
+    inputs=[gr.Image(source="upload", type="pil", label="Sketch Image"),
+            gr.Textbox(label="Prompt (optional)"),
+            gr.Dropdown(choices=list(STYLES.keys()), label="Style"),
+            gr.Slider(minimum=0, maximum=100, step=1, value=DEFAULT_SEED, label="Seed"),
+            gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=VAL_R_DEFAULT, label="Sketch Guidance")],
+    outputs=gr.Image(label="Generated Image"),
+    title="Sketch to Image HD",
+    description="Upload a sketch to generate an image."
+)
+app = gr.mount_gradio_app(app, interface, path="/gradio")
+if __name__ == "__main__":
+    logging.info("Iniciando la aplicación en Uvicorn...")
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)
+    print_welcome_message(interface.app)

preview_server.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import gradio as gr
+# Variable global para almacenar la £ltima imagen generada en base64
+last_image_base64 = None
+def get_last_image():
+    return last_image_base64 if last_image_base64 else "No image processed yet."
+# Crear la interfaz de Gradio para la £ltima imagen y lanzarla en el puerto 7861
+last_image_interface = gr.Interface(
+    fn=get_last_image,
+    inputs=[],
+    outputs="text",
+    title="Last Processed Image",
+    description="Retrieve the last processed image in base64 format."
+)
+if __name__ == "__main__":
+    last_image_interface.launch(server_name="0.0.0.0", server_port=7861, share=True)