Spaces:

Inmental
/

img2img-turbo

Paused

File size: 5,733 Bytes

from flask import Flask, request, jsonify
from io import BytesIO
import base64
from PIL import Image
import torch
import torchvision.transforms.functional as F
from torch.cuda.amp import autocast
from flask_cors import CORS  # Import CORS

from src.pix2pix_turbo import Pix2Pix_Turbo

app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

# Configuration Variables
model_type = "sketch_to_image_stochastic"
output_format = "PNG"
desired_size = (768, 768)  # Increased resolution for better quality

# Load the model when the app starts
print("Loading model...")
model = Pix2Pix_Turbo(model_type)
print("Model loaded successfully.")

# Example styles list (update this with your actual styles)
style_list = [
    {"name": "Cinematic", "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy"},
    {"name": "3D Model", "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting"},
    {"name": "Anime", "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime,  highly detailed"},
    {"name": "Digital Art", "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed"},
    {"name": "Photographic", "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed"},
    {"name": "Pixel art", "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics"},
    {"name": "Fantasy art", "prompt": "ethereal fantasy concept art of  {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy"},
    {"name": "Neonpunk", "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional"},
    {"name": "Manga", "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style"},
]

styles = {k["name"]: k["prompt"] for k in style_list}

def process_image(image, prompt, prompt_template, style_name, seed, val_r):
    image = image.convert("RGB")
    
    # Convert image to tensor and threshold, then convert to float
    image_t = F.to_tensor(image) > 0.5
    image_t = image_t.float()
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    with torch.no_grad(), autocast():
        # Move the tensor to the appropriate device
        c_t = image_t.unsqueeze(0).to(device).float()
        torch.manual_seed(seed)
        B, C, H, W = c_t.shape
        noise = torch.randn((1, 4, H // 8, W // 8), device=device)  # Ensure noise is on the same device
        logging.debug("Calling Pix2Pix model... ct: {}, prompt: {}, deterministic: False, r: {}, noise_map: {}".format(c_t.shape, prompt, val_r, noise.shape))  
        
        # Pass through the model
        output_image = model(c_t, prompt, deterministic=False, r=val_r, noise_map=noise)
        
    output_pil = F.to_pil_image(output_image[0].cpu() * 0.5 + 0.5)
    return output_pil

@app.route('/process-image', methods=['POST'])
def process_image_route():
    data = request.get_json()

    # Debugging: Print the raw received data
    print("Received JSON data:", data)

    if not data or 'image' not in data:
        print("Error: No image provided")
        return jsonify({"error": "No image provided"}), 400

    # Decode the base64 image (remove the prefix 'data:image/png;base64,' if present)
    image_data = data['image']
    print("Received base64 image data (truncated):", image_data[:100])  # Print first 100 chars of base64 data

    if image_data.startswith('data:image/png;base64,'):
        image_data = image_data.split(",")[1]
    
    try:
        image_bytes = base64.b64decode(image_data)
        image = Image.open(BytesIO(image_bytes))
    except Exception as e:
        print("Error decoding base64 image:", str(e))
        return jsonify({"error": "Invalid image data"}), 400

    # Retrieve other parameters
    prompt = data.get('prompt', 'a cat')
    style_name = data.get('style_name', 'Fantasy art').strip()  # Strip any leading/trailing whitespace
    seed = int(data.get('seed', 42))
    val_r = float(data.get('val_r', 0.8))

    # Debug: print available styles
    print(f"Available styles: {list(styles.keys())}")
    print(f"Received style name: {style_name}")

    # Case-insensitive lookup
    style_name = next((key for key in styles if key.lower() == style_name.lower()), None)
    if not style_name:
        print(f"Error: Style '{data.get('style_name')}' not found")
        return jsonify({"error": f"Style '{data.get('style_name')}' not found"}), 400

    prompt_template = styles[style_name]

    print(f"Using style: {style_name} with prompt: {prompt}")

    # Process the image
    try:
        processed_image = process_image(image, prompt, prompt_template, style_name, seed, val_r)
    except Exception as e:
        print("Error processing image:", str(e))
        return jsonify({"error": "Failed to process image"}), 500

    # Convert the processed image to base64
    img_io = BytesIO()
    processed_image.save(img_io, format=output_format)
    img_io.seek(0)
    img_base64 = base64.b64encode(img_io.getvalue()).decode('utf-8')

    print("Processed image successfully, sending back to client")
    return jsonify({"image": img_base64})

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=5000)