Spaces:

ajsbsd
/

claudetest

Running on Zero

claudetest / app.py

e66736e 7 days ago

10.1 kB

	import os
	import time
	import spaces # Must be imported before torch/CUDA
	import torch
	from diffusers import StableDiffusionXLImg2ImgPipeline
	from diffusers.utils import load_image
	from PIL import Image
	from PIL.PngImagePlugin import PngInfo
	import json
	import gradio as gr
	import tempfile

	# Set environment variable to reduce memory fragmentation
	#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

	# Initialize pipeline as None - will be loaded in GPU function
	pipe = None

	def load_pipeline():
	"""Load the pipeline on GPU when needed"""
	global pipe
	if pipe is None:
	print("Loading pipeline...")
	pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-refiner-1.0",
	torch_dtype=torch.float16,
	variant="fp16",
	use_safetensors=True,
	device_map="balanced",
	attn_implementation="eager"
	)

	# Enable memory optimizations
	#pipe.enable_model_cpu_offload()

	# Try to enable memory efficient attention
	try:
	pipe.enable_xformers_memory_efficient_attention()
	except (ModuleNotFoundError, ImportError):
	print("xformers not available, using attention slicing")
	pipe.enable_attention_slicing()

	print("Pipeline loaded successfully!")
	return pipe


	@spaces.GPU
	def img2img(
	uploaded_image,
	image_url: str,
	prompt: str,
	negative_prompt: str = "",
	strength: float = 0.7,
	guidance_scale: float = 3.5,
	num_inference_steps: int = 50,
	seed: int = -1,
	):
	# Load pipeline inside GPU context
	try:
	pipe = load_pipeline()
	except Exception as e:
	return None, f"❌ Failed to load model: {str(e)}", None

	try:
	# Choose image source
	if uploaded_image is not None:
	init_image = Image.open(uploaded_image).convert("RGB")
	elif image_url.strip() != "":
	try:
	init_image = load_image(image_url).convert("RGB")
	except Exception as e:
	return None, f"❌ Failed to load image from URL: {str(e)}", None
	else:
	return None, "❌ Please upload an image or enter a valid URL", None

	# Resize image (keeping aspect ratio consideration for better results)
	init_image.thumbnail((1024, 1024), Image.Resampling.LANCZOS)

	# Ensure dimensions are multiples of 8 for SDXL
	width, height = init_image.size
	width = (width // 8) * 8
	height = (height // 8) * 8
	init_image = init_image.resize((width, height))

	# Set seed and generator
	if seed == -1:
	generator = torch.Generator(device="cuda")
	else:
	generator = torch.Generator(device="cuda").manual_seed(seed)

	# Validate inputs
	if not prompt.strip():
	return None, "❌ Please enter a prompt", None

	# Run inference with progress tracking
	with torch.inference_mode():
	result = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt if negative_prompt.strip() else None,
	image=init_image,
	strength=max(0.1, min(1.0, strength)), # Clamp strength
	guidance_scale=max(1.0, min(20.0, guidance_scale)), # Clamp guidance
	num_inference_steps=max(10, min(100, num_inference_steps)), # Clamp steps
	generator=generator
	).images[0]

	used_seed = generator.initial_seed()

	# Create metadata dictionary
	metadata = {
	"prompt": prompt,
	"negative_prompt": negative_prompt,
	"seed": used_seed,
	"model": "stabilityai/stable-diffusion-xl-refiner-1.0",
	"pipeline": "StableDiffusionXLImg2ImgPipeline",
	"guidance_scale": guidance_scale,
	"strength": strength,
	"steps": num_inference_steps,
	"width": result.width,
	"height": result.height,
	"device": "cuda"
	}

	# Save metadata into PNG
	png_info = PngInfo()
	png_info.add_text("parameters", json.dumps(metadata))

	# Use temporary file for HF Spaces
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_file:
	output_path = tmp_file.name
	result.save(output_path, format="PNG", pnginfo=png_info)

	# Build markdown preview of metadata
	metadata_str = (
	f"Prompt: {metadata['prompt']}\n\n"
	f"Negative Prompt: {metadata['negative_prompt']}\n\n"
	f"Seed: {metadata['seed']}\n\n"
	f"Model: {metadata['model']}\n\n"
	f"Guidance Scale: {metadata['guidance_scale']}\n\n"
	f"Strength: {metadata['strength']}\n\n"
	f"Steps: {metadata['steps']}\n\n"
	f"Dimensions: {metadata['width']}x{metadata['height']}\n\n"
	f"Device: {metadata['device']}"
	)

	return output_path, f"✅ Generation Complete!\n\n{metadata_str}", output_path

	except torch.cuda.OutOfMemoryError:
	return None, "❌ GPU out of memory. Try reducing image size or inference steps.", None
	except Exception as e:
	return None, f"❌ Error during generation: {str(e)}", None


	# Define UI components with better styling
	title = "🎨 SDXL Image-to-Image Editor"
	description = """
	Transform your images with AI! Upload an image and describe the changes you want to make.

	Tips:
	- Use detailed prompts for better results
	- Lower strength values preserve more of the original image
	- Higher guidance scale follows your prompt more closely
	"""

	# Custom CSS for better appearance
	css = """
	.gradio-container {
	font-family: 'IBM Plex Sans', sans-serif;
	}
	.gr-button {
	color: white;
	background: linear-gradient(90deg, #4f46e5, #7c3aed);
	border: none;
	}
	.gr-button:hover {
	background: linear-gradient(90deg, #4338ca, #6d28d9);
	}
	"""

	with gr.Blocks(title=title, css=css, theme=gr.themes.Soft()) as demo:
	gr.Markdown(f"# {title}")
	gr.Markdown(description)

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📸 Input Image")
	uploaded_image = gr.Image(
	label="Upload Image",
	type="filepath",
	height=300
	)

	gr.Markdown("Or")
	image_url = gr.Textbox(
	label="Image URL",
	placeholder="https://example.com/image.jpg",
	info="Paste a direct link to an image"
	)

	gr.Markdown("### ✍️ Prompts")
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="a beautiful sunset over mountains, photorealistic, detailed",
	lines=3,
	info="Describe what you want to see"
	)
	negative_prompt = gr.Textbox(
	label="Negative Prompt",
	placeholder="blurry, low quality, distorted",
	lines=2,
	info="What to avoid in the image"
	)

	gr.Markdown("### ⚙️ Settings")
	with gr.Row():
	strength = gr.Slider(
	minimum=0.1, maximum=1.0, value=0.7, step=0.05,
	label="Transformation Strength",
	info="0.1 = subtle changes, 1.0 = major changes"
	)
	guidance_scale = gr.Slider(
	minimum=1.0, maximum=20.0, value=7.5, step=0.5,
	label="Guidance Scale",
	info="How closely to follow the prompt"
	)

	with gr.Row():
	num_inference_steps = gr.Slider(
	minimum=10, maximum=50, step=5, value=30,
	label="Quality Steps",
	info="More steps = higher quality but slower"
	)
	seed = gr.Slider(
	minimum=-1, maximum=999999, step=1, value=-1,
	label="Seed",
	info="-1 for random"
	)

	submit_btn = gr.Button("🚀 Generate Image", variant="primary", size="lg")

	with gr.Column(scale=1):
	gr.Markdown("### 🖼️ Result")
	image_output = gr.Image(label="Generated Image", height=400)
	download_button = gr.File(label="📥 Download Full Resolution", visible=False)

	gr.Markdown("### 📊 Generation Details")
	metadata_output = gr.Markdown()

	# Event handlers
	submit_btn.click(
	fn=img2img,
	inputs=[
	uploaded_image,
	image_url,
	prompt,
	negative_prompt,
	strength,
	guidance_scale,
	num_inference_steps,
	seed
	],
	outputs=[image_output, metadata_output, download_button]
	).then(
	lambda x: gr.update(visible=x is not None),
	inputs=[image_output],
	outputs=[download_button]
	)

	# Examples
	gr.Markdown("### 🎯 Examples")
	gr.Examples(
	examples=[
	[None, "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png", "make it a van gogh painting", "blurry, low quality", 0.8, 7.5, 30, 42],
	[None, "https://picsum.photos/512/512?random=1", "turn into a cyberpunk cityscape", "blurry, distorted", 0.9, 8.0, 30, 123],
	],
	inputs=[uploaded_image, image_url, prompt, negative_prompt, strength, guidance_scale, num_inference_steps, seed],
	)

	# Launch configuration for HF Spaces
	if __name__ == "__main__":
	demo.queue(max_size=20) # Enable queuing for better performance
	demo.launch(
	show_error=True,
	share=False, # Don't create gradio.live links in HF Spaces
	inbrowser=False, # Don't try to open browser in cloud environment
	quiet=False
	)