Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import numpy as np | |
import random | |
from PIL import Image | |
import os | |
import spaces | |
from diffusers import StableDiffusion3Pipeline | |
import torch | |
from peft import PeftModel | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model_repo_id = "frankjoshua/stable-diffusion-3.5-medium" | |
if torch.cuda.is_available(): | |
torch_dtype = torch.float16 | |
else: | |
torch_dtype = torch.float32 | |
pipe = StableDiffusion3Pipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype) | |
MAX_SEED = np.iinfo(np.int32).max | |
MAX_IMAGE_SIZE = 1024 | |
lora_models = { | |
"None": None, | |
"GenEval": "jieliu/SD3.5M-FlowGRPO-GenEval", | |
"Text Rendering": "jieliu/SD3.5M-FlowGRPO-Text", | |
"Human Prefer": "jieliu/SD3.5M-FlowGRPO-PickScore", | |
} | |
lora_prompts = { | |
"GenEval": os.path.join(os.getcwd(), "prompts/geneval.txt"), | |
"Text Rendering": os.path.join(os.getcwd(), "prompts/ocr.txt"), | |
"Human Prefer": os.path.join(os.getcwd(), "prompts/pickscore.txt"), | |
} | |
pipe.transformer = PeftModel.from_pretrained(pipe.transformer, lora_models["GenEval"], adapter_name="GenEval") | |
pipe.transformer.load_adapter(lora_models["Text Rendering"], adapter_name="Text Rendering") | |
pipe.transformer.load_adapter(lora_models["Human Prefer"], adapter_name="Human Prefer") | |
pipe = pipe.to(device) | |
# COUNTER_FILE = os.path.join(os.getcwd(),"model_call_counter.txt") | |
COUNTER_FILE = os.path.join("/data/model_call_counter.txt") | |
def get_call_count(): | |
if not os.path.exists(COUNTER_FILE): | |
return 0 | |
try: | |
with open(COUNTER_FILE, 'r') as f: | |
return int(f.read().strip()) | |
except: | |
return 0 | |
def update_call_count(): | |
count = get_call_count() + 1 | |
with open(COUNTER_FILE, 'w') as f: | |
f.write(str(count)) | |
return count | |
def sample_prompt(lora_model): | |
if lora_model in lora_models and lora_model != "None": | |
file_path = f"{lora_prompts[lora_model]}" | |
try: | |
with open(file_path, 'r') as file: | |
prompts = file.readlines() | |
if lora_model=='GenEval': | |
total_lines = len(prompts) | |
if total_lines > 0: | |
weights = [1/(i+1) for i in range(total_lines)] | |
sum_weights = sum(weights) | |
normalized_weights = [w/sum_weights for w in weights] | |
return random.choices(prompts, weights=normalized_weights, k=1)[0].strip() | |
return "No prompts found in file." | |
else: | |
return random.choice(prompts).strip() | |
except FileNotFoundError: | |
return "Prompt file not found." | |
return "" | |
def create_grid_image(images): | |
# Create a 2x2 grid from the 4 images | |
width, height = images[0].size | |
grid_image = Image.new('RGB', (width * 2, height * 2)) | |
# Paste images in a 2x2 grid | |
grid_image.paste(images[0], (0, 0)) | |
grid_image.paste(images[1], (width, 0)) | |
grid_image.paste(images[2], (0, height)) | |
grid_image.paste(images[3], (width, height)) | |
return grid_image | |
def infer( | |
prompt, | |
seed, | |
randomize_seed, | |
width, | |
height, | |
guidance_scale, | |
num_inference_steps, | |
lora_model, | |
progress=gr.Progress(track_tqdm=True), | |
): | |
call_count = update_call_count() | |
images = [] | |
seeds = [] | |
# Generate 4 images | |
for i in range(4): | |
if randomize_seed: | |
current_seed = random.randint(0, MAX_SEED) | |
else: | |
current_seed = seed + i # Use sequential seeds if not randomizing | |
seeds.append(current_seed) | |
generator = torch.Generator().manual_seed(current_seed) | |
sampled_prompt = sample_prompt(lora_model) | |
final_prompt = prompt if prompt else sampled_prompt | |
if lora_model == "None": | |
with pipe.transformer.disable_adapter(): | |
image = pipe( | |
prompt=final_prompt, | |
negative_prompt="", | |
guidance_scale=guidance_scale, | |
num_inference_steps=num_inference_steps, | |
width=width, | |
height=height, | |
generator=generator, | |
).images[0] | |
else: | |
pipe.transformer.set_adapter(lora_model) | |
image = pipe( | |
prompt=final_prompt, | |
negative_prompt="", | |
guidance_scale=guidance_scale, | |
num_inference_steps=num_inference_steps, | |
width=width, | |
height=height, | |
generator=generator, | |
).images[0] | |
images.append(image) | |
# Create a 2x2 grid from the 4 images | |
grid_image = create_grid_image(images) | |
return grid_image, ", ".join(map(str, seeds)), f"Model has been called {call_count} times" | |
css = """ | |
#col-container { | |
margin: 0 auto; | |
max-width: 640px; | |
} | |
""" | |
with gr.Blocks(css=css) as demo: | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown(""" | |
# SD3.5 Medium + Flow-GRPO | |
Our model is trained separately for different tasks, so it’s best to use the corresponding prompt format for each task. | |
**User Guide:** | |
1. Select a LoRA model (choose “None” to use the base model) | |
2. Click “Sample Prompt” to randomly select from ~1000 task-specific prompts, or write your own | |
3. Click “Run” to generate images (a 2×2 grid of 4 images will be produced) | |
**Note:** | |
- For the *Text Rendering* task, please enclose the text to be displayed in **double quotes (`"`)**, not single quotes (`'`) | |
""") | |
with gr.Row(): | |
prompt = gr.Textbox( | |
label="Prompt", | |
show_label=False, | |
max_lines=1, | |
placeholder="Enter your prompt", | |
container=False, | |
) | |
with gr.Row(): | |
lora_model = gr.Dropdown( | |
label="LoRA Model", | |
choices=list(lora_models.keys()), | |
value="GenEval" | |
) | |
sample_prompt_button = gr.Button("Sample Prompt", scale=0, variant="secondary") | |
def update_sampled_prompt(lora_model): | |
return sample_prompt(lora_model) | |
sample_prompt_button.click( | |
fn=update_sampled_prompt, | |
inputs=[lora_model], | |
outputs=[prompt] | |
) | |
run_button = gr.Button("Run", scale=0, variant="primary") | |
result = gr.Image(label="Results (2x2 Grid)", show_label=True) | |
seed_display = gr.Textbox(label="Seeds Used", show_label=True) | |
with gr.Accordion("Advanced Settings", open=False): | |
seed = gr.Slider( | |
label="Starting Seed", | |
minimum=0, | |
maximum=MAX_SEED, | |
step=1, | |
value=0, | |
) | |
randomize_seed = gr.Checkbox(label="Randomize seeds", value=True) | |
with gr.Row(): | |
width = gr.Slider( | |
label="Width", | |
minimum=256, | |
maximum=MAX_IMAGE_SIZE, | |
step=32, | |
value=512, # Replace with defaults that work for your model | |
) | |
height = gr.Slider( | |
label="Height", | |
minimum=256, | |
maximum=MAX_IMAGE_SIZE, | |
step=32, | |
value=512, # Replace with defaults that work for your model | |
) | |
with gr.Row(): | |
guidance_scale = gr.Slider( | |
label="Guidance scale", | |
minimum=0.0, | |
maximum=10.0, | |
step=0.1, | |
value=4.5, # Replace with defaults that work for your model | |
) | |
num_inference_steps = gr.Slider( | |
label="Number of inference steps", | |
minimum=1, | |
maximum=50, | |
step=1, | |
value=40, # Replace with defaults that work for your model | |
) | |
call_count_display = gr.Textbox( | |
label="Model Call Count", | |
value=f"Model has been called {get_call_count()} times", | |
interactive=False | |
) | |
gr.on( | |
triggers=[run_button.click, prompt.submit], | |
fn=infer, | |
inputs=[ | |
prompt, | |
seed, | |
randomize_seed, | |
width, | |
height, | |
guidance_scale, | |
num_inference_steps, | |
lora_model, | |
], | |
outputs=[result, seed_display, call_count_display], | |
) | |
if __name__ == "__main__": | |
demo.launch() | |