|
import os |
|
import shlex |
|
import spaces |
|
import subprocess |
|
def install_cuda_toolkit(): |
|
CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run" |
|
CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL) |
|
subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE]) |
|
subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE]) |
|
subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"]) |
|
|
|
os.environ["CUDA_HOME"] = "/usr/local/cuda" |
|
os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"]) |
|
os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % ( |
|
os.environ["CUDA_HOME"], |
|
"" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"], |
|
) |
|
os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6" |
|
install_cuda_toolkit() |
|
os.system("pip list | grep torch") |
|
os.system('nvcc -V') |
|
print("cd /home/user/app/step1x3d_texture/differentiable_renderer/ && python setup.py install") |
|
os.system("cd /home/user/app/step1x3d_texture/differentiable_renderer/ && python setup.py install") |
|
|
|
subprocess.run(shlex.split("pip install custom_rasterizer-0.1-cp310-cp310-linux_x86_64.whl"), check=True) |
|
import time |
|
import uuid |
|
import torch |
|
import trimesh |
|
import argparse |
|
import numpy as np |
|
import gradio as gr |
|
from gradio_client import Client |
|
from PIL import Image |
|
from step1x3d_geometry.models.pipelines.pipeline import Step1X3DGeometryPipeline |
|
from step1x3d_texture.pipelines.step1x_3d_texture_synthesis_pipeline import ( |
|
Step1X3DTexturePipeline, |
|
) |
|
from step1x3d_geometry.models.pipelines.pipeline_utils import reduce_face, remove_degenerate_face |
|
|
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"--geometry_model", type=str, default="Step1X-3D-Geometry-Label-1300m" |
|
) |
|
parser.add_argument( |
|
"--texture_model", type=str, default="Step1X-3D-Texture" |
|
) |
|
parser.add_argument("--cache_dir", type=str, default="cache") |
|
args = parser.parse_args() |
|
|
|
os.makedirs(args.cache_dir, exist_ok=True) |
|
|
|
geometry_model = Step1X3DGeometryPipeline.from_pretrained( |
|
"stepfun-ai/Step1X-3D", subfolder=args.geometry_model |
|
).to("cuda") |
|
|
|
texture_model = Step1X3DTexturePipeline.from_pretrained("stepfun-ai/Step1X-3D", subfolder=args.texture_model) |
|
|
|
|
|
t2i_client = Client(os.getenv("H100_3D_URL")) |
|
|
|
|
|
def generate_image_from_text(prompt, height, width, steps, scales, seed): |
|
"""Generate image from text using the external API""" |
|
try: |
|
result = t2i_client.predict( |
|
height=height, |
|
width=width, |
|
steps=steps, |
|
scales=scales, |
|
prompt=prompt, |
|
seed=seed if seed != -1 else None, |
|
api_name="/process_and_save_image" |
|
) |
|
|
|
if isinstance(result, dict) and 'path' in result: |
|
return result['path'] |
|
elif isinstance(result, str): |
|
return result |
|
else: |
|
raise Exception("Unexpected result format from text-to-image API") |
|
except Exception as e: |
|
print(f"Error generating image from text: {e}") |
|
return None |
|
|
|
|
|
def get_random_seed(): |
|
"""Get a random seed from the external API""" |
|
try: |
|
result = t2i_client.predict(api_name="/update_random_seed") |
|
return result |
|
except Exception as e: |
|
print(f"Error getting random seed: {e}") |
|
return -1 |
|
|
|
|
|
@spaces.GPU(duration=240) |
|
def generate_3d_func( |
|
input_image_path, guidance_scale, inference_steps, max_facenum, symmetry, edge_type |
|
): |
|
|
|
if "Label" in args.geometry_model: |
|
symmetry_values = ["x", "asymmetry"] |
|
out = geometry_model( |
|
input_image_path, |
|
label={"symmetry": symmetry_values[int(symmetry)], "edge_type": edge_type}, |
|
guidance_scale=float(guidance_scale), |
|
octree_resolution=384, |
|
max_facenum=int(max_facenum), |
|
num_inference_steps=int(inference_steps), |
|
) |
|
else: |
|
out = geometry_model( |
|
input_image_path, |
|
guidance_scale=float(guidance_scale), |
|
num_inference_steps=int(inference_steps), |
|
max_facenum=int(max_facenum), |
|
) |
|
|
|
save_name = str(uuid.uuid4()) |
|
print(save_name) |
|
geometry_save_path = f"{args.cache_dir}/{save_name}.glb" |
|
geometry_mesh = out.mesh[0] |
|
geometry_mesh.export(geometry_save_path) |
|
|
|
geometry_mesh = remove_degenerate_face(geometry_mesh) |
|
geometry_mesh = reduce_face(geometry_mesh) |
|
textured_mesh = texture_model(input_image_path, geometry_mesh) |
|
textured_save_path = f"{args.cache_dir}/{save_name}-textured.glb" |
|
textured_mesh.export(textured_save_path) |
|
|
|
torch.cuda.empty_cache() |
|
print("Generate finish") |
|
return geometry_save_path, textured_save_path |
|
|
|
|
|
def update_image_display(uploaded_image, generated_image): |
|
"""Update the displayed image based on which source has content""" |
|
if generated_image is not None: |
|
return generated_image |
|
elif uploaded_image is not None: |
|
return uploaded_image |
|
else: |
|
return None |
|
|
|
|
|
with gr.Blocks(title="3D-LLAMA V2") as demo: |
|
gr.Markdown("# 3D-LLAMA V2 with Step1X-3D") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
gr.Markdown("## Image Input") |
|
with gr.Tab("Upload Image"): |
|
uploaded_image = gr.Image(label="Upload Image", type="filepath") |
|
|
|
with gr.Tab("Generate from Text"): |
|
text_prompt = gr.Textbox(label="Image Description", placeholder="Enter your image description here...") |
|
with gr.Row(): |
|
t2i_height = gr.Slider(label="Height", minimum=512, maximum=2048, value=1024, step=64) |
|
t2i_width = gr.Slider(label="Width", minimum=512, maximum=2048, value=1024, step=64) |
|
with gr.Row(): |
|
t2i_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, value=8, step=1) |
|
t2i_scales = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, value=3.5, step=0.5) |
|
with gr.Row(): |
|
t2i_seed = gr.Number(label="Seed (optional, -1 for random)", value=-1) |
|
random_seed_btn = gr.Button("Get Random Seed", scale=0) |
|
generate_image_btn = gr.Button("Generate Image", variant="primary") |
|
|
|
|
|
current_image = gr.Image(label="Current Image (for 3D generation)", type="filepath", interactive=False) |
|
generated_image_path = gr.State(value=None) |
|
|
|
gr.Markdown("## 3D Generation Settings") |
|
guidance_scale = gr.Number(label="3D Guidance Scale", value="7.5") |
|
inference_steps = gr.Slider( |
|
label="3D Inference Steps", minimum=1, maximum=100, value=50 |
|
) |
|
max_facenum = gr.Number(label="Max Face Num", value="400000") |
|
symmetry = gr.Radio( |
|
choices=["symmetry", "asymmetry"], |
|
label="Symmetry Type", |
|
value="symmetry", |
|
type="index", |
|
) |
|
edge_type = gr.Radio( |
|
choices=["sharp", "normal", "smooth"], |
|
label="Edge Type", |
|
value="sharp", |
|
type="value", |
|
) |
|
btn_3d = gr.Button("Generate 3D", variant="primary") |
|
|
|
with gr.Column(scale=4): |
|
textured_preview = gr.Model3D(label="Textured", height=380) |
|
geometry_preview = gr.Model3D(label="Geometry", height=380) |
|
|
|
with gr.Column(scale=1): |
|
gr.Examples( |
|
examples=[ |
|
["examples/images/000.png"], |
|
["examples/images/001.png"], |
|
["examples/images/004.png"], |
|
["examples/images/008.png"], |
|
["examples/images/028.png"], |
|
["examples/images/032.png"], |
|
["examples/images/061.png"], |
|
["examples/images/107.png"], |
|
], |
|
inputs=[uploaded_image], |
|
cache_examples=False, |
|
label="Example Images" |
|
) |
|
|
|
|
|
def on_generate_image(prompt, height, width, steps, scales, seed): |
|
if not prompt: |
|
gr.Warning("Please enter a text prompt") |
|
return None, None |
|
|
|
generated_path = generate_image_from_text(prompt, height, width, steps, scales, seed) |
|
if generated_path: |
|
return generated_path, generated_path |
|
else: |
|
gr.Warning("Failed to generate image from text") |
|
return None, None |
|
|
|
def on_upload_image(image_path): |
|
return image_path |
|
|
|
def get_current_image(uploaded, generated): |
|
if generated is not None: |
|
return generated |
|
elif uploaded is not None: |
|
return uploaded |
|
else: |
|
return None |
|
|
|
|
|
generate_image_btn.click( |
|
on_generate_image, |
|
inputs=[text_prompt, t2i_height, t2i_width, t2i_steps, t2i_scales, t2i_seed], |
|
outputs=[generated_image_path, current_image] |
|
) |
|
|
|
random_seed_btn.click( |
|
get_random_seed, |
|
inputs=[], |
|
outputs=[t2i_seed] |
|
) |
|
|
|
uploaded_image.change( |
|
on_upload_image, |
|
inputs=[uploaded_image], |
|
outputs=[current_image] |
|
) |
|
|
|
btn_3d.click( |
|
lambda img, gs, is_, mf, sym, et: generate_3d_func(img, gs, is_, mf, sym, et) if img else (None, None), |
|
inputs=[ |
|
current_image, |
|
guidance_scale, |
|
inference_steps, |
|
max_facenum, |
|
symmetry, |
|
edge_type, |
|
], |
|
outputs=[geometry_preview, textured_preview], |
|
) |
|
|
|
demo.launch(ssr_mode=False) |