Spaces:

LTT
/

Kiss3DGen

Runtime error

Kiss3DGen / app_demo.py

JiantaoLin

new

d346594 7 months ago

15.1 kB

	import gradio as gr
	import os
	import subprocess
	import shlex
	import spaces
	import torch
	access_token = os.getenv("HUGGINGFACE_TOKEN")
	subprocess.run(
	shlex.split(
	"pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html"
	)
	)

	subprocess.run(
	shlex.split(
	"pip install ./extension/nvdiffrast-0.3.1+torch-py3-none-any.whl --force-reinstall --no-deps"
	)
	)

	subprocess.run(
	shlex.split(
	"pip install ./extension/renderutils_plugin-0.1.0-cp310-cp310-linux_x86_64.whl --force-reinstall --no-deps"
	)
	)
	def install_cuda_toolkit():
	# CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
	# CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
	CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run"
	CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
	subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
	subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
	subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])

	os.environ["CUDA_HOME"] = "/usr/local/cuda"
	os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
	os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
	os.environ["CUDA_HOME"],
	"" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
	)
	# Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
	os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
	print("==> finfish install")
	install_cuda_toolkit()
	@spaces.GPU
	def check_gpu():
	os.environ['CUDA_HOME'] = '/usr/local/cuda-12.1'
	os.environ['PATH'] += ':/usr/local/cuda-12.1/bin'
	# os.environ['LD_LIBRARY_PATH'] += ':/usr/local/cuda-12.1/lib64'
	os.environ['LD_LIBRARY_PATH'] = "/usr/local/cuda-12.1/lib64:" + os.environ.get('LD_LIBRARY_PATH', '')
	subprocess.run(['nvidia-smi']) # 测试 CUDA 是否可用
	print(f"torch.cuda.is_available:{torch.cuda.is_available()}")
	check_gpu()

	from PIL import Image
	from einops import rearrange
	from diffusers import FluxPipeline
	from models.lrm.utils.camera_util import get_flux_input_cameras
	from models.lrm.utils.infer_util import save_video
	from models.lrm.utils.mesh_util import save_obj, save_obj_with_mtl
	from models.lrm.utils.render_utils import rotate_x, rotate_y
	from models.lrm.utils.train_util import instantiate_from_config
	from models.ISOMER.reconstruction_func import reconstruction
	from models.ISOMER.projection_func import projection
	import os
	from einops import rearrange
	from omegaconf import OmegaConf
	import torch
	import numpy as np
	import trimesh
	import torchvision
	import torch.nn.functional as F
	from PIL import Image
	from torchvision import transforms
	from torchvision.transforms import v2
	from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL
	from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
	from diffusers import FluxPipeline
	from pytorch_lightning import seed_everything
	import os
	from huggingface_hub import hf_hub_download


	from utils.tool import NormalTransfer, get_background, get_render_cameras_video, load_mipmap, render_frames

	device_0 = "cuda"
	device_1 = "cuda"
	resolution = 512
	save_dir = "./outputs"
	normal_transfer = NormalTransfer()
	isomer_azimuths = torch.from_numpy(np.array([0, 90, 180, 270])).float().to(device_1)
	isomer_elevations = torch.from_numpy(np.array([5, 5, 5, 5])).float().to(device_1)
	isomer_radius = 4.5
	isomer_geo_weights = torch.from_numpy(np.array([1, 0.9, 1, 0.9])).float().to(device_1)
	isomer_color_weights = torch.from_numpy(np.array([1, 0.5, 1, 0.5])).float().to(device_1)

	# model initialization and loading
	# flux
	# # taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=torch.bfloat16).to(device_0)
	# # good_vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=torch.bfloat16, token=access_token).to(device_0)
	# flux_pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, token=access_token).to(device=device_0, dtype=torch.bfloat16)
	# # flux_pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, vae=taef1, token=access_token).to(device_0)
	# flux_lora_ckpt_path = hf_hub_download(repo_id="LTT/xxx-ckpt", filename="rgb_normal_large.safetensors", repo_type="model", token=access_token)
	# flux_pipe.load_lora_weights(flux_lora_ckpt_path)
	# flux_pipe.to(device=device_0, dtype=torch.bfloat16)
	# torch.cuda.empty_cache()
	# flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(flux_pipe)


	# lrm
	config = OmegaConf.load("./models/lrm/config/PRM_inference.yaml")
	model_config = config.model_config
	infer_config = config.infer_config
	model = instantiate_from_config(model_config)
	model_ckpt_path = hf_hub_download(repo_id="LTT/PRM", filename="final_ckpt.ckpt", repo_type="model")
	state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
	state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.')}
	model.load_state_dict(state_dict, strict=True)
	model = model.to(device_1)
	torch.cuda.empty_cache()
	@spaces.GPU
	def lrm_reconstructions(image, input_cameras, save_path=None, name="temp", export_texmap=False, if_save_video=False):
	images = image.unsqueeze(0).to(device_1)
	images = v2.functional.resize(images, 512, interpolation=3, antialias=True).clamp(0, 1)
	# breakpoint()
	with torch.no_grad():
	# get triplane
	planes = model.forward_planes(images, input_cameras)

	mesh_path_idx = os.path.join(save_path, f'{name}.obj')

	mesh_out = model.extract_mesh(
	planes,
	use_texture_map=export_texmap,
	**infer_config,
	)
	if export_texmap:
	vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out
	save_obj_with_mtl(
	vertices.data.cpu().numpy(),
	uvs.data.cpu().numpy(),
	faces.data.cpu().numpy(),
	mesh_tex_idx.data.cpu().numpy(),
	tex_map.permute(1, 2, 0).data.cpu().numpy(),
	mesh_path_idx,
	)
	else:
	vertices, faces, vertex_colors = mesh_out
	save_obj(vertices, faces, vertex_colors, mesh_path_idx)
	print(f"Mesh saved to {mesh_path_idx}")

	render_size = 512
	if if_save_video:
	video_path_idx = os.path.join(save_path, f'{name}.mp4')
	render_size = infer_config.render_resolution
	ENV = load_mipmap("models/lrm/env_mipmap/6")
	materials = (0.0,0.9)

	all_mv, all_mvp, all_campos = get_render_cameras_video(
	batch_size=1,
	M=24,
	radius=4.5,
	elevation=(90, 60.0),
	is_flexicubes=True,
	fov=30
	)

	frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals, alphas = render_frames(
	model,
	planes,
	render_cameras=all_mvp,
	camera_pos=all_campos,
	env=ENV,
	materials=materials,
	render_size=render_size,
	chunk_size=20,
	is_flexicubes=True,
	)
	normals = (torch.nn.functional.normalize(normals) + 1) / 2
	normals = normals * alphas + (1-alphas)
	all_frames = torch.cat([frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals], dim=3)

	save_video(
	all_frames,
	video_path_idx,
	fps=30,
	)
	print(f"Video saved to {video_path_idx}")

	return vertices, faces


	def local_normal_global_transform(local_normal_images, azimuths_deg, elevations_deg):
	if local_normal_images.min() >= 0:
	local_normal = local_normal_images.float() * 2 - 1
	else:
	local_normal = local_normal_images.float()
	global_normal = normal_transfer.trans_local_2_global(local_normal, azimuths_deg, elevations_deg, radius=4.5, for_lotus=False)
	global_normal[...,0] *= -1
	global_normal = (global_normal + 1) / 2
	global_normal = global_normal.permute(0, 3, 1, 2)
	return global_normal

	# 生成多视图图像
	@spaces.GPU(duration=120)
	def generate_multi_view_images(prompt, seed):
	# torch.cuda.empty_cache()
	# generator = torch.manual_seed(seed)
	generator = torch.Generator().manual_seed(seed)
	with torch.no_grad():
	img = flux_pipe(
	prompt=prompt,
	num_inference_steps=5,
	guidance_scale=3.5,
	num_images_per_prompt=1,
	width=resolution * 2,
	height=resolution * 1,
	output_type='np',
	generator=generator,
	).images
	# for img in flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images(
	# prompt=prompt,
	# guidance_scale=3.5,
	# num_inference_steps=4,
	# width=resolution * 4,
	# height=resolution * 2,
	# generator=generator,
	# output_type="np",
	# good_vae=good_vae,
	# ):
	# pass
	# 返回最终的图像和种子（通过外部调用处理）
	return img

	# 重建 3D 模型
	@spaces.GPU
	def reconstruct_3d_model(images, prompt):
	global model
	model.init_flexicubes_geometry(device_1, fovy=50.0)
	model = model.eval()
	rgb_normal_grid = images
	save_dir_path = os.path.join(save_dir, prompt.replace(" ", "_"))
	os.makedirs(save_dir_path, exist_ok=True)

	images = torch.from_numpy(rgb_normal_grid).squeeze(0).permute(2, 0, 1).contiguous().float() # (3, 1024, 2048)
	images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=2, m=4) # (8, 3, 512, 512)
	rgb_multi_view = images[:4, :3, :, :]
	normal_multi_view = images[4:, :3, :, :]
	multi_view_mask = get_background(normal_multi_view)
	rgb_multi_view = rgb_multi_view * rgb_multi_view + (1-multi_view_mask)
	input_cameras = get_flux_input_cameras(batch_size=1, radius=4.2, fov=30).to(device_1)
	vertices, faces = lrm_reconstructions(rgb_multi_view, input_cameras, save_path=save_dir_path, name='lrm', export_texmap=False, if_save_video=True)
	# local normal to global normal

	global_normal = local_normal_global_transform(normal_multi_view.permute(0, 2, 3, 1), isomer_azimuths, isomer_elevations)
	global_normal = global_normal * multi_view_mask + (1-multi_view_mask)

	global_normal = global_normal.permute(0,2,3,1)
	rgb_multi_view = rgb_multi_view.permute(0,2,3,1)
	multi_view_mask = multi_view_mask.permute(0,2,3,1).squeeze(-1)
	vertices = torch.from_numpy(vertices).to(device_1)
	faces = torch.from_numpy(faces).to(device_1)
	vertices = vertices @ rotate_x(np.pi / 2, device=vertices.device)[:3, :3]
	vertices = vertices @ rotate_y(np.pi / 2, device=vertices.device)[:3, :3]

	# global_normal: B,H,W,3
	# multi_view_mask: B,H,W
	# rgb_multi_view: B,H,W,3

	meshes = reconstruction(
	normal_pils=global_normal,
	masks=multi_view_mask,
	weights=isomer_geo_weights,
	fov=30,
	radius=isomer_radius,
	camera_angles_azi=isomer_azimuths,
	camera_angles_ele=isomer_elevations,
	expansion_weight_stage1=0.1,
	init_type="file",
	init_verts=vertices,
	init_faces=faces,
	stage1_steps=0,
	stage2_steps=50,
	start_edge_len_stage1=0.1,
	end_edge_len_stage1=0.02,
	start_edge_len_stage2=0.02,
	end_edge_len_stage2=0.005,
	)


	save_glb_addr = projection(
	meshes,
	masks=multi_view_mask,
	images=rgb_multi_view,
	azimuths=isomer_azimuths,
	elevations=isomer_elevations,
	weights=isomer_color_weights,
	fov=30,
	radius=isomer_radius,
	save_dir=f"{save_dir_path}/ISOMER/",
	)

	return save_glb_addr

	# Gradio 接口函数
	@spaces.GPU
	def gradio_pipeline(prompt, seed):
	import ctypes
	# 显式加载 libnvrtc.so.12
	cuda_lib_path = "/usr/local/cuda-12.1/lib64/libnvrtc.so.12"
	try:
	ctypes.CDLL(cuda_lib_path, mode=ctypes.RTLD_GLOBAL)
	print(f"Successfully preloaded {cuda_lib_path}")
	except OSError as e:
	print(f"Failed to preload {cuda_lib_path}: {e}")
	# 生成多视图图像
	# rgb_normal_grid = generate_multi_view_images(prompt, seed)
	rgb_normal_grid = np.load("rgb_normal_grid.npy")
	image_preview = Image.fromarray((rgb_normal_grid[0] * 255).astype(np.uint8))

	# 3d reconstruction


	# 重建 3D 模型并返回 glb 路径
	save_glb_addr = reconstruct_3d_model(rgb_normal_grid, prompt)
	# save_glb_addr = None
	return image_preview, save_glb_addr

	# Gradio Blocks 应用
	with gr.Blocks() as demo:
	with gr.Row(variant="panel"):
	# 左侧输入区域
	with gr.Column():
	with gr.Row():
	prompt_input = gr.Textbox(
	label="Enter Prompt",
	placeholder="Describe your 3D model...",
	lines=2,
	elem_id="prompt_input"
	)

	with gr.Row():
	sample_seed = gr.Number(value=42, label="Seed Value", precision=0)

	with gr.Row():
	submit = gr.Button("Generate", elem_id="generate", variant="primary")

	with gr.Row(variant="panel"):
	gr.Markdown("Examples:")
	gr.Examples(
	examples=[
	["a castle on a hill"],
	["an owl wearing a hat"],
	["a futuristic car"]
	],
	inputs=[prompt_input],
	label="Prompt Examples"
	)

	# 右侧输出区域
	with gr.Column():
	with gr.Row():
	rgb_normal_grid_image = gr.Image(
	label="RGB Normal Grid",
	type="pil",
	interactive=False
	)

	with gr.Row():
	with gr.Tab("GLB"):
	output_glb_model = gr.Model3D(
	label="Generated 3D Model (GLB Format)",
	interactive=False
	)
	gr.Markdown("Download the model for proper visualization.")

	# 处理逻辑
	submit.click(
	fn=gradio_pipeline, inputs=[prompt_input, sample_seed],
	outputs=[rgb_normal_grid_image, output_glb_model]
	)

	# 启动应用
	# demo.queue(max_size=10)
	demo.launch()