TRELLIS-Texto3D

Running on Zero

File size: 7,871 Bytes

e67f19a
2b5a4ed
 
e67f19a
 
0fcfda4
e67f19a
988b15f
e67f19a
 
 
 
2b5a4ed
 
 
f456f77
e67f19a
 
 
 
 
2b5a4ed
 
bcc350d
e67f19a
2b5a4ed
 
 
61e4d45
e67f19a
 
 
 
 
 
 
 
 
 
 
 
 
 
2b5a4ed
 
e67f19a
 
 
 
 
 
 
 
2b5a4ed
 
 
 
 
61e4d45
 
2b5a4ed
 
61e4d45
b9a0ac3
2b5a4ed
 
b9a0ac3
 
e67f19a
df3e7f6
a4d8888
e67f19a
 
 
 
 
 
 
 
2b5a4ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e67f19a
 
df3e7f6
e67f19a
 
 
 
 
 
2b5a4ed
 
 
 
 
 
e67f19a
070e8ae
2b5a4ed
e67f19a
372c8f9
 
 
 
 
e67f19a
2b5a4ed
e67f19a
 
070e8ae
372c8f9
070e8ae
 
 
 
 
 
 
 
 
 
 
372c8f9
070e8ae
 
 
372c8f9
070e8ae
 
372c8f9
e17ff7a
 
 
01dac1f
 
 
 
 
 
 
 
 
 
372c8f9
a4d8888
e17ff7a
 
 
 
 
 
070e8ae
 
a4d8888
2b5a4ed
e67f19a
2b5a4ed
a4d8888
2b5a4ed
273779b
 
2b5a4ed
 
 
 
e67f19a
2b5a4ed
 
 
e67f19a
a4d8888
2b5a4ed
 
 
7792d1e
a4d8888
4ec1d3d
2b5a4ed
e67f19a
372c8f9
2b5a4ed
 
 
e67f19a
b8c58d3
2b5a4ed
 
 
7792d1e
33b5608
2b5a4ed
e67f19a
a76d2a3
2b5a4ed

import gradio as gr
import spaces 
from gradio_litmodel3d import LitModel3D
import os
import shutil
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
os.environ['SPCONV_ALGO'] = 'native'
from typing import *
import torch
import numpy as np
import imageio
from easydict import EasyDict as edict
from trellis.pipelines import TrellisTextTo3DPipeline
from trellis.representations import Gaussian, MeshExtractResult
from trellis.utils import render_utils, postprocessing_utils

MAX_SEED = np.iinfo(np.int32).max
TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
os.makedirs(TMP_DIR, exist_ok=True)

def start_session(req: gr.Request):
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    os.makedirs(user_dir, exist_ok=True)
    
def end_session(req: gr.Request):
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    shutil.rmtree(user_dir)

def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
    return {
        'gaussian': {
            **gs.init_params,
            '_xyz': gs._xyz.cpu().numpy(),
            '_features_dc': gs._features_dc.cpu().numpy(),
            '_scaling': gs._scaling.cpu().numpy(),
            '_rotation': gs._rotation.cpu().numpy(),
            '_opacity': gs._opacity.cpu().numpy(),
        },
        'mesh': {
            'vertices': mesh.vertices.cpu().numpy(),
            'faces': mesh.faces.cpu().numpy(),
        },
    }
    
def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
    gs = Gaussian(
        aabb=state['gaussian']['aabb'],
        sh_degree=state['gaussian']['sh_degree'],
        mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
        scaling_bias=state['gaussian']['scaling_bias'],
        opacity_bias=state['gaussian']['opacity_bias'],
        scaling_activation=state['gaussian']['scaling_activation'],
    )
    gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
    gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
    gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
    gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
    gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
    
    mesh = edict(
        vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
        faces=torch.tensor(state['mesh']['faces'], device='cuda'),
    )
    
    return gs, mesh

def get_seed(randomize_seed: bool, seed: int) -> int:
    return np.random.randint(0, MAX_SEED) if randomize_seed else seed

@spaces.GPU
def text_to_3d(
    prompt: str,
    seed: int,
    ss_guidance_strength: float,
    ss_sampling_steps: int,
    slat_guidance_strength: float,
    slat_sampling_steps: int,
    req: gr.Request,
) -> Tuple[dict, str]:
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    outputs = pipeline.run(
        prompt,
        seed=seed,
        formats=["gaussian", "mesh"],
        sparse_structure_sampler_params={
            "steps": ss_sampling_steps,
            "cfg_strength": ss_guidance_strength,
        },
        slat_sampler_params={
            "steps": slat_sampling_steps,
            "cfg_strength": slat_guidance_strength,
        },
    )
    video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
    video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
    video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
    video_path = os.path.join(user_dir, 'sample.mp4')
    imageio.mimsave(video_path, video, fps=15)
    state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
    torch.cuda.empty_cache()
    return state, video_path

@spaces.GPU
def extract_glb(
    state: dict,
    mesh_simplify: float,
    texture_size: int,
    req: gr.Request,
) -> Tuple[str, str]:
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    gs, mesh = unpack_state(state)
    glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
    glb_path = os.path.join(user_dir, 'sample.glb')
    glb.export(glb_path)
    torch.cuda.empty_cache()
    return glb_path, glb_path

with gr.Blocks(delete_cache=(600, 600)) as demo:
    gr.Markdown("""
    # UTPL - Conversión de Texto a objetos 3D usando IA  
    ### Tesis: *"Objetos tridimensionales creados por IA: Innovación en entornos virtuales"*  
    **Autor:** Carlos Vargas  
    **Base técnica:** Adaptación de [TRELLIS](https://trellis3d.github.io/) (herramienta de código abierto para generación 3D)  
    **Propósito educativo:** Demostraciones académicas e Investigación en modelado 3D automático  
    """)
    
    with gr.Row():
        with gr.Column():
            text_prompt = gr.Textbox(label="Text Prompt", lines=5)
            
            with gr.Accordion(label="Generation Settings", open=False):
                seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                with gr.Row():
                    ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
                    ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=25, step=1)
                with gr.Row():
                    slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
                    slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=25, step=1)

            generate_btn = gr.Button("Generate")
            
            with gr.Accordion(label="GLB Extraction Settings", open=False):
                mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
                texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
            
            with gr.Row():
                extract_glb_btn = gr.Button("Extract GLB", interactive=False)

        with gr.Column(scale=3, min_width=600):
            with gr.Group():
                video_output = gr.Video(
                    label="3D Preview", 
                    autoplay=True, 
                    loop=True, 
                    height=300,
                    show_label=False
                )
                model_output = gr.Model3D(
                    label="3D Model Viewer", 
                    height=400
                )
            
            with gr.Row():
                download_glb = gr.DownloadButton(
                    label="Download GLB File", 
                    interactive=False,
                    variant="secondary",
                    size="lg"
                )
    
    output_buf = gr.State()

    # Handlers
    demo.load(start_session)
    demo.unload(end_session)

    generate_btn.click(
        get_seed,
        inputs=[randomize_seed, seed],
        outputs=[seed],
    ).then(
        text_to_3d,
        inputs=[text_prompt, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
        outputs=[output_buf, video_output],
    ).then(
        lambda: gr.Button(interactive=True),
        outputs=[extract_glb_btn],
    )

    video_output.clear(
        lambda: gr.Button(interactive=False),
        outputs=[extract_glb_btn],
    )

    extract_glb_btn.click(
        extract_glb,
        inputs=[output_buf, mesh_simplify, texture_size],
        outputs=[model_output, download_glb],
    ).then(
        lambda: gr.Button(interactive=True),
        outputs=[download_glb],
    )

    model_output.clear(
        lambda: gr.Button(interactive=False),
        outputs=[download_glb],
    )

# Launch the Gradio app
if __name__ == "__main__":
    pipeline = TrellisTextTo3DPipeline.from_pretrained("cavargas10/TRELLIS-text-xlarge")
    pipeline.cuda()
    demo.launch()