syedMohib44 commited on
Commit
4ebc49e
·
1 Parent(s): 3a804d3

Changed path of cache

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +3 -1
  2. dist/hy3dgen-2.0.0-py3.12.egg +0 -0
  3. hy3dgen/__init__.py +0 -23
  4. hy3dgen/rembg.py +0 -36
  5. hy3dgen/shapegen/__init__.py +0 -27
  6. hy3dgen/shapegen/models/__init__.py +0 -28
  7. hy3dgen/shapegen/models/conditioner.py +0 -165
  8. hy3dgen/shapegen/models/hunyuan3ddit.py +0 -390
  9. hy3dgen/shapegen/models/vae.py +0 -636
  10. hy3dgen/shapegen/pipelines.py +0 -589
  11. hy3dgen/shapegen/postprocessors.py +0 -175
  12. hy3dgen/shapegen/preprocessors.py +0 -127
  13. hy3dgen/shapegen/schedulers.py +0 -307
  14. hy3dgen/texgen/__init__.py +0 -26
  15. hy3dgen/texgen/custom_rasterizer/custom_rasterizer/__init__.py +0 -32
  16. hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_glb.py +0 -248
  17. hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_obj.py +0 -76
  18. hy3dgen/texgen/custom_rasterizer/custom_rasterizer/render.py +0 -41
  19. hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/__init__.py +0 -23
  20. hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp +0 -575
  21. hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp +0 -139
  22. hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h +0 -54
  23. hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu +0 -127
  24. hy3dgen/texgen/custom_rasterizer/setup.py +0 -26
  25. hy3dgen/texgen/differentiable_renderer/__init__.py +0 -23
  26. hy3dgen/texgen/differentiable_renderer/build/temp.win-amd64-cpython-310/Release/mesh_processor.cp310-win_amd64.exp +0 -0
  27. hy3dgen/texgen/differentiable_renderer/build/temp.win-amd64-cpython-310/Release/mesh_processor.cp310-win_amd64.lib +0 -0
  28. hy3dgen/texgen/differentiable_renderer/build/temp.win-amd64-cpython-310/Release/mesh_processor.obj +0 -3
  29. hy3dgen/texgen/differentiable_renderer/camera_utils.py +0 -116
  30. hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.bat +0 -3
  31. hy3dgen/texgen/differentiable_renderer/dist/mesh_processor-0.0.0-py3.10-win-amd64.egg +0 -0
  32. hy3dgen/texgen/differentiable_renderer/mesh_processor.cpp +0 -161
  33. hy3dgen/texgen/differentiable_renderer/mesh_processor.egg-info/PKG-INFO +0 -7
  34. hy3dgen/texgen/differentiable_renderer/mesh_processor.egg-info/SOURCES.txt +0 -7
  35. hy3dgen/texgen/differentiable_renderer/mesh_processor.egg-info/dependency_links.txt +0 -1
  36. hy3dgen/texgen/differentiable_renderer/mesh_processor.egg-info/requires.txt +0 -1
  37. hy3dgen/texgen/differentiable_renderer/mesh_processor.egg-info/top_level.txt +0 -1
  38. hy3dgen/texgen/differentiable_renderer/mesh_processor.py +0 -70
  39. hy3dgen/texgen/differentiable_renderer/mesh_render.py +0 -833
  40. hy3dgen/texgen/differentiable_renderer/mesh_utils.py +0 -44
  41. hy3dgen/texgen/differentiable_renderer/setup.py +0 -48
  42. hy3dgen/texgen/hunyuanpaint/__init__.py +0 -23
  43. hy3dgen/texgen/hunyuanpaint/pipeline.py +0 -554
  44. hy3dgen/texgen/hunyuanpaint/unet/__init__.py +0 -23
  45. hy3dgen/texgen/hunyuanpaint/unet/modules.py +0 -440
  46. hy3dgen/texgen/pipelines.py +0 -227
  47. hy3dgen/texgen/utils/__init__.py +0 -23
  48. hy3dgen/texgen/utils/alignImg4Tex_utils.py +0 -132
  49. hy3dgen/texgen/utils/counter_utils.py +0 -58
  50. hy3dgen/texgen/utils/dehighlight_utils.py +0 -84
.gitignore CHANGED
@@ -7,6 +7,8 @@ __pycache__/
7
  venv/
8
  env/
9
  .venv/
 
 
10
 
11
  # Jupyter Notebook checkpoints
12
  .ipynb_checkpoints/
@@ -25,4 +27,4 @@ env/
25
  .vscode/
26
 
27
  # Hugging Face cache (optional)
28
- /content/huggingface/
 
7
  venv/
8
  env/
9
  .venv/
10
+ build/
11
+ dist/
12
 
13
  # Jupyter Notebook checkpoints
14
  .ipynb_checkpoints/
 
27
  .vscode/
28
 
29
  # Hugging Face cache (optional)
30
+ ~/.cache/huggingface/
dist/hy3dgen-2.0.0-py3.12.egg DELETED
Binary file (189 kB)
 
hy3dgen/__init__.py DELETED
@@ -1,23 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/rembg.py DELETED
@@ -1,36 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
-
26
- from PIL import Image
27
- from rembg import remove, new_session
28
-
29
-
30
- class BackgroundRemover():
31
- def __init__(self):
32
- self.session = new_session()
33
-
34
- def __call__(self, image: Image.Image):
35
- output = remove(image, session=self.session, bgcolor=[255, 255, 255, 0])
36
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/shapegen/__init__.py DELETED
@@ -1,27 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- from .pipelines import Hunyuan3DDiTPipeline, Hunyuan3DDiTFlowMatchingPipeline
26
- from .postprocessors import FaceReducer, FloaterRemover, DegenerateFaceRemover
27
- from .preprocessors import ImageProcessorV2, IMAGE_PROCESSORS, DEFAULT_IMAGEPROCESSOR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/shapegen/models/__init__.py DELETED
@@ -1,28 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
-
26
- from .conditioner import DualImageEncoder, SingleImageEncoder, DinoImageEncoder, CLIPImageEncoder
27
- from .hunyuan3ddit import Hunyuan3DDiT
28
- from .vae import ShapeVAE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/shapegen/models/conditioner.py DELETED
@@ -1,165 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- import torch
26
- import torch.nn as nn
27
- from torchvision import transforms
28
- from transformers import (
29
- CLIPVisionModelWithProjection,
30
- CLIPVisionConfig,
31
- Dinov2Model,
32
- Dinov2Config,
33
- )
34
-
35
-
36
- class ImageEncoder(nn.Module):
37
- def __init__(
38
- self,
39
- version=None,
40
- config=None,
41
- use_cls_token=True,
42
- image_size=224,
43
- **kwargs,
44
- ):
45
- super().__init__()
46
-
47
- if config is None:
48
- self.model = self.MODEL_CLASS.from_pretrained(version)
49
- else:
50
- self.model = self.MODEL_CLASS(self.MODEL_CONFIG_CLASS.from_dict(config))
51
- self.model.eval()
52
- self.model.requires_grad_(False)
53
- self.use_cls_token = use_cls_token
54
- self.size = image_size // 14
55
- self.num_patches = (image_size // 14) ** 2
56
- if self.use_cls_token:
57
- self.num_patches += 1
58
-
59
- self.transform = transforms.Compose(
60
- [
61
- transforms.Resize(image_size, transforms.InterpolationMode.BILINEAR, antialias=True),
62
- transforms.CenterCrop(image_size),
63
- transforms.Normalize(
64
- mean=self.mean,
65
- std=self.std,
66
- ),
67
- ]
68
- )
69
-
70
- def forward(self, image, mask=None, value_range=(-1, 1)):
71
- if value_range is not None:
72
- low, high = value_range
73
- image = (image - low) / (high - low)
74
-
75
- image = image.to(self.model.device, dtype=self.model.dtype)
76
- inputs = self.transform(image)
77
- outputs = self.model(inputs)
78
-
79
- last_hidden_state = outputs.last_hidden_state
80
- if not self.use_cls_token:
81
- last_hidden_state = last_hidden_state[:, 1:, :]
82
-
83
- return last_hidden_state
84
-
85
- def unconditional_embedding(self, batch_size):
86
- device = next(self.model.parameters()).device
87
- dtype = next(self.model.parameters()).dtype
88
- zero = torch.zeros(
89
- batch_size,
90
- self.num_patches,
91
- self.model.config.hidden_size,
92
- device=device,
93
- dtype=dtype,
94
- )
95
-
96
- return zero
97
-
98
-
99
- class CLIPImageEncoder(ImageEncoder):
100
- MODEL_CLASS = CLIPVisionModelWithProjection
101
- MODEL_CONFIG_CLASS = CLIPVisionConfig
102
- mean = [0.48145466, 0.4578275, 0.40821073]
103
- std = [0.26862954, 0.26130258, 0.27577711]
104
-
105
-
106
- class DinoImageEncoder(ImageEncoder):
107
- MODEL_CLASS = Dinov2Model
108
- MODEL_CONFIG_CLASS = Dinov2Config
109
- mean = [0.485, 0.456, 0.406]
110
- std = [0.229, 0.224, 0.225]
111
-
112
-
113
- def build_image_encoder(config):
114
- if config['type'] == 'CLIPImageEncoder':
115
- return CLIPImageEncoder(**config['kwargs'])
116
- elif config['type'] == 'DinoImageEncoder':
117
- return DinoImageEncoder(**config['kwargs'])
118
- else:
119
- raise ValueError(f'Unknown image encoder type: {config["type"]}')
120
-
121
-
122
- class DualImageEncoder(nn.Module):
123
- def __init__(
124
- self,
125
- main_image_encoder,
126
- additional_image_encoder,
127
- ):
128
- super().__init__()
129
- self.main_image_encoder = build_image_encoder(main_image_encoder)
130
- self.additional_image_encoder = build_image_encoder(additional_image_encoder)
131
-
132
- def forward(self, image, mask=None):
133
- outputs = {
134
- 'main': self.main_image_encoder(image, mask=mask),
135
- 'additional': self.additional_image_encoder(image, mask=mask),
136
- }
137
- return outputs
138
-
139
- def unconditional_embedding(self, batch_size):
140
- outputs = {
141
- 'main': self.main_image_encoder.unconditional_embedding(batch_size),
142
- 'additional': self.additional_image_encoder.unconditional_embedding(batch_size),
143
- }
144
- return outputs
145
-
146
-
147
- class SingleImageEncoder(nn.Module):
148
- def __init__(
149
- self,
150
- main_image_encoder,
151
- ):
152
- super().__init__()
153
- self.main_image_encoder = build_image_encoder(main_image_encoder)
154
-
155
- def forward(self, image, mask=None):
156
- outputs = {
157
- 'main': self.main_image_encoder(image, mask=mask),
158
- }
159
- return outputs
160
-
161
- def unconditional_embedding(self, batch_size):
162
- outputs = {
163
- 'main': self.main_image_encoder.unconditional_embedding(batch_size),
164
- }
165
- return outputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/shapegen/models/hunyuan3ddit.py DELETED
@@ -1,390 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- import math
26
- from dataclasses import dataclass
27
- from typing import List, Tuple, Optional
28
-
29
- import torch
30
- from einops import rearrange
31
- from torch import Tensor, nn
32
-
33
-
34
- def attention(q: Tensor, k: Tensor, v: Tensor, **kwargs) -> Tensor:
35
- x = torch.nn.functional.scaled_dot_product_attention(q, k, v)
36
- x = rearrange(x, "B H L D -> B L (H D)")
37
- return x
38
-
39
-
40
- def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 1000.0):
41
- """
42
- Create sinusoidal timestep embeddings.
43
- :param t: a 1-D Tensor of N indices, one per batch element.
44
- These may be fractional.
45
- :param dim: the dimension of the output.
46
- :param max_period: controls the minimum frequency of the embeddings.
47
- :return: an (N, D) Tensor of positional embeddings.
48
- """
49
- t = time_factor * t
50
- half = dim // 2
51
- freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to(
52
- t.device
53
- )
54
-
55
- args = t[:, None].float() * freqs[None]
56
- embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
57
- if dim % 2:
58
- embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
59
- if torch.is_floating_point(t):
60
- embedding = embedding.to(t)
61
- return embedding
62
-
63
-
64
- class MLPEmbedder(nn.Module):
65
- def __init__(self, in_dim: int, hidden_dim: int):
66
- super().__init__()
67
- self.in_layer = nn.Linear(in_dim, hidden_dim, bias=True)
68
- self.silu = nn.SiLU()
69
- self.out_layer = nn.Linear(hidden_dim, hidden_dim, bias=True)
70
-
71
- def forward(self, x: Tensor) -> Tensor:
72
- return self.out_layer(self.silu(self.in_layer(x)))
73
-
74
-
75
- class RMSNorm(torch.nn.Module):
76
- def __init__(self, dim: int):
77
- super().__init__()
78
- self.scale = nn.Parameter(torch.ones(dim))
79
-
80
- def forward(self, x: Tensor):
81
- x_dtype = x.dtype
82
- x = x.float()
83
- rrms = torch.rsqrt(torch.mean(x ** 2, dim=-1, keepdim=True) + 1e-6)
84
- return (x * rrms).to(dtype=x_dtype) * self.scale
85
-
86
-
87
- class QKNorm(torch.nn.Module):
88
- def __init__(self, dim: int):
89
- super().__init__()
90
- self.query_norm = RMSNorm(dim)
91
- self.key_norm = RMSNorm(dim)
92
-
93
- def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tuple[Tensor, Tensor]:
94
- q = self.query_norm(q)
95
- k = self.key_norm(k)
96
- return q.to(v), k.to(v)
97
-
98
-
99
- class SelfAttention(nn.Module):
100
- def __init__(
101
- self,
102
- dim: int,
103
- num_heads: int = 8,
104
- qkv_bias: bool = False,
105
- ):
106
- super().__init__()
107
- self.num_heads = num_heads
108
- head_dim = dim // num_heads
109
-
110
- self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
111
- self.norm = QKNorm(head_dim)
112
- self.proj = nn.Linear(dim, dim)
113
-
114
- def forward(self, x: Tensor, pe: Tensor) -> Tensor:
115
- qkv = self.qkv(x)
116
- q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
117
- q, k = self.norm(q, k, v)
118
- x = attention(q, k, v, pe=pe)
119
- x = self.proj(x)
120
- return x
121
-
122
-
123
- @dataclass
124
- class ModulationOut:
125
- shift: Tensor
126
- scale: Tensor
127
- gate: Tensor
128
-
129
-
130
- class Modulation(nn.Module):
131
- def __init__(self, dim: int, double: bool):
132
- super().__init__()
133
- self.is_double = double
134
- self.multiplier = 6 if double else 3
135
- self.lin = nn.Linear(dim, self.multiplier * dim, bias=True)
136
-
137
- def forward(self, vec: Tensor) -> Tuple[ModulationOut, Optional[ModulationOut]]:
138
- out = self.lin(nn.functional.silu(vec))[:, None, :]
139
- out = out.chunk(self.multiplier, dim=-1)
140
-
141
- return (
142
- ModulationOut(*out[:3]),
143
- ModulationOut(*out[3:]) if self.is_double else None,
144
- )
145
-
146
-
147
- class DoubleStreamBlock(nn.Module):
148
- def __init__(
149
- self,
150
- hidden_size: int,
151
- num_heads: int,
152
- mlp_ratio: float,
153
- qkv_bias: bool = False,
154
- ):
155
- super().__init__()
156
- mlp_hidden_dim = int(hidden_size * mlp_ratio)
157
- self.num_heads = num_heads
158
- self.hidden_size = hidden_size
159
- self.img_mod = Modulation(hidden_size, double=True)
160
- self.img_norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
161
- self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias)
162
-
163
- self.img_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
164
- self.img_mlp = nn.Sequential(
165
- nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
166
- nn.GELU(approximate="tanh"),
167
- nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
168
- )
169
-
170
- self.txt_mod = Modulation(hidden_size, double=True)
171
- self.txt_norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
172
- self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias)
173
-
174
- self.txt_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
175
- self.txt_mlp = nn.Sequential(
176
- nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
177
- nn.GELU(approximate="tanh"),
178
- nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
179
- )
180
-
181
- def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor) -> Tuple[Tensor, Tensor]:
182
- img_mod1, img_mod2 = self.img_mod(vec)
183
- txt_mod1, txt_mod2 = self.txt_mod(vec)
184
-
185
- img_modulated = self.img_norm1(img)
186
- img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
187
- img_qkv = self.img_attn.qkv(img_modulated)
188
- img_q, img_k, img_v = rearrange(img_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
189
- img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
190
-
191
- txt_modulated = self.txt_norm1(txt)
192
- txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
193
- txt_qkv = self.txt_attn.qkv(txt_modulated)
194
- txt_q, txt_k, txt_v = rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
195
- txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
196
-
197
- q = torch.cat((txt_q, img_q), dim=2)
198
- k = torch.cat((txt_k, img_k), dim=2)
199
- v = torch.cat((txt_v, img_v), dim=2)
200
-
201
- attn = attention(q, k, v, pe=pe)
202
- txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]
203
-
204
- img = img + img_mod1.gate * self.img_attn.proj(img_attn)
205
- img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift)
206
-
207
- txt = txt + txt_mod1.gate * self.txt_attn.proj(txt_attn)
208
- txt = txt + txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift)
209
- return img, txt
210
-
211
-
212
- class SingleStreamBlock(nn.Module):
213
- """
214
- A DiT block with parallel linear layers as described in
215
- https://arxiv.org/abs/2302.05442 and adapted modulation interface.
216
- """
217
-
218
- def __init__(
219
- self,
220
- hidden_size: int,
221
- num_heads: int,
222
- mlp_ratio: float = 4.0,
223
- qk_scale: Optional[float] = None,
224
- ):
225
- super().__init__()
226
-
227
- self.hidden_dim = hidden_size
228
- self.num_heads = num_heads
229
- head_dim = hidden_size // num_heads
230
- self.scale = qk_scale or head_dim ** -0.5
231
-
232
- self.mlp_hidden_dim = int(hidden_size * mlp_ratio)
233
- # qkv and mlp_in
234
- self.linear1 = nn.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim)
235
- # proj and mlp_out
236
- self.linear2 = nn.Linear(hidden_size + self.mlp_hidden_dim, hidden_size)
237
-
238
- self.norm = QKNorm(head_dim)
239
-
240
- self.hidden_size = hidden_size
241
- self.pre_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
242
-
243
- self.mlp_act = nn.GELU(approximate="tanh")
244
- self.modulation = Modulation(hidden_size, double=False)
245
-
246
- def forward(self, x: Tensor, vec: Tensor, pe: Tensor) -> Tensor:
247
- mod, _ = self.modulation(vec)
248
-
249
- x_mod = (1 + mod.scale) * self.pre_norm(x) + mod.shift
250
- qkv, mlp = torch.split(self.linear1(x_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)
251
-
252
- q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
253
- q, k = self.norm(q, k, v)
254
-
255
- # compute attention
256
- attn = attention(q, k, v, pe=pe)
257
- # compute activation in mlp stream, cat again and run second linear layer
258
- output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2))
259
- return x + mod.gate * output
260
-
261
-
262
- class LastLayer(nn.Module):
263
- def __init__(self, hidden_size: int, patch_size: int, out_channels: int):
264
- super().__init__()
265
- self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
266
- self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True)
267
- self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, 2 * hidden_size, bias=True))
268
-
269
- def forward(self, x: Tensor, vec: Tensor) -> Tensor:
270
- shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1)
271
- x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :]
272
- x = self.linear(x)
273
- return x
274
-
275
-
276
- class Hunyuan3DDiT(nn.Module):
277
- def __init__(
278
- self,
279
- in_channels: int = 64,
280
- context_in_dim: int = 1536,
281
- hidden_size: int = 1024,
282
- mlp_ratio: float = 4.0,
283
- num_heads: int = 16,
284
- depth: int = 16,
285
- depth_single_blocks: int = 32,
286
- axes_dim: List[int] = [64],
287
- theta: int = 10_000,
288
- qkv_bias: bool = True,
289
- time_factor: float = 1000,
290
- ckpt_path: Optional[str] = None,
291
- **kwargs,
292
- ):
293
- super().__init__()
294
- self.in_channels = in_channels
295
- self.context_in_dim = context_in_dim
296
- self.hidden_size = hidden_size
297
- self.mlp_ratio = mlp_ratio
298
- self.num_heads = num_heads
299
- self.depth = depth
300
- self.depth_single_blocks = depth_single_blocks
301
- self.axes_dim = axes_dim
302
- self.theta = theta
303
- self.qkv_bias = qkv_bias
304
- self.time_factor = time_factor
305
- self.out_channels = self.in_channels
306
-
307
- if hidden_size % num_heads != 0:
308
- raise ValueError(
309
- f"Hidden size {hidden_size} must be divisible by num_heads {num_heads}"
310
- )
311
- pe_dim = hidden_size // num_heads
312
- if sum(axes_dim) != pe_dim:
313
- raise ValueError(f"Got {axes_dim} but expected positional dim {pe_dim}")
314
- self.hidden_size = hidden_size
315
- self.num_heads = num_heads
316
- self.latent_in = nn.Linear(self.in_channels, self.hidden_size, bias=True)
317
- self.time_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size)
318
- self.cond_in = nn.Linear(context_in_dim, self.hidden_size)
319
-
320
- self.double_blocks = nn.ModuleList(
321
- [
322
- DoubleStreamBlock(
323
- self.hidden_size,
324
- self.num_heads,
325
- mlp_ratio=mlp_ratio,
326
- qkv_bias=qkv_bias,
327
- )
328
- for _ in range(depth)
329
- ]
330
- )
331
-
332
- self.single_blocks = nn.ModuleList(
333
- [
334
- SingleStreamBlock(
335
- self.hidden_size,
336
- self.num_heads,
337
- mlp_ratio=mlp_ratio,
338
- )
339
- for _ in range(depth_single_blocks)
340
- ]
341
- )
342
-
343
- self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels)
344
-
345
- if ckpt_path is not None:
346
- print('restored denoiser ckpt', ckpt_path)
347
-
348
- ckpt = torch.load(ckpt_path, map_location="cpu")
349
- if 'state_dict' not in ckpt:
350
- # deepspeed ckpt
351
- state_dict = {}
352
- for k in ckpt.keys():
353
- new_k = k.replace('_forward_module.', '')
354
- state_dict[new_k] = ckpt[k]
355
- else:
356
- state_dict = ckpt["state_dict"]
357
-
358
- final_state_dict = {}
359
- for k, v in state_dict.items():
360
- if k.startswith('model.'):
361
- final_state_dict[k.replace('model.', '')] = v
362
- else:
363
- final_state_dict[k] = v
364
- missing, unexpected = self.load_state_dict(final_state_dict, strict=False)
365
- print('unexpected keys:', unexpected)
366
- print('missing keys:', missing)
367
-
368
- def forward(
369
- self,
370
- x,
371
- t,
372
- contexts,
373
- **kwargs,
374
- ) -> Tensor:
375
- cond = contexts['main']
376
- latent = self.latent_in(x)
377
- vec = self.time_in(timestep_embedding(t, 256, self.time_factor).to(dtype=latent.dtype))
378
- cond = self.cond_in(cond)
379
- pe = None
380
-
381
- for block in self.double_blocks:
382
- latent, cond = block(img=latent, txt=cond, vec=vec, pe=pe)
383
-
384
- latent = torch.cat((cond, latent), 1)
385
- for block in self.single_blocks:
386
- latent = block(latent, vec=vec, pe=pe)
387
-
388
- latent = latent[:, cond.shape[1]:, ...]
389
- latent = self.final_layer(latent, vec)
390
- return latent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/shapegen/models/vae.py DELETED
@@ -1,636 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- from typing import Tuple, List, Union, Optional
26
-
27
- import numpy as np
28
- import torch
29
- import torch.nn as nn
30
- import torch.nn.functional as F
31
- from einops import rearrange, repeat
32
- from skimage import measure
33
- from tqdm import tqdm
34
-
35
-
36
- class FourierEmbedder(nn.Module):
37
- """The sin/cosine positional embedding. Given an input tensor `x` of shape [n_batch, ..., c_dim], it converts
38
- each feature dimension of `x[..., i]` into:
39
- [
40
- sin(x[..., i]),
41
- sin(f_1*x[..., i]),
42
- sin(f_2*x[..., i]),
43
- ...
44
- sin(f_N * x[..., i]),
45
- cos(x[..., i]),
46
- cos(f_1*x[..., i]),
47
- cos(f_2*x[..., i]),
48
- ...
49
- cos(f_N * x[..., i]),
50
- x[..., i] # only present if include_input is True.
51
- ], here f_i is the frequency.
52
-
53
- Denote the space is [0 / num_freqs, 1 / num_freqs, 2 / num_freqs, 3 / num_freqs, ..., (num_freqs - 1) / num_freqs].
54
- If logspace is True, then the frequency f_i is [2^(0 / num_freqs), ..., 2^(i / num_freqs), ...];
55
- Otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1)].
56
-
57
- Args:
58
- num_freqs (int): the number of frequencies, default is 6;
59
- logspace (bool): If logspace is True, then the frequency f_i is [..., 2^(i / num_freqs), ...],
60
- otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1)];
61
- input_dim (int): the input dimension, default is 3;
62
- include_input (bool): include the input tensor or not, default is True.
63
-
64
- Attributes:
65
- frequencies (torch.Tensor): If logspace is True, then the frequency f_i is [..., 2^(i / num_freqs), ...],
66
- otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1);
67
-
68
- out_dim (int): the embedding size, if include_input is True, it is input_dim * (num_freqs * 2 + 1),
69
- otherwise, it is input_dim * num_freqs * 2.
70
-
71
- """
72
-
73
- def __init__(self,
74
- num_freqs: int = 6,
75
- logspace: bool = True,
76
- input_dim: int = 3,
77
- include_input: bool = True,
78
- include_pi: bool = True) -> None:
79
-
80
- """The initialization"""
81
-
82
- super().__init__()
83
-
84
- if logspace:
85
- frequencies = 2.0 ** torch.arange(
86
- num_freqs,
87
- dtype=torch.float32
88
- )
89
- else:
90
- frequencies = torch.linspace(
91
- 1.0,
92
- 2.0 ** (num_freqs - 1),
93
- num_freqs,
94
- dtype=torch.float32
95
- )
96
-
97
- if include_pi:
98
- frequencies *= torch.pi
99
-
100
- self.register_buffer("frequencies", frequencies, persistent=False)
101
- self.include_input = include_input
102
- self.num_freqs = num_freqs
103
-
104
- self.out_dim = self.get_dims(input_dim)
105
-
106
- def get_dims(self, input_dim):
107
- temp = 1 if self.include_input or self.num_freqs == 0 else 0
108
- out_dim = input_dim * (self.num_freqs * 2 + temp)
109
-
110
- return out_dim
111
-
112
- def forward(self, x: torch.Tensor) -> torch.Tensor:
113
- """ Forward process.
114
-
115
- Args:
116
- x: tensor of shape [..., dim]
117
-
118
- Returns:
119
- embedding: an embedding of `x` of shape [..., dim * (num_freqs * 2 + temp)]
120
- where temp is 1 if include_input is True and 0 otherwise.
121
- """
122
-
123
- if self.num_freqs > 0:
124
- embed = (x[..., None].contiguous() * self.frequencies).view(*x.shape[:-1], -1)
125
- if self.include_input:
126
- return torch.cat((x, embed.sin(), embed.cos()), dim=-1)
127
- else:
128
- return torch.cat((embed.sin(), embed.cos()), dim=-1)
129
- else:
130
- return x
131
-
132
-
133
- class DropPath(nn.Module):
134
- """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
135
- """
136
-
137
- def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True):
138
- super(DropPath, self).__init__()
139
- self.drop_prob = drop_prob
140
- self.scale_by_keep = scale_by_keep
141
-
142
- def forward(self, x):
143
- """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
144
-
145
- This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
146
- the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
147
- See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
148
- changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
149
- 'survival rate' as the argument.
150
-
151
- """
152
- if self.drop_prob == 0. or not self.training:
153
- return x
154
- keep_prob = 1 - self.drop_prob
155
- shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
156
- random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
157
- if keep_prob > 0.0 and self.scale_by_keep:
158
- random_tensor.div_(keep_prob)
159
- return x * random_tensor
160
-
161
- def extra_repr(self):
162
- return f'drop_prob={round(self.drop_prob, 3):0.3f}'
163
-
164
-
165
- class MLP(nn.Module):
166
- def __init__(
167
- self, *,
168
- width: int,
169
- output_width: int = None,
170
- drop_path_rate: float = 0.0
171
- ):
172
- super().__init__()
173
- self.width = width
174
- self.c_fc = nn.Linear(width, width * 4)
175
- self.c_proj = nn.Linear(width * 4, output_width if output_width is not None else width)
176
- self.gelu = nn.GELU()
177
- self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
178
-
179
- def forward(self, x):
180
- return self.drop_path(self.c_proj(self.gelu(self.c_fc(x))))
181
-
182
-
183
- class QKVMultiheadCrossAttention(nn.Module):
184
- def __init__(
185
- self,
186
- *,
187
- heads: int,
188
- n_data: Optional[int] = None,
189
- width=None,
190
- qk_norm=False,
191
- norm_layer=nn.LayerNorm
192
- ):
193
- super().__init__()
194
- self.heads = heads
195
- self.n_data = n_data
196
- self.q_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
197
- self.k_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
198
-
199
- def forward(self, q, kv):
200
- _, n_ctx, _ = q.shape
201
- bs, n_data, width = kv.shape
202
- attn_ch = width // self.heads // 2
203
- q = q.view(bs, n_ctx, self.heads, -1)
204
- kv = kv.view(bs, n_data, self.heads, -1)
205
- k, v = torch.split(kv, attn_ch, dim=-1)
206
-
207
- q = self.q_norm(q)
208
- k = self.k_norm(k)
209
-
210
- q, k, v = map(lambda t: rearrange(t, 'b n h d -> b h n d', h=self.heads), (q, k, v))
211
- out = F.scaled_dot_product_attention(q, k, v).transpose(1, 2).reshape(bs, n_ctx, -1)
212
-
213
- return out
214
-
215
-
216
- class MultiheadCrossAttention(nn.Module):
217
- def __init__(
218
- self,
219
- *,
220
- width: int,
221
- heads: int,
222
- qkv_bias: bool = True,
223
- n_data: Optional[int] = None,
224
- data_width: Optional[int] = None,
225
- norm_layer=nn.LayerNorm,
226
- qk_norm: bool = False
227
- ):
228
- super().__init__()
229
- self.n_data = n_data
230
- self.width = width
231
- self.heads = heads
232
- self.data_width = width if data_width is None else data_width
233
- self.c_q = nn.Linear(width, width, bias=qkv_bias)
234
- self.c_kv = nn.Linear(self.data_width, width * 2, bias=qkv_bias)
235
- self.c_proj = nn.Linear(width, width)
236
- self.attention = QKVMultiheadCrossAttention(
237
- heads=heads,
238
- n_data=n_data,
239
- width=width,
240
- norm_layer=norm_layer,
241
- qk_norm=qk_norm
242
- )
243
-
244
- def forward(self, x, data):
245
- x = self.c_q(x)
246
- data = self.c_kv(data)
247
- x = self.attention(x, data)
248
- x = self.c_proj(x)
249
- return x
250
-
251
-
252
- class ResidualCrossAttentionBlock(nn.Module):
253
- def __init__(
254
- self,
255
- *,
256
- n_data: Optional[int] = None,
257
- width: int,
258
- heads: int,
259
- data_width: Optional[int] = None,
260
- qkv_bias: bool = True,
261
- norm_layer=nn.LayerNorm,
262
- qk_norm: bool = False
263
- ):
264
- super().__init__()
265
-
266
- if data_width is None:
267
- data_width = width
268
-
269
- self.attn = MultiheadCrossAttention(
270
- n_data=n_data,
271
- width=width,
272
- heads=heads,
273
- data_width=data_width,
274
- qkv_bias=qkv_bias,
275
- norm_layer=norm_layer,
276
- qk_norm=qk_norm
277
- )
278
- self.ln_1 = norm_layer(width, elementwise_affine=True, eps=1e-6)
279
- self.ln_2 = norm_layer(data_width, elementwise_affine=True, eps=1e-6)
280
- self.ln_3 = norm_layer(width, elementwise_affine=True, eps=1e-6)
281
- self.mlp = MLP(width=width)
282
-
283
- def forward(self, x: torch.Tensor, data: torch.Tensor):
284
- x = x + self.attn(self.ln_1(x), self.ln_2(data))
285
- x = x + self.mlp(self.ln_3(x))
286
- return x
287
-
288
-
289
- class QKVMultiheadAttention(nn.Module):
290
- def __init__(
291
- self,
292
- *,
293
- heads: int,
294
- n_ctx: int,
295
- width=None,
296
- qk_norm=False,
297
- norm_layer=nn.LayerNorm
298
- ):
299
- super().__init__()
300
- self.heads = heads
301
- self.n_ctx = n_ctx
302
- self.q_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
303
- self.k_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
304
-
305
- def forward(self, qkv):
306
- bs, n_ctx, width = qkv.shape
307
- attn_ch = width // self.heads // 3
308
- qkv = qkv.view(bs, n_ctx, self.heads, -1)
309
- q, k, v = torch.split(qkv, attn_ch, dim=-1)
310
-
311
- q = self.q_norm(q)
312
- k = self.k_norm(k)
313
-
314
- q, k, v = map(lambda t: rearrange(t, 'b n h d -> b h n d', h=self.heads), (q, k, v))
315
- out = F.scaled_dot_product_attention(q, k, v).transpose(1, 2).reshape(bs, n_ctx, -1)
316
- return out
317
-
318
-
319
- class MultiheadAttention(nn.Module):
320
- def __init__(
321
- self,
322
- *,
323
- n_ctx: int,
324
- width: int,
325
- heads: int,
326
- qkv_bias: bool,
327
- norm_layer=nn.LayerNorm,
328
- qk_norm: bool = False,
329
- drop_path_rate: float = 0.0
330
- ):
331
- super().__init__()
332
- self.n_ctx = n_ctx
333
- self.width = width
334
- self.heads = heads
335
- self.c_qkv = nn.Linear(width, width * 3, bias=qkv_bias)
336
- self.c_proj = nn.Linear(width, width)
337
- self.attention = QKVMultiheadAttention(
338
- heads=heads,
339
- n_ctx=n_ctx,
340
- width=width,
341
- norm_layer=norm_layer,
342
- qk_norm=qk_norm
343
- )
344
- self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
345
-
346
- def forward(self, x):
347
- x = self.c_qkv(x)
348
- x = self.attention(x)
349
- x = self.drop_path(self.c_proj(x))
350
- return x
351
-
352
-
353
- class ResidualAttentionBlock(nn.Module):
354
- def __init__(
355
- self,
356
- *,
357
- n_ctx: int,
358
- width: int,
359
- heads: int,
360
- qkv_bias: bool = True,
361
- norm_layer=nn.LayerNorm,
362
- qk_norm: bool = False,
363
- drop_path_rate: float = 0.0,
364
- ):
365
- super().__init__()
366
- self.attn = MultiheadAttention(
367
- n_ctx=n_ctx,
368
- width=width,
369
- heads=heads,
370
- qkv_bias=qkv_bias,
371
- norm_layer=norm_layer,
372
- qk_norm=qk_norm,
373
- drop_path_rate=drop_path_rate
374
- )
375
- self.ln_1 = norm_layer(width, elementwise_affine=True, eps=1e-6)
376
- self.mlp = MLP(width=width, drop_path_rate=drop_path_rate)
377
- self.ln_2 = norm_layer(width, elementwise_affine=True, eps=1e-6)
378
-
379
- def forward(self, x: torch.Tensor):
380
- x = x + self.attn(self.ln_1(x))
381
- x = x + self.mlp(self.ln_2(x))
382
- return x
383
-
384
-
385
- class Transformer(nn.Module):
386
- def __init__(
387
- self,
388
- *,
389
- n_ctx: int,
390
- width: int,
391
- layers: int,
392
- heads: int,
393
- qkv_bias: bool = True,
394
- norm_layer=nn.LayerNorm,
395
- qk_norm: bool = False,
396
- drop_path_rate: float = 0.0
397
- ):
398
- super().__init__()
399
- self.n_ctx = n_ctx
400
- self.width = width
401
- self.layers = layers
402
- self.resblocks = nn.ModuleList(
403
- [
404
- ResidualAttentionBlock(
405
- n_ctx=n_ctx,
406
- width=width,
407
- heads=heads,
408
- qkv_bias=qkv_bias,
409
- norm_layer=norm_layer,
410
- qk_norm=qk_norm,
411
- drop_path_rate=drop_path_rate
412
- )
413
- for _ in range(layers)
414
- ]
415
- )
416
-
417
- def forward(self, x: torch.Tensor):
418
- for block in self.resblocks:
419
- x = block(x)
420
- return x
421
-
422
-
423
- class CrossAttentionDecoder(nn.Module):
424
-
425
- def __init__(
426
- self,
427
- *,
428
- num_latents: int,
429
- out_channels: int,
430
- fourier_embedder: FourierEmbedder,
431
- width: int,
432
- heads: int,
433
- qkv_bias: bool = True,
434
- qk_norm: bool = False,
435
- label_type: str = "binary"
436
- ):
437
- super().__init__()
438
-
439
- self.fourier_embedder = fourier_embedder
440
-
441
- self.query_proj = nn.Linear(self.fourier_embedder.out_dim, width)
442
-
443
- self.cross_attn_decoder = ResidualCrossAttentionBlock(
444
- n_data=num_latents,
445
- width=width,
446
- heads=heads,
447
- qkv_bias=qkv_bias,
448
- qk_norm=qk_norm
449
- )
450
-
451
- self.ln_post = nn.LayerNorm(width)
452
- self.output_proj = nn.Linear(width, out_channels)
453
- self.label_type = label_type
454
-
455
- def forward(self, queries: torch.FloatTensor, latents: torch.FloatTensor):
456
- queries = self.query_proj(self.fourier_embedder(queries).to(latents.dtype))
457
- x = self.cross_attn_decoder(queries, latents)
458
- x = self.ln_post(x)
459
- occ = self.output_proj(x)
460
- return occ
461
-
462
-
463
- def generate_dense_grid_points(bbox_min: np.ndarray,
464
- bbox_max: np.ndarray,
465
- octree_depth: int,
466
- indexing: str = "ij",
467
- octree_resolution: int = None,
468
- ):
469
- length = bbox_max - bbox_min
470
- num_cells = np.exp2(octree_depth)
471
- if octree_resolution is not None:
472
- num_cells = octree_resolution
473
-
474
- x = np.linspace(bbox_min[0], bbox_max[0], int(num_cells) + 1, dtype=np.float32)
475
- y = np.linspace(bbox_min[1], bbox_max[1], int(num_cells) + 1, dtype=np.float32)
476
- z = np.linspace(bbox_min[2], bbox_max[2], int(num_cells) + 1, dtype=np.float32)
477
- [xs, ys, zs] = np.meshgrid(x, y, z, indexing=indexing)
478
- xyz = np.stack((xs, ys, zs), axis=-1)
479
- xyz = xyz.reshape(-1, 3)
480
- grid_size = [int(num_cells) + 1, int(num_cells) + 1, int(num_cells) + 1]
481
-
482
- return xyz, grid_size, length
483
-
484
-
485
- def center_vertices(vertices):
486
- """Translate the vertices so that bounding box is centered at zero."""
487
- vert_min = vertices.min(dim=0)[0]
488
- vert_max = vertices.max(dim=0)[0]
489
- vert_center = 0.5 * (vert_min + vert_max)
490
- return vertices - vert_center
491
-
492
-
493
- class Latent2MeshOutput:
494
-
495
- def __init__(self, mesh_v=None, mesh_f=None):
496
- self.mesh_v = mesh_v
497
- self.mesh_f = mesh_f
498
-
499
-
500
- class ShapeVAE(nn.Module):
501
- def __init__(
502
- self,
503
- *,
504
- num_latents: int,
505
- embed_dim: int,
506
- width: int,
507
- heads: int,
508
- num_decoder_layers: int,
509
- num_freqs: int = 8,
510
- include_pi: bool = True,
511
- qkv_bias: bool = True,
512
- qk_norm: bool = False,
513
- label_type: str = "binary",
514
- drop_path_rate: float = 0.0,
515
- scale_factor: float = 1.0,
516
- ):
517
- super().__init__()
518
- self.fourier_embedder = FourierEmbedder(num_freqs=num_freqs, include_pi=include_pi)
519
-
520
- self.post_kl = nn.Linear(embed_dim, width)
521
-
522
- self.transformer = Transformer(
523
- n_ctx=num_latents,
524
- width=width,
525
- layers=num_decoder_layers,
526
- heads=heads,
527
- qkv_bias=qkv_bias,
528
- qk_norm=qk_norm,
529
- drop_path_rate=drop_path_rate
530
- )
531
-
532
- self.geo_decoder = CrossAttentionDecoder(
533
- fourier_embedder=self.fourier_embedder,
534
- out_channels=1,
535
- num_latents=num_latents,
536
- width=width,
537
- heads=heads,
538
- qkv_bias=qkv_bias,
539
- qk_norm=qk_norm,
540
- label_type=label_type,
541
- )
542
-
543
- self.scale_factor = scale_factor
544
- self.latent_shape = (num_latents, embed_dim)
545
-
546
- def forward(self, latents):
547
- latents = self.post_kl(latents)
548
- latents = self.transformer(latents)
549
- return latents
550
-
551
- @torch.no_grad()
552
- def latents2mesh(
553
- self,
554
- latents: torch.FloatTensor,
555
- bounds: Union[Tuple[float], List[float], float] = 1.1,
556
- octree_depth: int = 7,
557
- num_chunks: int = 10000,
558
- mc_level: float = -1 / 512,
559
- octree_resolution: int = None,
560
- mc_algo: str = 'dmc',
561
- ):
562
- device = latents.device
563
-
564
- # 1. generate query points
565
- if isinstance(bounds, float):
566
- bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
567
- bbox_min = np.array(bounds[0:3])
568
- bbox_max = np.array(bounds[3:6])
569
- bbox_size = bbox_max - bbox_min
570
- xyz_samples, grid_size, length = generate_dense_grid_points(
571
- bbox_min=bbox_min,
572
- bbox_max=bbox_max,
573
- octree_depth=octree_depth,
574
- octree_resolution=octree_resolution,
575
- indexing="ij"
576
- )
577
- xyz_samples = torch.FloatTensor(xyz_samples)
578
-
579
- # 2. latents to 3d volume
580
- batch_logits = []
581
- batch_size = latents.shape[0]
582
- for start in tqdm(range(0, xyz_samples.shape[0], num_chunks),
583
- desc=f"MC Level {mc_level} Implicit Function:"):
584
- queries = xyz_samples[start: start + num_chunks, :].to(device)
585
- queries = queries.half()
586
- batch_queries = repeat(queries, "p c -> b p c", b=batch_size)
587
-
588
- logits = self.geo_decoder(batch_queries.to(latents.dtype), latents)
589
- if mc_level == -1:
590
- mc_level = 0
591
- logits = torch.sigmoid(logits) * 2 - 1
592
- print(f'Training with soft labels, inference with sigmoid and marching cubes level 0.')
593
- batch_logits.append(logits)
594
- grid_logits = torch.cat(batch_logits, dim=1)
595
- grid_logits = grid_logits.view((batch_size, grid_size[0], grid_size[1], grid_size[2])).float()
596
-
597
- # 3. extract surface
598
- outputs = []
599
- for i in range(batch_size):
600
- try:
601
- if mc_algo == 'mc':
602
- vertices, faces, normals, _ = measure.marching_cubes(
603
- grid_logits[i].cpu().numpy(),
604
- mc_level,
605
- method="lewiner"
606
- )
607
- vertices = vertices / grid_size * bbox_size + bbox_min
608
- elif mc_algo == 'dmc':
609
- if not hasattr(self, 'dmc'):
610
- try:
611
- from diso import DiffDMC
612
- except:
613
- raise ImportError("Please install diso via `pip install diso`, or set mc_algo to 'mc'")
614
- self.dmc = DiffDMC(dtype=torch.float32).to(device)
615
- octree_resolution = 2 ** octree_depth if octree_resolution is None else octree_resolution
616
- sdf = -grid_logits[i] / octree_resolution
617
- verts, faces = self.dmc(sdf, deform=None, return_quads=False, normalize=True)
618
- verts = center_vertices(verts)
619
- vertices = verts.detach().cpu().numpy()
620
- faces = faces.detach().cpu().numpy()[:, ::-1]
621
- else:
622
- raise ValueError(f"mc_algo {mc_algo} not supported.")
623
-
624
- outputs.append(
625
- Latent2MeshOutput(
626
- mesh_v=vertices.astype(np.float32),
627
- mesh_f=np.ascontiguousarray(faces)
628
- )
629
- )
630
-
631
- except ValueError:
632
- outputs.append(None)
633
- except RuntimeError:
634
- outputs.append(None)
635
-
636
- return outputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/shapegen/pipelines.py DELETED
@@ -1,589 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- import copy
26
- import importlib
27
- import inspect
28
- import logging
29
- import os
30
- from typing import List, Optional, Union
31
-
32
- import numpy as np
33
- import torch
34
- import trimesh
35
- import yaml
36
- from PIL import Image
37
- from diffusers.utils.torch_utils import randn_tensor
38
- from tqdm import tqdm
39
-
40
- logger = logging.getLogger(__name__)
41
-
42
-
43
- def retrieve_timesteps(
44
- scheduler,
45
- num_inference_steps: Optional[int] = None,
46
- device: Optional[Union[str, torch.device]] = None,
47
- timesteps: Optional[List[int]] = None,
48
- sigmas: Optional[List[float]] = None,
49
- **kwargs,
50
- ):
51
- """
52
- Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
53
- custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
54
-
55
- Args:
56
- scheduler (`SchedulerMixin`):
57
- The scheduler to get timesteps from.
58
- num_inference_steps (`int`):
59
- The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
60
- must be `None`.
61
- device (`str` or `torch.device`, *optional*):
62
- The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
63
- timesteps (`List[int]`, *optional*):
64
- Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
65
- `num_inference_steps` and `sigmas` must be `None`.
66
- sigmas (`List[float]`, *optional*):
67
- Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
68
- `num_inference_steps` and `timesteps` must be `None`.
69
-
70
- Returns:
71
- `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
72
- second element is the number of inference steps.
73
- """
74
- if timesteps is not None and sigmas is not None:
75
- raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
76
- if timesteps is not None:
77
- accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
78
- if not accepts_timesteps:
79
- raise ValueError(
80
- f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
81
- f" timestep schedules. Please check whether you are using the correct scheduler."
82
- )
83
- scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
84
- timesteps = scheduler.timesteps
85
- num_inference_steps = len(timesteps)
86
- elif sigmas is not None:
87
- accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
88
- if not accept_sigmas:
89
- raise ValueError(
90
- f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
91
- f" sigmas schedules. Please check whether you are using the correct scheduler."
92
- )
93
- scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
94
- timesteps = scheduler.timesteps
95
- num_inference_steps = len(timesteps)
96
- else:
97
- scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
98
- timesteps = scheduler.timesteps
99
- return timesteps, num_inference_steps
100
-
101
-
102
- def export_to_trimesh(mesh_output):
103
- if isinstance(mesh_output, list):
104
- outputs = []
105
- for mesh in mesh_output:
106
- if mesh is None:
107
- outputs.append(None)
108
- else:
109
- mesh.mesh_f = mesh.mesh_f[:, ::-1]
110
- mesh_output = trimesh.Trimesh(mesh.mesh_v, mesh.mesh_f)
111
- outputs.append(mesh_output)
112
- return outputs
113
- else:
114
- mesh_output.mesh_f = mesh_output.mesh_f[:, ::-1]
115
- mesh_output = trimesh.Trimesh(mesh_output.mesh_v, mesh_output.mesh_f)
116
- return mesh_output
117
-
118
-
119
- def get_obj_from_str(string, reload=False):
120
- module, cls = string.rsplit(".", 1)
121
- if reload:
122
- module_imp = importlib.import_module(module)
123
- importlib.reload(module_imp)
124
- return getattr(importlib.import_module(module, package=None), cls)
125
-
126
-
127
- def instantiate_from_config(config, **kwargs):
128
- if "target" not in config:
129
- raise KeyError("Expected key `target` to instantiate.")
130
- cls = get_obj_from_str(config["target"])
131
- params = config.get("params", dict())
132
- kwargs.update(params)
133
- instance = cls(**kwargs)
134
- return instance
135
-
136
-
137
- class Hunyuan3DDiTPipeline:
138
- @classmethod
139
- def from_single_file(
140
- cls,
141
- ckpt_path,
142
- config_path,
143
- device='cpu',
144
- dtype=torch.float16,
145
- **kwargs,
146
- ):
147
- # load config
148
- with open(config_path, 'r') as f:
149
- config = yaml.safe_load(f)
150
-
151
- # load ckpt
152
- if not os.path.exists(ckpt_path):
153
- raise FileNotFoundError(f"Model file {ckpt_path} not found")
154
- logger.info(f"Loading model from {ckpt_path}")
155
-
156
- if ckpt_path.endswith('.safetensors'):
157
- # parse safetensors
158
- import safetensors.torch
159
- safetensors_ckpt = safetensors.torch.load_file(ckpt_path, device='cpu')
160
- ckpt = {}
161
- for key, value in safetensors_ckpt.items():
162
- model_name = key.split('.')[0]
163
- new_key = key[len(model_name) + 1:]
164
- if model_name not in ckpt:
165
- ckpt[model_name] = {}
166
- ckpt[model_name][new_key] = value
167
- else:
168
- ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True)
169
-
170
- # load model
171
- from accelerate import init_empty_weights
172
- with init_empty_weights():
173
- model = instantiate_from_config(config['model'])
174
- vae = instantiate_from_config(config['vae'])
175
- conditioner = instantiate_from_config(config['conditioner'])
176
- image_processor = instantiate_from_config(config['image_processor'])
177
- scheduler = instantiate_from_config(config['scheduler'])
178
-
179
- model.load_state_dict(ckpt['model'], assign = True)
180
- vae.load_state_dict(ckpt['vae'], assign = True)
181
- if 'conditioner' in ckpt:
182
- conditioner.load_state_dict(ckpt['conditioner'], assign = True)
183
-
184
- model_kwargs = dict(
185
- vae=vae,
186
- model=model,
187
- scheduler=scheduler,
188
- conditioner=conditioner,
189
- image_processor=image_processor,
190
- device=device,
191
- dtype=dtype,
192
- )
193
- model_kwargs.update(kwargs)
194
-
195
- return cls(
196
- **model_kwargs
197
- )
198
-
199
- @classmethod
200
- def from_pretrained(
201
- cls,
202
- model_path,
203
- device='cuda',
204
- dtype=torch.float16,
205
- use_safetensors=None,
206
- variant=None,
207
- subfolder='hunyuan3d-dit-v2-0',
208
- **kwargs,
209
- ):
210
- original_model_path = model_path
211
- if not os.path.exists(model_path):
212
- # try local path
213
- base_dir = os.environ.get('HY3DGEN_MODELS', '/content/hy3dgen')
214
- model_path = os.path.expanduser(os.path.join(base_dir, model_path, subfolder))
215
- if not os.path.exists(model_path):
216
- try:
217
- import huggingface_hub
218
- # download from huggingface
219
- path = huggingface_hub.snapshot_download(repo_id=original_model_path)
220
- model_path = os.path.join(path, subfolder)
221
- except ImportError:
222
- logger.warning(
223
- "You need to install HuggingFace Hub to load models from the hub."
224
- )
225
- raise RuntimeError(f"Model path {model_path} not found")
226
- if not os.path.exists(model_path):
227
- raise FileNotFoundError(f"Model path {original_model_path} not found")
228
-
229
- extension = 'ckpt' if not use_safetensors else 'safetensors'
230
- variant = '' if variant is None else f'.{variant}'
231
- ckpt_name = f'model{variant}.{extension}'
232
- config_path = os.path.join(model_path, 'config.yaml')
233
- ckpt_path = os.path.join(model_path, ckpt_name)
234
-
235
- return cls.from_single_file(
236
- ckpt_path,
237
- config_path,
238
- device=device,
239
- dtype=dtype,
240
- use_safetensors=use_safetensors,
241
- variant=variant,
242
- **kwargs
243
- )
244
-
245
- def __init__(
246
- self,
247
- vae,
248
- model,
249
- scheduler,
250
- conditioner,
251
- image_processor,
252
- device='cuda',
253
- dtype=torch.float16,
254
- **kwargs
255
- ):
256
- self.vae = vae
257
- self.model = model
258
- self.scheduler = scheduler
259
- self.conditioner = conditioner
260
- self.image_processor = image_processor
261
-
262
- self.to(device, dtype)
263
-
264
- def to(self, device=None, dtype=None):
265
- if device is not None:
266
- self.device = torch.device(device)
267
- self.vae.to(device)
268
- self.model.to(device)
269
- self.conditioner.to(device)
270
- if dtype is not None:
271
- self.dtype = dtype
272
- self.vae.to(dtype=dtype)
273
- self.model.to(dtype=dtype)
274
- self.conditioner.to(dtype=dtype)
275
-
276
- def encode_cond(self, image, mask, do_classifier_free_guidance, dual_guidance):
277
- bsz = image.shape[0]
278
- cond = self.conditioner(image=image, mask=mask)
279
-
280
- if do_classifier_free_guidance:
281
- un_cond = self.conditioner.unconditional_embedding(bsz)
282
-
283
- if dual_guidance:
284
- un_cond_drop_main = copy.deepcopy(un_cond)
285
- un_cond_drop_main['additional'] = cond['additional']
286
-
287
- def cat_recursive(a, b, c):
288
- if isinstance(a, torch.Tensor):
289
- return torch.cat([a, b, c], dim=0).to(self.dtype)
290
- out = {}
291
- for k in a.keys():
292
- out[k] = cat_recursive(a[k], b[k], c[k])
293
- return out
294
-
295
- cond = cat_recursive(cond, un_cond_drop_main, un_cond)
296
- else:
297
- un_cond = self.conditioner.unconditional_embedding(bsz)
298
-
299
- def cat_recursive(a, b):
300
- if isinstance(a, torch.Tensor):
301
- return torch.cat([a, b], dim=0).to(self.dtype)
302
- out = {}
303
- for k in a.keys():
304
- out[k] = cat_recursive(a[k], b[k])
305
- return out
306
-
307
- cond = cat_recursive(cond, un_cond)
308
- return cond
309
-
310
- def prepare_extra_step_kwargs(self, generator, eta):
311
- # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
312
- # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
313
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
314
- # and should be between [0, 1]
315
-
316
- accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
317
- extra_step_kwargs = {}
318
- if accepts_eta:
319
- extra_step_kwargs["eta"] = eta
320
-
321
- # check if the scheduler accepts generator
322
- accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys())
323
- if accepts_generator:
324
- extra_step_kwargs["generator"] = generator
325
- return extra_step_kwargs
326
-
327
- def prepare_latents(self, batch_size, dtype, device, generator, latents=None):
328
- shape = (batch_size, *self.vae.latent_shape)
329
- if isinstance(generator, list) and len(generator) != batch_size:
330
- raise ValueError(
331
- f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
332
- f" size of {batch_size}. Make sure the batch size matches the length of the generators."
333
- )
334
-
335
- if latents is None:
336
- latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
337
- else:
338
- latents = latents.to(device)
339
-
340
- # scale the initial noise by the standard deviation required by the scheduler
341
- latents = latents * getattr(self.scheduler, 'init_noise_sigma', 1.0)
342
- return latents
343
-
344
- def prepare_image(self, image):
345
- if isinstance(image, str) and not os.path.exists(image):
346
- raise FileNotFoundError(f"Couldn't find image at path {image}")
347
-
348
- if not isinstance(image, list):
349
- image = [image]
350
- image_pts = []
351
- mask_pts = []
352
- for img in image:
353
- image_pt, mask_pt = self.image_processor(img, return_mask=True)
354
- image_pts.append(image_pt)
355
- mask_pts.append(mask_pt)
356
-
357
- image_pts = torch.cat(image_pts, dim=0).to(self.device, dtype=self.dtype)
358
- if mask_pts[0] is not None:
359
- mask_pts = torch.cat(mask_pts, dim=0).to(self.device, dtype=self.dtype)
360
- else:
361
- mask_pts = None
362
- return image_pts, mask_pts
363
-
364
- def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
365
- """
366
- See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
367
-
368
- Args:
369
- timesteps (`torch.Tensor`):
370
- generate embedding vectors at these timesteps
371
- embedding_dim (`int`, *optional*, defaults to 512):
372
- dimension of the embeddings to generate
373
- dtype:
374
- data type of the generated embeddings
375
-
376
- Returns:
377
- `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
378
- """
379
- assert len(w.shape) == 1
380
- w = w * 1000.0
381
-
382
- half_dim = embedding_dim // 2
383
- emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1)
384
- emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb)
385
- emb = w.to(dtype)[:, None] * emb[None, :]
386
- emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
387
- if embedding_dim % 2 == 1: # zero pad
388
- emb = torch.nn.functional.pad(emb, (0, 1))
389
- assert emb.shape == (w.shape[0], embedding_dim)
390
- return emb
391
-
392
- @torch.no_grad()
393
- def __call__(
394
- self,
395
- image: Union[str, List[str], Image.Image] = None,
396
- num_inference_steps: int = 50,
397
- timesteps: List[int] = None,
398
- sigmas: List[float] = None,
399
- eta: float = 0.0,
400
- guidance_scale: float = 7.5,
401
- dual_guidance_scale: float = 10.5,
402
- dual_guidance: bool = True,
403
- generator=None,
404
- box_v=1.01,
405
- octree_resolution=384,
406
- mc_level=-1 / 512,
407
- num_chunks=8000,
408
- mc_algo='mc',
409
- output_type: Optional[str] = "trimesh",
410
- enable_pbar=True,
411
- **kwargs,
412
- ) -> List[List[trimesh.Trimesh]]:
413
- callback = kwargs.pop("callback", None)
414
- callback_steps = kwargs.pop("callback_steps", None)
415
-
416
- device = self.device
417
- dtype = self.dtype
418
- do_classifier_free_guidance = guidance_scale >= 0 and \
419
- getattr(self.model, 'guidance_cond_proj_dim', None) is None
420
- dual_guidance = dual_guidance_scale >= 0 and dual_guidance
421
-
422
- image, mask = self.prepare_image(image)
423
- cond = self.encode_cond(image=image,
424
- mask=mask,
425
- do_classifier_free_guidance=do_classifier_free_guidance,
426
- dual_guidance=dual_guidance)
427
- batch_size = image.shape[0]
428
-
429
- t_dtype = torch.long
430
- timesteps, num_inference_steps = retrieve_timesteps(
431
- self.scheduler, num_inference_steps, device, timesteps, sigmas)
432
-
433
- latents = self.prepare_latents(batch_size, dtype, device, generator)
434
- extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
435
-
436
- guidance_cond = None
437
- if getattr(self.model, 'guidance_cond_proj_dim', None) is not None:
438
- print('Using lcm guidance scale')
439
- guidance_scale_tensor = torch.tensor(guidance_scale - 1).repeat(batch_size)
440
- guidance_cond = self.get_guidance_scale_embedding(
441
- guidance_scale_tensor, embedding_dim=self.model.guidance_cond_proj_dim
442
- ).to(device=device, dtype=latents.dtype)
443
-
444
- for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:", leave=False)):
445
- # expand the latents if we are doing classifier free guidance
446
- if do_classifier_free_guidance:
447
- latent_model_input = torch.cat([latents] * (3 if dual_guidance else 2))
448
- else:
449
- latent_model_input = latents
450
- latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
451
-
452
- # predict the noise residual
453
- timestep_tensor = torch.tensor([t], dtype=t_dtype, device=device)
454
- timestep_tensor = timestep_tensor.expand(latent_model_input.shape[0])
455
- noise_pred = self.model(latent_model_input, timestep_tensor, cond, guidance_cond=guidance_cond)
456
-
457
- # no drop, drop clip, all drop
458
- if do_classifier_free_guidance:
459
- if dual_guidance:
460
- noise_pred_clip, noise_pred_dino, noise_pred_uncond = noise_pred.chunk(3)
461
- noise_pred = (
462
- noise_pred_uncond
463
- + guidance_scale * (noise_pred_clip - noise_pred_dino)
464
- + dual_guidance_scale * (noise_pred_dino - noise_pred_uncond)
465
- )
466
- else:
467
- noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2)
468
- noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond)
469
-
470
- # compute the previous noisy sample x_t -> x_t-1
471
- outputs = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs)
472
- latents = outputs.prev_sample
473
-
474
- if callback is not None and i % callback_steps == 0:
475
- step_idx = i // getattr(self.scheduler, "order", 1)
476
- callback(step_idx, t, outputs)
477
-
478
- return self._export(
479
- latents,
480
- output_type,
481
- box_v, mc_level, num_chunks, octree_resolution, mc_algo,
482
- )
483
-
484
- def _export(self, latents, output_type, box_v, mc_level, num_chunks, octree_resolution, mc_algo):
485
- if not output_type == "latent":
486
- latents = 1. / self.vae.scale_factor * latents
487
- latents = self.vae(latents)
488
- outputs = self.vae.latents2mesh(
489
- latents,
490
- bounds=box_v,
491
- mc_level=mc_level,
492
- num_chunks=num_chunks,
493
- octree_resolution=octree_resolution,
494
- mc_algo=mc_algo,
495
- )
496
- else:
497
- outputs = latents
498
-
499
- if output_type == 'trimesh':
500
- outputs = export_to_trimesh(outputs)
501
-
502
- return outputs
503
-
504
-
505
- class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline):
506
-
507
- @torch.no_grad()
508
- def __call__(
509
- self,
510
- image: Union[str, List[str], Image.Image] = None,
511
- num_inference_steps: int = 50,
512
- timesteps: List[int] = None,
513
- sigmas: List[float] = None,
514
- eta: float = 0.0,
515
- guidance_scale: float = 7.5,
516
- generator=None,
517
- box_v=1.01,
518
- octree_resolution=384,
519
- mc_level=0.0,
520
- mc_algo='mc',
521
- num_chunks=8000,
522
- output_type: Optional[str] = "trimesh",
523
- enable_pbar=True,
524
- **kwargs,
525
- ) -> List[List[trimesh.Trimesh]]:
526
- callback = kwargs.pop("callback", None)
527
- callback_steps = kwargs.pop("callback_steps", None)
528
-
529
- device = self.device
530
- dtype = self.dtype
531
- do_classifier_free_guidance = guidance_scale >= 0 and not (
532
- hasattr(self.model, 'guidance_embed') and
533
- self.model.guidance_embed is True
534
- )
535
-
536
- image, mask = self.prepare_image(image)
537
- cond = self.encode_cond(
538
- image=image,
539
- mask=mask,
540
- do_classifier_free_guidance=do_classifier_free_guidance,
541
- dual_guidance=False,
542
- )
543
- batch_size = image.shape[0]
544
-
545
- # 5. Prepare timesteps
546
- # NOTE: this is slightly different from common usage, we start from 0.
547
- sigmas = np.linspace(0, 1, num_inference_steps) if sigmas is None else sigmas
548
- timesteps, num_inference_steps = retrieve_timesteps(
549
- self.scheduler,
550
- num_inference_steps,
551
- device,
552
- sigmas=sigmas,
553
- )
554
- latents = self.prepare_latents(batch_size, dtype, device, generator)
555
-
556
- guidance = None
557
- if hasattr(self.model, 'guidance_embed') and \
558
- self.model.guidance_embed is True:
559
- guidance = torch.tensor([guidance_scale] * batch_size, device=device, dtype=dtype)
560
-
561
- for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:")):
562
- # expand the latents if we are doing classifier free guidance
563
- if do_classifier_free_guidance:
564
- latent_model_input = torch.cat([latents] * 2)
565
- else:
566
- latent_model_input = latents
567
-
568
- # NOTE: we assume model get timesteps ranged from 0 to 1
569
- timestep = t.expand(latent_model_input.shape[0]).to(
570
- latents.dtype) / self.scheduler.config.num_train_timesteps
571
- noise_pred = self.model(latent_model_input, timestep, cond, guidance=guidance)
572
-
573
- if do_classifier_free_guidance:
574
- noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2)
575
- noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond)
576
-
577
- # compute the previous noisy sample x_t -> x_t-1
578
- outputs = self.scheduler.step(noise_pred, t, latents)
579
- latents = outputs.prev_sample
580
-
581
- if callback is not None and i % callback_steps == 0:
582
- step_idx = i // getattr(self.scheduler, "order", 1)
583
- callback(step_idx, t, outputs)
584
-
585
- return self._export(
586
- latents,
587
- output_type,
588
- box_v, mc_level, num_chunks, octree_resolution, mc_algo,
589
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/shapegen/postprocessors.py DELETED
@@ -1,175 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- import os
26
- import tempfile
27
- from typing import Union
28
-
29
- import pymeshlab
30
- import trimesh
31
-
32
- from .models.vae import Latent2MeshOutput
33
-
34
-
35
- def load_mesh(path):
36
- if path.endswith(".glb"):
37
- mesh = trimesh.load(path)
38
- else:
39
- mesh = pymeshlab.MeshSet()
40
- mesh.load_new_mesh(path)
41
- return mesh
42
-
43
-
44
- def reduce_face(mesh: pymeshlab.MeshSet, max_facenum: int = 200000):
45
- mesh.apply_filter(
46
- "meshing_decimation_quadric_edge_collapse",
47
- targetfacenum=max_facenum,
48
- qualitythr=1.0,
49
- preserveboundary=True,
50
- boundaryweight=3,
51
- preservenormal=True,
52
- preservetopology=True,
53
- autoclean=True
54
- )
55
- return mesh
56
-
57
-
58
- def remove_floater(mesh: pymeshlab.MeshSet):
59
- mesh.apply_filter("compute_selection_by_small_disconnected_components_per_face",
60
- nbfaceratio=0.005)
61
- mesh.apply_filter("compute_selection_transfer_face_to_vertex", inclusive=False)
62
- mesh.apply_filter("meshing_remove_selected_vertices_and_faces")
63
- return mesh
64
-
65
-
66
- def pymeshlab2trimesh(mesh: pymeshlab.MeshSet):
67
- temp_file = tempfile.NamedTemporaryFile(suffix='.ply', delete=True)
68
- temp_file.close()
69
- temp_file_name = temp_file.name
70
-
71
- mesh.save_current_mesh(temp_file_name)
72
- mesh = trimesh.load(temp_file_name)
73
- if os.path.exists(temp_file_name):
74
- os.remove(temp_file_name)
75
-
76
- # 检查加载的对象类型
77
- if isinstance(mesh, trimesh.Scene):
78
- combined_mesh = trimesh.Trimesh()
79
- # 如果是Scene,遍历所有的geometry并合并
80
- for geom in mesh.geometry.values():
81
- combined_mesh = trimesh.util.concatenate([combined_mesh, geom])
82
- mesh = combined_mesh
83
- return mesh
84
-
85
-
86
- def trimesh2pymeshlab(mesh: trimesh.Trimesh):
87
- temp_file = tempfile.NamedTemporaryFile(suffix='.ply', delete=True)
88
- temp_file.close()
89
- temp_file_name = temp_file.name
90
-
91
- if isinstance(mesh, trimesh.scene.Scene):
92
- for idx, obj in enumerate(mesh.geometry.values()):
93
- if idx == 0:
94
- temp_mesh = obj
95
- else:
96
- temp_mesh = temp_mesh + obj
97
- mesh = temp_mesh
98
- mesh.export(temp_file_name)
99
- mesh = pymeshlab.MeshSet()
100
- mesh.load_new_mesh(temp_file_name)
101
- if os.path.exists(temp_file_name):
102
- os.remove(temp_file_name)
103
-
104
- return mesh
105
-
106
-
107
- def export_mesh(input, output):
108
- if isinstance(input, pymeshlab.MeshSet):
109
- mesh = output
110
- elif isinstance(input, Latent2MeshOutput):
111
- output = Latent2MeshOutput()
112
- output.mesh_v = output.current_mesh().vertex_matrix()
113
- output.mesh_f = output.current_mesh().face_matrix()
114
- mesh = output
115
- else:
116
- mesh = pymeshlab2trimesh(output)
117
- return mesh
118
-
119
-
120
- def import_mesh(mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str]) -> pymeshlab.MeshSet:
121
- if isinstance(mesh, str):
122
- mesh = load_mesh(mesh)
123
- elif isinstance(mesh, Latent2MeshOutput):
124
- mesh = pymeshlab.MeshSet()
125
- mesh_pymeshlab = pymeshlab.Mesh(vertex_matrix=mesh.mesh_v, face_matrix=mesh.mesh_f)
126
- mesh.add_mesh(mesh_pymeshlab, "converted_mesh")
127
-
128
- if isinstance(mesh, (trimesh.Trimesh, trimesh.scene.Scene)):
129
- mesh = trimesh2pymeshlab(mesh)
130
-
131
- return mesh
132
-
133
-
134
- class FaceReducer:
135
- def __call__(
136
- self,
137
- mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
138
- max_facenum: int = 40000
139
- ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh]:
140
- ms = import_mesh(mesh)
141
- ms = reduce_face(ms, max_facenum=max_facenum)
142
- mesh = export_mesh(mesh, ms)
143
- return mesh
144
-
145
-
146
- class FloaterRemover:
147
- def __call__(
148
- self,
149
- mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
150
- ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]:
151
- ms = import_mesh(mesh)
152
- ms = remove_floater(ms)
153
- mesh = export_mesh(mesh, ms)
154
- return mesh
155
-
156
-
157
- class DegenerateFaceRemover:
158
- def __call__(
159
- self,
160
- mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
161
- ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]:
162
- ms = import_mesh(mesh)
163
-
164
- temp_file = tempfile.NamedTemporaryFile(suffix='.ply', delete=True)
165
- temp_file.close()
166
- temp_file_name = temp_file.name
167
-
168
- ms.save_current_mesh(temp_file_name)
169
- ms = pymeshlab.MeshSet()
170
- ms.load_new_mesh(temp_file_name)
171
- if os.path.exists(temp_file_name):
172
- os.remove(temp_file_name)
173
-
174
- mesh = export_mesh(mesh, ms)
175
- return mesh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/shapegen/preprocessors.py DELETED
@@ -1,127 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
6
- # The below software and/or models in this distribution may have been
7
- # modified by THL A29 Limited ("Tencent Modifications").
8
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
9
-
10
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
11
- # except for the third-party components listed below.
12
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
13
- # in the repsective licenses of these third-party components.
14
- # Users must comply with all terms and conditions of original licenses of these third-party
15
- # components and must ensure that the usage of the third party components adheres to
16
- # all relevant laws and regulations.
17
-
18
- # For avoidance of doubts, Hunyuan 3D means the large language models and
19
- # their software and algorithms, including trained model weights, parameters (including
20
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
21
- # fine-tuning enabling code and other elements of the foregoing made publicly available
22
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
23
-
24
- import cv2
25
- import numpy as np
26
- import torch
27
- from PIL import Image
28
- from einops import repeat, rearrange
29
-
30
-
31
- def array_to_tensor(np_array):
32
- image_pt = torch.tensor(np_array).float()
33
- image_pt = image_pt / 255 * 2 - 1
34
- image_pt = rearrange(image_pt, "h w c -> c h w")
35
- image_pts = repeat(image_pt, "c h w -> b c h w", b=1)
36
- return image_pts
37
-
38
-
39
- class ImageProcessorV2:
40
- def __init__(self, size=512, border_ratio=None):
41
- self.size = size
42
- self.border_ratio = border_ratio
43
-
44
- @staticmethod
45
- def recenter(image, border_ratio: float = 0.2):
46
- """ recenter an image to leave some empty space at the image border.
47
-
48
- Args:
49
- image (ndarray): input image, float/uint8 [H, W, 3/4]
50
- mask (ndarray): alpha mask, bool [H, W]
51
- border_ratio (float, optional): border ratio, image will be resized to (1 - border_ratio). Defaults to 0.2.
52
-
53
- Returns:
54
- ndarray: output image, float/uint8 [H, W, 3/4]
55
- """
56
-
57
- if image.shape[-1] == 4:
58
- mask = image[..., 3]
59
- else:
60
- mask = np.ones_like(image[..., 0:1]) * 255
61
- image = np.concatenate([image, mask], axis=-1)
62
- mask = mask[..., 0]
63
-
64
- H, W, C = image.shape
65
-
66
- size = max(H, W)
67
- result = np.zeros((size, size, C), dtype=np.uint8)
68
-
69
- coords = np.nonzero(mask)
70
- x_min, x_max = coords[0].min(), coords[0].max()
71
- y_min, y_max = coords[1].min(), coords[1].max()
72
- h = x_max - x_min
73
- w = y_max - y_min
74
- if h == 0 or w == 0:
75
- raise ValueError('input image is empty')
76
- desired_size = int(size * (1 - border_ratio))
77
- scale = desired_size / max(h, w)
78
- h2 = int(h * scale)
79
- w2 = int(w * scale)
80
- x2_min = (size - h2) // 2
81
- x2_max = x2_min + h2
82
-
83
- y2_min = (size - w2) // 2
84
- y2_max = y2_min + w2
85
-
86
- result[x2_min:x2_max, y2_min:y2_max] = cv2.resize(image[x_min:x_max, y_min:y_max], (w2, h2),
87
- interpolation=cv2.INTER_AREA)
88
-
89
- bg = np.ones((result.shape[0], result.shape[1], 3), dtype=np.uint8) * 255
90
- # bg = np.zeros((result.shape[0], result.shape[1], 3), dtype=np.uint8) * 255
91
- mask = result[..., 3:].astype(np.float32) / 255
92
- result = result[..., :3] * mask + bg * (1 - mask)
93
-
94
- mask = mask * 255
95
- result = result.clip(0, 255).astype(np.uint8)
96
- mask = mask.clip(0, 255).astype(np.uint8)
97
- return result, mask
98
-
99
- def __call__(self, image, border_ratio=0.15, to_tensor=True, return_mask=False, **kwargs):
100
- if self.border_ratio is not None:
101
- border_ratio = self.border_ratio
102
- print(f"Using border_ratio from init: {border_ratio}")
103
- if isinstance(image, str):
104
- image = cv2.imread(image, cv2.IMREAD_UNCHANGED)
105
- image, mask = self.recenter(image, border_ratio=border_ratio)
106
- image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
107
- elif isinstance(image, Image.Image):
108
- image = np.asarray(image)
109
- image, mask = self.recenter(image, border_ratio=border_ratio)
110
-
111
- image = cv2.resize(image, (self.size, self.size), interpolation=cv2.INTER_CUBIC)
112
- mask = cv2.resize(mask, (self.size, self.size), interpolation=cv2.INTER_NEAREST)
113
- mask = mask[..., np.newaxis]
114
-
115
- if to_tensor:
116
- image = array_to_tensor(image)
117
- mask = array_to_tensor(mask)
118
- if return_mask:
119
- return image, mask
120
- return image
121
-
122
-
123
- IMAGE_PROCESSORS = {
124
- "v2": ImageProcessorV2,
125
- }
126
-
127
- DEFAULT_IMAGEPROCESSOR = 'v2'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/shapegen/schedulers.py DELETED
@@ -1,307 +0,0 @@
1
- # Copyright 2024 Stability AI, Katherine Crowson and The HuggingFace Team. All rights reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import math
16
- from dataclasses import dataclass
17
- from typing import List, Optional, Tuple, Union
18
-
19
- import numpy as np
20
- import torch
21
- from diffusers.configuration_utils import ConfigMixin, register_to_config
22
- from diffusers.schedulers.scheduling_utils import SchedulerMixin
23
- from diffusers.utils import BaseOutput, logging
24
-
25
- logger = logging.get_logger(__name__) # pylint: disable=invalid-name
26
-
27
-
28
- @dataclass
29
- class FlowMatchEulerDiscreteSchedulerOutput(BaseOutput):
30
- """
31
- Output class for the scheduler's `step` function output.
32
-
33
- Args:
34
- prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
35
- Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
36
- denoising loop.
37
- """
38
-
39
- prev_sample: torch.FloatTensor
40
-
41
-
42
- class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
43
- """
44
- NOTE: this is very similar to diffusers.FlowMatchEulerDiscreteScheduler. Except our timesteps are reversed
45
-
46
- Euler scheduler.
47
-
48
- This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
49
- methods the library implements for all schedulers such as loading and saving.
50
-
51
- Args:
52
- num_train_timesteps (`int`, defaults to 1000):
53
- The number of diffusion steps to train the model.
54
- timestep_spacing (`str`, defaults to `"linspace"`):
55
- The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
56
- Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
57
- shift (`float`, defaults to 1.0):
58
- The shift value for the timestep schedule.
59
- """
60
-
61
- _compatibles = []
62
- order = 1
63
-
64
- @register_to_config
65
- def __init__(
66
- self,
67
- num_train_timesteps: int = 1000,
68
- shift: float = 1.0,
69
- use_dynamic_shifting=False,
70
- ):
71
- timesteps = np.linspace(1, num_train_timesteps, num_train_timesteps, dtype=np.float32).copy()
72
- timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32)
73
-
74
- sigmas = timesteps / num_train_timesteps
75
- if not use_dynamic_shifting:
76
- # when use_dynamic_shifting is True, we apply the timestep shifting on the fly based on the image resolution
77
- sigmas = shift * sigmas / (1 + (shift - 1) * sigmas)
78
-
79
- self.timesteps = sigmas * num_train_timesteps
80
-
81
- self._step_index = None
82
- self._begin_index = None
83
-
84
- self.sigmas = sigmas.to("cpu") # to avoid too much CPU/GPU communication
85
- self.sigma_min = self.sigmas[-1].item()
86
- self.sigma_max = self.sigmas[0].item()
87
-
88
- @property
89
- def step_index(self):
90
- """
91
- The index counter for current timestep. It will increase 1 after each scheduler step.
92
- """
93
- return self._step_index
94
-
95
- @property
96
- def begin_index(self):
97
- """
98
- The index for the first timestep. It should be set from pipeline with `set_begin_index` method.
99
- """
100
- return self._begin_index
101
-
102
- # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index
103
- def set_begin_index(self, begin_index: int = 0):
104
- """
105
- Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
106
-
107
- Args:
108
- begin_index (`int`):
109
- The begin index for the scheduler.
110
- """
111
- self._begin_index = begin_index
112
-
113
- def scale_noise(
114
- self,
115
- sample: torch.FloatTensor,
116
- timestep: Union[float, torch.FloatTensor],
117
- noise: Optional[torch.FloatTensor] = None,
118
- ) -> torch.FloatTensor:
119
- """
120
- Forward process in flow-matching
121
-
122
- Args:
123
- sample (`torch.FloatTensor`):
124
- The input sample.
125
- timestep (`int`, *optional*):
126
- The current timestep in the diffusion chain.
127
-
128
- Returns:
129
- `torch.FloatTensor`:
130
- A scaled input sample.
131
- """
132
- # Make sure sigmas and timesteps have the same device and dtype as original_samples
133
- sigmas = self.sigmas.to(device=sample.device, dtype=sample.dtype)
134
-
135
- if sample.device.type == "mps" and torch.is_floating_point(timestep):
136
- # mps does not support float64
137
- schedule_timesteps = self.timesteps.to(sample.device, dtype=torch.float32)
138
- timestep = timestep.to(sample.device, dtype=torch.float32)
139
- else:
140
- schedule_timesteps = self.timesteps.to(sample.device)
141
- timestep = timestep.to(sample.device)
142
-
143
- # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
144
- if self.begin_index is None:
145
- step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timestep]
146
- elif self.step_index is not None:
147
- # add_noise is called after first denoising step (for inpainting)
148
- step_indices = [self.step_index] * timestep.shape[0]
149
- else:
150
- # add noise is called before first denoising step to create initial latent(img2img)
151
- step_indices = [self.begin_index] * timestep.shape[0]
152
-
153
- sigma = sigmas[step_indices].flatten()
154
- while len(sigma.shape) < len(sample.shape):
155
- sigma = sigma.unsqueeze(-1)
156
-
157
- sample = sigma * noise + (1.0 - sigma) * sample
158
-
159
- return sample
160
-
161
- def _sigma_to_t(self, sigma):
162
- return sigma * self.config.num_train_timesteps
163
-
164
- def time_shift(self, mu: float, sigma: float, t: torch.Tensor):
165
- return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
166
-
167
- def set_timesteps(
168
- self,
169
- num_inference_steps: int = None,
170
- device: Union[str, torch.device] = None,
171
- sigmas: Optional[List[float]] = None,
172
- mu: Optional[float] = None,
173
- ):
174
- """
175
- Sets the discrete timesteps used for the diffusion chain (to be run before inference).
176
-
177
- Args:
178
- num_inference_steps (`int`):
179
- The number of diffusion steps used when generating samples with a pre-trained model.
180
- device (`str` or `torch.device`, *optional*):
181
- The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
182
- """
183
-
184
- if self.config.use_dynamic_shifting and mu is None:
185
- raise ValueError(" you have a pass a value for `mu` when `use_dynamic_shifting` is set to be `True`")
186
-
187
- if sigmas is None:
188
- self.num_inference_steps = num_inference_steps
189
- timesteps = np.linspace(
190
- self._sigma_to_t(self.sigma_max), self._sigma_to_t(self.sigma_min), num_inference_steps
191
- )
192
-
193
- sigmas = timesteps / self.config.num_train_timesteps
194
-
195
- if self.config.use_dynamic_shifting:
196
- sigmas = self.time_shift(mu, 1.0, sigmas)
197
- else:
198
- sigmas = self.config.shift * sigmas / (1 + (self.config.shift - 1) * sigmas)
199
-
200
- sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device)
201
- timesteps = sigmas * self.config.num_train_timesteps
202
-
203
- self.timesteps = timesteps.to(device=device)
204
- self.sigmas = torch.cat([sigmas, torch.ones(1, device=sigmas.device)])
205
-
206
- self._step_index = None
207
- self._begin_index = None
208
-
209
- def index_for_timestep(self, timestep, schedule_timesteps=None):
210
- if schedule_timesteps is None:
211
- schedule_timesteps = self.timesteps
212
-
213
- indices = (schedule_timesteps == timestep).nonzero()
214
-
215
- # The sigma index that is taken for the **very** first `step`
216
- # is always the second index (or the last index if there is only 1)
217
- # This way we can ensure we don't accidentally skip a sigma in
218
- # case we start in the middle of the denoising schedule (e.g. for image-to-image)
219
- pos = 1 if len(indices) > 1 else 0
220
-
221
- return indices[pos].item()
222
-
223
- def _init_step_index(self, timestep):
224
- if self.begin_index is None:
225
- if isinstance(timestep, torch.Tensor):
226
- timestep = timestep.to(self.timesteps.device)
227
- self._step_index = self.index_for_timestep(timestep)
228
- else:
229
- self._step_index = self._begin_index
230
-
231
- def step(
232
- self,
233
- model_output: torch.FloatTensor,
234
- timestep: Union[float, torch.FloatTensor],
235
- sample: torch.FloatTensor,
236
- s_churn: float = 0.0,
237
- s_tmin: float = 0.0,
238
- s_tmax: float = float("inf"),
239
- s_noise: float = 1.0,
240
- generator: Optional[torch.Generator] = None,
241
- return_dict: bool = True,
242
- ) -> Union[FlowMatchEulerDiscreteSchedulerOutput, Tuple]:
243
- """
244
- Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
245
- process from the learned model outputs (most often the predicted noise).
246
-
247
- Args:
248
- model_output (`torch.FloatTensor`):
249
- The direct output from learned diffusion model.
250
- timestep (`float`):
251
- The current discrete timestep in the diffusion chain.
252
- sample (`torch.FloatTensor`):
253
- A current instance of a sample created by the diffusion process.
254
- s_churn (`float`):
255
- s_tmin (`float`):
256
- s_tmax (`float`):
257
- s_noise (`float`, defaults to 1.0):
258
- Scaling factor for noise added to the sample.
259
- generator (`torch.Generator`, *optional*):
260
- A random number generator.
261
- return_dict (`bool`):
262
- Whether or not to return a [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or
263
- tuple.
264
-
265
- Returns:
266
- [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or `tuple`:
267
- If return_dict is `True`, [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] is
268
- returned, otherwise a tuple is returned where the first element is the sample tensor.
269
- """
270
-
271
- if (
272
- isinstance(timestep, int)
273
- or isinstance(timestep, torch.IntTensor)
274
- or isinstance(timestep, torch.LongTensor)
275
- ):
276
- raise ValueError(
277
- (
278
- "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to"
279
- " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass"
280
- " one of the `scheduler.timesteps` as a timestep."
281
- ),
282
- )
283
-
284
- if self.step_index is None:
285
- self._init_step_index(timestep)
286
-
287
- # Upcast to avoid precision issues when computing prev_sample
288
- sample = sample.to(torch.float32)
289
-
290
- sigma = self.sigmas[self.step_index]
291
- sigma_next = self.sigmas[self.step_index + 1]
292
-
293
- prev_sample = sample + (sigma_next - sigma) * model_output
294
-
295
- # Cast sample back to model compatible dtype
296
- prev_sample = prev_sample.to(model_output.dtype)
297
-
298
- # upon completion increase step index by one
299
- self._step_index += 1
300
-
301
- if not return_dict:
302
- return (prev_sample,)
303
-
304
- return FlowMatchEulerDiscreteSchedulerOutput(prev_sample=prev_sample)
305
-
306
- def __len__(self):
307
- return self.config.num_train_timesteps
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
-
26
- from .pipelines import Hunyuan3DPaintPipeline, Hunyuan3DTexGenConfig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/custom_rasterizer/custom_rasterizer/__init__.py DELETED
@@ -1,32 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- '''
26
- from .hierarchy import BuildHierarchy, BuildHierarchyWithColor
27
- from .io_obj import LoadObj, LoadObjWithTexture
28
- from .render import rasterize, interpolate
29
- '''
30
- from .io_glb import *
31
- from .io_obj import *
32
- from .render import *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_glb.py DELETED
@@ -1,248 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- import base64
26
- import io
27
- import os
28
-
29
- import numpy as np
30
- from PIL import Image as PILImage
31
- from pygltflib import GLTF2
32
- from scipy.spatial.transform import Rotation as R
33
-
34
-
35
- # Function to extract buffer data
36
- def get_buffer_data(gltf, buffer_view):
37
- buffer = gltf.buffers[buffer_view.buffer]
38
- buffer_data = gltf.get_data_from_buffer_uri(buffer.uri)
39
- byte_offset = buffer_view.byteOffset if buffer_view.byteOffset else 0
40
- byte_length = buffer_view.byteLength
41
- return buffer_data[byte_offset:byte_offset + byte_length]
42
-
43
-
44
- # Function to extract attribute data
45
- def get_attribute_data(gltf, accessor_index):
46
- accessor = gltf.accessors[accessor_index]
47
- buffer_view = gltf.bufferViews[accessor.bufferView]
48
- buffer_data = get_buffer_data(gltf, buffer_view)
49
-
50
- comptype = {5120: np.int8, 5121: np.uint8, 5122: np.int16, 5123: np.uint16, 5125: np.uint32, 5126: np.float32}
51
- dtype = comptype[accessor.componentType]
52
-
53
- t2n = {'SCALAR': 1, 'VEC2': 2, 'VEC3': 3, 'VEC4': 4, 'MAT2': 4, 'MAT3': 9, 'MAT4': 16}
54
- num_components = t2n[accessor.type]
55
-
56
- # Calculate the correct slice of data
57
- byte_offset = accessor.byteOffset if accessor.byteOffset else 0
58
- byte_stride = buffer_view.byteStride if buffer_view.byteStride else num_components * np.dtype(dtype).itemsize
59
- count = accessor.count
60
-
61
- # Extract the attribute data
62
- attribute_data = np.zeros((count, num_components), dtype=dtype)
63
- for i in range(count):
64
- start = byte_offset + i * byte_stride
65
- end = start + num_components * np.dtype(dtype).itemsize
66
- attribute_data[i] = np.frombuffer(buffer_data[start:end], dtype=dtype)
67
-
68
- return attribute_data
69
-
70
-
71
- # Function to extract image data
72
- def get_image_data(gltf, image, folder):
73
- if image.uri:
74
- if image.uri.startswith('data:'):
75
- # Data URI
76
- header, encoded = image.uri.split(',', 1)
77
- data = base64.b64decode(encoded)
78
- else:
79
- # External file
80
- fn = image.uri
81
- if not os.path.isabs(fn):
82
- fn = folder + '/' + fn
83
- with open(fn, 'rb') as f:
84
- data = f.read()
85
- else:
86
- buffer_view = gltf.bufferViews[image.bufferView]
87
- data = get_buffer_data(gltf, buffer_view)
88
- return data
89
-
90
-
91
- # Function to convert triangle strip to triangles
92
- def convert_triangle_strip_to_triangles(indices):
93
- triangles = []
94
- for i in range(len(indices) - 2):
95
- if i % 2 == 0:
96
- triangles.append([indices[i], indices[i + 1], indices[i + 2]])
97
- else:
98
- triangles.append([indices[i], indices[i + 2], indices[i + 1]])
99
- return np.array(triangles).reshape(-1, 3)
100
-
101
-
102
- # Function to convert triangle fan to triangles
103
- def convert_triangle_fan_to_triangles(indices):
104
- triangles = []
105
- for i in range(1, len(indices) - 1):
106
- triangles.append([indices[0], indices[i], indices[i + 1]])
107
- return np.array(triangles).reshape(-1, 3)
108
-
109
-
110
- # Function to get the transformation matrix from a node
111
- def get_node_transform(node):
112
- if node.matrix:
113
- return np.array(node.matrix).reshape(4, 4).T
114
- else:
115
- T = np.eye(4)
116
- if node.translation:
117
- T[:3, 3] = node.translation
118
- if node.rotation:
119
- R_mat = R.from_quat(node.rotation).as_matrix()
120
- T[:3, :3] = R_mat
121
- if node.scale:
122
- S = np.diag(node.scale + [1])
123
- T = T @ S
124
- return T
125
-
126
-
127
- def get_world_transform(gltf, node_index, parents, world_transforms):
128
- if parents[node_index] == -2:
129
- return world_transforms[node_index]
130
-
131
- node = gltf.nodes[node_index]
132
- if parents[node_index] == -1:
133
- world_transforms[node_index] = get_node_transform(node)
134
- parents[node_index] = -2
135
- return world_transforms[node_index]
136
-
137
- parent_index = parents[node_index]
138
- parent_transform = get_world_transform(gltf, parent_index, parents, world_transforms)
139
- world_transforms[node_index] = parent_transform @ get_node_transform(node)
140
- parents[node_index] = -2
141
- return world_transforms[node_index]
142
-
143
-
144
- def LoadGlb(path):
145
- # Load the GLB file using pygltflib
146
- gltf = GLTF2().load(path)
147
-
148
- primitives = []
149
- images = {}
150
- # Iterate through the meshes in the GLB file
151
-
152
- world_transforms = [np.identity(4) for i in range(len(gltf.nodes))]
153
- parents = [-1 for i in range(len(gltf.nodes))]
154
- for node_index, node in enumerate(gltf.nodes):
155
- for idx in node.children:
156
- parents[idx] = node_index
157
- # for i in range(len(gltf.nodes)):
158
- # get_world_transform(gltf, i, parents, world_transform)
159
-
160
- for node_index, node in enumerate(gltf.nodes):
161
- if node.mesh is not None:
162
- world_transform = get_world_transform(gltf, node_index, parents, world_transforms)
163
- # Iterate through the primitives in the mesh
164
- mesh = gltf.meshes[node.mesh]
165
- for primitive in mesh.primitives:
166
- # Access the attributes of the primitive
167
- attributes = primitive.attributes.__dict__
168
- mode = primitive.mode if primitive.mode is not None else 4 # Default to TRIANGLES
169
- result = {}
170
- if primitive.indices is not None:
171
- indices = get_attribute_data(gltf, primitive.indices)
172
- if mode == 4: # TRIANGLES
173
- face_indices = indices.reshape(-1, 3)
174
- elif mode == 5: # TRIANGLE_STRIP
175
- face_indices = convert_triangle_strip_to_triangles(indices)
176
- elif mode == 6: # TRIANGLE_FAN
177
- face_indices = convert_triangle_fan_to_triangles(indices)
178
- else:
179
- continue
180
- result['F'] = face_indices
181
-
182
- # Extract vertex positions
183
- if 'POSITION' in attributes and attributes['POSITION'] is not None:
184
- positions = get_attribute_data(gltf, attributes['POSITION'])
185
- # Apply the world transformation to the positions
186
- positions_homogeneous = np.hstack([positions, np.ones((positions.shape[0], 1))])
187
- transformed_positions = (world_transform @ positions_homogeneous.T).T[:, :3]
188
- result['V'] = transformed_positions
189
-
190
- # Extract vertex colors
191
- if 'COLOR_0' in attributes and attributes['COLOR_0'] is not None:
192
- colors = get_attribute_data(gltf, attributes['COLOR_0'])
193
- if colors.shape[-1] > 3:
194
- colors = colors[..., :3]
195
- result['VC'] = colors
196
-
197
- # Extract UVs
198
- if 'TEXCOORD_0' in attributes and not attributes['TEXCOORD_0'] is None:
199
- uvs = get_attribute_data(gltf, attributes['TEXCOORD_0'])
200
- result['UV'] = uvs
201
-
202
- if primitive.material is not None:
203
- material = gltf.materials[primitive.material]
204
- if material.pbrMetallicRoughness is not None and material.pbrMetallicRoughness.baseColorTexture is not None:
205
- texture_index = material.pbrMetallicRoughness.baseColorTexture.index
206
- texture = gltf.textures[texture_index]
207
- image_index = texture.source
208
- if not image_index in images:
209
- image = gltf.images[image_index]
210
- image_data = get_image_data(gltf, image, os.path.dirname(path))
211
- pil_image = PILImage.open(io.BytesIO(image_data))
212
- if pil_image.mode != 'RGB':
213
- pil_image = pil_image.convert('RGB')
214
- images[image_index] = pil_image
215
- result['TEX'] = image_index
216
- elif material.emissiveTexture is not None:
217
- texture_index = material.emissiveTexture.index
218
- texture = gltf.textures[texture_index]
219
- image_index = texture.source
220
- if not image_index in images:
221
- image = gltf.images[image_index]
222
- image_data = get_image_data(gltf, image, os.path.dirname(path))
223
- pil_image = PILImage.open(io.BytesIO(image_data))
224
- if pil_image.mode != 'RGB':
225
- pil_image = pil_image.convert('RGB')
226
- images[image_index] = pil_image
227
- result['TEX'] = image_index
228
- else:
229
- if material.pbrMetallicRoughness is not None:
230
- base_color = material.pbrMetallicRoughness.baseColorFactor
231
- else:
232
- base_color = np.array([0.8, 0.8, 0.8], dtype=np.float32)
233
- result['MC'] = base_color
234
-
235
- primitives.append(result)
236
-
237
- return primitives, images
238
-
239
-
240
- def RotatePrimitives(primitives, transform):
241
- for i in range(len(primitives)):
242
- if 'V' in primitives[i]:
243
- primitives[i]['V'] = primitives[i]['V'] @ transform.T
244
-
245
-
246
- if __name__ == '__main__':
247
- path = 'data/test.glb'
248
- LoadGlb(path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_obj.py DELETED
@@ -1,76 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- import cv2
26
- import numpy as np
27
-
28
-
29
- def LoadObj(fn):
30
- lines = [l.strip() for l in open(fn)]
31
- vertices = []
32
- faces = []
33
- for l in lines:
34
- words = [w for w in l.split(' ') if w != '']
35
- if len(words) == 0:
36
- continue
37
- if words[0] == 'v':
38
- v = [float(words[i]) for i in range(1, 4)]
39
- vertices.append(v)
40
- elif words[0] == 'f':
41
- f = [int(words[i]) - 1 for i in range(1, 4)]
42
- faces.append(f)
43
-
44
- return np.array(vertices).astype('float32'), np.array(faces).astype('int32')
45
-
46
-
47
- def LoadObjWithTexture(fn, tex_fn):
48
- lines = [l.strip() for l in open(fn)]
49
- vertices = []
50
- vertex_textures = []
51
- faces = []
52
- face_textures = []
53
- for l in lines:
54
- words = [w for w in l.split(' ') if w != '']
55
- if len(words) == 0:
56
- continue
57
- if words[0] == 'v':
58
- v = [float(words[i]) for i in range(1, len(words))]
59
- vertices.append(v)
60
- elif words[0] == 'vt':
61
- v = [float(words[i]) for i in range(1, len(words))]
62
- vertex_textures.append(v)
63
- elif words[0] == 'f':
64
- f = []
65
- ft = []
66
- for i in range(1, len(words)):
67
- t = words[i].split('/')
68
- f.append(int(t[0]) - 1)
69
- ft.append(int(t[1]) - 1)
70
- for i in range(2, len(f)):
71
- faces.append([f[0], f[i - 1], f[i]])
72
- face_textures.append([ft[0], ft[i - 1], ft[i]])
73
-
74
- tex_image = cv2.cvtColor(cv2.imread(tex_fn), cv2.COLOR_BGR2RGB)
75
- return np.array(vertices).astype('float32'), np.array(vertex_textures).astype('float32'), np.array(faces).astype(
76
- 'int32'), np.array(face_textures).astype('int32'), tex_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/custom_rasterizer/custom_rasterizer/render.py DELETED
@@ -1,41 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- import custom_rasterizer_kernel
26
- import torch
27
-
28
-
29
- def rasterize(pos, tri, resolution, clamp_depth=torch.zeros(0), use_depth_prior=0):
30
- assert (pos.device == tri.device)
31
- findices, barycentric = custom_rasterizer_kernel.rasterize_image(pos[0], tri, clamp_depth, resolution[1],
32
- resolution[0], 1e-6, use_depth_prior)
33
- return findices, barycentric
34
-
35
-
36
- def interpolate(col, findices, barycentric, tri):
37
- f = findices - 1 + (findices == 0)
38
- vcol = col[0, tri.long()[f.long()]]
39
- result = barycentric.view(*barycentric.shape, 1) * vcol
40
- result = torch.sum(result, axis=-2)
41
- return result.view(1, *result.shape)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/__init__.py DELETED
@@ -1,23 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp DELETED
@@ -1,575 +0,0 @@
1
- #include "rasterizer.h"
2
- #include <fstream>
3
-
4
- inline int pos2key(float* p, int resolution) {
5
- int x = (p[0] * 0.5 + 0.5) * resolution;
6
- int y = (p[1] * 0.5 + 0.5) * resolution;
7
- int z = (p[2] * 0.5 + 0.5) * resolution;
8
- return (x * resolution + y) * resolution + z;
9
- }
10
-
11
- inline void key2pos(int key, int resolution, float* p) {
12
- int x = key / resolution / resolution;
13
- int y = key / resolution % resolution;
14
- int z = key % resolution;
15
- p[0] = ((x + 0.5) / resolution - 0.5) * 2;
16
- p[1] = ((y + 0.5) / resolution - 0.5) * 2;
17
- p[2] = ((z + 0.5) / resolution - 0.5) * 2;
18
- }
19
-
20
- inline void key2cornerpos(int key, int resolution, float* p) {
21
- int x = key / resolution / resolution;
22
- int y = key / resolution % resolution;
23
- int z = key % resolution;
24
- p[0] = ((x + 0.75) / resolution - 0.5) * 2;
25
- p[1] = ((y + 0.25) / resolution - 0.5) * 2;
26
- p[2] = ((z + 0.75) / resolution - 0.5) * 2;
27
- }
28
-
29
- inline float* pos_ptr(int l, int i, int j, torch::Tensor t) {
30
- float* pdata = t.data_ptr<float>();
31
- int height = t.size(1);
32
- int width = t.size(2);
33
- return &pdata[((l * height + i) * width + j) * 4];
34
- }
35
-
36
- struct Grid
37
- {
38
- std::vector<int> seq2oddcorner;
39
- std::vector<int> seq2evencorner;
40
- std::vector<int> seq2grid;
41
- std::vector<int> seq2normal;
42
- std::vector<int> seq2neighbor;
43
- std::unordered_map<int, int> grid2seq;
44
- std::vector<int> downsample_seq;
45
- int num_origin_seq;
46
- int resolution;
47
- int stride;
48
- };
49
-
50
- inline void pos_from_seq(Grid& grid, int seq, float* p) {
51
- auto k = grid.seq2grid[seq];
52
- key2pos(k, grid.resolution, p);
53
- }
54
-
55
- inline int fetch_seq(Grid& grid, int l, int i, int j, torch::Tensor pdata) {
56
- float* p = pos_ptr(l, i, j, pdata);
57
- if (p[3] == 0)
58
- return -1;
59
- auto key = pos2key(p, grid.resolution);
60
- int seq = grid.grid2seq[key];
61
- return seq;
62
- }
63
-
64
- inline int fetch_last_seq(Grid& grid, int i, int j, torch::Tensor pdata) {
65
- int num_layers = pdata.size(0);
66
- int l = 0;
67
- int idx = fetch_seq(grid, l, i, j, pdata);
68
- while (l < num_layers - 1) {
69
- l += 1;
70
- int new_idx = fetch_seq(grid, l, i, j, pdata);
71
- if (new_idx == -1)
72
- break;
73
- idx = new_idx;
74
- }
75
- return idx;
76
- }
77
-
78
- inline int fetch_nearest_seq(Grid& grid, int i, int j, int dim, float d, torch::Tensor pdata) {
79
- float p[3];
80
- float max_dist = 1e10;
81
- int best_idx = -1;
82
- int num_layers = pdata.size(0);
83
- for (int l = 0; l < num_layers; ++l) {
84
- int idx = fetch_seq(grid, l, i, j, pdata);
85
- if (idx == -1)
86
- break;
87
- pos_from_seq(grid, idx, p);
88
- float dist = std::abs(d - p[(dim + 2) % 3]);
89
- if (dist < max_dist) {
90
- max_dist = dist;
91
- best_idx = idx;
92
- }
93
- }
94
- return best_idx;
95
- }
96
-
97
- inline int fetch_nearest_seq_layer(Grid& grid, int i, int j, int dim, float d, torch::Tensor pdata) {
98
- float p[3];
99
- float max_dist = 1e10;
100
- int best_layer = -1;
101
- int num_layers = pdata.size(0);
102
- for (int l = 0; l < num_layers; ++l) {
103
- int idx = fetch_seq(grid, l, i, j, pdata);
104
- if (idx == -1)
105
- break;
106
- pos_from_seq(grid, idx, p);
107
- float dist = std::abs(d - p[(dim + 2) % 3]);
108
- if (dist < max_dist) {
109
- max_dist = dist;
110
- best_layer = l;
111
- }
112
- }
113
- return best_layer;
114
- }
115
-
116
- void FetchNeighbor(Grid& grid, int seq, float* pos, int dim, int boundary_info, std::vector<torch::Tensor>& view_layer_positions,
117
- int* output_indices)
118
- {
119
- auto t = view_layer_positions[dim];
120
- int height = t.size(1);
121
- int width = t.size(2);
122
- int top = 0;
123
- int ci = 0;
124
- int cj = 0;
125
- if (dim == 0) {
126
- ci = (pos[1]/2+0.5)*height;
127
- cj = (pos[0]/2+0.5)*width;
128
- }
129
- else if (dim == 1) {
130
- ci = (pos[1]/2+0.5)*height;
131
- cj = (pos[2]/2+0.5)*width;
132
- }
133
- else {
134
- ci = (-pos[2]/2+0.5)*height;
135
- cj = (pos[0]/2+0.5)*width;
136
- }
137
- int stride = grid.stride;
138
- for (int ni = ci + stride; ni >= ci - stride; ni -= stride) {
139
- for (int nj = cj - stride; nj <= cj + stride; nj += stride) {
140
- int idx = -1;
141
- if (ni == ci && nj == cj)
142
- idx = seq;
143
- else if (!(ni < 0 || ni >= height || nj < 0 || nj >= width)) {
144
- if (boundary_info == -1)
145
- idx = fetch_seq(grid, 0, ni, nj, t);
146
- else if (boundary_info == 1)
147
- idx = fetch_last_seq(grid, ni, nj, t);
148
- else
149
- idx = fetch_nearest_seq(grid, ni, nj, dim, pos[(dim + 2) % 3], t);
150
- }
151
- output_indices[top] = idx;
152
- top += 1;
153
- }
154
- }
155
- }
156
-
157
- void DownsampleGrid(Grid& src, Grid& tar)
158
- {
159
- src.downsample_seq.resize(src.seq2grid.size(), -1);
160
- tar.resolution = src.resolution / 2;
161
- tar.stride = src.stride * 2;
162
- float pos[3];
163
- std::vector<int> seq2normal_count;
164
- for (int i = 0; i < src.seq2grid.size(); ++i) {
165
- key2pos(src.seq2grid[i], src.resolution, pos);
166
- int k = pos2key(pos, tar.resolution);
167
- int s = seq2normal_count.size();
168
- if (!tar.grid2seq.count(k)) {
169
- tar.grid2seq[k] = tar.seq2grid.size();
170
- tar.seq2grid.emplace_back(k);
171
- seq2normal_count.emplace_back(0);
172
- seq2normal_count.emplace_back(0);
173
- seq2normal_count.emplace_back(0);
174
- //tar.seq2normal.emplace_back(src.seq2normal[i]);
175
- } else {
176
- s = tar.grid2seq[k] * 3;
177
- }
178
- seq2normal_count[s + src.seq2normal[i]] += 1;
179
- src.downsample_seq[i] = tar.grid2seq[k];
180
- }
181
- tar.seq2normal.resize(seq2normal_count.size() / 3);
182
- for (int i = 0; i < seq2normal_count.size(); i += 3) {
183
- int t = 0;
184
- for (int j = 1; j < 3; ++j) {
185
- if (seq2normal_count[i + j] > seq2normal_count[i + t])
186
- t = j;
187
- }
188
- tar.seq2normal[i / 3] = t;
189
- }
190
- }
191
-
192
- void NeighborGrid(Grid& grid, std::vector<torch::Tensor> view_layer_positions, int v)
193
- {
194
- grid.seq2evencorner.resize(grid.seq2grid.size(), 0);
195
- grid.seq2oddcorner.resize(grid.seq2grid.size(), 0);
196
- std::unordered_set<int> visited_seq;
197
- for (int vd = 0; vd < 3; ++vd) {
198
- auto t = view_layer_positions[vd];
199
- auto t0 = view_layer_positions[v];
200
- int height = t.size(1);
201
- int width = t.size(2);
202
- int num_layers = t.size(0);
203
- int num_view_layers = t0.size(0);
204
- for (int i = 0; i < height; ++i) {
205
- for (int j = 0; j < width; ++j) {
206
- for (int l = 0; l < num_layers; ++l) {
207
- int seq = fetch_seq(grid, l, i, j, t);
208
- if (seq == -1)
209
- break;
210
- int dim = grid.seq2normal[seq];
211
- if (dim != v)
212
- continue;
213
-
214
- float pos[3];
215
- pos_from_seq(grid, seq, pos);
216
-
217
- int ci = 0;
218
- int cj = 0;
219
- if (dim == 0) {
220
- ci = (pos[1]/2+0.5)*height;
221
- cj = (pos[0]/2+0.5)*width;
222
- }
223
- else if (dim == 1) {
224
- ci = (pos[1]/2+0.5)*height;
225
- cj = (pos[2]/2+0.5)*width;
226
- }
227
- else {
228
- ci = (-pos[2]/2+0.5)*height;
229
- cj = (pos[0]/2+0.5)*width;
230
- }
231
-
232
- if ((ci % (grid.stride * 2) < grid.stride) && (cj % (grid.stride * 2) >= grid.stride))
233
- grid.seq2evencorner[seq] = 1;
234
-
235
- if ((ci % (grid.stride * 2) >= grid.stride) && (cj % (grid.stride * 2) < grid.stride))
236
- grid.seq2oddcorner[seq] = 1;
237
-
238
- bool is_boundary = false;
239
- if (vd == v) {
240
- if (l == 0 || l == num_layers - 1)
241
- is_boundary = true;
242
- else {
243
- int seq_new = fetch_seq(grid, l + 1, i, j, t);
244
- if (seq_new == -1)
245
- is_boundary = true;
246
- }
247
- }
248
- int boundary_info = 0;
249
- if (is_boundary && (l == 0))
250
- boundary_info = -1;
251
- else if (is_boundary)
252
- boundary_info = 1;
253
- if (visited_seq.count(seq))
254
- continue;
255
- visited_seq.insert(seq);
256
-
257
- FetchNeighbor(grid, seq, pos, dim, boundary_info, view_layer_positions, &grid.seq2neighbor[seq * 9]);
258
- }
259
- }
260
- }
261
- }
262
- }
263
-
264
- void PadGrid(Grid& src, Grid& tar, std::vector<torch::Tensor>& view_layer_positions) {
265
- auto& downsample_seq = src.downsample_seq;
266
- auto& seq2evencorner = src.seq2evencorner;
267
- auto& seq2oddcorner = src.seq2oddcorner;
268
- int indices[9];
269
- std::vector<int> mapped_even_corners(tar.seq2grid.size(), 0);
270
- std::vector<int> mapped_odd_corners(tar.seq2grid.size(), 0);
271
- for (int i = 0; i < downsample_seq.size(); ++i) {
272
- if (seq2evencorner[i] > 0) {
273
- mapped_even_corners[downsample_seq[i]] = 1;
274
- }
275
- if (seq2oddcorner[i] > 0) {
276
- mapped_odd_corners[downsample_seq[i]] = 1;
277
- }
278
- }
279
- auto& tar_seq2normal = tar.seq2normal;
280
- auto& tar_seq2grid = tar.seq2grid;
281
- for (int i = 0; i < tar_seq2grid.size(); ++i) {
282
- if (mapped_even_corners[i] == 1 && mapped_odd_corners[i] == 1)
283
- continue;
284
- auto k = tar_seq2grid[i];
285
- float p[3];
286
- key2cornerpos(k, tar.resolution, p);
287
-
288
- int src_key = pos2key(p, src.resolution);
289
- if (!src.grid2seq.count(src_key)) {
290
- int seq = src.seq2grid.size();
291
- src.grid2seq[src_key] = seq;
292
- src.seq2evencorner.emplace_back((mapped_even_corners[i] == 0));
293
- src.seq2oddcorner.emplace_back((mapped_odd_corners[i] == 0));
294
- src.seq2grid.emplace_back(src_key);
295
- src.seq2normal.emplace_back(tar_seq2normal[i]);
296
- FetchNeighbor(src, seq, p, tar_seq2normal[i], 0, view_layer_positions, indices);
297
- for (int j = 0; j < 9; ++j) {
298
- src.seq2neighbor.emplace_back(indices[j]);
299
- }
300
- src.downsample_seq.emplace_back(i);
301
- } else {
302
- int seq = src.grid2seq[src_key];
303
- if (mapped_even_corners[i] == 0)
304
- src.seq2evencorner[seq] = 1;
305
- if (mapped_odd_corners[i] == 0)
306
- src.seq2oddcorner[seq] = 1;
307
- }
308
- }
309
- }
310
-
311
- std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tensor> view_layer_positions,
312
- std::vector<torch::Tensor> view_layer_normals, int num_level, int resolution)
313
- {
314
- if (view_layer_positions.size() != 3 || num_level < 1) {
315
- printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level);
316
- return {{},{},{},{}};
317
- }
318
-
319
- std::vector<Grid> grids;
320
- grids.resize(num_level);
321
-
322
- std::vector<float> seq2pos;
323
- auto& seq2grid = grids[0].seq2grid;
324
- auto& seq2normal = grids[0].seq2normal;
325
- auto& grid2seq = grids[0].grid2seq;
326
- grids[0].resolution = resolution;
327
- grids[0].stride = 1;
328
-
329
- auto int64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false);
330
- auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
331
-
332
- for (int v = 0; v < 3; ++v) {
333
- int num_layers = view_layer_positions[v].size(0);
334
- int height = view_layer_positions[v].size(1);
335
- int width = view_layer_positions[v].size(2);
336
- float* data = view_layer_positions[v].data_ptr<float>();
337
- float* data_normal = view_layer_normals[v].data_ptr<float>();
338
- for (int l = 0; l < num_layers; ++l) {
339
- for (int i = 0; i < height; ++i) {
340
- for (int j = 0; j < width; ++j) {
341
- float* p = &data[(i * width + j) * 4];
342
- float* n = &data_normal[(i * width + j) * 3];
343
- if (p[3] == 0)
344
- continue;
345
- auto k = pos2key(p, resolution);
346
- if (!grid2seq.count(k)) {
347
- int dim = 0;
348
- for (int d = 0; d < 3; ++d) {
349
- if (std::abs(n[d]) > std::abs(n[dim]))
350
- dim = d;
351
- }
352
- dim = (dim + 1) % 3;
353
- grid2seq[k] = seq2grid.size();
354
- seq2grid.emplace_back(k);
355
- seq2pos.push_back(p[0]);
356
- seq2pos.push_back(p[1]);
357
- seq2pos.push_back(p[2]);
358
- seq2normal.emplace_back(dim);
359
- }
360
- }
361
- }
362
- data += (height * width * 4);
363
- data_normal += (height * width * 3);
364
- }
365
- }
366
-
367
- for (int i = 0; i < num_level - 1; ++i) {
368
- DownsampleGrid(grids[i], grids[i + 1]);
369
- }
370
-
371
- for (int l = 0; l < num_level; ++l) {
372
- grids[l].seq2neighbor.resize(grids[l].seq2grid.size() * 9, -1);
373
- grids[l].num_origin_seq = grids[l].seq2grid.size();
374
- for (int d = 0; d < 3; ++d) {
375
- NeighborGrid(grids[l], view_layer_positions, d);
376
- }
377
- }
378
-
379
- for (int i = num_level - 2; i >= 0; --i) {
380
- PadGrid(grids[i], grids[i + 1], view_layer_positions);
381
- }
382
- for (int i = grids[0].num_origin_seq; i < grids[0].seq2grid.size(); ++i) {
383
- int k = grids[0].seq2grid[i];
384
- float p[3];
385
- key2pos(k, grids[0].resolution, p);
386
- seq2pos.push_back(p[0]);
387
- seq2pos.push_back(p[1]);
388
- seq2pos.push_back(p[2]);
389
- }
390
-
391
- std::vector<torch::Tensor> texture_positions(2);
392
- std::vector<torch::Tensor> grid_neighbors(grids.size());
393
- std::vector<torch::Tensor> grid_downsamples(grids.size() - 1);
394
- std::vector<torch::Tensor> grid_evencorners(grids.size());
395
- std::vector<torch::Tensor> grid_oddcorners(grids.size());
396
-
397
-
398
- texture_positions[0] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3), static_cast<int64_t>(3)}, float_options);
399
- texture_positions[1] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3)}, float_options);
400
- float* positions_out_ptr = texture_positions[0].data_ptr<float>();
401
- memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size());
402
- positions_out_ptr = texture_positions[1].data_ptr<float>();
403
- for (int i = 0; i < grids[0].seq2grid.size(); ++i) {
404
- positions_out_ptr[i] = (i < grids[0].num_origin_seq);
405
- }
406
-
407
- for (int i = 0; i < grids.size(); ++i) {
408
- grid_neighbors[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2grid.size()), static_cast<int64_t>(9)}, int64_options);
409
- int64_t* nptr = grid_neighbors[i].data_ptr<int64_t>();
410
- for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) {
411
- nptr[j] = grids[i].seq2neighbor[j];
412
- }
413
-
414
- grid_evencorners[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2evencorner.size())}, int64_options);
415
- grid_oddcorners[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2oddcorner.size())}, int64_options);
416
- int64_t* dptr = grid_evencorners[i].data_ptr<int64_t>();
417
- for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) {
418
- dptr[j] = grids[i].seq2evencorner[j];
419
- }
420
- dptr = grid_oddcorners[i].data_ptr<int64_t>();
421
- for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) {
422
- dptr[j] = grids[i].seq2oddcorner[j];
423
- }
424
- if (i + 1 < grids.size()) {
425
- grid_downsamples[i] = torch::zeros({static_cast<int64_t>(grids[i].downsample_seq.size())}, int64_options);
426
- int64_t* dptr = grid_downsamples[i].data_ptr<int64_t>();
427
- for (int j = 0; j < grids[i].downsample_seq.size(); ++j) {
428
- dptr[j] = grids[i].downsample_seq[j];
429
- }
430
- }
431
-
432
- }
433
- return {texture_positions, grid_neighbors, grid_downsamples, grid_evencorners, grid_oddcorners};
434
- }
435
-
436
- std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
437
- std::vector<torch::Tensor> view_layer_positions,
438
- std::vector<torch::Tensor> view_layer_normals,
439
- std::vector<torch::Tensor> view_layer_feats,
440
- int num_level, int resolution)
441
- {
442
- if (view_layer_positions.size() != 3 || num_level < 1) {
443
- printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level);
444
- return {{},{},{},{}};
445
- }
446
-
447
- std::vector<Grid> grids;
448
- grids.resize(num_level);
449
-
450
- std::vector<float> seq2pos;
451
- std::vector<float> seq2feat;
452
- auto& seq2grid = grids[0].seq2grid;
453
- auto& seq2normal = grids[0].seq2normal;
454
- auto& grid2seq = grids[0].grid2seq;
455
- grids[0].resolution = resolution;
456
- grids[0].stride = 1;
457
-
458
- auto int64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false);
459
- auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
460
-
461
- int feat_channel = 3;
462
- for (int v = 0; v < 3; ++v) {
463
- int num_layers = view_layer_positions[v].size(0);
464
- int height = view_layer_positions[v].size(1);
465
- int width = view_layer_positions[v].size(2);
466
- float* data = view_layer_positions[v].data_ptr<float>();
467
- float* data_normal = view_layer_normals[v].data_ptr<float>();
468
- float* data_feat = view_layer_feats[v].data_ptr<float>();
469
- feat_channel = view_layer_feats[v].size(3);
470
- for (int l = 0; l < num_layers; ++l) {
471
- for (int i = 0; i < height; ++i) {
472
- for (int j = 0; j < width; ++j) {
473
- float* p = &data[(i * width + j) * 4];
474
- float* n = &data_normal[(i * width + j) * 3];
475
- float* f = &data_feat[(i * width + j) * feat_channel];
476
- if (p[3] == 0)
477
- continue;
478
- auto k = pos2key(p, resolution);
479
- if (!grid2seq.count(k)) {
480
- int dim = 0;
481
- for (int d = 0; d < 3; ++d) {
482
- if (std::abs(n[d]) > std::abs(n[dim]))
483
- dim = d;
484
- }
485
- dim = (dim + 1) % 3;
486
- grid2seq[k] = seq2grid.size();
487
- seq2grid.emplace_back(k);
488
- seq2pos.push_back(p[0]);
489
- seq2pos.push_back(p[1]);
490
- seq2pos.push_back(p[2]);
491
- for (int c = 0; c < feat_channel; ++c) {
492
- seq2feat.emplace_back(f[c]);
493
- }
494
- seq2normal.emplace_back(dim);
495
- }
496
- }
497
- }
498
- data += (height * width * 4);
499
- data_normal += (height * width * 3);
500
- data_feat += (height * width * feat_channel);
501
- }
502
- }
503
-
504
- for (int i = 0; i < num_level - 1; ++i) {
505
- DownsampleGrid(grids[i], grids[i + 1]);
506
- }
507
-
508
- for (int l = 0; l < num_level; ++l) {
509
- grids[l].seq2neighbor.resize(grids[l].seq2grid.size() * 9, -1);
510
- grids[l].num_origin_seq = grids[l].seq2grid.size();
511
- for (int d = 0; d < 3; ++d) {
512
- NeighborGrid(grids[l], view_layer_positions, d);
513
- }
514
- }
515
-
516
- for (int i = num_level - 2; i >= 0; --i) {
517
- PadGrid(grids[i], grids[i + 1], view_layer_positions);
518
- }
519
- for (int i = grids[0].num_origin_seq; i < grids[0].seq2grid.size(); ++i) {
520
- int k = grids[0].seq2grid[i];
521
- float p[3];
522
- key2pos(k, grids[0].resolution, p);
523
- seq2pos.push_back(p[0]);
524
- seq2pos.push_back(p[1]);
525
- seq2pos.push_back(p[2]);
526
- for (int c = 0; c < feat_channel; ++c) {
527
- seq2feat.emplace_back(0.5);
528
- }
529
- }
530
-
531
- std::vector<torch::Tensor> texture_positions(2);
532
- std::vector<torch::Tensor> texture_feats(1);
533
- std::vector<torch::Tensor> grid_neighbors(grids.size());
534
- std::vector<torch::Tensor> grid_downsamples(grids.size() - 1);
535
- std::vector<torch::Tensor> grid_evencorners(grids.size());
536
- std::vector<torch::Tensor> grid_oddcorners(grids.size());
537
-
538
- texture_positions[0] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3), static_cast<int64_t>(3)}, float_options);
539
- texture_positions[1] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3)}, float_options);
540
- texture_feats[0] = torch::zeros({static_cast<int64_t>(seq2feat.size() / feat_channel), static_cast<int64_t>(feat_channel)}, float_options);
541
- float* positions_out_ptr = texture_positions[0].data_ptr<float>();
542
- memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size());
543
- positions_out_ptr = texture_positions[1].data_ptr<float>();
544
- for (int i = 0; i < grids[0].seq2grid.size(); ++i) {
545
- positions_out_ptr[i] = (i < grids[0].num_origin_seq);
546
- }
547
- float* feats_out_ptr = texture_feats[0].data_ptr<float>();
548
- memcpy(feats_out_ptr, seq2feat.data(), sizeof(float) * seq2feat.size());
549
-
550
- for (int i = 0; i < grids.size(); ++i) {
551
- grid_neighbors[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2grid.size()), static_cast<int64_t>(9)}, int64_options);
552
- int64_t* nptr = grid_neighbors[i].data_ptr<int64_t>();
553
- for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) {
554
- nptr[j] = grids[i].seq2neighbor[j];
555
- }
556
- grid_evencorners[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2evencorner.size())}, int64_options);
557
- grid_oddcorners[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2oddcorner.size())}, int64_options);
558
- int64_t* dptr = grid_evencorners[i].data_ptr<int64_t>();
559
- for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) {
560
- dptr[j] = grids[i].seq2evencorner[j];
561
- }
562
- dptr = grid_oddcorners[i].data_ptr<int64_t>();
563
- for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) {
564
- dptr[j] = grids[i].seq2oddcorner[j];
565
- }
566
- if (i + 1 < grids.size()) {
567
- grid_downsamples[i] = torch::zeros({static_cast<int64_t>(grids[i].downsample_seq.size())}, int64_options);
568
- int64_t* dptr = grid_downsamples[i].data_ptr<int64_t>();
569
- for (int j = 0; j < grids[i].downsample_seq.size(); ++j) {
570
- dptr[j] = grids[i].downsample_seq[j];
571
- }
572
- }
573
- }
574
- return {texture_positions, texture_feats, grid_neighbors, grid_downsamples, grid_evencorners, grid_oddcorners};
575
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp DELETED
@@ -1,139 +0,0 @@
1
- #include "rasterizer.h"
2
-
3
- void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
4
- float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
5
- float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
6
- float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
7
- float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1]));
8
-
9
- for (int px = x_min; px < x_max + 1; ++px) {
10
- if (px < 0 || px >= width)
11
- continue;
12
- for (int py = y_min; py < y_max + 1; ++py) {
13
- if (py < 0 || py >= height)
14
- continue;
15
- float vt[2] = {px + 0.5f, py + 0.5f};
16
- float baryCentricCoordinate[3];
17
- calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate);
18
- if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
19
- int pixel = py * width + px;
20
- if (zbuffer == 0) {
21
- zbuffer[pixel] = (INT64)(idx + 1);
22
- continue;
23
- }
24
-
25
- float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2];
26
- float depth_thres = 0;
27
- if (d) {
28
- depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation;
29
- }
30
-
31
- int z_quantize = depth * (2<<17);
32
- INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
33
- if (depth < depth_thres)
34
- continue;
35
- zbuffer[pixel] = std::min(zbuffer[pixel], token);
36
- }
37
- }
38
- }
39
- }
40
-
41
- void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
42
- float* barycentric_map, int pix)
43
- {
44
- INT64 f = zbuffer[pix] % MAXINT;
45
- if (f == (MAXINT-1)) {
46
- findices[pix] = 0;
47
- barycentric_map[pix * 3] = 0;
48
- barycentric_map[pix * 3 + 1] = 0;
49
- barycentric_map[pix * 3 + 2] = 0;
50
- return;
51
- }
52
- findices[pix] = f;
53
- f -= 1;
54
- float barycentric[3] = {0, 0, 0};
55
- if (f >= 0) {
56
- float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f};
57
- float* vt0_ptr = V + (F[f * 3] * 4);
58
- float* vt1_ptr = V + (F[f * 3 + 1] * 4);
59
- float* vt2_ptr = V + (F[f * 3 + 2] * 4);
60
-
61
- float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f};
62
- float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f};
63
- float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f};
64
-
65
- calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric);
66
-
67
- barycentric[0] = barycentric[0] / vt0_ptr[3];
68
- barycentric[1] = barycentric[1] / vt1_ptr[3];
69
- barycentric[2] = barycentric[2] / vt2_ptr[3];
70
- float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]);
71
- barycentric[0] *= w;
72
- barycentric[1] *= w;
73
- barycentric[2] *= w;
74
-
75
- }
76
- barycentric_map[pix * 3] = barycentric[0];
77
- barycentric_map[pix * 3 + 1] = barycentric[1];
78
- barycentric_map[pix * 3 + 2] = barycentric[2];
79
- }
80
-
81
- void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f)
82
- {
83
- float* vt0_ptr = V + (F[f * 3] * 4);
84
- float* vt1_ptr = V + (F[f * 3 + 1] * 4);
85
- float* vt2_ptr = V + (F[f * 3 + 2] * 4);
86
-
87
- float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f};
88
- float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f};
89
- float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f};
90
-
91
- rasterizeTriangleCPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc);
92
- }
93
-
94
- std::vector<torch::Tensor> rasterize_image_cpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
95
- int width, int height, float occlusion_truncation, int use_depth_prior)
96
- {
97
- int num_faces = F.size(0);
98
- int num_vertices = V.size(0);
99
- auto options = torch::TensorOptions().dtype(torch::kInt32).requires_grad(false);
100
- auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false);
101
- auto findices = torch::zeros({height, width}, options);
102
- INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
103
- auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint;
104
-
105
- if (!use_depth_prior) {
106
- for (int i = 0; i < num_faces; ++i) {
107
- rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), 0,
108
- (INT64*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
109
- }
110
- } else {
111
- for (int i = 0; i < num_faces; ++i)
112
- rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
113
- (INT64*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
114
- }
115
-
116
- auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
117
- auto barycentric = torch::zeros({height, width, 3}, float_options);
118
- for (int i = 0; i < width * height; ++i)
119
- barycentricFromImgcoordCPU(V.data_ptr<float>(), F.data_ptr<int>(),
120
- findices.data_ptr<int>(), (INT64*)z_min.data_ptr<int64_t>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>(), i);
121
-
122
- return {findices, barycentric};
123
- }
124
-
125
- std::vector<torch::Tensor> rasterize_image(torch::Tensor V, torch::Tensor F, torch::Tensor D,
126
- int width, int height, float occlusion_truncation, int use_depth_prior)
127
- {
128
- int device_id = V.get_device();
129
- if (device_id == -1)
130
- return rasterize_image_cpu(V, F, D, width, height, occlusion_truncation, use_depth_prior);
131
- else
132
- return rasterize_image_gpu(V, F, D, width, height, occlusion_truncation, use_depth_prior);
133
- }
134
-
135
- PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
136
- m.def("rasterize_image", &rasterize_image, "Custom image rasterization");
137
- m.def("build_hierarchy", &build_hierarchy, "Custom image rasterization");
138
- m.def("build_hierarchy_with_feat", &build_hierarchy_with_feat, "Custom image rasterization");
139
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h DELETED
@@ -1,54 +0,0 @@
1
- #ifndef RASTERIZER_H_
2
- #define RASTERIZER_H_
3
-
4
- #include <torch/extension.h>
5
- #include <vector>
6
- #include <ATen/ATen.h>
7
- #include <ATen/cuda/CUDAContext.h> // For CUDA context
8
- #include <cstdint>
9
- #define INT64 uint64_t
10
- #define MAXINT 2147483647
11
-
12
- __host__ __device__ inline float calculateSignedArea2(float* a, float* b, float* c) {
13
- return ((c[0] - a[0]) * (b[1] - a[1]) - (b[0] - a[0]) * (c[1] - a[1]));
14
- }
15
-
16
- __host__ __device__ inline void calculateBarycentricCoordinate(float* a, float* b, float* c, float* p,
17
- float* barycentric)
18
- {
19
- float beta_tri = calculateSignedArea2(a, p, c);
20
- float gamma_tri = calculateSignedArea2(a, b, p);
21
- float area = calculateSignedArea2(a, b, c);
22
- if (area == 0) {
23
- barycentric[0] = -1.0;
24
- barycentric[1] = -1.0;
25
- barycentric[2] = -1.0;
26
- return;
27
- }
28
- float tri_inv = 1.0 / area;
29
- float beta = beta_tri * tri_inv;
30
- float gamma = gamma_tri * tri_inv;
31
- float alpha = 1.0 - beta - gamma;
32
- barycentric[0] = alpha;
33
- barycentric[1] = beta;
34
- barycentric[2] = gamma;
35
- }
36
-
37
- __host__ __device__ inline bool isBarycentricCoordInBounds(float* barycentricCoord) {
38
- return barycentricCoord[0] >= 0.0 && barycentricCoord[0] <= 1.0 &&
39
- barycentricCoord[1] >= 0.0 && barycentricCoord[1] <= 1.0 &&
40
- barycentricCoord[2] >= 0.0 && barycentricCoord[2] <= 1.0;
41
- }
42
-
43
- std::vector<torch::Tensor> rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
44
- int width, int height, float occlusion_truncation, int use_depth_prior);
45
-
46
- std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tensor> view_layer_positions, std::vector<torch::Tensor> view_layer_normals, int num_level, int resolution);
47
-
48
- std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
49
- std::vector<torch::Tensor> view_layer_positions,
50
- std::vector<torch::Tensor> view_layer_normals,
51
- std::vector<torch::Tensor> view_layer_feats,
52
- int num_level, int resolution);
53
-
54
- #endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu DELETED
@@ -1,127 +0,0 @@
1
- #include "rasterizer.h"
2
-
3
- __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
4
- float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
5
- float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
6
- float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
7
- float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1]));
8
-
9
- for (int px = x_min; px < x_max + 1; ++px) {
10
- if (px < 0 || px >= width)
11
- continue;
12
- for (int py = y_min; py < y_max + 1; ++py) {
13
- if (py < 0 || py >= height)
14
- continue;
15
- float vt[2] = {px + 0.5f, py + 0.5f};
16
- float baryCentricCoordinate[3];
17
- calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate);
18
- if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
19
- int pixel = py * width + px;
20
- if (zbuffer == 0) {
21
- atomicExch(&zbuffer[pixel], (INT64)(idx + 1));
22
- continue;
23
- }
24
- float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2];
25
- float depth_thres = 0;
26
- if (d) {
27
- depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation;
28
- }
29
-
30
- int z_quantize = depth * (2<<17);
31
- INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
32
- if (depth < depth_thres)
33
- continue;
34
- atomicMin(&zbuffer[pixel], token);
35
- }
36
- }
37
- }
38
- }
39
-
40
- __global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
41
- float* barycentric_map)
42
- {
43
- int pix = blockIdx.x * blockDim.x + threadIdx.x;
44
- if (pix >= width * height)
45
- return;
46
- INT64 f = zbuffer[pix] % MAXINT;
47
- if (f == (MAXINT-1)) {
48
- findices[pix] = 0;
49
- barycentric_map[pix * 3] = 0;
50
- barycentric_map[pix * 3 + 1] = 0;
51
- barycentric_map[pix * 3 + 2] = 0;
52
- return;
53
- }
54
- findices[pix] = f;
55
- f -= 1;
56
- float barycentric[3] = {0, 0, 0};
57
- if (f >= 0) {
58
- float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f};
59
- float* vt0_ptr = V + (F[f * 3] * 4);
60
- float* vt1_ptr = V + (F[f * 3 + 1] * 4);
61
- float* vt2_ptr = V + (F[f * 3 + 2] * 4);
62
-
63
- float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f};
64
- float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f};
65
- float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f};
66
-
67
- calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric);
68
-
69
- barycentric[0] = barycentric[0] / vt0_ptr[3];
70
- barycentric[1] = barycentric[1] / vt1_ptr[3];
71
- barycentric[2] = barycentric[2] / vt2_ptr[3];
72
- float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]);
73
- barycentric[0] *= w;
74
- barycentric[1] *= w;
75
- barycentric[2] *= w;
76
-
77
- }
78
- barycentric_map[pix * 3] = barycentric[0];
79
- barycentric_map[pix * 3 + 1] = barycentric[1];
80
- barycentric_map[pix * 3 + 2] = barycentric[2];
81
- }
82
-
83
- __global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces)
84
- {
85
- int f = blockIdx.x * blockDim.x + threadIdx.x;
86
- if (f >= num_faces)
87
- return;
88
-
89
- float* vt0_ptr = V + (F[f * 3] * 4);
90
- float* vt1_ptr = V + (F[f * 3 + 1] * 4);
91
- float* vt2_ptr = V + (F[f * 3 + 2] * 4);
92
-
93
- float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f};
94
- float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f};
95
- float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f};
96
-
97
- rasterizeTriangleGPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc);
98
- }
99
-
100
- std::vector<torch::Tensor> rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
101
- int width, int height, float occlusion_truncation, int use_depth_prior)
102
- {
103
- int device_id = V.get_device();
104
- cudaSetDevice(device_id);
105
- int num_faces = F.size(0);
106
- int num_vertices = V.size(0);
107
- auto options = torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA, device_id).requires_grad(false);
108
- auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA, device_id).requires_grad(false);
109
- auto findices = torch::zeros({height, width}, options);
110
- INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
111
- auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint;
112
-
113
- if (!use_depth_prior) {
114
- rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), 0,
115
- (INT64*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces);
116
- } else {
117
- rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
118
- (INT64*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces);
119
- }
120
-
121
- auto float_options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, device_id).requires_grad(false);
122
- auto barycentric = torch::zeros({height, width, 3}, float_options);
123
- barycentricFromImgcoordGPU<<<(width * height + 255)/256, 256>>>(V.data_ptr<float>(), F.data_ptr<int>(),
124
- findices.data_ptr<int>(), (INT64*)z_min.data_ptr<int64_t>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>());
125
-
126
- return {findices, barycentric};
127
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/custom_rasterizer/setup.py DELETED
@@ -1,26 +0,0 @@
1
- from setuptools import setup, find_packages
2
- from torch.utils.cpp_extension import BuildExtension, CUDAExtension
3
-
4
- # build custom rasterizer
5
- # build with `python setup.py install`
6
- # nvcc is needed
7
-
8
- custom_rasterizer_module = CUDAExtension('custom_rasterizer_kernel', [
9
- 'lib/custom_rasterizer_kernel/rasterizer.cpp',
10
- 'lib/custom_rasterizer_kernel/grid_neighbor.cpp',
11
- 'lib/custom_rasterizer_kernel/rasterizer_gpu.cu',
12
- ])
13
-
14
- setup(
15
- packages=find_packages(),
16
- version='0.1',
17
- name='custom_rasterizer',
18
- include_package_data=True,
19
- package_dir={'': '.'},
20
- ext_modules=[
21
- custom_rasterizer_module,
22
- ],
23
- cmdclass={
24
- 'build_ext': BuildExtension
25
- }
26
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/differentiable_renderer/__init__.py DELETED
@@ -1,23 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/differentiable_renderer/build/temp.win-amd64-cpython-310/Release/mesh_processor.cp310-win_amd64.exp DELETED
Binary file (840 Bytes)
 
hy3dgen/texgen/differentiable_renderer/build/temp.win-amd64-cpython-310/Release/mesh_processor.cp310-win_amd64.lib DELETED
Binary file (2.14 kB)
 
hy3dgen/texgen/differentiable_renderer/build/temp.win-amd64-cpython-310/Release/mesh_processor.obj DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1aa1f67f69a3f4389d88b5824de08503705112177eb5d8c7dd5ad09c2847e8b6
3
- size 7617045
 
 
 
 
hy3dgen/texgen/differentiable_renderer/camera_utils.py DELETED
@@ -1,116 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- import math
26
-
27
- import numpy as np
28
- import torch
29
-
30
-
31
- def transform_pos(mtx, pos, keepdim=False):
32
- t_mtx = torch.from_numpy(mtx).to(
33
- pos.device) if isinstance(
34
- mtx, np.ndarray) else mtx
35
- if pos.shape[-1] == 3:
36
- posw = torch.cat(
37
- [pos, torch.ones([pos.shape[0], 1]).to(pos.device)], axis=1)
38
- else:
39
- posw = pos
40
-
41
- if keepdim:
42
- return torch.matmul(posw, t_mtx.t())[...]
43
- else:
44
- return torch.matmul(posw, t_mtx.t())[None, ...]
45
-
46
-
47
- def get_mv_matrix(elev, azim, camera_distance, center=None):
48
- elev = -elev
49
- azim += 90
50
-
51
- elev_rad = math.radians(elev)
52
- azim_rad = math.radians(azim)
53
-
54
- camera_position = np.array([camera_distance * math.cos(elev_rad) * math.cos(azim_rad),
55
- camera_distance *
56
- math.cos(elev_rad) * math.sin(azim_rad),
57
- camera_distance * math.sin(elev_rad)])
58
-
59
- if center is None:
60
- center = np.array([0, 0, 0])
61
- else:
62
- center = np.array(center)
63
-
64
- lookat = center - camera_position
65
- lookat = lookat / np.linalg.norm(lookat)
66
-
67
- up = np.array([0, 0, 1.0])
68
- right = np.cross(lookat, up)
69
- right = right / np.linalg.norm(right)
70
- up = np.cross(right, lookat)
71
- up = up / np.linalg.norm(up)
72
-
73
- c2w = np.concatenate(
74
- [np.stack([right, up, -lookat], axis=-1), camera_position[:, None]], axis=-1)
75
-
76
- w2c = np.zeros((4, 4))
77
- w2c[:3, :3] = np.transpose(c2w[:3, :3], (1, 0))
78
- w2c[:3, 3:] = -np.matmul(np.transpose(c2w[:3, :3], (1, 0)), c2w[:3, 3:])
79
- w2c[3, 3] = 1.0
80
-
81
- return w2c.astype(np.float32)
82
-
83
-
84
- def get_orthographic_projection_matrix(
85
- left=-1, right=1, bottom=-1, top=1, near=0, far=2):
86
- """
87
- 计算正交投影矩阵。
88
-
89
- 参数:
90
- left (float): 投影区域左侧边界。
91
- right (float): 投影区域右侧边界。
92
- bottom (float): 投影区域底部边界。
93
- top (float): 投影区域顶部边界。
94
- near (float): 投影区域近裁剪面距离。
95
- far (float): 投影区域远裁剪面距离。
96
-
97
- 返回:
98
- numpy.ndarray: 正交投影矩阵。
99
- """
100
- ortho_matrix = np.eye(4, dtype=np.float32)
101
- ortho_matrix[0, 0] = 2 / (right - left)
102
- ortho_matrix[1, 1] = 2 / (top - bottom)
103
- ortho_matrix[2, 2] = -2 / (far - near)
104
- ortho_matrix[0, 3] = -(right + left) / (right - left)
105
- ortho_matrix[1, 3] = -(top + bottom) / (top - bottom)
106
- ortho_matrix[2, 3] = -(far + near) / (far - near)
107
- return ortho_matrix
108
-
109
-
110
- def get_perspective_projection_matrix(fovy, aspect_wh, near, far):
111
- fovy_rad = math.radians(fovy)
112
- return np.array([[1.0 / (math.tan(fovy_rad / 2.0) * aspect_wh), 0, 0, 0],
113
- [0, 1.0 / math.tan(fovy_rad / 2.0), 0, 0],
114
- [0, 0, -(far + near) / (far - near), -
115
- 2.0 * far * near / (far - near)],
116
- [0, 0, -1, 0]]).astype(np.float32)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.bat DELETED
@@ -1,3 +0,0 @@
1
- FOR /F "tokens=*" %%i IN ('python -m pybind11 --includes') DO SET PYINCLUDES=%%i
2
- echo %PYINCLUDES%
3
- g++ -O3 -Wall -shared -std=c++11 -fPIC %PYINCLUDES% mesh_processor.cpp -o mesh_processor.pyd -lpython3.12
 
 
 
 
hy3dgen/texgen/differentiable_renderer/dist/mesh_processor-0.0.0-py3.10-win-amd64.egg DELETED
Binary file (63.4 kB)
 
hy3dgen/texgen/differentiable_renderer/mesh_processor.cpp DELETED
@@ -1,161 +0,0 @@
1
- #include <vector>
2
- #include <queue>
3
- #include <cmath>
4
- #include <algorithm>
5
- #include <pybind11/pybind11.h>
6
- #include <pybind11/numpy.h>
7
- #include <pybind11/stl.h>
8
-
9
- namespace py = pybind11;
10
- using namespace std;
11
-
12
- std::pair<py::array_t<float>,
13
- py::array_t<uint8_t>> meshVerticeInpaint_smooth(py::array_t<float> texture,
14
- py::array_t<uint8_t> mask,
15
- py::array_t<float> vtx_pos, py::array_t<float> vtx_uv,
16
- py::array_t<int> pos_idx, py::array_t<int> uv_idx) {
17
- auto texture_buf = texture.request();
18
- auto mask_buf = mask.request();
19
- auto vtx_pos_buf = vtx_pos.request();
20
- auto vtx_uv_buf = vtx_uv.request();
21
- auto pos_idx_buf = pos_idx.request();
22
- auto uv_idx_buf = uv_idx.request();
23
-
24
- int texture_height = texture_buf.shape[0];
25
- int texture_width = texture_buf.shape[1];
26
- int texture_channel = texture_buf.shape[2];
27
- float* texture_ptr = static_cast<float*>(texture_buf.ptr);
28
- uint8_t* mask_ptr = static_cast<uint8_t*>(mask_buf.ptr);
29
-
30
- int vtx_num = vtx_pos_buf.shape[0];
31
- float* vtx_pos_ptr = static_cast<float*>(vtx_pos_buf.ptr);
32
- float* vtx_uv_ptr = static_cast<float*>(vtx_uv_buf.ptr);
33
- int* pos_idx_ptr = static_cast<int*>(pos_idx_buf.ptr);
34
- int* uv_idx_ptr = static_cast<int*>(uv_idx_buf.ptr);
35
-
36
- vector<float> vtx_mask(vtx_num, 0.0f);
37
- vector<vector<float>> vtx_color(vtx_num, vector<float>(texture_channel, 0.0f));
38
- vector<int> uncolored_vtxs;
39
-
40
- vector<vector<int>> G(vtx_num);
41
-
42
- for (int i = 0; i < uv_idx_buf.shape[0]; ++i) {
43
- for (int k = 0; k < 3; ++k) {
44
- int vtx_uv_idx = uv_idx_ptr[i * 3 + k];
45
- int vtx_idx = pos_idx_ptr[i * 3 + k];
46
- int uv_v = round(vtx_uv_ptr[vtx_uv_idx * 2] * (texture_width - 1));
47
- int uv_u = round((1.0 - vtx_uv_ptr[vtx_uv_idx * 2 + 1]) * (texture_height - 1));
48
-
49
- if (mask_ptr[uv_u * texture_width + uv_v] > 0) {
50
- vtx_mask[vtx_idx] = 1.0f;
51
- for (int c = 0; c < texture_channel; ++c) {
52
- vtx_color[vtx_idx][c] = texture_ptr[(uv_u * texture_width + uv_v) * texture_channel + c];
53
- }
54
- }else{
55
- uncolored_vtxs.push_back(vtx_idx);
56
- }
57
-
58
- G[pos_idx_ptr[i * 3 + k]].push_back(pos_idx_ptr[i * 3 + (k + 1) % 3]);
59
- }
60
- }
61
-
62
- int smooth_count = 2;
63
- int last_uncolored_vtx_count = 0;
64
- while (smooth_count>0) {
65
- int uncolored_vtx_count = 0;
66
-
67
- for (int vtx_idx : uncolored_vtxs) {
68
-
69
- vector<float> sum_color(texture_channel, 0.0f);
70
- float total_weight = 0.0f;
71
-
72
- array<float, 3> vtx_0 = {vtx_pos_ptr[vtx_idx * 3],
73
- vtx_pos_ptr[vtx_idx * 3 + 1], vtx_pos_ptr[vtx_idx * 3 + 2]};
74
- for (int connected_idx : G[vtx_idx]) {
75
- if (vtx_mask[connected_idx] > 0) {
76
- array<float, 3> vtx1 = {vtx_pos_ptr[connected_idx * 3],
77
- vtx_pos_ptr[connected_idx * 3 + 1], vtx_pos_ptr[connected_idx * 3 + 2]};
78
- float dist_weight = 1.0f / max(sqrt(pow(vtx_0[0] - vtx1[0], 2) + pow(vtx_0[1] - vtx1[1], 2) + \
79
- pow(vtx_0[2] - vtx1[2], 2)), 1E-4);
80
- dist_weight = dist_weight * dist_weight;
81
- for (int c = 0; c < texture_channel; ++c) {
82
- sum_color[c] += vtx_color[connected_idx][c] * dist_weight;
83
- }
84
- total_weight += dist_weight;
85
- }
86
- }
87
-
88
- if (total_weight > 0.0f) {
89
- for (int c = 0; c < texture_channel; ++c) {
90
- vtx_color[vtx_idx][c] = sum_color[c] / total_weight;
91
- }
92
- vtx_mask[vtx_idx] = 1.0f;
93
- } else {
94
- uncolored_vtx_count++;
95
- }
96
-
97
- }
98
-
99
- if(last_uncolored_vtx_count==uncolored_vtx_count){
100
- smooth_count--;
101
- }else{
102
- smooth_count++;
103
- }
104
- last_uncolored_vtx_count = uncolored_vtx_count;
105
- }
106
-
107
- // Create new arrays for the output
108
- py::array_t<float> new_texture(texture_buf.size);
109
- py::array_t<uint8_t> new_mask(mask_buf.size);
110
-
111
- auto new_texture_buf = new_texture.request();
112
- auto new_mask_buf = new_mask.request();
113
-
114
- float* new_texture_ptr = static_cast<float*>(new_texture_buf.ptr);
115
- uint8_t* new_mask_ptr = static_cast<uint8_t*>(new_mask_buf.ptr);
116
- // Copy original texture and mask to new arrays
117
- std::copy(texture_ptr, texture_ptr + texture_buf.size, new_texture_ptr);
118
- std::copy(mask_ptr, mask_ptr + mask_buf.size, new_mask_ptr);
119
-
120
- for (int face_idx = 0; face_idx < uv_idx_buf.shape[0]; ++face_idx) {
121
- for (int k = 0; k < 3; ++k) {
122
- int vtx_uv_idx = uv_idx_ptr[face_idx * 3 + k];
123
- int vtx_idx = pos_idx_ptr[face_idx * 3 + k];
124
-
125
- if (vtx_mask[vtx_idx] == 1.0f) {
126
- int uv_v = round(vtx_uv_ptr[vtx_uv_idx * 2] * (texture_width - 1));
127
- int uv_u = round((1.0 - vtx_uv_ptr[vtx_uv_idx * 2 + 1]) * (texture_height - 1));
128
-
129
- for (int c = 0; c < texture_channel; ++c) {
130
- new_texture_ptr[(uv_u * texture_width + uv_v) * texture_channel + c] = vtx_color[vtx_idx][c];
131
- }
132
- new_mask_ptr[uv_u * texture_width + uv_v] = 255;
133
- }
134
- }
135
- }
136
-
137
- // Reshape the new arrays to match the original texture and mask shapes
138
- new_texture.resize({texture_height, texture_width, 3});
139
- new_mask.resize({texture_height, texture_width});
140
- return std::make_pair(new_texture, new_mask);
141
- }
142
-
143
-
144
- std::pair<py::array_t<float>, py::array_t<uint8_t>> meshVerticeInpaint(py::array_t<float> texture,
145
- py::array_t<uint8_t> mask,
146
- py::array_t<float> vtx_pos, py::array_t<float> vtx_uv,
147
- py::array_t<int> pos_idx, py::array_t<int> uv_idx, const std::string& method = "smooth") {
148
- if (method == "smooth") {
149
- return meshVerticeInpaint_smooth(texture, mask, vtx_pos, vtx_uv, pos_idx, uv_idx);
150
- } else {
151
- throw std::invalid_argument("Invalid method. Use 'smooth' or 'forward'.");
152
- }
153
- }
154
-
155
- PYBIND11_MODULE(mesh_processor, m) {
156
- m.def("meshVerticeInpaint", &meshVerticeInpaint, "A function to process mesh",
157
- py::arg("texture"), py::arg("mask"),
158
- py::arg("vtx_pos"), py::arg("vtx_uv"),
159
- py::arg("pos_idx"), py::arg("uv_idx"),
160
- py::arg("method") = "smooth");
161
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/differentiable_renderer/mesh_processor.egg-info/PKG-INFO DELETED
@@ -1,7 +0,0 @@
1
- Metadata-Version: 2.2
2
- Name: mesh_processor
3
- Version: 0.0.0
4
- Requires-Python: >=3.6
5
- Requires-Dist: pybind11>=2.6.0
6
- Dynamic: requires-dist
7
- Dynamic: requires-python
 
 
 
 
 
 
 
 
hy3dgen/texgen/differentiable_renderer/mesh_processor.egg-info/SOURCES.txt DELETED
@@ -1,7 +0,0 @@
1
- mesh_processor.cpp
2
- setup.py
3
- mesh_processor.egg-info/PKG-INFO
4
- mesh_processor.egg-info/SOURCES.txt
5
- mesh_processor.egg-info/dependency_links.txt
6
- mesh_processor.egg-info/requires.txt
7
- mesh_processor.egg-info/top_level.txt
 
 
 
 
 
 
 
 
hy3dgen/texgen/differentiable_renderer/mesh_processor.egg-info/dependency_links.txt DELETED
@@ -1 +0,0 @@
1
-
 
 
hy3dgen/texgen/differentiable_renderer/mesh_processor.egg-info/requires.txt DELETED
@@ -1 +0,0 @@
1
- pybind11>=2.6.0
 
 
hy3dgen/texgen/differentiable_renderer/mesh_processor.egg-info/top_level.txt DELETED
@@ -1 +0,0 @@
1
- mesh_processor
 
 
hy3dgen/texgen/differentiable_renderer/mesh_processor.py DELETED
@@ -1,70 +0,0 @@
1
- import numpy as np
2
-
3
- def meshVerticeInpaint_smooth(texture, mask, vtx_pos, vtx_uv, pos_idx, uv_idx):
4
- texture_height, texture_width, texture_channel = texture.shape
5
- vtx_num = vtx_pos.shape[0]
6
-
7
- vtx_mask = np.zeros(vtx_num, dtype=np.float32)
8
- vtx_color = [np.zeros(texture_channel, dtype=np.float32) for _ in range(vtx_num)]
9
- uncolored_vtxs = []
10
- G = [[] for _ in range(vtx_num)]
11
-
12
- for i in range(uv_idx.shape[0]):
13
- for k in range(3):
14
- vtx_uv_idx = uv_idx[i, k]
15
- vtx_idx = pos_idx[i, k]
16
- uv_v = int(round(vtx_uv[vtx_uv_idx, 0] * (texture_width - 1)))
17
- uv_u = int(round((1.0 - vtx_uv[vtx_uv_idx, 1]) * (texture_height - 1)))
18
- if mask[uv_u, uv_v] > 0:
19
- vtx_mask[vtx_idx] = 1.0
20
- vtx_color[vtx_idx] = texture[uv_u, uv_v]
21
- else:
22
- uncolored_vtxs.append(vtx_idx)
23
- G[pos_idx[i, k]].append(pos_idx[i, (k + 1) % 3])
24
-
25
- smooth_count = 2
26
- last_uncolored_vtx_count = 0
27
- while smooth_count > 0:
28
- uncolored_vtx_count = 0
29
- for vtx_idx in uncolored_vtxs:
30
- sum_color = np.zeros(texture_channel, dtype=np.float32)
31
- total_weight = 0.0
32
- vtx_0 = vtx_pos[vtx_idx]
33
- for connected_idx in G[vtx_idx]:
34
- if vtx_mask[connected_idx] > 0:
35
- vtx1 = vtx_pos[connected_idx]
36
- dist = np.sqrt(np.sum((vtx_0 - vtx1) ** 2))
37
- dist_weight = 1.0 / max(dist, 1e-4)
38
- dist_weight *= dist_weight
39
- sum_color += vtx_color[connected_idx] * dist_weight
40
- total_weight += dist_weight
41
- if total_weight > 0:
42
- vtx_color[vtx_idx] = sum_color / total_weight
43
- vtx_mask[vtx_idx] = 1.0
44
- else:
45
- uncolored_vtx_count += 1
46
-
47
- if last_uncolored_vtx_count == uncolored_vtx_count:
48
- smooth_count -= 1
49
- else:
50
- smooth_count += 1
51
- last_uncolored_vtx_count = uncolored_vtx_count
52
-
53
- new_texture = texture.copy()
54
- new_mask = mask.copy()
55
- for face_idx in range(uv_idx.shape[0]):
56
- for k in range(3):
57
- vtx_uv_idx = uv_idx[face_idx, k]
58
- vtx_idx = pos_idx[face_idx, k]
59
- if vtx_mask[vtx_idx] == 1.0:
60
- uv_v = int(round(vtx_uv[vtx_uv_idx, 0] * (texture_width - 1)))
61
- uv_u = int(round((1.0 - vtx_uv[vtx_uv_idx, 1]) * (texture_height - 1)))
62
- new_texture[uv_u, uv_v] = vtx_color[vtx_idx]
63
- new_mask[uv_u, uv_v] = 255
64
- return new_texture, new_mask
65
-
66
- def meshVerticeInpaint(texture, mask, vtx_pos, vtx_uv, pos_idx, uv_idx, method="smooth"):
67
- if method == "smooth":
68
- return meshVerticeInpaint_smooth(texture, mask, vtx_pos, vtx_uv, pos_idx, uv_idx)
69
- else:
70
- raise ValueError("Invalid method. Use 'smooth' or 'forward'.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/differentiable_renderer/mesh_render.py DELETED
@@ -1,833 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- import cv2
26
- import numpy as np
27
- import torch
28
- import torch.nn.functional as F
29
- import trimesh
30
- from PIL import Image
31
-
32
- from .camera_utils import (
33
- transform_pos,
34
- get_mv_matrix,
35
- get_orthographic_projection_matrix,
36
- get_perspective_projection_matrix,
37
- )
38
- from .mesh_processor import meshVerticeInpaint
39
- from .mesh_utils import load_mesh, save_mesh
40
-
41
-
42
- def stride_from_shape(shape):
43
- stride = [1]
44
- for x in reversed(shape[1:]):
45
- stride.append(stride[-1] * x)
46
- return list(reversed(stride))
47
-
48
-
49
- def scatter_add_nd_with_count(input, count, indices, values, weights=None):
50
- # input: [..., C], D dimension + C channel
51
- # count: [..., 1], D dimension
52
- # indices: [N, D], long
53
- # values: [N, C]
54
-
55
- D = indices.shape[-1]
56
- C = input.shape[-1]
57
- size = input.shape[:-1]
58
- stride = stride_from_shape(size)
59
-
60
- assert len(size) == D
61
-
62
- input = input.view(-1, C) # [HW, C]
63
- count = count.view(-1, 1)
64
-
65
- flatten_indices = (indices * torch.tensor(stride,
66
- dtype=torch.long, device=indices.device)).sum(-1) # [N]
67
-
68
- if weights is None:
69
- weights = torch.ones_like(values[..., :1])
70
-
71
- input.scatter_add_(0, flatten_indices.unsqueeze(1).repeat(1, C), values)
72
- count.scatter_add_(0, flatten_indices.unsqueeze(1), weights)
73
-
74
- return input.view(*size, C), count.view(*size, 1)
75
-
76
-
77
- def linear_grid_put_2d(H, W, coords, values, return_count=False):
78
- # coords: [N, 2], float in [0, 1]
79
- # values: [N, C]
80
-
81
- C = values.shape[-1]
82
-
83
- indices = coords * torch.tensor(
84
- [H - 1, W - 1], dtype=torch.float32, device=coords.device
85
- )
86
- indices_00 = indices.floor().long() # [N, 2]
87
- indices_00[:, 0].clamp_(0, H - 2)
88
- indices_00[:, 1].clamp_(0, W - 2)
89
- indices_01 = indices_00 + torch.tensor(
90
- [0, 1], dtype=torch.long, device=indices.device
91
- )
92
- indices_10 = indices_00 + torch.tensor(
93
- [1, 0], dtype=torch.long, device=indices.device
94
- )
95
- indices_11 = indices_00 + torch.tensor(
96
- [1, 1], dtype=torch.long, device=indices.device
97
- )
98
-
99
- h = indices[..., 0] - indices_00[..., 0].float()
100
- w = indices[..., 1] - indices_00[..., 1].float()
101
- w_00 = (1 - h) * (1 - w)
102
- w_01 = (1 - h) * w
103
- w_10 = h * (1 - w)
104
- w_11 = h * w
105
-
106
- result = torch.zeros(H, W, C, device=values.device,
107
- dtype=values.dtype) # [H, W, C]
108
- count = torch.zeros(H, W, 1, device=values.device,
109
- dtype=values.dtype) # [H, W, 1]
110
- weights = torch.ones_like(values[..., :1]) # [N, 1]
111
-
112
- result, count = scatter_add_nd_with_count(
113
- result, count, indices_00, values * w_00.unsqueeze(1), weights * w_00.unsqueeze(1))
114
- result, count = scatter_add_nd_with_count(
115
- result, count, indices_01, values * w_01.unsqueeze(1), weights * w_01.unsqueeze(1))
116
- result, count = scatter_add_nd_with_count(
117
- result, count, indices_10, values * w_10.unsqueeze(1), weights * w_10.unsqueeze(1))
118
- result, count = scatter_add_nd_with_count(
119
- result, count, indices_11, values * w_11.unsqueeze(1), weights * w_11.unsqueeze(1))
120
-
121
- if return_count:
122
- return result, count
123
-
124
- mask = (count.squeeze(-1) > 0)
125
- result[mask] = result[mask] / count[mask].repeat(1, C)
126
-
127
- return result
128
-
129
-
130
- class MeshRender():
131
- def __init__(
132
- self,
133
- camera_distance=1.45, camera_type='orth',
134
- default_resolution=1024, texture_size=1024,
135
- use_antialias=True, max_mip_level=None, filter_mode='linear',
136
- bake_mode='linear', raster_mode='cr', device='cuda'):
137
-
138
- self.device = device
139
-
140
- self.set_default_render_resolution(default_resolution)
141
- self.set_default_texture_resolution(texture_size)
142
-
143
- self.camera_distance = camera_distance
144
- self.use_antialias = use_antialias
145
- self.max_mip_level = max_mip_level
146
- self.filter_mode = filter_mode
147
-
148
- self.bake_angle_thres = 75
149
- self.bake_unreliable_kernel_size = int(
150
- (2 / 512) * max(self.default_resolution[0], self.default_resolution[1]))
151
- self.bake_mode = bake_mode
152
-
153
- self.raster_mode = raster_mode
154
- if self.raster_mode == 'cr':
155
- import custom_rasterizer as cr
156
- self.raster = cr
157
- else:
158
- raise f'No raster named {self.raster_mode}'
159
-
160
- if camera_type == 'orth':
161
- self.ortho_scale = 1.2
162
- self.camera_proj_mat = get_orthographic_projection_matrix(
163
- left=-self.ortho_scale * 0.5, right=self.ortho_scale * 0.5,
164
- bottom=-self.ortho_scale * 0.5, top=self.ortho_scale * 0.5,
165
- near=0.1, far=100
166
- )
167
- elif camera_type == 'perspective':
168
- self.camera_proj_mat = get_perspective_projection_matrix(
169
- 49.13, self.default_resolution[1] / self.default_resolution[0],
170
- 0.01, 100.0
171
- )
172
- else:
173
- raise f'No camera type {camera_type}'
174
-
175
- def raster_rasterize(self, pos, tri, resolution, ranges=None, grad_db=True):
176
-
177
- if self.raster_mode == 'cr':
178
- rast_out_db = None
179
- if pos.dim() == 2:
180
- pos = pos.unsqueeze(0)
181
- findices, barycentric = self.raster.rasterize(pos, tri, resolution)
182
- rast_out = torch.cat((barycentric, findices.unsqueeze(-1)), dim=-1)
183
- rast_out = rast_out.unsqueeze(0)
184
- else:
185
- raise f'No raster named {self.raster_mode}'
186
-
187
- return rast_out, rast_out_db
188
-
189
- def raster_interpolate(self, uv, rast_out, uv_idx, rast_db=None, diff_attrs=None):
190
-
191
- if self.raster_mode == 'cr':
192
- textd = None
193
- barycentric = rast_out[0, ..., :-1]
194
- findices = rast_out[0, ..., -1]
195
- if uv.dim() == 2:
196
- uv = uv.unsqueeze(0)
197
- textc = self.raster.interpolate(uv, findices, barycentric, uv_idx)
198
- else:
199
- raise f'No raster named {self.raster_mode}'
200
-
201
- return textc, textd
202
-
203
- def raster_texture(self, tex, uv, uv_da=None, mip_level_bias=None, mip=None, filter_mode='auto',
204
- boundary_mode='wrap', max_mip_level=None):
205
-
206
- if self.raster_mode == 'cr':
207
- raise f'Texture is not implemented in cr'
208
- else:
209
- raise f'No raster named {self.raster_mode}'
210
-
211
- return color
212
-
213
- def raster_antialias(self, color, rast, pos, tri, topology_hash=None, pos_gradient_boost=1.0):
214
-
215
- if self.raster_mode == 'cr':
216
- # Antialias has not been supported yet
217
- color = color
218
- else:
219
- raise f'No raster named {self.raster_mode}'
220
-
221
- return color
222
-
223
- def load_mesh(
224
- self,
225
- mesh,
226
- scale_factor=1.15,
227
- auto_center=True,
228
- ):
229
- vtx_pos, pos_idx, vtx_uv, uv_idx, texture_data = load_mesh(mesh)
230
- self.mesh_copy = mesh
231
- self.set_mesh(vtx_pos, pos_idx,
232
- vtx_uv=vtx_uv, uv_idx=uv_idx,
233
- scale_factor=scale_factor, auto_center=auto_center
234
- )
235
- if texture_data is not None:
236
- self.set_texture(texture_data)
237
-
238
- def save_mesh(self):
239
- texture_data = self.get_texture()
240
- texture_data = Image.fromarray((texture_data * 255).astype(np.uint8))
241
- return save_mesh(self.mesh_copy, texture_data)
242
-
243
- def set_mesh(
244
- self,
245
- vtx_pos, pos_idx,
246
- vtx_uv=None, uv_idx=None,
247
- scale_factor=1.15, auto_center=True
248
- ):
249
-
250
- self.vtx_pos = torch.from_numpy(vtx_pos).to(self.device).float()
251
- self.pos_idx = torch.from_numpy(pos_idx).to(self.device).to(torch.int)
252
- if (vtx_uv is not None) and (uv_idx is not None):
253
- self.vtx_uv = torch.from_numpy(vtx_uv).to(self.device).float()
254
- self.uv_idx = torch.from_numpy(uv_idx).to(self.device).to(torch.int)
255
- else:
256
- self.vtx_uv = None
257
- self.uv_idx = None
258
-
259
- self.vtx_pos[:, [0, 1]] = -self.vtx_pos[:, [0, 1]]
260
- self.vtx_pos[:, [1, 2]] = self.vtx_pos[:, [2, 1]]
261
- if (vtx_uv is not None) and (uv_idx is not None):
262
- self.vtx_uv[:, 1] = 1.0 - self.vtx_uv[:, 1]
263
-
264
- if auto_center:
265
- max_bb = (self.vtx_pos - 0).max(0)[0]
266
- min_bb = (self.vtx_pos - 0).min(0)[0]
267
- center = (max_bb + min_bb) / 2
268
- scale = torch.norm(self.vtx_pos - center, dim=1).max() * 2.0
269
- self.vtx_pos = (self.vtx_pos - center) * \
270
- (scale_factor / float(scale))
271
- self.scale_factor = scale_factor
272
-
273
- def set_texture(self, tex):
274
- if isinstance(tex, np.ndarray):
275
- tex = Image.fromarray((tex * 255).astype(np.uint8))
276
- elif isinstance(tex, torch.Tensor):
277
- tex = tex.cpu().numpy()
278
- tex = Image.fromarray((tex * 255).astype(np.uint8))
279
-
280
- tex = tex.resize(self.texture_size).convert('RGB')
281
- tex = np.array(tex) / 255.0
282
- self.tex = torch.from_numpy(tex).to(self.device)
283
- self.tex = self.tex.float()
284
-
285
- def set_default_render_resolution(self, default_resolution):
286
- if isinstance(default_resolution, int):
287
- default_resolution = (default_resolution, default_resolution)
288
- self.default_resolution = default_resolution
289
-
290
- def set_default_texture_resolution(self, texture_size):
291
- if isinstance(texture_size, int):
292
- texture_size = (texture_size, texture_size)
293
- self.texture_size = texture_size
294
-
295
- def get_mesh(self):
296
- vtx_pos = self.vtx_pos.cpu().numpy()
297
- pos_idx = self.pos_idx.cpu().numpy()
298
- vtx_uv = self.vtx_uv.cpu().numpy()
299
- uv_idx = self.uv_idx.cpu().numpy()
300
-
301
- # 坐标变换的逆变换
302
- vtx_pos[:, [1, 2]] = vtx_pos[:, [2, 1]]
303
- vtx_pos[:, [0, 1]] = -vtx_pos[:, [0, 1]]
304
-
305
- vtx_uv[:, 1] = 1.0 - vtx_uv[:, 1]
306
- return vtx_pos, pos_idx, vtx_uv, uv_idx
307
-
308
- def get_texture(self):
309
- return self.tex.cpu().numpy()
310
-
311
- def to(self, device):
312
- self.device = device
313
-
314
- for attr_name in dir(self):
315
- attr_value = getattr(self, attr_name)
316
- if isinstance(attr_value, torch.Tensor):
317
- setattr(self, attr_name, attr_value.to(self.device))
318
-
319
- def color_rgb_to_srgb(self, image):
320
- if isinstance(image, Image.Image):
321
- image_rgb = torch.tesnor(
322
- np.array(image) /
323
- 255.0).float().to(
324
- self.device)
325
- elif isinstance(image, np.ndarray):
326
- image_rgb = torch.tensor(image).float()
327
- else:
328
- image_rgb = image.to(self.device)
329
-
330
- image_srgb = torch.where(
331
- image_rgb <= 0.0031308,
332
- 12.92 * image_rgb,
333
- 1.055 * torch.pow(image_rgb, 1 / 2.4) - 0.055
334
- )
335
-
336
- if isinstance(image, Image.Image):
337
- image_srgb = Image.fromarray(
338
- (image_srgb.cpu().numpy() *
339
- 255).astype(
340
- np.uint8))
341
- elif isinstance(image, np.ndarray):
342
- image_srgb = image_srgb.cpu().numpy()
343
- else:
344
- image_srgb = image_srgb.to(image.device)
345
-
346
- return image_srgb
347
-
348
- def _render(
349
- self,
350
- glctx,
351
- mvp,
352
- pos,
353
- pos_idx,
354
- uv,
355
- uv_idx,
356
- tex,
357
- resolution,
358
- max_mip_level,
359
- keep_alpha,
360
- filter_mode
361
- ):
362
- pos_clip = transform_pos(mvp, pos)
363
- if isinstance(resolution, (int, float)):
364
- resolution = [resolution, resolution]
365
- rast_out, rast_out_db = self.raster_rasterize(
366
- glctx, pos_clip, pos_idx, resolution=resolution)
367
-
368
- tex = tex.contiguous()
369
- if filter_mode == 'linear-mipmap-linear':
370
- texc, texd = self.raster_interpolate(
371
- uv[None, ...], rast_out, uv_idx, rast_db=rast_out_db, diff_attrs='all')
372
- color = self.raster_texture(
373
- tex[None, ...], texc, texd, filter_mode='linear-mipmap-linear', max_mip_level=max_mip_level)
374
- else:
375
- texc, _ = self.raster_interpolate(uv[None, ...], rast_out, uv_idx)
376
- color = self.raster_texture(tex[None, ...], texc, filter_mode=filter_mode)
377
-
378
- visible_mask = torch.clamp(rast_out[..., -1:], 0, 1)
379
- color = color * visible_mask # Mask out background.
380
- if self.use_antialias:
381
- color = self.raster_antialias(color, rast_out, pos_clip, pos_idx)
382
-
383
- if keep_alpha:
384
- color = torch.cat([color, visible_mask], dim=-1)
385
- return color[0, ...]
386
-
387
- def render(
388
- self,
389
- elev,
390
- azim,
391
- camera_distance=None,
392
- center=None,
393
- resolution=None,
394
- tex=None,
395
- keep_alpha=True,
396
- bgcolor=None,
397
- filter_mode=None,
398
- return_type='th'
399
- ):
400
-
401
- proj = self.camera_proj_mat
402
- r_mv = get_mv_matrix(
403
- elev=elev,
404
- azim=azim,
405
- camera_distance=self.camera_distance if camera_distance is None else camera_distance,
406
- center=center)
407
- r_mvp = np.matmul(proj, r_mv).astype(np.float32)
408
- if tex is not None:
409
- if isinstance(tex, Image.Image):
410
- tex = torch.tensor(np.array(tex) / 255.0)
411
- elif isinstance(tex, np.ndarray):
412
- tex = torch.tensor(tex)
413
- if tex.dim() == 2:
414
- tex = tex.unsqueeze(-1)
415
- tex = tex.float().to(self.device)
416
- image = self._render(r_mvp, self.vtx_pos, self.pos_idx, self.vtx_uv, self.uv_idx,
417
- self.tex if tex is None else tex,
418
- self.default_resolution if resolution is None else resolution,
419
- self.max_mip_level, True, filter_mode if filter_mode else self.filter_mode)
420
- mask = (image[..., [-1]] == 1).float()
421
- if bgcolor is None:
422
- bgcolor = [0 for _ in range(image.shape[-1] - 1)]
423
- image = image * mask + (1 - mask) * \
424
- torch.tensor(bgcolor + [0]).to(self.device)
425
- if keep_alpha == False:
426
- image = image[..., :-1]
427
- if return_type == 'np':
428
- image = image.cpu().numpy()
429
- elif return_type == 'pl':
430
- image = image.squeeze(-1).cpu().numpy() * 255
431
- image = Image.fromarray(image.astype(np.uint8))
432
- return image
433
-
434
- def render_normal(
435
- self,
436
- elev,
437
- azim,
438
- camera_distance=None,
439
- center=None,
440
- resolution=None,
441
- bg_color=[1, 1, 1],
442
- use_abs_coor=False,
443
- normalize_rgb=True,
444
- return_type='th'
445
- ):
446
-
447
- pos_camera, pos_clip = self.get_pos_from_mvp(elev, azim, camera_distance, center)
448
- if resolution is None:
449
- resolution = self.default_resolution
450
- if isinstance(resolution, (int, float)):
451
- resolution = [resolution, resolution]
452
- rast_out, rast_out_db = self.raster_rasterize(
453
- pos_clip, self.pos_idx, resolution=resolution)
454
-
455
- if use_abs_coor:
456
- mesh_triangles = self.vtx_pos[self.pos_idx[:, :3], :]
457
- else:
458
- pos_camera = pos_camera[:, :3] / pos_camera[:, 3:4]
459
- mesh_triangles = pos_camera[self.pos_idx[:, :3], :]
460
- face_normals = F.normalize(
461
- torch.cross(mesh_triangles[:,
462
- 1,
463
- :] - mesh_triangles[:,
464
- 0,
465
- :],
466
- mesh_triangles[:,
467
- 2,
468
- :] - mesh_triangles[:,
469
- 0,
470
- :],
471
- dim=-1),
472
- dim=-1)
473
-
474
- vertex_normals = trimesh.geometry.mean_vertex_normals(vertex_count=self.vtx_pos.shape[0],
475
- faces=self.pos_idx.cpu(),
476
- face_normals=face_normals.cpu(), )
477
- vertex_normals = torch.from_numpy(
478
- vertex_normals).float().to(self.device).contiguous()
479
-
480
- # Interpolate normal values across the rasterized pixels
481
- normal, _ = self.raster_interpolate(
482
- vertex_normals[None, ...], rast_out, self.pos_idx)
483
-
484
- visible_mask = torch.clamp(rast_out[..., -1:], 0, 1)
485
- normal = normal * visible_mask + \
486
- torch.tensor(bg_color, dtype=torch.float32, device=self.device) * (1 -
487
- visible_mask) # Mask out background.
488
-
489
- if normalize_rgb:
490
- normal = (normal + 1) * 0.5
491
- if self.use_antialias:
492
- normal = self.raster_antialias(normal, rast_out, pos_clip, self.pos_idx)
493
-
494
- image = normal[0, ...]
495
- if return_type == 'np':
496
- image = image.cpu().numpy()
497
- elif return_type == 'pl':
498
- image = image.cpu().numpy() * 255
499
- image = Image.fromarray(image.astype(np.uint8))
500
-
501
- return image
502
-
503
- def convert_normal_map(self, image):
504
- # blue is front, red is left, green is top
505
- if isinstance(image, Image.Image):
506
- image = np.array(image)
507
- mask = (image == [255, 255, 255]).all(axis=-1)
508
-
509
- image = (image / 255.0) * 2.0 - 1.0
510
-
511
- image[..., [1]] = -image[..., [1]]
512
- image[..., [1, 2]] = image[..., [2, 1]]
513
- image[..., [0]] = -image[..., [0]]
514
-
515
- image = (image + 1.0) * 0.5
516
-
517
- image = (image * 255).astype(np.uint8)
518
- image[mask] = [127, 127, 255]
519
-
520
- return Image.fromarray(image)
521
-
522
- def get_pos_from_mvp(self, elev, azim, camera_distance, center):
523
- proj = self.camera_proj_mat
524
- r_mv = get_mv_matrix(
525
- elev=elev,
526
- azim=azim,
527
- camera_distance=self.camera_distance if camera_distance is None else camera_distance,
528
- center=center)
529
-
530
- pos_camera = transform_pos(r_mv, self.vtx_pos, keepdim=True)
531
- pos_clip = transform_pos(proj, pos_camera)
532
-
533
- return pos_camera, pos_clip
534
-
535
- def render_depth(
536
- self,
537
- elev,
538
- azim,
539
- camera_distance=None,
540
- center=None,
541
- resolution=None,
542
- return_type='th'
543
- ):
544
- pos_camera, pos_clip = self.get_pos_from_mvp(elev, azim, camera_distance, center)
545
-
546
- if resolution is None:
547
- resolution = self.default_resolution
548
- if isinstance(resolution, (int, float)):
549
- resolution = [resolution, resolution]
550
- rast_out, rast_out_db = self.raster_rasterize(
551
- pos_clip, self.pos_idx, resolution=resolution)
552
-
553
- pos_camera = pos_camera[:, :3] / pos_camera[:, 3:4]
554
- tex_depth = pos_camera[:, 2].reshape(1, -1, 1).contiguous()
555
-
556
- # Interpolate depth values across the rasterized pixels
557
- depth, _ = self.raster_interpolate(tex_depth, rast_out, self.pos_idx)
558
-
559
- visible_mask = torch.clamp(rast_out[..., -1:], 0, 1)
560
- depth_max, depth_min = depth[visible_mask >
561
- 0].max(), depth[visible_mask > 0].min()
562
- depth = (depth - depth_min) / (depth_max - depth_min)
563
-
564
- depth = depth * visible_mask # Mask out background.
565
- if self.use_antialias:
566
- depth = self.raster_antialias(depth, rast_out, pos_clip, self.pos_idx)
567
-
568
- image = depth[0, ...]
569
- if return_type == 'np':
570
- image = image.cpu().numpy()
571
- elif return_type == 'pl':
572
- image = image.squeeze(-1).cpu().numpy() * 255
573
- image = Image.fromarray(image.astype(np.uint8))
574
- return image
575
-
576
- def render_position(self, elev, azim, camera_distance=None, center=None,
577
- resolution=None, bg_color=[1, 1, 1], return_type='th'):
578
- pos_camera, pos_clip = self.get_pos_from_mvp(elev, azim, camera_distance, center)
579
- if resolution is None:
580
- resolution = self.default_resolution
581
- if isinstance(resolution, (int, float)):
582
- resolution = [resolution, resolution]
583
- rast_out, rast_out_db = self.raster_rasterize(
584
- pos_clip, self.pos_idx, resolution=resolution)
585
-
586
- tex_position = 0.5 - self.vtx_pos[:, :3] / self.scale_factor
587
- tex_position = tex_position.contiguous()
588
-
589
- # Interpolate depth values across the rasterized pixels
590
- position, _ = self.raster_interpolate(
591
- tex_position[None, ...], rast_out, self.pos_idx)
592
-
593
- visible_mask = torch.clamp(rast_out[..., -1:], 0, 1)
594
-
595
- position = position * visible_mask + \
596
- torch.tensor(bg_color, dtype=torch.float32, device=self.device) * (1 -
597
- visible_mask) # Mask out background.
598
- if self.use_antialias:
599
- position = self.raster_antialias(position, rast_out, pos_clip, self.pos_idx)
600
-
601
- image = position[0, ...]
602
-
603
- if return_type == 'np':
604
- image = image.cpu().numpy()
605
- elif return_type == 'pl':
606
- image = image.squeeze(-1).cpu().numpy() * 255
607
- image = Image.fromarray(image.astype(np.uint8))
608
- return image
609
-
610
- def render_uvpos(self, return_type='th'):
611
- image = self.uv_feature_map(self.vtx_pos * 0.5 + 0.5)
612
- if return_type == 'np':
613
- image = image.cpu().numpy()
614
- elif return_type == 'pl':
615
- image = image.cpu().numpy() * 255
616
- image = Image.fromarray(image.astype(np.uint8))
617
- return image
618
-
619
- def uv_feature_map(self, vert_feat, bg=None):
620
- vtx_uv = self.vtx_uv * 2 - 1.0
621
- vtx_uv = torch.cat(
622
- [vtx_uv, torch.zeros_like(self.vtx_uv)], dim=1).unsqueeze(0)
623
- vtx_uv[..., -1] = 1
624
- uv_idx = self.uv_idx
625
- rast_out, rast_out_db = self.raster_rasterize(
626
- vtx_uv, uv_idx, resolution=self.texture_size)
627
- feat_map, _ = self.raster_interpolate(vert_feat[None, ...], rast_out, uv_idx)
628
- feat_map = feat_map[0, ...]
629
- if bg is not None:
630
- visible_mask = torch.clamp(rast_out[..., -1:], 0, 1)[0, ...]
631
- feat_map[visible_mask == 0] = bg
632
- return feat_map
633
-
634
- def render_sketch_from_geometry(self, normal_image, depth_image):
635
- normal_image_np = normal_image.cpu().numpy()
636
- depth_image_np = depth_image.cpu().numpy()
637
-
638
- normal_image_np = (normal_image_np * 255).astype(np.uint8)
639
- depth_image_np = (depth_image_np * 255).astype(np.uint8)
640
- normal_image_np = cv2.cvtColor(normal_image_np, cv2.COLOR_RGB2GRAY)
641
-
642
- normal_edges = cv2.Canny(normal_image_np, 80, 150)
643
- depth_edges = cv2.Canny(depth_image_np, 30, 80)
644
-
645
- combined_edges = np.maximum(normal_edges, depth_edges)
646
-
647
- sketch_image = torch.from_numpy(combined_edges).to(
648
- normal_image.device).float() / 255.0
649
- sketch_image = sketch_image.unsqueeze(-1)
650
-
651
- return sketch_image
652
-
653
- def render_sketch_from_depth(self, depth_image):
654
- depth_image_np = depth_image.cpu().numpy()
655
- depth_image_np = (depth_image_np * 255).astype(np.uint8)
656
- depth_edges = cv2.Canny(depth_image_np, 30, 80)
657
- combined_edges = depth_edges
658
- sketch_image = torch.from_numpy(combined_edges).to(
659
- depth_image.device).float() / 255.0
660
- sketch_image = sketch_image.unsqueeze(-1)
661
- return sketch_image
662
-
663
- def back_project(self, image, elev, azim,
664
- camera_distance=None, center=None, method=None):
665
- if isinstance(image, Image.Image):
666
- image = torch.tensor(np.array(image) / 255.0)
667
- elif isinstance(image, np.ndarray):
668
- image = torch.tensor(image)
669
- if image.dim() == 2:
670
- image = image.unsqueeze(-1)
671
- image = image.float().to(self.device)
672
- resolution = image.shape[:2]
673
- channel = image.shape[-1]
674
- texture = torch.zeros(self.texture_size + (channel,)).to(self.device)
675
- cos_map = torch.zeros(self.texture_size + (1,)).to(self.device)
676
-
677
- proj = self.camera_proj_mat
678
- r_mv = get_mv_matrix(
679
- elev=elev,
680
- azim=azim,
681
- camera_distance=self.camera_distance if camera_distance is None else camera_distance,
682
- center=center)
683
- pos_camera = transform_pos(r_mv, self.vtx_pos, keepdim=True)
684
- pos_clip = transform_pos(proj, pos_camera)
685
- pos_camera = pos_camera[:, :3] / pos_camera[:, 3:4]
686
- v0 = pos_camera[self.pos_idx[:, 0], :]
687
- v1 = pos_camera[self.pos_idx[:, 1], :]
688
- v2 = pos_camera[self.pos_idx[:, 2], :]
689
- face_normals = F.normalize(
690
- torch.cross(
691
- v1 - v0,
692
- v2 - v0,
693
- dim=-1),
694
- dim=-1)
695
- vertex_normals = trimesh.geometry.mean_vertex_normals(vertex_count=self.vtx_pos.shape[0],
696
- faces=self.pos_idx.cpu(),
697
- face_normals=face_normals.cpu(), )
698
- vertex_normals = torch.from_numpy(
699
- vertex_normals).float().to(self.device).contiguous()
700
- tex_depth = pos_camera[:, 2].reshape(1, -1, 1).contiguous()
701
- rast_out, rast_out_db = self.raster_rasterize(
702
- pos_clip, self.pos_idx, resolution=resolution)
703
- visible_mask = torch.clamp(rast_out[..., -1:], 0, 1)[0, ...]
704
-
705
- normal, _ = self.raster_interpolate(
706
- vertex_normals[None, ...], rast_out, self.pos_idx)
707
- normal = normal[0, ...]
708
- uv, _ = self.raster_interpolate(self.vtx_uv[None, ...], rast_out, self.uv_idx)
709
- depth, _ = self.raster_interpolate(tex_depth, rast_out, self.pos_idx)
710
- depth = depth[0, ...]
711
-
712
- depth_max, depth_min = depth[visible_mask >
713
- 0].max(), depth[visible_mask > 0].min()
714
- depth_normalized = (depth - depth_min) / (depth_max - depth_min)
715
- depth_image = depth_normalized * visible_mask # Mask out background.
716
-
717
- sketch_image = self.render_sketch_from_depth(depth_image)
718
-
719
- lookat = torch.tensor([[0, 0, -1]], device=self.device)
720
- cos_image = torch.nn.functional.cosine_similarity(
721
- lookat, normal.view(-1, 3))
722
- cos_image = cos_image.view(normal.shape[0], normal.shape[1], 1)
723
-
724
- cos_thres = np.cos(self.bake_angle_thres / 180 * np.pi)
725
- cos_image[cos_image < cos_thres] = 0
726
-
727
- # shrink
728
- kernel_size = self.bake_unreliable_kernel_size * 2 + 1
729
- kernel = torch.ones(
730
- (1, 1, kernel_size, kernel_size), dtype=torch.float32).to(
731
- sketch_image.device)
732
-
733
- visible_mask = visible_mask.permute(2, 0, 1).unsqueeze(0).float()
734
- visible_mask = F.conv2d(
735
- 1.0 - visible_mask,
736
- kernel,
737
- padding=kernel_size // 2)
738
- visible_mask = 1.0 - (visible_mask > 0).float() # 二值化
739
- visible_mask = visible_mask.squeeze(0).permute(1, 2, 0)
740
-
741
- sketch_image = sketch_image.permute(2, 0, 1).unsqueeze(0)
742
- sketch_image = F.conv2d(sketch_image, kernel, padding=kernel_size // 2)
743
- sketch_image = (sketch_image > 0).float() # 二值化
744
- sketch_image = sketch_image.squeeze(0).permute(1, 2, 0)
745
- visible_mask = visible_mask * (sketch_image < 0.5)
746
-
747
- cos_image[visible_mask == 0] = 0
748
-
749
- method = self.bake_mode if method is None else method
750
-
751
- if method == 'linear':
752
- proj_mask = (visible_mask != 0).view(-1)
753
- uv = uv.squeeze(0).contiguous().view(-1, 2)[proj_mask]
754
- image = image.squeeze(0).contiguous().view(-1, channel)[proj_mask]
755
- cos_image = cos_image.contiguous().view(-1, 1)[proj_mask]
756
- sketch_image = sketch_image.contiguous().view(-1, 1)[proj_mask]
757
-
758
- texture = linear_grid_put_2d(
759
- self.texture_size[1], self.texture_size[0], uv[..., [1, 0]], image)
760
- cos_map = linear_grid_put_2d(
761
- self.texture_size[1], self.texture_size[0], uv[..., [1, 0]], cos_image)
762
- boundary_map = linear_grid_put_2d(
763
- self.texture_size[1], self.texture_size[0], uv[..., [1, 0]], sketch_image)
764
- else:
765
- raise f'No bake mode {method}'
766
-
767
- return texture, cos_map, boundary_map
768
-
769
- def bake_texture(self, colors, elevs, azims,
770
- camera_distance=None, center=None, exp=6, weights=None):
771
- for i in range(len(colors)):
772
- if isinstance(colors[i], Image.Image):
773
- colors[i] = torch.tensor(
774
- np.array(
775
- colors[i]) / 255.0,
776
- device=self.device).float()
777
- if weights is None:
778
- weights = [1.0 for _ in range(colors)]
779
- textures = []
780
- cos_maps = []
781
- for color, elev, azim, weight in zip(colors, elevs, azims, weights):
782
- texture, cos_map, _ = self.back_project(
783
- color, elev, azim, camera_distance, center)
784
- cos_map = weight * (cos_map ** exp)
785
- textures.append(texture)
786
- cos_maps.append(cos_map)
787
-
788
- texture_merge, trust_map_merge = self.fast_bake_texture(
789
- textures, cos_maps)
790
- return texture_merge, trust_map_merge
791
-
792
- @torch.no_grad()
793
- def fast_bake_texture(self, textures, cos_maps):
794
-
795
- channel = textures[0].shape[-1]
796
- texture_merge = torch.zeros(
797
- self.texture_size + (channel,)).to(self.device)
798
- trust_map_merge = torch.zeros(self.texture_size + (1,)).to(self.device)
799
- for texture, cos_map in zip(textures, cos_maps):
800
- view_sum = (cos_map > 0).sum()
801
- painted_sum = ((cos_map > 0) * (trust_map_merge > 0)).sum()
802
- if painted_sum / view_sum > 0.99:
803
- continue
804
- texture_merge += texture * cos_map
805
- trust_map_merge += cos_map
806
- texture_merge = texture_merge / torch.clamp(trust_map_merge, min=1E-8)
807
-
808
- return texture_merge, trust_map_merge > 1E-8
809
-
810
- def uv_inpaint(self, texture, mask):
811
-
812
- if isinstance(texture, torch.Tensor):
813
- texture_np = texture.cpu().numpy()
814
- elif isinstance(texture, np.ndarray):
815
- texture_np = texture
816
- elif isinstance(texture, Image.Image):
817
- texture_np = np.array(texture) / 255.0
818
-
819
- vtx_pos, pos_idx, vtx_uv, uv_idx = self.get_mesh()
820
-
821
- texture_np, mask = meshVerticeInpaint(
822
- texture_np, mask, vtx_pos, vtx_uv, pos_idx, uv_idx)
823
-
824
- texture_np = cv2.inpaint(
825
- (texture_np *
826
- 255).astype(
827
- np.uint8),
828
- 255 -
829
- mask,
830
- 3,
831
- cv2.INPAINT_NS)
832
-
833
- return texture_np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/differentiable_renderer/mesh_utils.py DELETED
@@ -1,44 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- import trimesh
26
-
27
-
28
- def load_mesh(mesh):
29
- vtx_pos = mesh.vertices if hasattr(mesh, 'vertices') else None
30
- pos_idx = mesh.faces if hasattr(mesh, 'faces') else None
31
-
32
- vtx_uv = mesh.visual.uv if hasattr(mesh.visual, 'uv') else None
33
- uv_idx = mesh.faces if hasattr(mesh, 'faces') else None
34
-
35
- texture_data = None
36
-
37
- return vtx_pos, pos_idx, vtx_uv, uv_idx, texture_data
38
-
39
-
40
- def save_mesh(mesh, texture_data):
41
- material = trimesh.visual.texture.SimpleMaterial(image=texture_data, diffuse=(255, 255, 255))
42
- texture_visuals = trimesh.visual.TextureVisuals(uv=mesh.visual.uv, image=texture_data, material=material)
43
- mesh.visual = texture_visuals
44
- return mesh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/differentiable_renderer/setup.py DELETED
@@ -1,48 +0,0 @@
1
- from setuptools import setup, Extension
2
- import pybind11
3
- import sys
4
- import platform
5
-
6
- def get_platform_specific_args():
7
- system = platform.system().lower()
8
- cpp_std = 'c++14' # Make configurable if needed
9
-
10
- if sys.platform == 'win32':
11
- compile_args = ['/O2', f'/std:{cpp_std}', '/EHsc', '/MP', '/DWIN32_LEAN_AND_MEAN', '/bigobj']
12
- link_args = []
13
- extra_includes = []
14
- elif system == 'linux':
15
- compile_args = ['-O3', f'-std={cpp_std}', '-fPIC', '-Wall', '-Wextra', '-pthread']
16
- link_args = ['-fPIC', '-pthread']
17
- extra_includes = []
18
- elif sys.platform == 'darwin':
19
- compile_args = ['-O3', f'-std={cpp_std}', '-fPIC', '-Wall', '-Wextra',
20
- '-stdlib=libc++', '-mmacosx-version-min=10.14']
21
- link_args = ['-fPIC', '-stdlib=libc++', '-mmacosx-version-min=10.14', '-dynamiclib']
22
- extra_includes = []
23
- else:
24
- raise RuntimeError(f"Unsupported platform: {system}")
25
-
26
- return compile_args, link_args, extra_includes
27
-
28
- extra_compile_args, extra_link_args, platform_includes = get_platform_specific_args()
29
- include_dirs = [pybind11.get_include(), pybind11.get_include(user=True)]
30
- include_dirs.extend(platform_includes)
31
-
32
- ext_modules = [
33
- Extension(
34
- "mesh_processor",
35
- ["mesh_processor.cpp"],
36
- include_dirs=include_dirs,
37
- language='c++',
38
- extra_compile_args=extra_compile_args,
39
- extra_link_args=extra_link_args,
40
- ),
41
- ]
42
-
43
- setup(
44
- name="mesh_processor",
45
- ext_modules=ext_modules,
46
- install_requires=['pybind11>=2.6.0'],
47
- python_requires='>=3.6',
48
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/hunyuanpaint/__init__.py DELETED
@@ -1,23 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/hunyuanpaint/pipeline.py DELETED
@@ -1,554 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- from typing import Any, Callable, Dict, List, Optional, Union
26
-
27
- import numpy
28
- import numpy as np
29
- import torch
30
- import torch.distributed
31
- import torch.utils.checkpoint
32
- from PIL import Image
33
- from diffusers import (
34
- AutoencoderKL,
35
- DiffusionPipeline,
36
- ImagePipelineOutput
37
- )
38
- from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
39
- from diffusers.image_processor import PipelineImageInput
40
- from diffusers.image_processor import VaeImageProcessor
41
- from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
42
- from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline, retrieve_timesteps, \
43
- rescale_noise_cfg
44
- from diffusers.schedulers import KarrasDiffusionSchedulers
45
- from diffusers.utils import deprecate
46
- from einops import rearrange
47
- from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
48
-
49
- from .unet.modules import UNet2p5DConditionModel
50
-
51
-
52
- def to_rgb_image(maybe_rgba: Image.Image):
53
- if maybe_rgba.mode == 'RGB':
54
- return maybe_rgba
55
- elif maybe_rgba.mode == 'RGBA':
56
- rgba = maybe_rgba
57
- img = numpy.random.randint(127, 128, size=[rgba.size[1], rgba.size[0], 3], dtype=numpy.uint8)
58
- img = Image.fromarray(img, 'RGB')
59
- img.paste(rgba, mask=rgba.getchannel('A'))
60
- return img
61
- else:
62
- raise ValueError("Unsupported image type.", maybe_rgba.mode)
63
-
64
-
65
- class HunyuanPaintPipeline(StableDiffusionPipeline):
66
-
67
- def __init__(
68
- self,
69
- vae: AutoencoderKL,
70
- text_encoder: CLIPTextModel,
71
- tokenizer: CLIPTokenizer,
72
- unet: UNet2p5DConditionModel,
73
- scheduler: KarrasDiffusionSchedulers,
74
- feature_extractor: CLIPImageProcessor,
75
- safety_checker=None,
76
- use_torch_compile=False,
77
- ):
78
- DiffusionPipeline.__init__(self)
79
-
80
- safety_checker = None
81
- self.register_modules(
82
- vae=torch.compile(vae) if use_torch_compile else vae,
83
- text_encoder=text_encoder,
84
- tokenizer=tokenizer,
85
- unet=unet,
86
- scheduler=scheduler,
87
- safety_checker=safety_checker,
88
- feature_extractor=torch.compile(feature_extractor) if use_torch_compile else feature_extractor,
89
- )
90
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
91
- self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
92
-
93
- @torch.no_grad()
94
- def encode_images(self, images):
95
- B = images.shape[0]
96
- images = rearrange(images, 'b n c h w -> (b n) c h w')
97
-
98
- dtype = next(self.vae.parameters()).dtype
99
- images = (images - 0.5) * 2.0
100
- posterior = self.vae.encode(images.to(dtype)).latent_dist
101
- latents = posterior.sample() * self.vae.config.scaling_factor
102
-
103
- latents = rearrange(latents, '(b n) c h w -> b n c h w', b=B)
104
- return latents
105
-
106
- @torch.no_grad()
107
- def __call__(
108
- self,
109
- image: Image.Image = None,
110
- prompt=None,
111
- negative_prompt='watermark, ugly, deformed, noisy, blurry, low contrast',
112
- *args,
113
- num_images_per_prompt: Optional[int] = 1,
114
- guidance_scale=2.0,
115
- output_type: Optional[str] = "pil",
116
- width=512,
117
- height=512,
118
- num_inference_steps=28,
119
- return_dict=True,
120
- **cached_condition,
121
- ):
122
- if image is None:
123
- raise ValueError("Inputting embeddings not supported for this pipeline. Please pass an image.")
124
- assert not isinstance(image, torch.Tensor)
125
-
126
- image = to_rgb_image(image)
127
-
128
- image_vae = torch.tensor(np.array(image) / 255.0)
129
- image_vae = image_vae.unsqueeze(0).permute(0, 3, 1, 2).unsqueeze(0)
130
- image_vae = image_vae.to(device=self.vae.device, dtype=self.vae.dtype)
131
-
132
- batch_size = image_vae.shape[0]
133
- assert batch_size == 1
134
- assert num_images_per_prompt == 1
135
-
136
- ref_latents = self.encode_images(image_vae)
137
-
138
- def convert_pil_list_to_tensor(images):
139
- bg_c = [1., 1., 1.]
140
- images_tensor = []
141
- for batch_imgs in images:
142
- view_imgs = []
143
- for pil_img in batch_imgs:
144
- img = numpy.asarray(pil_img, dtype=numpy.float32) / 255.
145
- if img.shape[2] > 3:
146
- alpha = img[:, :, 3:]
147
- img = img[:, :, :3] * alpha + bg_c * (1 - alpha)
148
- img = torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0).contiguous().half().to("cuda")
149
- view_imgs.append(img)
150
- view_imgs = torch.cat(view_imgs, dim=0)
151
- images_tensor.append(view_imgs.unsqueeze(0))
152
-
153
- images_tensor = torch.cat(images_tensor, dim=0)
154
- return images_tensor
155
-
156
- if "normal_imgs" in cached_condition:
157
-
158
- if isinstance(cached_condition["normal_imgs"], List):
159
- cached_condition["normal_imgs"] = convert_pil_list_to_tensor(cached_condition["normal_imgs"])
160
-
161
- cached_condition['normal_imgs'] = self.encode_images(cached_condition["normal_imgs"])
162
-
163
- if "position_imgs" in cached_condition:
164
-
165
- if isinstance(cached_condition["position_imgs"], List):
166
- cached_condition["position_imgs"] = convert_pil_list_to_tensor(cached_condition["position_imgs"])
167
-
168
- cached_condition["position_imgs"] = self.encode_images(cached_condition["position_imgs"])
169
-
170
- if 'camera_info_gen' in cached_condition:
171
- camera_info = cached_condition['camera_info_gen'] # B,N
172
- if isinstance(camera_info, List):
173
- camera_info = torch.tensor(camera_info)
174
- camera_info = camera_info.to(image_vae.device).to(torch.int64)
175
- cached_condition['camera_info_gen'] = camera_info
176
- if 'camera_info_ref' in cached_condition:
177
- camera_info = cached_condition['camera_info_ref'] # B,N
178
- if isinstance(camera_info, List):
179
- camera_info = torch.tensor(camera_info)
180
- camera_info = camera_info.to(image_vae.device).to(torch.int64)
181
- cached_condition['camera_info_ref'] = camera_info
182
-
183
- cached_condition['ref_latents'] = ref_latents
184
-
185
- if guidance_scale > 1:
186
- negative_ref_latents = torch.zeros_like(cached_condition['ref_latents'])
187
- cached_condition['ref_latents'] = torch.cat([negative_ref_latents, cached_condition['ref_latents']])
188
- cached_condition['ref_scale'] = torch.as_tensor([0.0, 1.0]).to(cached_condition['ref_latents'])
189
- if "normal_imgs" in cached_condition:
190
- cached_condition['normal_imgs'] = torch.cat(
191
- (cached_condition['normal_imgs'], cached_condition['normal_imgs']))
192
-
193
- if "position_imgs" in cached_condition:
194
- cached_condition['position_imgs'] = torch.cat(
195
- (cached_condition['position_imgs'], cached_condition['position_imgs']))
196
-
197
- if 'position_maps' in cached_condition:
198
- cached_condition['position_maps'] = torch.cat(
199
- (cached_condition['position_maps'], cached_condition['position_maps']))
200
-
201
- if 'camera_info_gen' in cached_condition:
202
- cached_condition['camera_info_gen'] = torch.cat(
203
- (cached_condition['camera_info_gen'], cached_condition['camera_info_gen']))
204
- if 'camera_info_ref' in cached_condition:
205
- cached_condition['camera_info_ref'] = torch.cat(
206
- (cached_condition['camera_info_ref'], cached_condition['camera_info_ref']))
207
-
208
- prompt_embeds = self.unet.learned_text_clip_gen.repeat(num_images_per_prompt, 1, 1)
209
- negative_prompt_embeds = torch.zeros_like(prompt_embeds)
210
-
211
- latents: torch.Tensor = self.denoise(
212
- None,
213
- *args,
214
- cross_attention_kwargs=None,
215
- guidance_scale=guidance_scale,
216
- num_images_per_prompt=num_images_per_prompt,
217
- prompt_embeds=prompt_embeds,
218
- negative_prompt_embeds=negative_prompt_embeds,
219
- num_inference_steps=num_inference_steps,
220
- output_type='latent',
221
- width=width,
222
- height=height,
223
- **cached_condition
224
- ).images
225
-
226
- if not output_type == "latent":
227
- image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
228
- else:
229
- image = latents
230
-
231
- image = self.image_processor.postprocess(image, output_type=output_type)
232
- if not return_dict:
233
- return (image,)
234
-
235
- return ImagePipelineOutput(images=image)
236
-
237
- def denoise(
238
- self,
239
- prompt: Union[str, List[str]] = None,
240
- height: Optional[int] = None,
241
- width: Optional[int] = None,
242
- num_inference_steps: int = 50,
243
- timesteps: List[int] = None,
244
- sigmas: List[float] = None,
245
- guidance_scale: float = 7.5,
246
- negative_prompt: Optional[Union[str, List[str]]] = None,
247
- num_images_per_prompt: Optional[int] = 1,
248
- eta: float = 0.0,
249
- generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
250
- latents: Optional[torch.Tensor] = None,
251
- prompt_embeds: Optional[torch.Tensor] = None,
252
- negative_prompt_embeds: Optional[torch.Tensor] = None,
253
- ip_adapter_image: Optional[PipelineImageInput] = None,
254
- ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
255
- output_type: Optional[str] = "pil",
256
- return_dict: bool = True,
257
- cross_attention_kwargs: Optional[Dict[str, Any]] = None,
258
- guidance_rescale: float = 0.0,
259
- clip_skip: Optional[int] = None,
260
- callback_on_step_end: Optional[
261
- Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
262
- ] = None,
263
- callback_on_step_end_tensor_inputs: List[str] = ["latents"],
264
- **kwargs,
265
- ):
266
- r"""
267
- The call function to the pipeline for generation.
268
-
269
- Args:
270
- prompt (`str` or `List[str]`, *optional*):
271
- The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
272
- height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
273
- The height in pixels of the generated image.
274
- width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
275
- The width in pixels of the generated image.
276
- num_inference_steps (`int`, *optional*, defaults to 50):
277
- The number of denoising steps. More denoising steps usually lead to a higher quality image at the
278
- expense of slower inference.
279
- timesteps (`List[int]`, *optional*):
280
- Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
281
- in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
282
- passed will be used. Must be in descending order.
283
- sigmas (`List[float]`, *optional*):
284
- Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
285
- their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
286
- will be used.
287
- guidance_scale (`float`, *optional*, defaults to 7.5):
288
- A higher guidance scale value encourages the model to generate images closely linked to the text
289
- `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
290
- negative_prompt (`str` or `List[str]`, *optional*):
291
- The prompt or prompts to guide what to not include in image generation. If not defined, you need to
292
- pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
293
- num_images_per_prompt (`int`, *optional*, defaults to 1):
294
- The number of images to generate per prompt.
295
- eta (`float`, *optional*, defaults to 0.0):
296
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
297
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
298
- generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
299
- A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
300
- generation deterministic.
301
- latents (`torch.Tensor`, *optional*):
302
- Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
303
- generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
304
- tensor is generated by sampling using the supplied random `generator`.
305
- prompt_embeds (`torch.Tensor`, *optional*):
306
- Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
307
- provided, text embeddings are generated from the `prompt` input argument.
308
- negative_prompt_embeds (`torch.Tensor`, *optional*):
309
- Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
310
- not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
311
- ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
312
- ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
313
- Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
314
- IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
315
- contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
316
- provided, embeddings are computed from the `ip_adapter_image` input argument.
317
- output_type (`str`, *optional*, defaults to `"pil"`):
318
- The output format of the generated image. Choose between `PIL.Image` or `np.array`.
319
- return_dict (`bool`, *optional*, defaults to `True`):
320
- Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
321
- plain tuple.
322
- cross_attention_kwargs (`dict`, *optional*):
323
- A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
324
- [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
325
- guidance_rescale (`float`, *optional*, defaults to 0.0):
326
- Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
327
- Flawed](https://arxiv.org/pdf/2305.08891.pdf). Guidance rescale factor should fix overexposure when
328
- using zero terminal SNR.
329
- clip_skip (`int`, *optional*):
330
- Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
331
- the output of the pre-final layer will be used for computing the prompt embeddings.
332
- callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
333
- A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
334
- each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
335
- DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
336
- list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
337
- callback_on_step_end_tensor_inputs (`List`, *optional*):
338
- The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
339
- will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
340
- `._callback_tensor_inputs` attribute of your pipeline class.
341
-
342
- Examples:
343
-
344
- Returns:
345
- [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
346
- If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned,
347
- otherwise a `tuple` is returned where the first element is a list with the generated images and the
348
- second element is a list of `bool`s indicating whether the corresponding generated image contains
349
- "not-safe-for-work" (nsfw) content.
350
- """
351
-
352
- callback = kwargs.pop("callback", None)
353
- callback_steps = kwargs.pop("callback_steps", None)
354
-
355
- if callback is not None:
356
- deprecate(
357
- "callback",
358
- "1.0.0",
359
- "Passing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
360
- )
361
- if callback_steps is not None:
362
- deprecate(
363
- "callback_steps",
364
- "1.0.0",
365
- "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
366
- )
367
-
368
- if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
369
- callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
370
-
371
- # 0. Default height and width to unet
372
- height = height or self.unet.config.sample_size * self.vae_scale_factor
373
- width = width or self.unet.config.sample_size * self.vae_scale_factor
374
- # to deal with lora scaling and other possible forward hooks
375
-
376
- # 1. Check inputs. Raise error if not correct
377
- self.check_inputs(
378
- prompt,
379
- height,
380
- width,
381
- callback_steps,
382
- negative_prompt,
383
- prompt_embeds,
384
- negative_prompt_embeds,
385
- ip_adapter_image,
386
- ip_adapter_image_embeds,
387
- callback_on_step_end_tensor_inputs,
388
- )
389
-
390
- self._guidance_scale = guidance_scale
391
- self._guidance_rescale = guidance_rescale
392
- self._clip_skip = clip_skip
393
- self._cross_attention_kwargs = cross_attention_kwargs
394
- self._interrupt = False
395
-
396
- # 2. Define call parameters
397
- if prompt is not None and isinstance(prompt, str):
398
- batch_size = 1
399
- elif prompt is not None and isinstance(prompt, list):
400
- batch_size = len(prompt)
401
- else:
402
- batch_size = prompt_embeds.shape[0]
403
-
404
- device = self._execution_device
405
-
406
- # 3. Encode input prompt
407
- lora_scale = (
408
- self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
409
- )
410
-
411
- prompt_embeds, negative_prompt_embeds = self.encode_prompt(
412
- prompt,
413
- device,
414
- num_images_per_prompt,
415
- self.do_classifier_free_guidance,
416
- negative_prompt,
417
- prompt_embeds=prompt_embeds,
418
- negative_prompt_embeds=negative_prompt_embeds,
419
- lora_scale=lora_scale,
420
- clip_skip=self.clip_skip,
421
- )
422
-
423
- # For classifier free guidance, we need to do two forward passes.
424
- # Here we concatenate the unconditional and text embeddings into a single batch
425
- # to avoid doing two forward passes
426
- if self.do_classifier_free_guidance:
427
- prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
428
-
429
- if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
430
- image_embeds = self.prepare_ip_adapter_image_embeds(
431
- ip_adapter_image,
432
- ip_adapter_image_embeds,
433
- device,
434
- batch_size * num_images_per_prompt,
435
- self.do_classifier_free_guidance,
436
- )
437
-
438
- # 4. Prepare timesteps
439
- timesteps, num_inference_steps = retrieve_timesteps(
440
- self.scheduler, num_inference_steps, device, timesteps, sigmas
441
- )
442
- assert num_images_per_prompt == 1
443
- # 5. Prepare latent variables
444
- num_channels_latents = self.unet.config.in_channels
445
- latents = self.prepare_latents(
446
- batch_size * kwargs['num_in_batch'], # num_images_per_prompt,
447
- num_channels_latents,
448
- height,
449
- width,
450
- prompt_embeds.dtype,
451
- device,
452
- generator,
453
- latents,
454
- )
455
-
456
- # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
457
- extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
458
-
459
- # 6.1 Add image embeds for IP-Adapter
460
- added_cond_kwargs = (
461
- {"image_embeds": image_embeds}
462
- if (ip_adapter_image is not None or ip_adapter_image_embeds is not None)
463
- else None
464
- )
465
-
466
- # 6.2 Optionally get Guidance Scale Embedding
467
- timestep_cond = None
468
- if self.unet.config.time_cond_proj_dim is not None:
469
- guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
470
- timestep_cond = self.get_guidance_scale_embedding(
471
- guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
472
- ).to(device=device, dtype=latents.dtype)
473
-
474
- # 7. Denoising loop
475
- num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
476
- self._num_timesteps = len(timesteps)
477
- with self.progress_bar(total=num_inference_steps) as progress_bar:
478
- for i, t in enumerate(timesteps):
479
- if self.interrupt:
480
- continue
481
-
482
- # expand the latents if we are doing classifier free guidance
483
- latents = rearrange(latents, '(b n) c h w -> b n c h w', n=kwargs['num_in_batch'])
484
- latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
485
- latent_model_input = rearrange(latent_model_input, 'b n c h w -> (b n) c h w')
486
- latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
487
- latent_model_input = rearrange(latent_model_input, '(b n) c h w ->b n c h w', n=kwargs['num_in_batch'])
488
-
489
- # predict the noise residual
490
-
491
- noise_pred = self.unet(
492
- latent_model_input,
493
- t,
494
- encoder_hidden_states=prompt_embeds,
495
- timestep_cond=timestep_cond,
496
- cross_attention_kwargs=self.cross_attention_kwargs,
497
- added_cond_kwargs=added_cond_kwargs,
498
- return_dict=False, **kwargs
499
- )[0]
500
- latents = rearrange(latents, 'b n c h w -> (b n) c h w')
501
- # perform guidance
502
- if self.do_classifier_free_guidance:
503
- noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
504
- noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
505
-
506
- if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
507
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
508
- noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
509
-
510
- # compute the previous noisy sample x_t -> x_t-1
511
- latents = \
512
- self.scheduler.step(noise_pred, t, latents[:, :num_channels_latents, :, :], **extra_step_kwargs,
513
- return_dict=False)[0]
514
-
515
- if callback_on_step_end is not None:
516
- callback_kwargs = {}
517
- for k in callback_on_step_end_tensor_inputs:
518
- callback_kwargs[k] = locals()[k]
519
- callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
520
-
521
- latents = callback_outputs.pop("latents", latents)
522
- prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
523
- negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
524
-
525
- # call the callback, if provided
526
- if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
527
- progress_bar.update()
528
- if callback is not None and i % callback_steps == 0:
529
- step_idx = i // getattr(self.scheduler, "order", 1)
530
- callback(step_idx, t, latents)
531
-
532
- if not output_type == "latent":
533
- image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
534
- 0
535
- ]
536
- image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
537
- else:
538
- image = latents
539
- has_nsfw_concept = None
540
-
541
- if has_nsfw_concept is None:
542
- do_denormalize = [True] * image.shape[0]
543
- else:
544
- do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
545
-
546
- image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
547
-
548
- # Offload all models
549
- self.maybe_free_model_hooks()
550
-
551
- if not return_dict:
552
- return (image, has_nsfw_concept)
553
-
554
- return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/hunyuanpaint/unet/__init__.py DELETED
@@ -1,23 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/hunyuanpaint/unet/modules.py DELETED
@@ -1,440 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
-
26
- import copy
27
- import json
28
- import os
29
- from typing import Any, Dict, Optional
30
-
31
- import torch
32
- import torch.nn as nn
33
- from diffusers.models import UNet2DConditionModel
34
- from diffusers.models.attention_processor import Attention
35
- from diffusers.models.transformers.transformer_2d import BasicTransformerBlock
36
- from einops import rearrange
37
-
38
-
39
- def _chunked_feed_forward(ff: nn.Module, hidden_states: torch.Tensor, chunk_dim: int, chunk_size: int):
40
- # "feed_forward_chunk_size" can be used to save memory
41
- if hidden_states.shape[chunk_dim] % chunk_size != 0:
42
- raise ValueError(
43
- f"`hidden_states` dimension to be chunked: {hidden_states.shape[chunk_dim]} has to be divisible by chunk size: {chunk_size}. Make sure to set an appropriate `chunk_size` when calling `unet.enable_forward_chunking`."
44
- )
45
-
46
- num_chunks = hidden_states.shape[chunk_dim] // chunk_size
47
- ff_output = torch.cat(
48
- [ff(hid_slice) for hid_slice in hidden_states.chunk(num_chunks, dim=chunk_dim)],
49
- dim=chunk_dim,
50
- )
51
- return ff_output
52
-
53
-
54
- class Basic2p5DTransformerBlock(torch.nn.Module):
55
- def __init__(self, transformer: BasicTransformerBlock, layer_name, use_ma=True, use_ra=True) -> None:
56
- super().__init__()
57
- self.transformer = transformer
58
- self.layer_name = layer_name
59
- self.use_ma = use_ma
60
- self.use_ra = use_ra
61
-
62
- # multiview attn
63
- if self.use_ma:
64
- self.attn_multiview = Attention(
65
- query_dim=self.dim,
66
- heads=self.num_attention_heads,
67
- dim_head=self.attention_head_dim,
68
- dropout=self.dropout,
69
- bias=self.attention_bias,
70
- cross_attention_dim=None,
71
- upcast_attention=self.attn1.upcast_attention,
72
- out_bias=True,
73
- )
74
-
75
- # ref attn
76
- if self.use_ra:
77
- self.attn_refview = Attention(
78
- query_dim=self.dim,
79
- heads=self.num_attention_heads,
80
- dim_head=self.attention_head_dim,
81
- dropout=self.dropout,
82
- bias=self.attention_bias,
83
- cross_attention_dim=None,
84
- upcast_attention=self.attn1.upcast_attention,
85
- out_bias=True,
86
- )
87
-
88
- def __getattr__(self, name: str):
89
- try:
90
- return super().__getattr__(name)
91
- except AttributeError:
92
- return getattr(self.transformer, name)
93
-
94
- def forward(
95
- self,
96
- hidden_states: torch.Tensor,
97
- attention_mask: Optional[torch.Tensor] = None,
98
- encoder_hidden_states: Optional[torch.Tensor] = None,
99
- encoder_attention_mask: Optional[torch.Tensor] = None,
100
- timestep: Optional[torch.LongTensor] = None,
101
- cross_attention_kwargs: Dict[str, Any] = None,
102
- class_labels: Optional[torch.LongTensor] = None,
103
- added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
104
- ) -> torch.Tensor:
105
-
106
- # Notice that normalization is always applied before the real computation in the following blocks.
107
- # 0. Self-Attention
108
- batch_size = hidden_states.shape[0]
109
-
110
- cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {}
111
- num_in_batch = cross_attention_kwargs.pop('num_in_batch', 1)
112
- mode = cross_attention_kwargs.pop('mode', None)
113
- mva_scale = cross_attention_kwargs.pop('mva_scale', 1.0)
114
- ref_scale = cross_attention_kwargs.pop('ref_scale', 1.0)
115
- condition_embed_dict = cross_attention_kwargs.pop("condition_embed_dict", None)
116
-
117
- if self.norm_type == "ada_norm":
118
- norm_hidden_states = self.norm1(hidden_states, timestep)
119
- elif self.norm_type == "ada_norm_zero":
120
- norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(
121
- hidden_states, timestep, class_labels, hidden_dtype=hidden_states.dtype
122
- )
123
- elif self.norm_type in ["layer_norm", "layer_norm_i2vgen"]:
124
- norm_hidden_states = self.norm1(hidden_states)
125
- elif self.norm_type == "ada_norm_continuous":
126
- norm_hidden_states = self.norm1(hidden_states, added_cond_kwargs["pooled_text_emb"])
127
- elif self.norm_type == "ada_norm_single":
128
- shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (
129
- self.scale_shift_table[None] + timestep.reshape(batch_size, 6, -1)
130
- ).chunk(6, dim=1)
131
- norm_hidden_states = self.norm1(hidden_states)
132
- norm_hidden_states = norm_hidden_states * (1 + scale_msa) + shift_msa
133
- else:
134
- raise ValueError("Incorrect norm used")
135
-
136
- if self.pos_embed is not None:
137
- norm_hidden_states = self.pos_embed(norm_hidden_states)
138
-
139
- # 1. Prepare GLIGEN inputs
140
- cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {}
141
- gligen_kwargs = cross_attention_kwargs.pop("gligen", None)
142
-
143
- attn_output = self.attn1(
144
- norm_hidden_states,
145
- encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
146
- attention_mask=attention_mask,
147
- **cross_attention_kwargs,
148
- )
149
-
150
- if self.norm_type == "ada_norm_zero":
151
- attn_output = gate_msa.unsqueeze(1) * attn_output
152
- elif self.norm_type == "ada_norm_single":
153
- attn_output = gate_msa * attn_output
154
-
155
- hidden_states = attn_output + hidden_states
156
- if hidden_states.ndim == 4:
157
- hidden_states = hidden_states.squeeze(1)
158
-
159
- # 1.2 Reference Attention
160
- if 'w' in mode:
161
- condition_embed_dict[self.layer_name] = rearrange(norm_hidden_states, '(b n) l c -> b (n l) c',
162
- n=num_in_batch) # B, (N L), C
163
-
164
- if 'r' in mode and self.use_ra:
165
- condition_embed = condition_embed_dict[self.layer_name].unsqueeze(1).repeat(1, num_in_batch, 1,
166
- 1) # B N L C
167
- condition_embed = rearrange(condition_embed, 'b n l c -> (b n) l c')
168
-
169
- attn_output = self.attn_refview(
170
- norm_hidden_states,
171
- encoder_hidden_states=condition_embed,
172
- attention_mask=None,
173
- **cross_attention_kwargs
174
- )
175
- ref_scale_timing = ref_scale
176
- if isinstance(ref_scale, torch.Tensor):
177
- ref_scale_timing = ref_scale.unsqueeze(1).repeat(1, num_in_batch).view(-1)
178
- for _ in range(attn_output.ndim - 1):
179
- ref_scale_timing = ref_scale_timing.unsqueeze(-1)
180
- hidden_states = ref_scale_timing * attn_output + hidden_states
181
- if hidden_states.ndim == 4:
182
- hidden_states = hidden_states.squeeze(1)
183
-
184
- # 1.3 Multiview Attention
185
- if num_in_batch > 1 and self.use_ma:
186
- multivew_hidden_states = rearrange(norm_hidden_states, '(b n) l c -> b (n l) c', n=num_in_batch)
187
-
188
- attn_output = self.attn_multiview(
189
- multivew_hidden_states,
190
- encoder_hidden_states=multivew_hidden_states,
191
- **cross_attention_kwargs
192
- )
193
-
194
- attn_output = rearrange(attn_output, 'b (n l) c -> (b n) l c', n=num_in_batch)
195
-
196
- hidden_states = mva_scale * attn_output + hidden_states
197
- if hidden_states.ndim == 4:
198
- hidden_states = hidden_states.squeeze(1)
199
-
200
- # 1.2 GLIGEN Control
201
- if gligen_kwargs is not None:
202
- hidden_states = self.fuser(hidden_states, gligen_kwargs["objs"])
203
-
204
- # 3. Cross-Attention
205
- if self.attn2 is not None:
206
- if self.norm_type == "ada_norm":
207
- norm_hidden_states = self.norm2(hidden_states, timestep)
208
- elif self.norm_type in ["ada_norm_zero", "layer_norm", "layer_norm_i2vgen"]:
209
- norm_hidden_states = self.norm2(hidden_states)
210
- elif self.norm_type == "ada_norm_single":
211
- # For PixArt norm2 isn't applied here:
212
- # https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/diffusion/model/nets/PixArtMS.py#L70C1-L76C103
213
- norm_hidden_states = hidden_states
214
- elif self.norm_type == "ada_norm_continuous":
215
- norm_hidden_states = self.norm2(hidden_states, added_cond_kwargs["pooled_text_emb"])
216
- else:
217
- raise ValueError("Incorrect norm")
218
-
219
- if self.pos_embed is not None and self.norm_type != "ada_norm_single":
220
- norm_hidden_states = self.pos_embed(norm_hidden_states)
221
-
222
- attn_output = self.attn2(
223
- norm_hidden_states,
224
- encoder_hidden_states=encoder_hidden_states,
225
- attention_mask=encoder_attention_mask,
226
- **cross_attention_kwargs,
227
- )
228
-
229
- hidden_states = attn_output + hidden_states
230
-
231
- # 4. Feed-forward
232
- # i2vgen doesn't have this norm 🤷‍♂️
233
- if self.norm_type == "ada_norm_continuous":
234
- norm_hidden_states = self.norm3(hidden_states, added_cond_kwargs["pooled_text_emb"])
235
- elif not self.norm_type == "ada_norm_single":
236
- norm_hidden_states = self.norm3(hidden_states)
237
-
238
- if self.norm_type == "ada_norm_zero":
239
- norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
240
-
241
- if self.norm_type == "ada_norm_single":
242
- norm_hidden_states = self.norm2(hidden_states)
243
- norm_hidden_states = norm_hidden_states * (1 + scale_mlp) + shift_mlp
244
-
245
- if self._chunk_size is not None:
246
- # "feed_forward_chunk_size" can be used to save memory
247
- ff_output = _chunked_feed_forward(self.ff, norm_hidden_states, self._chunk_dim, self._chunk_size)
248
- else:
249
- ff_output = self.ff(norm_hidden_states)
250
-
251
- if self.norm_type == "ada_norm_zero":
252
- ff_output = gate_mlp.unsqueeze(1) * ff_output
253
- elif self.norm_type == "ada_norm_single":
254
- ff_output = gate_mlp * ff_output
255
-
256
- hidden_states = ff_output + hidden_states
257
- if hidden_states.ndim == 4:
258
- hidden_states = hidden_states.squeeze(1)
259
-
260
- return hidden_states
261
-
262
-
263
- class UNet2p5DConditionModel(torch.nn.Module):
264
- def __init__(self, unet: UNet2DConditionModel) -> None:
265
- super().__init__()
266
- self.unet = unet
267
-
268
- self.use_ma = True
269
- self.use_ra = True
270
- self.use_camera_embedding = True
271
- self.use_dual_stream = True
272
-
273
- if self.use_dual_stream:
274
- self.unet_dual = copy.deepcopy(unet)
275
- self.init_attention(self.unet_dual)
276
- self.init_attention(self.unet, use_ma=self.use_ma, use_ra=self.use_ra)
277
- self.init_condition()
278
- self.init_camera_embedding()
279
-
280
- @staticmethod
281
- def from_pretrained(pretrained_model_name_or_path, **kwargs):
282
- torch_dtype = kwargs.pop('torch_dtype', torch.float32)
283
- config_path = os.path.join(pretrained_model_name_or_path, 'config.json')
284
- unet_ckpt_path = os.path.join(pretrained_model_name_or_path, 'diffusion_pytorch_model.bin')
285
- with open(config_path, 'r', encoding='utf-8') as file:
286
- config = json.load(file)
287
- unet = UNet2DConditionModel(**config)
288
- unet = UNet2p5DConditionModel(unet)
289
- unet_ckpt = torch.load(unet_ckpt_path, map_location='cpu', weights_only=True)
290
- unet.load_state_dict(unet_ckpt, strict=True)
291
- unet = unet.to(torch_dtype)
292
- return unet
293
-
294
- def init_condition(self):
295
- self.unet.conv_in = torch.nn.Conv2d(
296
- 12,
297
- self.unet.conv_in.out_channels,
298
- kernel_size=self.unet.conv_in.kernel_size,
299
- stride=self.unet.conv_in.stride,
300
- padding=self.unet.conv_in.padding,
301
- dilation=self.unet.conv_in.dilation,
302
- groups=self.unet.conv_in.groups,
303
- bias=self.unet.conv_in.bias is not None)
304
-
305
- self.unet.learned_text_clip_gen = nn.Parameter(torch.randn(1, 77, 1024))
306
- self.unet.learned_text_clip_ref = nn.Parameter(torch.randn(1, 77, 1024))
307
-
308
- def init_camera_embedding(self):
309
-
310
- if self.use_camera_embedding:
311
- time_embed_dim = 1280
312
- self.max_num_ref_image = 5
313
- self.max_num_gen_image = 12 * 3 + 4 * 2
314
- self.unet.class_embedding = nn.Embedding(self.max_num_ref_image + self.max_num_gen_image, time_embed_dim)
315
-
316
- def init_attention(self, unet, use_ma=False, use_ra=False):
317
-
318
- for down_block_i, down_block in enumerate(unet.down_blocks):
319
- if hasattr(down_block, "has_cross_attention") and down_block.has_cross_attention:
320
- for attn_i, attn in enumerate(down_block.attentions):
321
- for transformer_i, transformer in enumerate(attn.transformer_blocks):
322
- if isinstance(transformer, BasicTransformerBlock):
323
- attn.transformer_blocks[transformer_i] = Basic2p5DTransformerBlock(transformer,
324
- f'down_{down_block_i}_{attn_i}_{transformer_i}',
325
- use_ma, use_ra)
326
-
327
- if hasattr(unet.mid_block, "has_cross_attention") and unet.mid_block.has_cross_attention:
328
- for attn_i, attn in enumerate(unet.mid_block.attentions):
329
- for transformer_i, transformer in enumerate(attn.transformer_blocks):
330
- if isinstance(transformer, BasicTransformerBlock):
331
- attn.transformer_blocks[transformer_i] = Basic2p5DTransformerBlock(transformer,
332
- f'mid_{attn_i}_{transformer_i}',
333
- use_ma, use_ra)
334
-
335
- for up_block_i, up_block in enumerate(unet.up_blocks):
336
- if hasattr(up_block, "has_cross_attention") and up_block.has_cross_attention:
337
- for attn_i, attn in enumerate(up_block.attentions):
338
- for transformer_i, transformer in enumerate(attn.transformer_blocks):
339
- if isinstance(transformer, BasicTransformerBlock):
340
- attn.transformer_blocks[transformer_i] = Basic2p5DTransformerBlock(transformer,
341
- f'up_{up_block_i}_{attn_i}_{transformer_i}',
342
- use_ma, use_ra)
343
-
344
- def __getattr__(self, name: str):
345
- try:
346
- return super().__getattr__(name)
347
- except AttributeError:
348
- return getattr(self.unet, name)
349
-
350
- def forward(
351
- self, sample, timestep, encoder_hidden_states,
352
- *args, down_intrablock_additional_residuals=None,
353
- down_block_res_samples=None, mid_block_res_sample=None,
354
- **cached_condition,
355
- ):
356
- B, N_gen, _, H, W = sample.shape
357
- assert H == W
358
-
359
- if self.use_camera_embedding:
360
- camera_info_gen = cached_condition['camera_info_gen'] + self.max_num_ref_image
361
- camera_info_gen = rearrange(camera_info_gen, 'b n -> (b n)')
362
- else:
363
- camera_info_gen = None
364
-
365
- sample = [sample]
366
- if 'normal_imgs' in cached_condition:
367
- sample.append(cached_condition["normal_imgs"])
368
- if 'position_imgs' in cached_condition:
369
- sample.append(cached_condition["position_imgs"])
370
- sample = torch.cat(sample, dim=2)
371
-
372
- sample = rearrange(sample, 'b n c h w -> (b n) c h w')
373
-
374
- encoder_hidden_states_gen = encoder_hidden_states.unsqueeze(1).repeat(1, N_gen, 1, 1)
375
- encoder_hidden_states_gen = rearrange(encoder_hidden_states_gen, 'b n l c -> (b n) l c')
376
-
377
- if self.use_ra:
378
- if 'condition_embed_dict' in cached_condition:
379
- condition_embed_dict = cached_condition['condition_embed_dict']
380
- else:
381
- condition_embed_dict = {}
382
- ref_latents = cached_condition['ref_latents']
383
- N_ref = ref_latents.shape[1]
384
- if self.use_camera_embedding:
385
- camera_info_ref = cached_condition['camera_info_ref']
386
- camera_info_ref = rearrange(camera_info_ref, 'b n -> (b n)')
387
- else:
388
- camera_info_ref = None
389
-
390
- ref_latents = rearrange(ref_latents, 'b n c h w -> (b n) c h w')
391
-
392
- encoder_hidden_states_ref = self.unet.learned_text_clip_ref.unsqueeze(1).repeat(B, N_ref, 1, 1)
393
- encoder_hidden_states_ref = rearrange(encoder_hidden_states_ref, 'b n l c -> (b n) l c')
394
-
395
- noisy_ref_latents = ref_latents
396
- timestep_ref = 0
397
-
398
- if self.use_dual_stream:
399
- unet_ref = self.unet_dual
400
- else:
401
- unet_ref = self.unet
402
- unet_ref(
403
- noisy_ref_latents, timestep_ref,
404
- encoder_hidden_states=encoder_hidden_states_ref,
405
- class_labels=camera_info_ref,
406
- # **kwargs
407
- return_dict=False,
408
- cross_attention_kwargs={
409
- 'mode': 'w', 'num_in_batch': N_ref,
410
- 'condition_embed_dict': condition_embed_dict},
411
- )
412
- cached_condition['condition_embed_dict'] = condition_embed_dict
413
- else:
414
- condition_embed_dict = None
415
-
416
- mva_scale = cached_condition.get('mva_scale', 1.0)
417
- ref_scale = cached_condition.get('ref_scale', 1.0)
418
-
419
- return self.unet(
420
- sample, timestep,
421
- encoder_hidden_states_gen, *args,
422
- class_labels=camera_info_gen,
423
- down_intrablock_additional_residuals=[
424
- sample.to(dtype=self.unet.dtype) for sample in down_intrablock_additional_residuals
425
- ] if down_intrablock_additional_residuals is not None else None,
426
- down_block_additional_residuals=[
427
- sample.to(dtype=self.unet.dtype) for sample in down_block_res_samples
428
- ] if down_block_res_samples is not None else None,
429
- mid_block_additional_residual=(
430
- mid_block_res_sample.to(dtype=self.unet.dtype)
431
- if mid_block_res_sample is not None else None
432
- ),
433
- return_dict=False,
434
- cross_attention_kwargs={
435
- 'mode': 'r', 'num_in_batch': N_gen,
436
- 'condition_embed_dict': condition_embed_dict,
437
- 'mva_scale': mva_scale,
438
- 'ref_scale': ref_scale,
439
- },
440
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/pipelines.py DELETED
@@ -1,227 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
-
26
- import logging
27
- import os
28
-
29
- import numpy as np
30
- import torch
31
- from PIL import Image
32
-
33
- from .differentiable_renderer.mesh_render import MeshRender
34
- from .utils.dehighlight_utils import Light_Shadow_Remover
35
- from .utils.multiview_utils import Multiview_Diffusion_Net
36
- from .utils.uv_warp_utils import mesh_uv_wrap
37
-
38
- logger = logging.getLogger(__name__)
39
-
40
-
41
- class Hunyuan3DTexGenConfig:
42
-
43
- def __init__(self, light_remover_ckpt_path, multiview_ckpt_path):
44
- self.device = 'cpu'
45
- self.light_remover_ckpt_path = light_remover_ckpt_path
46
- self.multiview_ckpt_path = multiview_ckpt_path
47
-
48
- self.candidate_camera_azims = [0, 90, 180, 270, 0, 180]
49
- self.candidate_camera_elevs = [0, 0, 0, 0, 90, -90]
50
- self.candidate_view_weights = [1, 0.1, 0.5, 0.1, 0.05, 0.05]
51
-
52
- self.render_size = 2048
53
- self.texture_size = 1024
54
- self.bake_exp = 4
55
- self.merge_method = 'fast'
56
-
57
-
58
- class Hunyuan3DPaintPipeline:
59
- @classmethod
60
- def from_pretrained(cls, model_path):
61
- original_model_path = model_path
62
- if not os.path.exists(model_path):
63
- # try local path
64
- base_dir = os.environ.get('HY3DGEN_MODELS', '~/content/hy3dgen')
65
- model_path = os.path.expanduser(os.path.join(base_dir, model_path))
66
-
67
- delight_model_path = os.path.join(model_path, 'hunyuan3d-delight-v2-0')
68
- multiview_model_path = os.path.join(model_path, 'hunyuan3d-paint-v2-0')
69
-
70
- if not os.path.exists(delight_model_path) or not os.path.exists(multiview_model_path):
71
- try:
72
- import huggingface_hub
73
- # download from huggingface
74
- model_path = huggingface_hub.snapshot_download(repo_id=original_model_path)
75
- delight_model_path = os.path.join(model_path, 'hunyuan3d-delight-v2-0')
76
- multiview_model_path = os.path.join(model_path, 'hunyuan3d-paint-v2-0')
77
- return cls(Hunyuan3DTexGenConfig(delight_model_path, multiview_model_path))
78
- except ImportError:
79
- logger.warning(
80
- "You need to install HuggingFace Hub to load models from the hub."
81
- )
82
- raise RuntimeError(f"Model path {model_path} not found")
83
- else:
84
- return cls(Hunyuan3DTexGenConfig(delight_model_path, multiview_model_path))
85
-
86
- raise FileNotFoundError(f"Model path {original_model_path} not found and we could not find it at huggingface")
87
-
88
- def __init__(self, config):
89
- self.config = config
90
- self.models = {}
91
- self.render = MeshRender(
92
- default_resolution=self.config.render_size,
93
- texture_size=self.config.texture_size)
94
-
95
- self.load_models()
96
-
97
- def load_models(self):
98
- # empty cude cache
99
- torch.cuda.empty_cache()
100
- # Load model
101
- self.models['delight_model'] = Light_Shadow_Remover(self.config)
102
- self.models['multiview_model'] = Multiview_Diffusion_Net(self.config)
103
-
104
- def render_normal_multiview(self, camera_elevs, camera_azims, use_abs_coor=True):
105
- normal_maps = []
106
- for elev, azim in zip(camera_elevs, camera_azims):
107
- normal_map = self.render.render_normal(
108
- elev, azim, use_abs_coor=use_abs_coor, return_type='pl')
109
- normal_maps.append(normal_map)
110
-
111
- return normal_maps
112
-
113
- def render_position_multiview(self, camera_elevs, camera_azims):
114
- position_maps = []
115
- for elev, azim in zip(camera_elevs, camera_azims):
116
- position_map = self.render.render_position(
117
- elev, azim, return_type='pl')
118
- position_maps.append(position_map)
119
-
120
- return position_maps
121
-
122
- def bake_from_multiview(self, views, camera_elevs,
123
- camera_azims, view_weights, method='graphcut'):
124
- project_textures, project_weighted_cos_maps = [], []
125
- project_boundary_maps = []
126
- for view, camera_elev, camera_azim, weight in zip(
127
- views, camera_elevs, camera_azims, view_weights):
128
- project_texture, project_cos_map, project_boundary_map = self.render.back_project(
129
- view, camera_elev, camera_azim)
130
- project_cos_map = weight * (project_cos_map ** self.config.bake_exp)
131
- project_textures.append(project_texture)
132
- project_weighted_cos_maps.append(project_cos_map)
133
- project_boundary_maps.append(project_boundary_map)
134
-
135
- if method == 'fast':
136
- texture, ori_trust_map = self.render.fast_bake_texture(
137
- project_textures, project_weighted_cos_maps)
138
- else:
139
- raise f'no method {method}'
140
- return texture, ori_trust_map > 1E-8
141
-
142
- def texture_inpaint(self, texture, mask):
143
-
144
- texture_np = self.render.uv_inpaint(texture, mask)
145
- texture = torch.tensor(texture_np / 255).float().to(texture.device)
146
-
147
- return texture
148
-
149
- def recenter_image(self, image, border_ratio=0.2):
150
- if image.mode == 'RGB':
151
- return image
152
- elif image.mode == 'L':
153
- image = image.convert('RGB')
154
- return image
155
-
156
- alpha_channel = np.array(image)[:, :, 3]
157
- non_zero_indices = np.argwhere(alpha_channel > 0)
158
- if non_zero_indices.size == 0:
159
- raise ValueError("Image is fully transparent")
160
-
161
- min_row, min_col = non_zero_indices.min(axis=0)
162
- max_row, max_col = non_zero_indices.max(axis=0)
163
-
164
- cropped_image = image.crop((min_col, min_row, max_col + 1, max_row + 1))
165
-
166
- width, height = cropped_image.size
167
- border_width = int(width * border_ratio)
168
- border_height = int(height * border_ratio)
169
-
170
- new_width = width + 2 * border_width
171
- new_height = height + 2 * border_height
172
-
173
- square_size = max(new_width, new_height)
174
-
175
- new_image = Image.new('RGBA', (square_size, square_size), (255, 255, 255, 0))
176
-
177
- paste_x = (square_size - new_width) // 2 + border_width
178
- paste_y = (square_size - new_height) // 2 + border_height
179
-
180
- new_image.paste(cropped_image, (paste_x, paste_y))
181
- return new_image
182
-
183
- @torch.no_grad()
184
- def __call__(self, mesh, image):
185
-
186
- if isinstance(image, str):
187
- image_prompt = Image.open(image)
188
- else:
189
- image_prompt = image
190
-
191
- image_prompt = self.recenter_image(image_prompt)
192
-
193
- image_prompt = self.models['delight_model'](image_prompt)
194
-
195
- mesh = mesh_uv_wrap(mesh)
196
-
197
- self.render.load_mesh(mesh)
198
-
199
- selected_camera_elevs, selected_camera_azims, selected_view_weights = \
200
- self.config.candidate_camera_elevs, self.config.candidate_camera_azims, self.config.candidate_view_weights
201
-
202
- normal_maps = self.render_normal_multiview(
203
- selected_camera_elevs, selected_camera_azims, use_abs_coor=True)
204
- position_maps = self.render_position_multiview(
205
- selected_camera_elevs, selected_camera_azims)
206
-
207
- camera_info = [(((azim // 30) + 9) % 12) // {-20: 1, 0: 1, 20: 1, -90: 3, 90: 3}[
208
- elev] + {-20: 0, 0: 12, 20: 24, -90: 36, 90: 40}[elev] for azim, elev in
209
- zip(selected_camera_azims, selected_camera_elevs)]
210
- multiviews = self.models['multiview_model'](image_prompt, normal_maps + position_maps, camera_info)
211
-
212
- for i in range(len(multiviews)):
213
- multiviews[i] = multiviews[i].resize(
214
- (self.config.render_size, self.config.render_size))
215
-
216
- texture, mask = self.bake_from_multiview(multiviews,
217
- selected_camera_elevs, selected_camera_azims, selected_view_weights,
218
- method=self.config.merge_method)
219
-
220
- mask_np = (mask.squeeze(-1).cpu().numpy() * 255).astype(np.uint8)
221
-
222
- texture = self.texture_inpaint(texture, mask_np)
223
-
224
- self.render.set_texture(texture)
225
- textured_mesh = self.render.save_mesh()
226
-
227
- return textured_mesh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/utils/__init__.py DELETED
@@ -1,23 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/utils/alignImg4Tex_utils.py DELETED
@@ -1,132 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
-
26
- import torch
27
- from diffusers import EulerAncestralDiscreteScheduler
28
- from diffusers import StableDiffusionControlNetPipeline, StableDiffusionXLControlNetImg2ImgPipeline, ControlNetModel, \
29
- AutoencoderKL
30
-
31
-
32
- class Img2img_Control_Ip_adapter:
33
- def __init__(self, device):
34
- controlnet = ControlNetModel.from_pretrained('lllyasviel/control_v11f1p_sd15_depth', torch_dtype=torch.float16,
35
- variant="fp16", use_safetensors=True)
36
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
37
- 'runwayml/stable-diffusion-v1-5', controlnet=controlnet, torch_dtype=torch.float16, use_safetensors=True
38
- )
39
- pipe.load_ip_adapter('h94/IP-Adapter', subfolder="models", weight_name="ip-adapter-plus_sd15.safetensors")
40
- pipe.set_ip_adapter_scale(0.7)
41
-
42
- pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
43
- # pipe.enable_model_cpu_offload()
44
- self.pipe = pipe.to(device)
45
-
46
- def __call__(
47
- self,
48
- prompt,
49
- control_image,
50
- ip_adapter_image,
51
- negative_prompt,
52
- height=512,
53
- width=512,
54
- num_inference_steps=20,
55
- guidance_scale=8.0,
56
- controlnet_conditioning_scale=1.0,
57
- output_type="pil",
58
- **kwargs,
59
- ):
60
- results = self.pipe(
61
- prompt=prompt,
62
- negative_prompt=negative_prompt,
63
- image=control_image,
64
- ip_adapter_image=ip_adapter_image,
65
- generator=torch.manual_seed(42),
66
- seed=42,
67
- num_inference_steps=num_inference_steps,
68
- guidance_scale=guidance_scale,
69
- controlnet_conditioning_scale=controlnet_conditioning_scale,
70
- strength=1,
71
- # clip_skip=2,
72
- height=height,
73
- width=width,
74
- output_type=output_type,
75
- **kwargs,
76
- ).images[0]
77
- return results
78
-
79
-
80
- ################################################################
81
-
82
- class HesModel:
83
- def __init__(self, ):
84
- controlnet_depth = ControlNetModel.from_pretrained(
85
- 'diffusers/controlnet-depth-sdxl-1.0',
86
- torch_dtype=torch.float16,
87
- variant="fp16",
88
- use_safetensors=True
89
- )
90
- self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
91
- 'stabilityai/stable-diffusion-xl-base-1.0',
92
- torch_dtype=torch.float16,
93
- variant="fp16",
94
- controlnet=controlnet_depth,
95
- use_safetensors=True,
96
- )
97
- self.pipe.vae = AutoencoderKL.from_pretrained(
98
- 'madebyollin/sdxl-vae-fp16-fix',
99
- torch_dtype=torch.float16
100
- )
101
-
102
- self.pipe.load_ip_adapter('h94/IP-Adapter', subfolder="sdxl_models", weight_name="ip-adapter_sdxl.safetensors")
103
- self.pipe.set_ip_adapter_scale(0.7)
104
- self.pipe.to("cuda")
105
-
106
- def __call__(self,
107
- init_image,
108
- control_image,
109
- ip_adapter_image=None,
110
- prompt='3D image',
111
- negative_prompt='2D image',
112
- seed=42,
113
- strength=0.8,
114
- num_inference_steps=40,
115
- guidance_scale=7.5,
116
- controlnet_conditioning_scale=0.5,
117
- **kwargs
118
- ):
119
- image = self.pipe(
120
- prompt=prompt,
121
- image=init_image,
122
- control_image=control_image,
123
- ip_adapter_image=ip_adapter_image,
124
- negative_prompt=negative_prompt,
125
- num_inference_steps=num_inference_steps,
126
- guidance_scale=guidance_scale,
127
- strength=strength,
128
- controlnet_conditioning_scale=controlnet_conditioning_scale,
129
- seed=seed,
130
- **kwargs
131
- ).images[0]
132
- return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/utils/counter_utils.py DELETED
@@ -1,58 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
-
26
- class RunningStats():
27
- def __init__(self) -> None:
28
- self.count = 0
29
- self.sum = 0
30
- self.mean = 0
31
- self.min = None
32
- self.max = None
33
-
34
- def add_value(self, value):
35
- self.count += 1
36
- self.sum += value
37
- self.mean = self.sum / self.count
38
-
39
- if self.min is None or value < self.min:
40
- self.min = value
41
-
42
- if self.max is None or value > self.max:
43
- self.max = value
44
-
45
- def get_count(self):
46
- return self.count
47
-
48
- def get_sum(self):
49
- return self.sum
50
-
51
- def get_mean(self):
52
- return self.mean
53
-
54
- def get_min(self):
55
- return self.min
56
-
57
- def get_max(self):
58
- return self.max
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hy3dgen/texgen/utils/dehighlight_utils.py DELETED
@@ -1,84 +0,0 @@
1
- # Open Source Model Licensed under the Apache License Version 2.0
2
- # and Other Licenses of the Third-Party Components therein:
3
- # The below Model in this distribution may have been modified by THL A29 Limited
4
- # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
-
6
- # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
- # The below software and/or models in this distribution may have been
8
- # modified by THL A29 Limited ("Tencent Modifications").
9
- # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
-
11
- # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
- # except for the third-party components listed below.
13
- # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
- # in the repsective licenses of these third-party components.
15
- # Users must comply with all terms and conditions of original licenses of these third-party
16
- # components and must ensure that the usage of the third party components adheres to
17
- # all relevant laws and regulations.
18
-
19
- # For avoidance of doubts, Hunyuan 3D means the large language models and
20
- # their software and algorithms, including trained model weights, parameters (including
21
- # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
- # fine-tuning enabling code and other elements of the foregoing made publicly available
23
- # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
-
25
- import cv2
26
- import numpy as np
27
- import torch
28
- from PIL import Image
29
- from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
30
-
31
-
32
- class Light_Shadow_Remover():
33
- def __init__(self, config):
34
- self.device = config.device
35
- self.cfg_image = 1.5
36
- self.cfg_text = 1.0
37
-
38
- pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
39
- config.light_remover_ckpt_path,
40
- torch_dtype=torch.float16,
41
- safety_checker=None,
42
- )
43
- pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config)
44
- pipeline.set_progress_bar_config(disable=True)
45
-
46
- # self.pipeline = pipeline.to(self.device, torch.float16)
47
- self.pipeline = pipeline # Needed to avoid displaying the warning
48
- @torch.no_grad()
49
- def __call__(self, image):
50
-
51
- image = image.resize((512, 512))
52
-
53
- if image.mode == 'RGBA':
54
- image_array = np.array(image)
55
- alpha_channel = image_array[:, :, 3]
56
- erosion_size = 3
57
- kernel = np.ones((erosion_size, erosion_size), np.uint8)
58
- alpha_channel = cv2.erode(alpha_channel, kernel, iterations=1)
59
- image_array[alpha_channel == 0, :3] = 255
60
- image_array[:, :, 3] = alpha_channel
61
- image = Image.fromarray(image_array)
62
-
63
- image_tensor = torch.tensor(np.array(image) / 255.0).to(self.device)
64
- alpha = image_tensor[:, :, 3:]
65
- rgb_target = image_tensor[:, :, :3]
66
- else:
67
- image_tensor = torch.tensor(np.array(image) / 255.0).to(self.device)
68
- alpha = torch.ones_like(image_tensor)[:, :, :1]
69
- rgb_target = image_tensor[:, :, :3]
70
-
71
- image = image.convert('RGB')
72
-
73
- image = self.pipeline(
74
- prompt="",
75
- image=image,
76
- generator=torch.manual_seed(42),
77
- height=512,
78
- width=512,
79
- num_inference_steps=50,
80
- image_guidance_scale=self.cfg_image,
81
- guidance_scale=self.cfg_text,
82
- ).images[0]
83
-
84
- return image