-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dif3d.py
230 lines (208 loc) · 8.52 KB
/
Dif3d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# Import necessary libraries
import os
import time
import torch
import logging
# import functions
from tsr.system import TSR
from tsr.gaussianutil import Gaussian
from tsr.utils import save_video
# Define a Timer class to measure execution time
class Timer:
def __init__(self):
self.items = {}
self.time_scale = 1000.0 # ms
self.time_unit = "ms"
def start(self, name: str) -> None:
# Start timing a task
if torch.cuda.is_available():
torch.cuda.synchronize()
self.items[name] = time.time()
logging.info(f"{name} ...")
def end(self, name: str) -> float:
# End timing a task and log the execution time
if name not in self.items:
return
if torch.cuda.is_available():
torch.cuda.synchronize()
start_time = self.items.pop(name)
delta = time.time() - start_time
t = delta * self.time_scale
logging.info(f"{name} finished in {t:.2f}{self.time_unit}.")
"""
A class to manage the runtime environment and
perform various tasks related to processing input images and
generating 3D models using a pre-trained TSR (Triangulated
Surface Reconstruction) model.
Attributes:
timer (Timer): An instance of the Timer class used
to measure execution time. premodel (Gaussian): An instance
of the Gaussian class used for pre-processing input images.
model (TSR):
An instance of the TSR class used for generating 3D models.
device (str):
The device to use for computation (GPU or CPU).
image_path (str):
The path to the input images.
out_path (str):
The path to save the output files.
pretrained (str):
The name of the pre-trained model to use.
chunk_size (int):
The chunk size to use for rendering.
mc_resolution (int):
The resolution to use for mesh extraction.
format (str):
The format to use for saving the 3D mesh.
Methods:
set_variables(input_path, output_path, pretrained_model, chunk_size,
padding, foreground_ratio, mc_resolution, model_save_format):
Set various variables used by the Runtime class.
output_address_chk():
Check and create the necessary output directories.
processor_check():
Check and set the device to use for computation.
initilize():
Initialize the pre-model and TSR model.
img_process():
Process the input images.
modelRun():
Run the TSR model on the processed images.
render():
Render the 3D model.
export_mesh():
Export the 3D mesh in the specified format.
"""
class Runtime():
def __init__(self):
# Initialize the timer
self.timer = Timer()
# Initialize the pre-model
self.premodel = Gaussian()
# Initialize the TSR model (None for now)
# based on pretrainde or to be trained
self.model = None
# Set up logging
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(message)s",
level=logging.INFO
)
def set_variables(
self,
input_path="input/",
output_path="output/",
pretrained_model="stabilityai/TripoSR",
chunk_size=8192,
padding=16,
foreground_ratio=0.85,
mc_resolution=256,
model_save_format="obj"
):
# Set the input image path
self.image_path = input_path
# Set the output path
self.out_path = output_path
# Set the pre-trained model name
self.pretrained = pretrained_model
# Set the chunk size for rendering
self.chunk_size = chunk_size
# Set the resolution for mesh extraction
self.mc_resolution = mc_resolution
# Set the format for saving the 3D mesh
self.format = model_save_format
# Set variables for the pre-model
self.premodel.set_variables(
in_path=input_path,
out_path=output_path,
foreground_ratio=foreground_ratio,
padding=padding)
# Check and create the necessary output directories
self.output_address_chk()
# Check and set the processor either GPU (CUDA) or CPU
self.processor_check()
def output_address_chk(self):
# creates directory for output results
os.makedirs(self.out_path, exist_ok=True)
os.makedirs(f"{self.out_path}images/", exist_ok=True)
os.makedirs(f"{self.out_path}renderfiles/", exist_ok=True)
os.makedirs(f"{self.out_path}3dfiles/", exist_ok=True)
def processor_check(self):
# Set the device to use (GPU or CPU)
# If a CUDA-compatible device is available,
# use it; otherwise, use the CPU
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
def initilize(self):
# Initialize the model
# This will load the pre-trained model and prepare it for use
self.timer.start("Initializing pre-model(Gaussian model)")
self.premodel.gassuin_load()
self.timer.end("Initializing pre-model(Gaussian model)")
self.timer.start("Initializing TSR (diffsusion 3D) model")
self.model = TSR.from_pretrained(
self.pretrained,
config_name="config.yaml",
weight_name="model.ckpt",
)
self.model.renderer.set_chunk_size(self.chunk_size)
self.model.to(self.device)
self.timer.end("Initializing TSR (diffsusion 3D) model")
def img_process(self):
# Start a timer for the image processing step
self.timer.start("Processing images")
# Load and preprocess the input image using
# the pre_process method of the premodel object
self.image = self.premodel.pre_process()
# Estimate the depth of the input image using the
# depth_estimation method of the premodel object
self.depth_image = self.premodel.depth_estimation()
# Perform a depth prediction comparison visualization
# using the dp_comparison_visual method of the premodel object
self.premodel.dp_comparison_visual()
# End the timer for the image processing step
self.timer.end("Processing images")
def modelRun(self):
# Log a message to inform the user that
# the process might take a few minutes
logging.info("please wait this process might take a few minutes...")
# Start a timer for the model running step
self.timer.start("Running model")
# Generate a point cloud
self.premodel.pointcloud()
# clean the pointcloud and create a forfront mesh
# for next model hologan and zero1-to-3 to create mesh
self.premodel.post_process()
# Run the model on the input image with no gradient computation
with torch.no_grad():
self.scene_codes = self.model([self.image], device=self.device)
# End the timer for the model running step
self.timer.end("Running model")
def render(self):
# Start a timer to track the rendering time
self.timer.start("Rendering")
# Render images using the model, with 30 views, and return PIL images
render_images = self.model.render(
self.scene_codes, n_views=30, return_type="pil")
# Iterate over the rendered images
for ri, render_image in enumerate(render_images[0]):
# Save each image to a file
# with a numbered filename (e.g. render_001.png)
render_image.save(os.path.join(
f"{self.out_path}renderfiles/", f"render_{ri:03d}.png"))
# Save a video using all the rendered images
save_video(render_images[0], os.path.join(
f"{self.out_path}renderfiles/", "render.mp4"), fps=30)
# End the timer (this should be done after the loop, not inside it)
self.timer.end("Rendering")
def export_mesh(self):
# Start a timer to track the mesh export time
self.timer.start("Exporting mesh")
# Extract the mesh using the model, with the specified resolution
meshes = self.model.extract_mesh(
self.scene_codes, resolution=self.mc_resolution)
# Export the first mesh to a file with the specified format
meshes[0].export(
os.path.join(
f"{self.out_path}3dfiles/",
f"mesh.{self.format}"))
# End the timer
self.timer.end("Exporting mesh")