-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
454 lines (399 loc) · 20.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
import os
import sys
import shutil
import time
import json
import argparse
import random
import subprocess
# Define the output path for the synthesized video
CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
TMP_PATH = os.path.join(CURRENT_PATH, f"tmp")
OUT_PATH = os.path.join(TMP_PATH, f"out.mp4")
OUT_NAME = "out"
DEF_DATA = os.path.join(CURRENT_PATH, f"data")
DEF_MOON = os.path.join(DEF_DATA, f"moonwalk.mp4")
INF_PATH = os.path.join(CURRENT_PATH, f"inference.py")
preseed = int(random.randint(1, 1000000)) # Generate a random seed
#parser = argparse.ArgumentParser()
DEFAULT_OUT_PATH = os.path.join(f"v-edit.mp4")
# Create an argument parser
parser = argparse.ArgumentParser(description="AI Video With Text Control Synthesis CLI")
parser.add_argument("-p", "--prompt", default="enter_cli_mode", help="Text description of target video", type=str)
parser.add_argument("-vp", "--video_path", default=DEF_MOON, help="Path to the source video file", type=str)
parser.add_argument("-c", "--condition", help="Condition of structure sequence", choices=["depth", "canny", "pose", "scribble_hed", "softedge_hed", "scribble_hedsafe", "softedge_hedsafe", "depth_midas", "mlsd", "openpose", "openpose_face", "openpose_faceonly", "openpose_full", "openpose_hand", "scribble_pidinet", "softedge_pidinet", "scribble_pidsafe", "softedge_pidsafe", "normal_bae", "lineart_coarse", "lineart_realistic", "lineart_anime", "depth_zoe", "depth_leres", "depth_leres++", "shuffle", "mediapipe_face", "canny"], type=str)
parser.add_argument("-vl", "--video_length", help="Length of synthesized video", default=15, type=int)
parser.add_argument("-fr", "--frame_rate", help="FrameRate of SyntVideo, defautls to 2", default=2, type=int)
parser.add_argument("-vw", "--width", help="Output Video Width defaults to 256px", default=256, type=int)
parser.add_argument("-vh", "--height", help="Output Video Height defaults to 256px", default=256, type=int)
#parser.add_argument("-ss", "--smoother_steps", help="Timesteps for using interleaved-frame smoother", default="19 20", type=str)
parser.add_argument("-lv", "--is_long_video", help="Set 1 to produce a long video OR use to lower RAM usage; Set 0 to disable; LongVid is On by default!", default=1, action="store_true")
parser.add_argument("-eo", "--extract_only", help="Set 1 to extract only the condition(eg. pose); Set 0 to proceed to inference; Default 0!", default=0, action="store_true")
parser.add_argument("-vc", "--version", default="v10", help="Controlnet version can be v10 or v11, def10", type=str)
parser.add_argument("-ni", "--num_inference_steps", help="Number of denoising steps", default=50, type=int)
parser.add_argument("-gs", "--guidance_scale", help="Scale for classifier-free guidance", default=12.5, type=float)
parser.add_argument("-s", "--seed", help="Random seed for generation (leave blank to randomize)", default=preseed, type=int)
parser.add_argument("-resnum", "--resnum", help="whatever", default="1", type=int)
parser.add_argument("-rp", "--respath",
help=f'Output mp4 filename, do not change... default `./condition-(edit|only-video).mp4`',
default=str(DEFAULT_OUT_PATH),
type=str)
parser.add_argument('-nc', '--noclear',
help='Service Status Request',
action='store_true')
args = parser.parse_args()
if args.is_long_video == 1:
args.is_long_video = True
if args.is_long_video != 1:
args.is_long_video = False
args.prompt = args.prompt
DEBUG = True
# UTILS
def pcol(obj, template, nostart=False, noend=False):
'''
# Description
print with colors
# Arguments
{
obj: {
desc: object to print, parsed into string
},
template: {
desc: template name,
options: [
header1,
header2,
section,
title,
body,
sucess,
fail
]
}
}
'''
_configs = {
"header1": "\033[1;105m",
"header2": "\033[1;95m",
"search": "\033[104m",
"section": "\033[94m",
"title": "\033[7m",
"body": "\033[97m",
"sucess": "\033[92m",
"fail": "\033[91m",
"end": "\033[0m"
}
_morfed_obj = ""
# PARSE OBJ INTO STR
if isinstance(obj, list) or isinstance(obj, dict):
_morfed_obj = json.dumps(obj, indent=2)
elif not isinstance(obj, str):
try:
_morfed_obj = str(obj)
except:
# Last ressource
pass
else:
_morfed_obj = obj
if template in _configs and (_morfed_obj or _morfed_obj==""):
return f"{_configs[template] if not nostart else ''}{_morfed_obj}{_configs['end'] if not noend else ''}"
else:
return obj
def VideoThread(query, args=args):
prompt = query
args = args
res = None
# Check that required arguments are provided
if not args.prompt or not args.video_path or not args.condition:
sys.exit("Please provide a prompt, video path, and condition")
# Convert source video path to Path object
video_path = args.video_path
output_path = args.respath
if not os.path.isfile(video_path):
sys.exit("Source video not found")
try:
shutil.rmtree(TMP_PATH)
except OSError as e:
print("Error: %s - %s." % (e.filename, e.strerror))
# Build the command for subprocess
command = [
"python",
INF_PATH,
"--prompt", str(args.prompt),
"--condition", str(args.condition),
"--video_path", str(video_path),
"--output_path", str(TMP_PATH), # You may need to adjust this
"--temp_video_name", str(args.respath), # You may need to adjust this
"--video_length", str(args.video_length),
# "--smoother_steps", str(f'"{args.smoother_steps}"'),
"--width", str(args.width),
"--height", str(args.height),
"--frame_rate", str(args.frame_rate),
"--version", str(args.version), # You may need to adjust this
"--seed", str(args.seed) if str(args.seed).isdigit() else str(random.randint(1, 1000000)),
]
#if args.is_long_video:
command.append("--is_long_video")
if args.extract_only:
command.append("--extract_only")
# Execute the subprocess
subprocess.run(command, check=True)
return OUT_PATH
def main(args):
if args.prompt == "enter_cli_mode":
if not args.noclear:
os.system("cls" if sys.platform == "win32" else "clear" )
print(pcol("Welcome to Desota AI Text Video Control CLI ", "header1"), pcol("by © DeSOTA, 2023", "header2"))
print(pcol("Edit video with text!\n", "body"))
while True:
print(pcol("*"*80, "body"))
# Get User Query
_user_query = ""
_exit = False
try:
_input_query_msg = "".join([pcol("What is your text prompt? ('exit' to exit)\n-------------------------------------------\n|", "search"), pcol("", "title", noend=True)])
_user_query = input(_input_query_msg)
except KeyboardInterrupt:
_exit = True
pass
if _user_query in ["exit", "Exit", "EXIT"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
#now we have to make this kind of question for every item of args.
#and at the end display them all
# and ask you sure you wanna go with these settings? Y/N
args.prompt = _user_query
args.prompt = _user_query
# Get User Input for -vp/--video_path Argument
try:
_user_query = input(f"Enter the path to the source video file (default is {DEF_MOON}): ")
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.video_path = _user_query if _user_query else DEF_MOON
# Get User Input for -vc/--version Argument
try:
_user_query = input("Enter the Controlnet version (choices: v10 or v11, default is v10): ")
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.version = _user_query.lower() if _user_query else "v10"
# Get User Input for -c/--condition Argument
try:
_user_query = input("Enter the condition of structure sequence (choices: v10: depth, canny, pose; v11: scribble_hed, softedge_hed, scribble_hedsafe, softedge_hedsafe, depth_midas, mlsd, openpose, openpose_face, openpose_faceonly, openpose_full, openpose_hand, scribble_pidinet, softedge_pidinet, scribble_pidsafe, softedge_pidsafe, normal_bae, lineart_coarse, lineart_realistic, lineart_anime, depth_zoe, depth_leres, depth_leres++, shuffle, mediapipe_face, canny): ")
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.condition = _user_query
# Get User Input for -lv/--is_long_video Argument
try:
_user_query = input("Set 1 to extract condition only (for example to extract the pose): ")
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.extract_only = True if _user_query.lower() == "1" else False
# Get User Input for -vl/--video_length Argument
try:
_user_query = input("Enter the length of synthesized video (default is 15): ")
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.video_length = int(_user_query) if _user_query else 15
# Get User Input for -vl/--video_length Argument
try:
_user_query = input("Enter the width; default is 256: ")
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.width = int(_user_query) if _user_query else 256
# Get User Input for -vl/--video_length Argument
try:
_user_query = input("Enter the height; default is 256: ")
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.height = int(_user_query) if _user_query else 256
# Get User Input for -fr/--frame_rate Argument
try:
_user_query = input("Enter the frame rate of SyntVideo (default is 2): ")
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.frame_rate = int(_user_query) if _user_query else 2
# Get User Input for -ss/--smoother_steps Argument
#try:
# _user_query = input("Enter the timesteps for using interleaved-frame smoother (default is '19, 20'): ")
#except KeyboardInterrupt:
# print()
# return
#if _user_query.lower() in ["exit"] or _exit:
# print(pcol("", "title", nostart=True))
# return
#print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
#args.smoother_steps = _user_query if _user_query else "19 20"
# Get User Input for -lv/--is_long_video Argument
try:
_user_query = input("Set 1 to produce a long video OR use to lower RAM usage; Set 0 to disable; LongVid is On by default! (default is 1): ")
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.is_long_video = False if _user_query.lower() == "0" else True
# Get User Input for -ni/--num_inference_steps Argument
try:
_user_query = input("Enter the number of denoising steps (default is 50): ")
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.num_inference_steps = int(_user_query) if _user_query else 50
# Get User Input for -gs/--guidance_scale Argument
try:
_user_query = input("Enter the scale for classifier-free guidance (default is 12.5): ")
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.guidance_scale = float(_user_query) if _user_query else 12.5
# Get User Input for -s/--seed Argument
try:
_user_query = input("Do you want to enter a specific seed for randomness? (Y/N): ")
if _user_query.lower() == "y":
_user_query = input("Enter the random seed for generation: ")
args.seed = _user_query
else:
args.seed = str(random.randint(1, 1000000)) # Generate a random seed
except KeyboardInterrupt:
print()
return
if _user_query.lower() in ["exit"] or _exit:
print(pcol("", "title", nostart=True))
return
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
args.seed = _user_query
os.system("cls" if sys.platform == "win32" else "clear" )
print(pcol("Desota AI Text Video Control CLI ", "header1"), pcol("by © DeSOTA, 2023", "header2"))
print(pcol("Confirm Settings!\n", "body"))
# Display the chosen settings
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
print("\nChosen Settings:")
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
print(f"Prompt: {args.prompt}")
print(f"Video Path: {args.video_path}")
print(f"Version Controlnet: {args.version}")
print(f"Condition: {args.condition}")
print(f"Condition Only? {args.extract_only}")
print(f"Output Video Length: {args.video_length}")
print(f"Output Video Width px: {args.width}")
print(f"Output Video Height px: {args.height}")
print(f"Frame Rate: {args.frame_rate}")
#print(f"Smoother Steps: {args.smoother_steps}")
print(f"Is Long Video: {args.is_long_video}")
print(f"Number of Inference Steps: {args.num_inference_steps}")
print(f"Guidance Scale: {args.guidance_scale}")
print(f"Seed: {args.seed}")
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
# Ask user for confirmation
confirm = input("Are you sure you want to go with these settings? (Y/N): ")
if confirm.lower() != "y":
print("Settings not confirmed. Exiting.")
exit()
os.system("cls" if sys.platform == "win32" else "clear" )
print(pcol("Desota AI Text Video Control CLI ", "header1"), pcol("by © DeSOTA, 2023", "header2"))
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
print("\nChosen Settings:")
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
print(f"Prompt: {args.prompt}")
print(f"Video Path: {args.video_path}")
print(f"Version Controlnet: {args.version}")
print(f"Condition: {args.condition}")
print(f"Condition Only? {args.extract_only}")
print(f"Output Video Length: {args.video_length}")
print(f"(Width px: {args.width} | Height px: {args.height}) @ {args.frame_rate}FPS")
#print(f"Smoother Steps: {args.smoother_steps}")
print(f"Is Long Video: {args.is_long_video}")
print(f"Inference Steps: {args.num_inference_steps} | Guidance: {args.guidance_scale} | Seed: {args.seed}")
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
print(pcol("AI RENDER INIT!\n", "body"))
print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
# Get Results
_start_time = time.time() if DEBUG else 0
_url_res = False
_resnum = 0
tsearch = VideoThread(args.prompt, args)
#tsearch.start()
#tsearch.join(timeout=15)
#_url_res = tsearch.res
if DEBUG:
#print(f" [ DEBUG ] - TimeOut: {tsearch.is_alive()}")
print(f" [ DEBUG ] - elapsed time (secs): {time.time()-_start_time}")
# Print Results
if tsearch:
print('done... :)')
# while not os.path.isfile(_url_res):
# print("Output video not found")
# time.sleep(0.1)
# os.rename(_url_res, args.respath)
# os.system("cls" if sys.platform == "win32" else "clear" )
# print(pcol("Desota AI Text Video Control CLI ", "header1"), pcol("by © DeSOTA, 2023", "header2"))
# print(f'{pcol("", "title", nostart=True)}{pcol("-------------------------------------------", "search")}')
# print(f"\nSaved at: {args.respath}")
else:
_url_res = False
_resnum = 0
tsearch = VideoThread(args.prompt, args)
#tsearch.start()
#tsearch.join(timeout=15)
#_url_res = tsearch.res
if tsearch:
print('done... :)')
#while not os.path.isfile(_url_res):
# print("Output video not found")
# time.sleep(0.1)
#os.rename(tsearch, args.respath)
exit(0)
if __name__ == "__main__":
args = parser.parse_args()
main(args)