diff --git a/.gitignore b/.gitignore
index 1c013180..3037ac66 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,4 +20,6 @@ flagged/
.env
ShortGPT.egg-info
dist
-build
\ No newline at end of file
+build
+setup.py
+test.ipynb
\ No newline at end of file
diff --git a/gui/__pycache__/content_automation_ui.cpython-39.pyc.1849492106672 b/gui/__pycache__/content_automation_ui.cpython-39.pyc.1849492106672
new file mode 100644
index 00000000..e69de29b
diff --git a/gui/asset_components.py b/gui/asset_components.py
index 78c1d94a..24777192 100644
--- a/gui/asset_components.py
+++ b/gui/asset_components.py
@@ -1,6 +1,7 @@
import gradio as gr
from shortGPT.config.asset_db import AssetDatabase
-
+from shortGPT.config.api_db import get_api_key
+from shortGPT.api_utils.eleven_api import getVoices
AssetDatabase().sync_local_assets()
def getBackgroundVideoChoices():
asset_db = AssetDatabase()
@@ -14,10 +15,15 @@ def getBackgroundMusicChoices():
choices = list(df.loc['background music' == df['type']]['name'])[:20]
return choices
+def getElevenlabsVoices():
+ api_key = get_api_key("ELEVEN LABS")
+ voices = list(reversed(getVoices(api_key).keys()))
+ return voices
background_video_checkbox = gr.CheckboxGroup(choices=getBackgroundVideoChoices(), interactive=True, label="Choose background video")
background_music_checkbox = gr.CheckboxGroup(choices=getBackgroundMusicChoices(), interactive=True, label="Choose background music")
-
+voiceChoice = gr.Radio(getElevenlabsVoices(), label="Elevenlabs voice", value="Antoni", interactive=True)
+voiceChoiceTranslation = gr.Radio(getElevenlabsVoices(), label="Elevenlabs voice", value="Antoni", interactive=True)
import os, platform, subprocess
def start_file(path):
diff --git a/gui/config_ui.py b/gui/config_ui.py
index c065ed86..ec82f290 100644
--- a/gui/config_ui.py
+++ b/gui/config_ui.py
@@ -2,7 +2,7 @@
import time
from shortGPT.config.api_db import get_api_key, set_api_key
from shortGPT.api_utils.eleven_api import getCharactersFromKey
-from gui.short_automation_ui import voiceChoice, getElevenlabsVoices
+from gui.asset_components import voiceChoice, voiceChoiceTranslation, getElevenlabsVoices
def onShow(button_text):
if button_text == "Show":
return gr.Textbox.update(type="text"), gr.Button.update(value="Hide")
@@ -25,11 +25,13 @@ def saveKeys(openai_key, eleven_key, pexels_key):
return gr.Textbox.update(value=openai_key),\
gr.Textbox.update(value=eleven_key),\
gr.Textbox.update(value=pexels_key),\
+ gr.Radio.update(choices=new_eleven_voices),\
gr.Radio.update(choices=new_eleven_voices)
return gr.Textbox.update(value=openai_key),\
gr.Textbox.update(value=eleven_key),\
gr.Textbox.update(value=pexels_key),\
+ gr.Radio.update(visible=True),\
gr.Radio.update(visible=True)
def getElevenRemaining(key):
@@ -60,7 +62,7 @@ def create_config_ui():
def back_to_normal():
time.sleep(3)
return gr.Button.update(value="save")
- save_button.click(verify_eleven_key, [eleven_labs_textbox, eleven_characters_remaining], [eleven_characters_remaining]).success(saveKeys, [openai_textbox, eleven_labs_textbox, pexels_textbox], [openai_textbox, eleven_labs_textbox, pexels_textbox, voiceChoice])
+ save_button.click(verify_eleven_key, [eleven_labs_textbox, eleven_characters_remaining], [eleven_characters_remaining]).success(saveKeys, [openai_textbox, eleven_labs_textbox, pexels_textbox], [openai_textbox, eleven_labs_textbox, pexels_textbox, voiceChoice, voiceChoiceTranslation])
save_button.click(lambda _ : gr.Button.update(value="Keys Saved !"), [], [save_button])
save_button.click(back_to_normal, [], [save_button])
return config_ui
\ No newline at end of file
diff --git a/gui/content_automation_ui.py b/gui/content_automation_ui.py
index 0cae64a5..8e43aefa 100644
--- a/gui/content_automation_ui.py
+++ b/gui/content_automation_ui.py
@@ -1,6 +1,8 @@
import gradio as gr
+from gui.video_translation_ui import create_video_translation_ui
+
ERROR_TEMPLATE = """
ERROR : {error_message}
Traceback Info : {stack_trace}
@@ -15,8 +17,11 @@ def create_content_automation(shortGPTUI: gr.Blocks):
with gr.Tab("Content Automation") as content_automation_ui:
gr.Markdown("# π Content Automation π")
gr.Markdown("## Choose your desired automation task.")
- choice = gr.Radio([ 'π¬ Automate the creation of shorts', 'ποΈ Automate a video with stock assets'], label="Choose an option")
+ choice = gr.Radio([ 'π¬ Automate the creation of shorts', 'ποΈ Automate a video with stock assets', 'πΉ Automate video translation'], label="Choose an option")
video_automation_ui = create_video_automation_ui(shortGPTUI)
short_automation_ui = create_short_automation_ui(shortGPTUI)
- choice.change(lambda x: (gr.update(visible= x == choice.choices[1]), gr.update(visible= x == choice.choices[0])), [choice], [video_automation_ui, short_automation_ui])
- return content_automation_ui
\ No newline at end of file
+ video_translation_ui = create_video_translation_ui(shortGPTUI)
+ choice.change(lambda x: (gr.update(visible= x == choice.choices[1]), gr.update(visible= x == choice.choices[0]), gr.update(visible= x == choice.choices[2])), [choice], [video_automation_ui, short_automation_ui, video_translation_ui])
+ return content_automation_ui
+
+ # video_translation_ui = create_video_translation_ui(shortGPTUI)
\ No newline at end of file
diff --git a/gui/gui.py b/gui/gui.py
index 414ba35a..daa48ac3 100644
--- a/gui/gui.py
+++ b/gui/gui.py
@@ -2,6 +2,7 @@
from gui.config_ui import create_config_ui
from gui.asset_library_ui import create_asset_library_ui
from gui.content_automation_ui import create_content_automation
+from gui.video_translation_ui import create_video_translation_ui
max_choices = 20
ui_asset_dataframe = gr.Dataframe(interactive=False)
LOGO_PATH = "http://localhost:31415/file=public/logo.png"
diff --git a/gui/short_automation_ui.py b/gui/short_automation_ui.py
index 1af0d3be..6c5d74c5 100644
--- a/gui/short_automation_ui.py
+++ b/gui/short_automation_ui.py
@@ -1,10 +1,9 @@
import traceback
import gradio as gr
-from gui.asset_components import background_video_checkbox, background_music_checkbox, start_file
+from gui.asset_components import background_video_checkbox, background_music_checkbox, voiceChoice, start_file
from shortGPT.config.api_db import get_api_key
from shortGPT.engine.reddit_short_engine import RedditShortEngine, Language
from shortGPT.engine.facts_short_engine import FactsShortEngine
-from shortGPT.api_utils.eleven_api import getVoices
import time
language_choices = [lang.value.upper() for lang in Language]
import gradio as gr
@@ -24,13 +23,6 @@
border-radius: 5px; cursor: pointer; text-decoration: none;'>Get Help on Discord
"""
-def getElevenlabsVoices():
- api_key = get_api_key("ELEVEN LABS")
- voices = list(reversed(getVoices(api_key).keys()))
- return voices
-
-voiceChoice = gr.Radio(getElevenlabsVoices(), label="Elevenlabs voice", value="Antoni", interactive=True)
-
def create_short_automation_ui(shortGptUI: gr.Blocks):
def create_short(numShorts,
short_type,
@@ -66,7 +58,7 @@ def logger(prog_str):
progress(progress_counter / (num_steps * numShorts),f"Making short {i+1}/{numShorts} - {prog_str}")
shortEngine.set_logger(logger)
- for step_num, step_info in shortEngine.makeShort():
+ for step_num, step_info in shortEngine.makeContent():
progress(progress_counter / (num_steps * numShorts), f"Making short {i+1}/{numShorts} - {step_info}")
progress_counter += 1
diff --git a/gui/video_automation_ui.py b/gui/video_automation_ui.py
index 46eedb4c..4f570921 100644
--- a/gui/video_automation_ui.py
+++ b/gui/video_automation_ui.py
@@ -43,7 +43,7 @@ def makeVideo(script, language, isVertical, progress):
def logger(prog_str):
progress(progress_counter / (num_steps),f"Creating video - {progress_counter} - {prog_str}")
shortEngine.set_logger(logger)
- for step_num, step_info in shortEngine.makeShort():
+ for step_num, step_info in shortEngine.makeContent():
progress(progress_counter / (num_steps), f"Creating video - {step_info}")
progress_counter += 1
diff --git a/gui/video_translation_ui.py b/gui/video_translation_ui.py
new file mode 100644
index 00000000..28c7b0ef
--- /dev/null
+++ b/gui/video_translation_ui.py
@@ -0,0 +1,128 @@
+import traceback
+import gradio as gr
+from gui.asset_components import voiceChoiceTranslation, start_file
+from shortGPT.engine.content_translation_engine import ContentTranslationEngine, Language
+import time
+language_choices = [lang.value.upper() for lang in Language]
+import gradio as gr
+import os
+import time
+
+ERROR_TEMPLATE = """
+
+
ERROR | {error_message}
+
Traceback Info : {stack_trace}
+
If the problem persists, don't hesitate to
+contact our support. We're here to assist you.
+
Get Help on Discord
+
"""
+
+
+def create_video_translation_ui(shortGptUI: gr.Blocks):
+ def translate_video(
+ videoType,
+ yt_link,
+ video_path,
+ target_language,
+ use_captions: bool,
+ voice: str,
+ progress=gr.Progress()):
+ language = Language(target_language.lower())
+ embedHTML = ''
+ progress_counter = 0
+ try:
+ content_translation_engine = ContentTranslationEngine(src_url=yt_link if videoType=="Youtube link" else video_path, target_language=language, use_captions=use_captions, voice_name=voice )
+ num_steps = content_translation_engine.get_total_steps()
+ def logger(prog_str):
+ progress(progress_counter / (num_steps),f"Translating your video - {prog_str}")
+ content_translation_engine.set_logger(logger)
+
+ for step_num, step_info in content_translation_engine.makeContent():
+ progress(progress_counter / (num_steps),f"Translating your video - {step_info}")
+ progress_counter += 1
+
+ video_path = content_translation_engine.get_video_output_path()
+ current_url = shortGptUI.share_url+"/" if shortGptUI.share else shortGptUI.local_url
+ file_url_path = f"{current_url}file={video_path}"
+ file_name = video_path.split("/")[-1].split("\\")[-1]
+ embedHTML += f'''
+
'''
+ return embedHTML + '
', gr.Button.update(visible=True), gr.update(visible=False)
+
+ except Exception as e:
+ traceback_str = ''.join(traceback.format_tb(e.__traceback__))
+ error_name = type(e).__name__.capitalize()+ " : " +f"{e.args[0]}"
+ print("Error", traceback_str)
+ return embedHTML + '', gr.Button.update(visible=True), gr.update(value=ERROR_TEMPLATE.format(error_message=error_name, stack_trace=traceback_str), visible=True)
+
+
+
+
+ with gr.Row(visible=False) as video_translation_ui:
+ with gr.Column():
+ videoType = gr.Radio(["Youtube link", "Video file"], label="Input your video", value="Video file", interactive=True)
+ video_path = gr.Video(source="upload", interactive=True, width=533.33, height=300)
+ yt_link = gr.Textbox(label="Youtube link (https://youtube.com/xyz): ", interactive=True, visible=False)
+ videoType.change(lambda x: (gr.update(visible= x == "Video file"), gr.update(visible= x == "Youtube link")), [videoType], [video_path, yt_link] )
+ language = gr.Radio(language_choices, label="Target Language", value="SPANISH", interactive=True)
+ voiceChoiceTranslation.render()
+ useCaptions = gr.Checkbox(label="Caption video", value=False)
+
+ translateButton = gr.Button(label="Create Shorts")
+
+ generation_error = gr.HTML(visible=False)
+ video_folder = gr.Button("π", visible=True)
+ file_name= "videos/2023-07-22_16-17-06 - translatedcontenttofrench.mp4"
+ file_url_path = f"http://127.0.0.1:31415/file={file_name}"
+ output = gr.HTML(f'''
+ ''')
+
+ video_folder.click(lambda _: start_file(os.path.abspath("videos/")))
+ translateButton.click(inspect_create_inputs, inputs=[videoType, video_path, yt_link, ], outputs=[generation_error]).success(translate_video, inputs=[
+ videoType, yt_link, video_path, language, useCaptions, voiceChoiceTranslation
+ ], outputs=[output, video_folder, generation_error])
+ return video_translation_ui
+
+
+
+def inspect_create_inputs(videoType, video_path, yt_link):
+ supported_extensions = ['.mp4', '.avi', '.mov'] # Add more supported video extensions if needed
+ print(videoType, video_path, yt_link)
+ if videoType == "Youtube link":
+ if not yt_link.startswith("https://youtube.com/") and not yt_link.startswith("https://www.youtube.com/"):
+ raise gr.Error('Invalid YouTube URL. Please provide a valid URL. Link example: https://www.youtube.com/watch?v=dQw4w9WgXcQ')
+ else:
+ if not video_path or not os.path.exists(video_path):
+ raise gr.Error('You must drag and drop a valid video file.')
+
+ file_ext = os.path.splitext(video_path)[-1].lower()
+ if file_ext not in supported_extensions:
+ raise gr.Error('Invalid video file. Supported video file extensions are: {}'.format(', '.join(supported_extensions)))
+ return gr.update(visible=False)
+
+def update_progress(progress, progress_counter, num_steps, num_shorts, stop_event):
+ start_time = time.time()
+ while not stop_event.is_set():
+ elapsed_time = time.time() - start_time
+ dynamic = int(3649 * elapsed_time / 600)
+ progress(progress_counter / (num_steps * num_shorts), f"Rendering progress - {dynamic}/3649")
+ time.sleep(0.1) # update every 0.1 second
diff --git a/shortGPT/api_utils/eleven_api.py b/shortGPT/api_utils/eleven_api.py
index 98379582..1decd367 100644
--- a/shortGPT/api_utils/eleven_api.py
+++ b/shortGPT/api_utils/eleven_api.py
@@ -56,7 +56,7 @@ def generateVoice(text, character, fileName, stability=0.2, clarity=0.1, api_key
return fileName
else:
message = response.text
- print(f'Error in response, {response.status_code} , message: {message}')
+ raise Exception(f'Error in response, {response.status_code} , message: {message}')
return ""
# print(getCharactersFromKey(''))
diff --git a/shortGPT/audio/audio_duration.py b/shortGPT/audio/audio_duration.py
index 57136f8b..9aa2a839 100644
--- a/shortGPT/audio/audio_duration.py
+++ b/shortGPT/audio/audio_duration.py
@@ -1,7 +1,7 @@
import yt_dlp
import subprocess
import json
-
+from shortGPT.editing_utils.handle_videos import getYoutubeVideoLink
def get_duration_yt_dlp(url):
ydl_opts = {
@@ -45,24 +45,21 @@ def get_duration_ffprobe(signed_url):
def getAssetDuration(url, isVideo=True):
if("youtube.com" in url):
if not isVideo:
- return getYoutubeAudioLink(url)
+ url, _ = getYoutubeAudioLink(url)
else:
- return getYoutubeVideoLink(url)
-
- #Audio/Video is from some cloud storage provider. Link must be public.
- else:
- #Trying two different method to get the duration of the video / audio
- duration, err_ffprobe = get_duration_ffprobe(url)
- if duration is not None:
- return url, duration
+ url, _ = getYoutubeVideoLink(url)
+ #Trying two different method to get the duration of the video / audio
+ duration, err_ffprobe = get_duration_ffprobe(url)
+ if duration is not None:
+ return url, duration
- duration, err_yt_dlp = get_duration_yt_dlp(url)
- if duration is not None:
- return url, duration
- print(err_yt_dlp)
- print(err_ffprobe)
- print(f"The url/path {url} does not point to a video/ audio. Impossible to extract its duration")
- return url, None
+ duration, err_yt_dlp = get_duration_yt_dlp(url)
+ if duration is not None:
+ return url, duration
+ print(err_yt_dlp)
+ print(err_ffprobe)
+ print(f"The url/path {url} does not point to a video/ audio. Impossible to extract its duration")
+ return url, None
def getYoutubeAudioLink(url):
@@ -83,22 +80,3 @@ def getYoutubeAudioLink(url):
except Exception as e:
print("Failed getting audio link from the following video/url", e.args[0])
return None
-
-def getYoutubeVideoLink(url):
- ydl_opts = {
- "quiet": True,
- "no_warnings": True,
- "no_color": True,
- "no_call_home": True,
- "no_check_certificate": True,
- "format": "bestvideo[height<=1080]"
- }
- try:
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
- dictMeta = ydl.extract_info(
- url,
- download=False)
- return dictMeta['url'], dictMeta['duration']
- except Exception as e:
- print("Failed getting video link from the following video/url", e.args[0])
- return None, None
\ No newline at end of file
diff --git a/shortGPT/audio/audio_utils.py b/shortGPT/audio/audio_utils.py
index 6b823c12..b5449efa 100644
--- a/shortGPT/audio/audio_utils.py
+++ b/shortGPT/audio/audio_utils.py
@@ -28,15 +28,17 @@ def downloadYoutubeAudio(url, outputFile):
print("Failed downloading audio from the following video/url", e.args[0])
return None
-def speedUpAudio(tempAudioPath, outputFile, expected_chars_per_sec=CONST_CHARS_PER_SEC): # Speeding up the audio to make it under 60secs, otherwise the output video is not considered as a short.
+def speedUpAudio(tempAudioPath, outputFile, expected_duration=None): # Speeding up the audio to make it under 60secs, otherwise the output video is not considered as a short.
tempAudioPath, duration = getAssetDuration(tempAudioPath, False)
- if(duration > 57):
- subprocess.run(['ffmpeg', '-i', tempAudioPath, '-af', f'atempo={(duration/57):.5f}', outputFile])
+ if not expected_duration:
+ if(duration > 57):
+ subprocess.run(['ffmpeg', '-i', tempAudioPath, '-af', f'atempo={(duration/57):.5f}', outputFile])
+ else:
+ subprocess.run(['ffmpeg', '-i', tempAudioPath, outputFile])
else:
- subprocess.run(['ffmpeg', '-i', tempAudioPath, outputFile])
+ subprocess.run(['ffmpeg', '-i', tempAudioPath, '-af', f'atempo={(duration/expected_duration):.5f}', outputFile])
if(os.path.exists(outputFile)):
- return outputFile
- return ""
+ return outputFile
def ChunkForAudio(alltext, chunk_size=2500):
alltext_list = alltext.split('.')
diff --git a/shortGPT/audio/eleven_voice_module.py b/shortGPT/audio/eleven_voice_module.py
index fbff65c0..f0c95aed 100644
--- a/shortGPT/audio/eleven_voice_module.py
+++ b/shortGPT/audio/eleven_voice_module.py
@@ -2,12 +2,12 @@
from shortGPT.audio.voice_module import VoiceModule
class ElevenLabsVoiceModule(VoiceModule):
- def __init__(self, api_key, voiceName):
+ def __init__(self, api_key, voiceName, checkElevenCredits):
self.api_key = api_key
self.voiceName = voiceName
self.remaining_credits = None
self.update_usage()
- if self.get_remaining_characters() < 1200:
+ if checkElevenCredits and self.get_remaining_characters() < 1200:
raise Exception(f"Your ElevenLabs API KEY doesn't have enough credits ({self.remaining_credits} character remaining). Minimum required: 1200 characters (equivalent to a 45sec short)")
super().__init__()
diff --git a/shortGPT/editing_framework/core_editing_engine.py b/shortGPT/editing_framework/core_editing_engine.py
index e3824f37..e5b76736 100644
--- a/shortGPT/editing_framework/core_editing_engine.py
+++ b/shortGPT/editing_framework/core_editing_engine.py
@@ -81,11 +81,32 @@ def generate_video(self, schema:Dict[str, Any], output_file, logger=None) -> Non
video.audio = audio
if logger:
my_logger = MoviepyProgressLogger(callBackFunction=logger)
- video.write_videofile(output_file, logger=my_logger)
+ video.write_videofile(output_file, codec='libx264', audio_codec='aac', logger=my_logger)
else:
- video.write_videofile(output_file)
+ video.write_videofile(output_file, codec='libx264', audio_codec='aac')
return output_file
+ def generate_audio(self, schema:Dict[str, Any], output_file, logger=None) -> None:
+ audio_assets = dict(sorted(schema['audio_assets'].items(), key=lambda item: item[1]['z']))
+ audio_clips = []
+
+ for asset_key in audio_assets:
+ asset = audio_assets[asset_key]
+ asset_type = asset['type']
+ if asset_type == "audio":
+ audio_clip = self.process_audio_asset(asset)
+ else:
+ raise ValueError(f"Invalid asset type: {asset_type}")
+
+ audio_clips.append(audio_clip)
+ audio = CompositeAudioClip(audio_clips)
+ audio.fps = 44100
+ if logger:
+ my_logger = MoviepyProgressLogger(callBackFunction=logger)
+ audio.write_audiofile(output_file, logger=my_logger)
+ else:
+ audio.write_audiofile(output_file)
+ return output_file
# Process common actions
def process_common_actions(self,
clip: Union[VideoFileClip, ImageClip, TextClip, AudioFileClip],
@@ -98,6 +119,10 @@ def process_common_actions(self,
if action['type'] == 'set_time_end':
clip = clip.set_end(action['param'])
continue
+
+ if action['type'] == 'subclip':
+ clip = clip.subclip(**action['param'])
+ continue
return clip
@@ -120,10 +145,6 @@ def process_common_visual_actions(self,
clip = clip.set_position(**action['param'])
continue
- if action['type'] == 'subclip':
- clip = clip.subclip(**action['param'])
- continue
-
if action['type'] == 'green_screen':
params = action['param']
color = params['color'] if params['color'] else [52, 255, 20]
diff --git a/shortGPT/editing_framework/editing_engine.py b/shortGPT/editing_framework/editing_engine.py
index e647b53a..68014909 100644
--- a/shortGPT/editing_framework/editing_engine.py
+++ b/shortGPT/editing_framework/editing_engine.py
@@ -27,6 +27,9 @@ class EditingStep(Enum):
ADD_BACKGROUND_MUSIC = "background_music.json"
ADD_REDDIT_IMAGE = "show_reddit_image.json"
ADD_BACKGROUND_VIDEO = "add_background_video.json"
+ INSERT_AUDIO = "insert_audio.json"
+ EXTRACT_AUDIO = "extract_audio.json"
+ ADD_BACKGROUND_VOICEOVER = "add_background_voiceover.json"
class Flow(Enum):
WHITE_REDDIT_IMAGE_FLOW = "build_reddit_image.json"
@@ -90,6 +93,9 @@ def dumpEditingSchema(self):
def renderVideo(self, outputPath, logger=None):
engine = CoreEditingEngine()
engine.generate_video(self.schema, outputPath, logger=logger)
- def renderImage(self, outputPath):
+ def renderImage(self, outputPath, logger=None):
engine = CoreEditingEngine()
- engine.generate_image(self.schema, outputPath)
\ No newline at end of file
+ engine.generate_image(self.schema, outputPath, logger=logger)
+ def generateAudio(self, outputPath, logger=None):
+ engine = CoreEditingEngine()
+ engine.generate_audio(self.schema, outputPath, logger=logger)
\ No newline at end of file
diff --git a/shortGPT/editing_framework/editing_steps/add_background_voiceover.json b/shortGPT/editing_framework/editing_steps/add_background_voiceover.json
new file mode 100644
index 00000000..1981c788
--- /dev/null
+++ b/shortGPT/editing_framework/editing_steps/add_background_voiceover.json
@@ -0,0 +1,19 @@
+{
+ "background_voiceover": {
+ "inputs": {
+ "parameters": ["url"],
+ "actions": ["volume_percentage"]
+ },
+ "type": "audio",
+ "z": -1,
+ "parameters": {
+ "url": null
+ },
+ "actions": [
+ {
+ "type": "volume_percentage",
+ "param": null
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/shortGPT/editing_framework/editing_steps/background_music.json b/shortGPT/editing_framework/editing_steps/background_music.json
index e73f916b..03f335cf 100644
--- a/shortGPT/editing_framework/editing_steps/background_music.json
+++ b/shortGPT/editing_framework/editing_steps/background_music.json
@@ -7,7 +7,7 @@
"type": "audio",
"z": -1,
"parameters": {
- "url": "editing_test/music.wav"
+ "url": null
},
"actions": [
{
diff --git a/shortGPT/editing_framework/editing_steps/extract_audio.json b/shortGPT/editing_framework/editing_steps/extract_audio.json
new file mode 100644
index 00000000..35a4df12
--- /dev/null
+++ b/shortGPT/editing_framework/editing_steps/extract_audio.json
@@ -0,0 +1,27 @@
+{
+ "extract_audio": {
+ "inputs": {
+ "parameters": ["url"],
+ "actions": ["subclip", "set_time_start", "set_time_end"]
+ },
+ "type": "audio",
+ "z": -2,
+ "parameters": {
+ "url": null
+ },
+ "actions": [
+ {
+ "type": "subclip",
+ "param": null
+ },
+ {
+ "type": "set_time_start",
+ "param": null
+ },
+ {
+ "type": "set_time_end",
+ "param": null
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/shortGPT/editing_framework/editing_steps/insert_audio.json b/shortGPT/editing_framework/editing_steps/insert_audio.json
new file mode 100644
index 00000000..6b9d2baf
--- /dev/null
+++ b/shortGPT/editing_framework/editing_steps/insert_audio.json
@@ -0,0 +1,23 @@
+{
+ "insert_audio": {
+ "inputs": {
+ "parameters": ["url"],
+ "actions": ["set_time_start", "set_time_end"]
+ },
+ "type": "audio",
+ "z": -1,
+ "parameters": {
+ "url": null
+ },
+ "actions": [
+ {
+ "type":"set_time_start",
+ "param":null
+ },
+ {
+ "type": "set_time_end",
+ "param": null
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/shortGPT/editing_utils/captions.py b/shortGPT/editing_utils/captions.py
index ca9ed6dd..e14c445c 100644
--- a/shortGPT/editing_utils/captions.py
+++ b/shortGPT/editing_utils/captions.py
@@ -1,14 +1,27 @@
import re
+def getSpeechBlocks(whispered, silence_time=2):
+ text_blocks, (st, et, txt) = [], (0,0,"")
+ for i, seg in enumerate(whispered['segments']):
+ if seg['start'] - et > silence_time:
+ if txt: text_blocks.append([[st, et], txt])
+ (st, et, txt) = (seg['start'], seg['end'], seg['text'])
+ else:
+ et, txt = seg['end'], txt + seg['text']
+
+ if txt: text_blocks.append([[st, et], txt]) # For last text block
+
+ return text_blocks
+
+def cleanWord(word):
+ return re.sub(r'[^\w\s\-_"\'\']', '', word)
+
def interpolateTimeFromDict(word_position, d):
for key, value in d.items():
if key[0] <= word_position <= key[1]:
return value
return None
-def cleanWord(word):
- return re.sub(r'[^\w\s]', '', word)
-
def getTimestampMapping(whisper_analysis):
index = 0
locationToTimestamp = {}
@@ -19,29 +32,40 @@ def getTimestampMapping(whisper_analysis):
index = newIndex
return locationToTimestamp
+
def splitWordsBySize(words, maxCaptionSize):
+ halfCaptionSize = maxCaptionSize / 2
captions = []
- i = 0
- while i < len(words):
- caption = words[i]
- while i + 1 < len(words) and len(caption + ' ' + words[i + 1]) <= maxCaptionSize:
- i += 1
- caption += ' ' + words[i]
+ while words:
+ caption = words[0]
+ words = words[1:]
+ while words and len(caption + ' ' + words[0]) <= maxCaptionSize:
+ caption += ' ' + words[0]
+ words = words[1:]
+ if len(caption) >= halfCaptionSize and words:
+ break
captions.append(caption)
- i += 1
return captions
-def getCaptionsWithTime(whisper_analysis, maxCaptionSize=15):
+
+def getCaptionsWithTime(whisper_analysis, maxCaptionSize=15, considerPunctuation=False):
wordLocationToTime = getTimestampMapping(whisper_analysis)
position = 0
start_time = 0
CaptionsPairs = []
- words = whisper_analysis['text'].split()
- split_captions = splitWordsBySize(words, maxCaptionSize)
- for caption in split_captions:
- position += len(caption) + 1
+ text = whisper_analysis['text']
+
+ if considerPunctuation:
+ sentences = re.split(r'(?<=[.!?]) +', text)
+ words = [word for sentence in sentences for word in splitWordsBySize(sentence.split(), maxCaptionSize)]
+ else:
+ words = text.split()
+ words = [cleanWord(word) for word in splitWordsBySize(words, maxCaptionSize)]
+
+ for word in words:
+ position += len(word) + 1
end_time = interpolateTimeFromDict(position, wordLocationToTime)
- if(end_time and caption):
- CaptionsPairs.append(((start_time, end_time), cleanWord(caption)))
+ if end_time and word:
+ CaptionsPairs.append(((start_time, end_time), word))
start_time = end_time
- return CaptionsPairs
+ return CaptionsPairs
\ No newline at end of file
diff --git a/shortGPT/editing_utils/handle_videos.py b/shortGPT/editing_utils/handle_videos.py
index 525f73d2..7c562c2d 100644
--- a/shortGPT/editing_utils/handle_videos.py
+++ b/shortGPT/editing_utils/handle_videos.py
@@ -2,53 +2,28 @@
import os
import random
import yt_dlp
-def getYoutubeAudio(url):
- ydl_opts = {
- "quiet": True,
- "no_warnings": True,
- "no_color": True,
- "no_call_home": True,
- "no_check_certificate": True,
- "format": "bestaudio/best"
- }
- try:
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
- dictMeta = ydl.extract_info(
- url,
- download=False)
- return dictMeta['url'], dictMeta['duration']
- except Exception as e:
- print("Failed getting audio link from the following video/url", e.args[0])
- return None
-
-def getYoutubeAudio(url):
- ydl_opts = {
- "quiet": True,
- "no_warnings": True,
- "no_color": True,
- "no_call_home": True,
- "no_check_certificate": True,
- "format": "bestaudio/best"
- }
- try:
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
- dictMeta = ydl.extract_info(
- url,
- download=False)
- return dictMeta['url'], dictMeta['duration']
- except Exception as e:
- print("Failed getting audio link from the following video/url", e.args[0])
- return None
+import subprocess
+import json
def getYoutubeVideoLink(url):
- ydl_opts = {
+ if 'shorts' in url:
+ ydl_opts = {
+ "quiet": True,
+ "no_warnings": True,
+ "no_color": True,
+ "no_call_home": True,
+ "no_check_certificate": True,
+ "format": "bestvideo[height<=1920]"
+ }
+ else:
+ ydl_opts = {
"quiet": True,
"no_warnings": True,
"no_color": True,
"no_call_home": True,
"no_check_certificate": True,
"format": "bestvideo[height<=1080]"
- }
+ }
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
dictMeta = ydl.extract_info(
@@ -84,3 +59,30 @@ def extract_random_clip_from_video(video_url, video_duration, clip_duration , ou
if not os.path.exists(output_file):
raise Exception("Random clip failed to be written")
return output_file
+
+
+def get_aspect_ratio(video_file):
+ cmd = 'ffprobe -i "{}" -v quiet -print_format json -show_format -show_streams'.format(video_file)
+# jsonstr = subprocess.getoutput(cmd)
+ jsonstr = subprocess.check_output(cmd, shell=True, encoding='utf-8')
+ r = json.loads(jsonstr)
+ # look for "codec_type": "video". take the 1st one if there are mulitple
+ video_stream_info = [x for x in r['streams'] if x['codec_type']=='video'][0]
+ if 'display_aspect_ratio' in video_stream_info and video_stream_info['display_aspect_ratio']!="0:1":
+ a,b = video_stream_info['display_aspect_ratio'].split(':')
+ dar = int(a)/int(b)
+ else:
+ # some video do not have the info of 'display_aspect_ratio'
+ w,h = video_stream_info['width'], video_stream_info['height']
+ dar = int(w)/int(h)
+ ## not sure if we should use this
+ #cw,ch = video_stream_info['coded_width'], video_stream_info['coded_height']
+ #sar = int(cw)/int(ch)
+ if 'sample_aspect_ratio' in video_stream_info and video_stream_info['sample_aspect_ratio']!="0:1":
+ # some video do not have the info of 'sample_aspect_ratio'
+ a,b = video_stream_info['sample_aspect_ratio'].split(':')
+ sar = int(a)/int(b)
+ else:
+ sar = dar
+ par = dar/sar
+ return dar
\ No newline at end of file
diff --git a/shortGPT/engine/README.md b/shortGPT/engine/README.md
index ebeaddc1..b965ec20 100644
--- a/shortGPT/engine/README.md
+++ b/shortGPT/engine/README.md
@@ -38,7 +38,7 @@ This file contains the `AbstractContentEngine` class, which is an abstract base
- `isShortDone(self)`: Checks if the short video is done rendering by checking the value of the '_db_ready_to_upload' attribute.
-- `makeShort(self)`: Generates the short video by executing the steps defined in the `stepDict`. It yields the current step number and a message indicating the progress.
+- `makeContent(self)`: Generates the short video by executing the steps defined in the `stepDict`. It yields the current step number and a message indicating the progress.
- `get_video_output_path(self)`: Returns the path of the rendered video.
diff --git a/shortGPT/engine/abstract_content_engine.py b/shortGPT/engine/abstract_content_engine.py
index 4e2567cd..54b26fbc 100644
--- a/shortGPT/engine/abstract_content_engine.py
+++ b/shortGPT/engine/abstract_content_engine.py
@@ -9,7 +9,7 @@
CONTENT_DB = ContentDatabase()
class AbstractContentEngine(ABC):
- def __init__(self, short_id: str, content_type:str, language: Language, voiceName: str):
+ def __init__(self, short_id: str, content_type:str, language: Language, voiceName: str, checkElevenCredits=True):
if short_id:
self.dataManager = CONTENT_DB.getContentDataManager(
short_id, content_type
@@ -20,7 +20,7 @@ def __init__(self, short_id: str, content_type:str, language: Language, voiceNam
self.initializeMagickAndFFMPEG()
self.prepareEditingPaths()
self._db_language = language.value
- self.voiceModule = ElevenLabsVoiceModule(get_api_key("ELEVEN LABS"), voiceName if voiceName else "Antoni")
+ self.voiceModule = ElevenLabsVoiceModule(get_api_key("ELEVEN LABS"), voiceName if voiceName else "Antoni", checkElevenCredits=checkElevenCredits)
self.assetStore = AssetDatabase()
self.stepDict = {}
self.logger = lambda _: print(_)
@@ -59,7 +59,7 @@ def verifyParameters(*args, **kargs):
def isShortDone(self):
return self._db_ready_to_upload
- def makeShort(self):
+ def makeContent(self):
while (not self.isShortDone()):
currentStep = self._db_last_completed_step + 1
if currentStep not in self.stepDict:
diff --git a/shortGPT/engine/content_translation_engine.py b/shortGPT/engine/content_translation_engine.py
new file mode 100644
index 00000000..ca51e0d0
--- /dev/null
+++ b/shortGPT/engine/content_translation_engine.py
@@ -0,0 +1,134 @@
+from shortGPT.audio.audio_duration import getAssetDuration
+from shortGPT.engine.abstract_content_engine import AbstractContentEngine
+from shortGPT.config.languages import Language
+from shortGPT.gpt.gpt_translate import translateContent
+from shortGPT.config.languages import Language
+from shortGPT.editing_utils.handle_videos import get_aspect_ratio
+from shortGPT.editing_framework.editing_engine import EditingEngine, EditingStep
+from shortGPT.editing_utils.captions import getSpeechBlocks, getCaptionsWithTime
+from shortGPT.audio.audio_utils import audioToText, getAssetDuration, speedUpAudio
+from tqdm import tqdm
+from shortGPT.editing_framework.editing_engine import EditingEngine, EditingStep
+import re
+import shutil
+import os
+import datetime
+language_mapping = {
+ "en": Language.ENGLISH,
+ "es": Language.SPANISH,
+ "fr": Language.FRENCH,
+ "ar": Language.ARABIC,
+ "de": Language.GERMAN,
+ "pl": Language.POLISH,
+ "it": Language.ITALIAN,
+ "pt": Language.PORTUGUESE,
+}
+class ContentTranslationEngine(AbstractContentEngine):
+
+ def __init__(self, src_url: str = "", target_language: Language = Language.ENGLISH, use_captions=False, id="", voice_name=""):
+ super().__init__(id, "content_translation", target_language, voice_name, checkElevenCredits=False)
+ if not id:
+ self._db_should_translate = True
+ if src_url:
+ self._db_src_url = src_url
+ self._db_use_captions = use_captions
+ self._db_target_language = target_language.value
+
+ self.stepDict = {
+ 1: self._transcribe_audio,
+ 2: self._translate_content,
+ 3: self._generate_translated_audio,
+ 4: self._edit_and_render_video,
+ 5: self._add_metadata
+ }
+
+ def _transcribe_audio(self):
+ video_audio, _ = getAssetDuration(self._db_src_url, isVideo=False)
+ self.verifyParameters(content_path=video_audio)
+ self.logger(f"1/5 - Transcribing original audio to text...")
+ whispered = audioToText(video_audio, model_size='base')
+ self._db_speech_blocks = getSpeechBlocks(whispered, silence_time=0.8)
+ if (language_mapping.get(whispered['language']) == Language(self._db_target_language)):
+ self._db_translated_timed_sentences = self._db_speech_blocks
+ self._db_should_translate = False
+
+ expected_chars = len("".join([text for _, text in self._db_speech_blocks]))
+ chars_remaining = self.voiceModule.get_remaining_characters()
+ if chars_remaining < expected_chars:
+ raise Exception(
+ f"Your Elevenlabs key doesn't have enough characters to totally translate this video | Remaining: {chars_remaining} | Number of characters to translate: {expected_chars}")
+
+ def _translate_content(self):
+ if(self._db_should_translate):
+ self.verifyParameters(_db_speech_blocks=self._db_speech_blocks)
+
+ translated_timed_sentences = []
+ for i, ((t1, t2), text) in tqdm(enumerate(self._db_speech_blocks), desc="Translating content"):
+ self.logger(f"2/5 - Translating text content - {i+1} / {len(self._db_speech_blocks)}")
+ translated_text = translateContent(text, self._db_target_language)
+ translated_timed_sentences.append([[t1, t2], translated_text])
+ self._db_translated_timed_sentences = translated_timed_sentences
+
+ def _generate_translated_audio(self):
+ self.verifyParameters(translated_timed_sentences=self._db_translated_timed_sentences)
+
+ translated_audio_blocks = []
+ for i, ((t1, t2), translated_text) in tqdm(enumerate(self._db_translated_timed_sentences), desc="Generating translated audio"):
+ self.logger(f"3/5 - Generating translated audio - {i+1} / {len(self._db_translated_timed_sentences)}")
+ translated_voice = self.voiceModule.generate_voice(translated_text, self.dynamicAssetDir+f"translated_{i}_{self._db_target_language}.wav")
+ if not translated_voice:
+ raise Exception('An error happending during audio voice creation')
+ final_audio_path = speedUpAudio(translated_voice,self.dynamicAssetDir+f"translated_{i}_{self._db_target_language}_spedup.wav" ,expected_duration=t2-t1 -0.05)
+ _, translated_duration = getAssetDuration(final_audio_path, isVideo=False)
+ translated_audio_blocks.append([[t1, t1+translated_duration], final_audio_path])
+ self._db_audio_bits = translated_audio_blocks
+
+ def _edit_and_render_video(self):
+ self.verifyParameters(_db_audio_bits=self._db_audio_bits)
+ self.logger(f"4.1 / 5 - Preparing automated editing")
+ target_language = Language(self._db_target_language)
+ input_video, video_length = getAssetDuration(self._db_src_url)
+ video_audio, _ = getAssetDuration(self._db_src_url, isVideo=False)
+ editing_engine = EditingEngine()
+ editing_engine.addEditingStep(EditingStep.ADD_BACKGROUND_VIDEO, {'url': input_video, "set_time_start": 0, "set_time_end": video_length})
+ last_t2 = 0
+ for (t1, t2), audio_path in self._db_audio_bits:
+ t2+=-0.05
+ editing_engine.addEditingStep(EditingStep.INSERT_AUDIO, {'url': audio_path, 'set_time_start': t1, 'set_time_end': t2})
+ if t1-last_t2 >4:
+ editing_engine.addEditingStep(EditingStep.EXTRACT_AUDIO, {"url": video_audio, "subclip": {"t_start": last_t2, "t_end": t1}, "set_time_start": last_t2, "set_time_end": t1})
+ last_t2 = t2
+
+ if video_length - last_t2 >4:
+ editing_engine.addEditingStep(EditingStep.EXTRACT_AUDIO, {"url": video_audio, "subclip": {"t_start": last_t2, "t_end": video_length}, "set_time_start": last_t2, "set_time_end": video_length})
+
+ if self._db_use_captions:
+ is_landscape = get_aspect_ratio(input_video) > 1
+ if not self._db_timed_translated_captions:
+ if not self._db_translated_voiceover_path:
+ self.logger(f"4.5 / 5 - Generating captions in {target_language.value}")
+ editing_engine.generateAudio(self.dynamicAssetDir+"translated_voiceover.wav")
+ self._db_translated_voiceover_path = self.dynamicAssetDir+"translated_voiceover.wav"
+ whispered_translated = audioToText(self._db_translated_voiceover_path, model_size='base')
+ timed_translated_captions = getCaptionsWithTime(whispered_translated, maxCaptionSize=50 if is_landscape else 15, considerPunctuation=True)
+ self._db_timed_translated_captions = [[[t1,t2], text] for (t1, t2), text in timed_translated_captions if t2 - t1 <= 4]
+ for (t1, t2), text in self._db_timed_translated_captions:
+ caption_key = "LANDSCAPE" if is_landscape else "SHORT"
+ caption_key += "_ARABIC" if target_language == Language.ARABIC else ""
+ caption_type = getattr(EditingStep, f"ADD_CAPTION_{caption_key}")
+ editing_engine.addEditingStep(caption_type, {'text': text, "set_time_start": t1, "set_time_end": t2})
+
+ self._db_video_path = self.dynamicAssetDir+"translated_content.mp4"
+
+ editing_engine.renderVideo(self._db_video_path, logger=self.logger)
+
+ def _add_metadata(self):
+ self.logger(f"5 / 5 - Saving translated video")
+ now = datetime.datetime.now()
+ date_str = now.strftime("%Y-%m-%d_%H-%M-%S")
+ newFileName = f"videos/{date_str} - " + \
+ re.sub(r"[^a-zA-Z0-9 '\n\.]", '', f"translated_content_to_{self._db_target_language}")
+
+ shutil.move(self._db_video_path, newFileName+".mp4")
+ self._db_video_path = newFileName+".mp4"
+ self._db_ready_to_upload = True
diff --git a/shortGPT/gpt/gpt_translate.py b/shortGPT/gpt/gpt_translate.py
index fe4c1a50..aa514df4 100644
--- a/shortGPT/gpt/gpt_translate.py
+++ b/shortGPT/gpt/gpt_translate.py
@@ -2,6 +2,8 @@
def translateContent(content, language):
chat, system = gpt_utils.load_yaml_prompt('shortGPT/prompt_templates/translate_content.yaml')
+ if language == "arabic":
+ language =="arabic, and make the translated text two third of the length of the original."
system = system.replace("<>", language)
chat = chat.replace("<>", content)
result = gpt_utils.gpt3Turbo_completion(chat_prompt=chat, system=system, temp=1)
diff --git a/shortGPT/prompt_templates/translate_content.yaml b/shortGPT/prompt_templates/translate_content.yaml
index 36f825e4..bc683218 100644
--- a/shortGPT/prompt_templates/translate_content.yaml
+++ b/shortGPT/prompt_templates/translate_content.yaml
@@ -1,29 +1,26 @@
system_prompt: >
- You're an expert content translator from English to <>.
+ You're an expert content translator to <>.
+ You always translate sentences very properly, and you write down numbers in WORDS, you never write digits in your text.
- The user will give you Content in perfect English, and you will translate it to <>.
-
- 1. Familiarize yourself with the target audience: They are between 13 to 45 yo people.
- Understand the cultural nuances, social context, and regional dialects that prevail in the target language.
- This will help you tailor the translation accordingly.
-
- 2. Focus on equivalence, not literal translation:
- Instead of translating word-for-word, aim for conveying the intended meaning,
- tone, and style of the original text. Consider the context and adapt the translation accordingly.
-
- 3. Pay attention to idioms, metaphors,
- and cultural references: Identify any idiomatic expressions or culturally specific references in the source text.
- Translate them by finding equivalent expressions or concepts in the target language,
- ensuring the meaning remains intact within the target audience's cultural framework.
-
- 4. Research current terminology and trends:
- Stay up-to-date with the latest terminology,
- slang, and colloquialisms used by the target audience.
- This will help you incorporate modern language and make the translation sound natural and relatable.
-
- 5. Avoid overusing formal or technical language:
- While certain texts require a formal tone,it's essential to strike a balance.
- Consider the context and intended audience to determine the appropriate level of formality.
+ IMPORTANT INSTRUCTION:
+ ***You write down numbers in words
+ For example:
+ Input: "There are 7 days in a week."
+ Translation: "Existem sete dias em uma semana."
+ Example 2:
+ Input: "She bought 4 apples at the market."
+ Translation: "Existem sete dias em uma semana."
+
+ Example 3:
+ Input:"The temperature is -2 degrees Celsius."
+ Translation: "A temperatura estΓ‘ dois graus Celsius negativos."
+
+
+ Example 4:
+ Input: "He is 30 years old."
+ Translation: "Ele tem trinta anos de idade."
+ **
+
chat_prompt: >
<>
\ No newline at end of file