Skip to content

Commit

Permalink
merge the video with the subtitles #67
Browse files Browse the repository at this point in the history
  • Loading branch information
abdeladim-s committed Aug 29, 2023
1 parent 57bef37 commit d75036d
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 8 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
* [x] [facebook/m2m100_1.2B](https://huggingface.co/facebook/m2m100_1.2B)
* [x] [facebook/mbart-large-50-many-to-many-mmt](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt)
* Auto-sync using [smacke/ffsubsync](https://github.com/smacke/ffsubsync)
* Merge subtitles into the video
* Command Line Interface
* For simple or batch processing
* Python package
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "subsai"
version = "1.1.3"
version = "1.2.0"
authors = [
{name = "abdeladim-s"},
]
Expand Down
73 changes: 69 additions & 4 deletions src/subsai/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
import os
import pathlib
import tempfile
from typing import Union
from typing import Union, Dict

import ffmpeg
import pysubs2
from dl_translate import TranslationModel
from pysubs2 import SSAFile
Expand Down Expand Up @@ -239,11 +240,75 @@ def auto_sync(subs: SSAFile,
os.unlink(srtin_file.name)
srtout_file.close()
os.unlink(srtout_file.name)
@staticmethod
def merge_subs_with_video(subs: Dict[str, SSAFile],
media_file: str,
output_filename: str = None,
**kwargs
) -> str:
"""
Uses ffmpeg to merge subtitles into a video media file.
You cna merge multiple subs at the same time providing a dict with (lang,`SSAFile` object) key,value pairs
Example:
```python
file = '../../assets/video/test1.webm'
subs_ai = SubsAI()
model = subs_ai.create_model('openai/whisper', {'model_type': 'tiny'})
en_subs = subs_ai.transcribe(file, model)
ar_subs = pysubs2.load('../../assets/video/test0-ar.srt')
Tools.merge_subs_with_video2({'English': subs, "Arabic": subs2}, file)
```
:param subs: dict with (lang,`SSAFile` object) key,value pairs
:param media_file: path of the video media_file
:param output_filename: Output file name (without the extension as it will be inferred from the media file)
:return: Absolute path of the output file
"""
metadata = ffmpeg.probe(media_file, select_streams="v")['streams'][0]
assert metadata['codec_type'] == 'video', f'File {media_file} is not a video'


srtin_files = {key: tempfile.NamedTemporaryFile(delete=False) for key in subs}
try:
in_file = pathlib.Path(media_file)
if output_filename is not None:
out_file = in_file.parent / f"{output_filename}{in_file.suffix}"
else:
out_file = in_file.parent / f"{in_file.stem}-subs-merged{in_file.suffix}"

video = str(in_file.resolve())
metadata_subs = {}
ffmpeg_subs_inputs = []
for i,lang in enumerate(srtin_files):
srtin = srtin_files[lang].name + '.srt'
subs[lang].save(srtin)
ffmpeg_subs_inputs.append(ffmpeg.input(srtin)['s'])
metadata_subs[f'metadata:s:s:{i}'] = "title=" + lang

output_file = str(out_file.resolve())
input_ffmpeg = ffmpeg.input(video)
input_video = input_ffmpeg['v']
input_audio = input_ffmpeg['a']
output_ffmpeg = ffmpeg.output(
input_video, input_audio, *ffmpeg_subs_inputs, output_file,
vcodec='copy', acodec='copy',
**metadata_subs
)
output_ffmpeg = ffmpeg.overwrite_output(output_ffmpeg)
ffmpeg.run(output_ffmpeg)
finally:
for srtin_file in srtin_files.values():
srtin_file.close()
os.unlink(srtin_file.name)
return str(out_file.resolve())

if __name__ == '__main__':
file = './assets/test1.mp4'
file = '../../assets/video/test1.webm'
subs_ai = SubsAI()
model = subs_ai.create_model('openai/whisper', {'model_type': 'base'})
model = subs_ai.create_model('openai/whisper', {'model_type': 'tiny'})
subs = subs_ai.transcribe(file, model)
subs.save('test1.srt')
subs.save('../../assets/video/test1.srt')
subs2 = pysubs2.load('../../assets/video/test0-ar.srt')
Tools.merge_subs_with_video2({'English': subs, "Arabic": subs2}, file)
# subs.save('test1.srt')
14 changes: 13 additions & 1 deletion src/subsai/webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ def webui() -> None:

event = st_player(_media_file_base64(file_path), **options, height=500, key="player")

with st.expander('Export'):
with st.expander('Export subtitles file'):
media_file = Path(file_path)
export_format = st.radio(
"Format",
Expand All @@ -508,6 +508,18 @@ def webui() -> None:
with open(exported_file, 'r') as f:
st.download_button('Download', f, file_name=export_filename + export_format)

with st.expander('Merge subtitles with video'):
media_file = Path(file_path)
subs_lang = st.text_input('Subtitles language', value='English', key='merged_video_subs_lang')
exported_video_filename = st.text_input('Filename', value=f"{media_file.stem}-subs-merged", key='merged_video_out_file')
submitted = st.button("Merge", key='merged_video_export_btn')
if submitted:
subs = st.session_state['transcribed_subs']
exported_file_path = tools.merge_subs_with_video({subs_lang: subs}, str(media_file.resolve()), exported_video_filename)
st.success(f'Exported file to {exported_file_path}', icon="✅")
with open(exported_file_path, 'rb') as f:
st.download_button('Download', f, file_name=f"{exported_video_filename}{media_file.suffix}")

st.markdown(footer, unsafe_allow_html=True)


Expand Down
9 changes: 7 additions & 2 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Test file the main module
"""

import pathlib
from unittest import TestCase

import pysubs2
Expand Down Expand Up @@ -51,7 +51,7 @@ def test_transcribe(self):

class TestTools(TestCase):
tools = Tools()
file = '../assets/video/test1.mp4'
file = '../assets/video/test1.webm'
subs_file = '../assets/video/test1.srt'
subs = pysubs2.load(subs_file)

Expand All @@ -65,3 +65,8 @@ def test_auto_sync(self):
synced_subs = self.tools.auto_sync(self.subs, self.file)
self.assertIsInstance(synced_subs, SSAFile)


def test_merge_subs_with_video(self):
Tools.merge_subs_with_video2({'English': self.subs}, self.file, 'subs-merged')
in_file = pathlib.Path(self.file)
self.assertTrue((in_file.parent / f"subs-merged{in_file.suffix}").exists())

0 comments on commit d75036d

Please sign in to comment.