Spaces:

marquesafonso
/

multilang-asr-captioner

Running

App Files Files Community

marquesafonso commited on Dec 18, 2023

Commit

44d365d

1 Parent(s): 1b8b58c

add max_words_per_line. reduce args. fix paths.

Browse files

Files changed (3) hide show

main.py +25 -16
utils/download_video.py +4 -2
utils/transcriber.py +31 -7

main.py CHANGED Viewed

@@ -12,36 +12,45 @@ logging.basicConfig(filename='main.log',
                 datefmt='%m/%d/%Y %I:%M:%S %p')
 def main(video_url,
-        srt_path,
-        invideo_dir,
         invideo_filename,
-        outvideo_path,
         fontsize,
-        bg_color):
     with tqdm(total=100, desc="Overall Progress") as pbar:
         if video_url != None:
-            stream_title = download_video(video_url, invideo_dir, filename='video.mp4')
             pbar.update(33.33)
-            if not os.path.exists(srt_path):
-                transcriber(stream_title, srt_path)
             pbar.update(33.33)
-            subtitler(stream_title, srt_path, outvideo_path,fontsize, bg_color)
             pbar.update(33.34)
             return
-        if not os.path.exists(srt_path):
-            transcriber(os.path.join(invideo_dir,invideo_filename), srt_path)
         pbar.update(66.66)
-        subtitler(os.path.join(invideo_dir,invideo_filename), srt_path, outvideo_path, fontsize,bg_color)
         pbar.update(33.34)
 if __name__ == '__main__':
     parser = ArgumentParser()
-    parser.add_argument('--invideo_dir', required=True, type=str, help='path to the input video dir')
-    parser.add_argument('--invideo_filename', required=True, type=str, help='filename and extension of ')
-    parser.add_argument('--outvideo_path', required=True, help='path to the output video')
     parser.add_argument('--video_url', required=False, default=None, type=str, help='A video file to be subtitled (Optional)')
-    parser.add_argument('--srt_path', required=False, default="data/audio.srt", type=str, help='path to the srt file (default: data/audio.srt)')
     parser.add_argument('--fontsize', required=False, default=32, type=int, help='Font size for captions (int)')
     parser.add_argument('--bg_color', required=False, default="#070a13b3", type=str, help='Hex color value for caption background colour.')
     args = parser.parse_args()
     # Example usage
-    main(args.video_url, args.srt_path, args.invideo_dir, args.invideo_filename, args.outvideo_path, args.fontsize, args.bg_color)

                 datefmt='%m/%d/%Y %I:%M:%S %p')
 def main(video_url,
         invideo_filename,
         fontsize,
+        bg_color,
+        max_words_per_line
+        ):
+    INVIDEO_DIR = os.path.join('data/',invideo_filename)
+    if not os.path.exists(INVIDEO_DIR):
+        os.makedirs(INVIDEO_DIR)
+    SRT_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.srt")
+    OUTVIDEO_PATH = os.path.join(INVIDEO_DIR, f"result_{invideo_filename}.mp4")
     with tqdm(total=100, desc="Overall Progress") as pbar:
         if video_url != None:
+            stream_title = download_video(video_url, INVIDEO_DIR, invideo_filename)
             pbar.update(33.33)
+            if not os.path.exists(SRT_PATH):
+                transcriber(stream_title, SRT_PATH, max_words_per_line)
             pbar.update(33.33)
+            subtitler(stream_title, SRT_PATH, OUTVIDEO_PATH,fontsize, bg_color)
             pbar.update(33.34)
             return
+        INVIDEO_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.mp4")
+        if not os.path.exists(SRT_PATH):
+            transcriber(INVIDEO_PATH, SRT_PATH, max_words_per_line)
         pbar.update(66.66)
+        subtitler(INVIDEO_PATH, SRT_PATH, OUTVIDEO_PATH, fontsize,bg_color)
         pbar.update(33.34)
 if __name__ == '__main__':
     parser = ArgumentParser()
+    parser.add_argument('--invideo_filename', required=True, type=str, help='filename')
     parser.add_argument('--video_url', required=False, default=None, type=str, help='A video file to be subtitled (Optional)')
     parser.add_argument('--fontsize', required=False, default=32, type=int, help='Font size for captions (int)')
     parser.add_argument('--bg_color', required=False, default="#070a13b3", type=str, help='Hex color value for caption background colour.')
+    parser.add_argument("--max_words_per_line", type=int, default=None, help="(requires --word_timestamps True, no effect with --max_line_width) the maximum number of words in a segment")
     args = parser.parse_args()
     # Example usage
+    main(args.video_url,
+        args.invideo_filename,
+        args.fontsize,
+        args.bg_color,
+        args.max_words_per_line,
+        )

utils/download_video.py CHANGED Viewed

@@ -1,12 +1,14 @@
 from pytube import YouTube
 def download_video(input_file, output_path, filename):
     try:
         yt = YouTube(input_file)
         video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
         if video_stream:
-            video_stream.download(output_path=output_path,filename=filename)
-            video_title = f"{output_path}/{filename}"
             return video_title
         else:
             return "No suitable stream found for this video."

 from pytube import YouTube
+import os
 def download_video(input_file, output_path, filename):
+    full_filename = f"{filename}.mp4"
     try:
         yt = YouTube(input_file)
         video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
         if video_stream:
+            video_stream.download(output_path=output_path,filename=full_filename)
+            video_title = os.path.join(output_path, full_filename)
             return video_title
         else:
             return "No suitable stream found for this video."

utils/transcriber.py CHANGED Viewed

@@ -8,14 +8,35 @@ logging.basicConfig(filename='main.log',
                 datefmt='%m/%d/%Y %I:%M:%S %p')
 logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
-def write_srt(segments, srt_path):
-    """Write segments to an SRT file."""
     with open(srt_path, "w", encoding='utf-8') as file:
-        for i, segment in enumerate(segments):
-            file.write(f"{i+1}\n{segment.start} --> {segment.end}\n{segment.text}\n\n")
-def transcriber(input_path:str, srt_path:str):
-    str_path = os.path.abspath(srt_path)
     model_size = "large-v3"
     # Run on GPU with FP16
@@ -32,8 +53,11 @@ def transcriber(input_path:str, srt_path:str):
         beam_size=5,
         vad_filter=True,
         vad_parameters=dict(min_silence_duration_ms=500),
     )
     logging.info("Detected language '%s' with probability %f" % (info.language, info.language_probability))
     logging.info("Writing file...")
-    write_srt(segments=segments, srt_path=srt_path)

                 datefmt='%m/%d/%Y %I:%M:%S %p')
 logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
+def write_srt(segments, srt_path, max_words_per_line):
+    """Write segments to an SRT file with a maximum number of words per line."""
     with open(srt_path, "w", encoding='utf-8') as file:
+        line_counter = 1
+        for _, segment in enumerate(segments):
+            words_in_line = []
+            for w, word in enumerate(segment.words):
+                words_in_line.append(word)
+                # Write the line if max words limit reached or it's the last word in the segment
+                if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
+                    if words_in_line:  # Check to avoid writing a line if there are no words
+                        start_time = words_in_line[0].start
+                        end_time = words_in_line[-1].end
+                        line_text = ' '.join([w.word.strip() for w in words_in_line])
+                        file.write(f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n")
+                        # Reset for the next line and increment line counter
+                        line_counter += 1
+                    words_in_line = []  # Reset words list for the next line
+def transcriber(input_path:str,
+                srt_path:str,
+                max_words_per_line:int):
     model_size = "large-v3"
     # Run on GPU with FP16
         beam_size=5,
         vad_filter=True,
         vad_parameters=dict(min_silence_duration_ms=500),
+        word_timestamps=True
     )
     logging.info("Detected language '%s' with probability %f" % (info.language, info.language_probability))
     logging.info("Writing file...")
+    write_srt(segments=segments, srt_path=srt_path, max_words_per_line=max_words_per_line)