artificialguybr commited on
Commit
a1bbd1f
·
verified ·
1 Parent(s): b6ac968

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -16,6 +16,7 @@ import scipy
16
  from googletrans import Translator
17
  import re
18
  import subprocess
 
19
 
20
  ZipFile("ffmpeg.zip").extractall()
21
  st = os.stat('ffmpeg')
@@ -25,9 +26,6 @@ with open('google_lang_codes.json', 'r') as f:
25
  google_lang_codes = json.load(f)
26
 
27
  translator = Translator()
28
-
29
- #tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-3.3B")
30
- #model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-3.3B")
31
  whisper_model = WhisperModel("large-v2", device="cuda", compute_type="float16")
32
 
33
  print("cwd", os.getcwd())
@@ -42,37 +40,39 @@ def process_video(Video, target_language):
42
  run(["ffmpeg", "-version"])
43
  audio_file = f"{common_uuid}.wav"
44
  run(["ffmpeg", "-i", Video, audio_file])
45
-
46
  # Transcription with Whisper.
47
- print("Iniciando transcrição com Whisper")
48
- segments, _ = whisper_model.transcribe(audio_file, beam_size=5)
49
- segments = list(segments)
50
- transcript_file = f"{current_path}/{common_uuid}.srt"
 
 
 
 
 
51
 
52
  # Create a list to hold the translated lines.
53
  translated_lines = []
54
 
55
  with open(transcript_file, "w+", encoding="utf-8") as f:
56
  counter = 1
57
- for segment in segments:
58
- start_hours = int(segment.start // 3600)
59
- start_minutes = int((segment.start % 3600) // 60)
60
- start_seconds = int(segment.start % 60)
61
- start_milliseconds = int((segment.start - int(segment.start)) * 1000)
62
-
63
- end_hours = int(segment.end // 3600)
64
- end_minutes = int((segment.end % 3600) // 60)
65
- end_seconds = int(segment.end % 60)
66
- end_milliseconds = int((segment.end - int(segment.end)) * 1000)
67
-
68
- formatted_start = f"{start_hours:02d}:{start_minutes:02d}:{start_seconds:02d},{start_milliseconds:03d}"
69
- formatted_end = f"{end_hours:02d}:{end_minutes:02d}:{end_seconds:02d},{end_milliseconds:03d}"
70
-
71
  f.write(f"{counter}\n")
72
  f.write(f"{formatted_start} --> {formatted_end}\n")
73
- f.write(f"{segment.text}\n\n")
74
  counter += 1
75
 
 
76
 
77
  # Move the file pointer to the beginning of the file.
78
  f.seek(0)
 
16
  from googletrans import Translator
17
  import re
18
  import subprocess
19
+ import datetime
20
 
21
  ZipFile("ffmpeg.zip").extractall()
22
  st = os.stat('ffmpeg')
 
26
  google_lang_codes = json.load(f)
27
 
28
  translator = Translator()
 
 
 
29
  whisper_model = WhisperModel("large-v2", device="cuda", compute_type="float16")
30
 
31
  print("cwd", os.getcwd())
 
40
  run(["ffmpeg", "-version"])
41
  audio_file = f"{common_uuid}.wav"
42
  run(["ffmpeg", "-i", Video, audio_file])
43
+ transcript_file = f"{common_uuid}.srt"
44
  # Transcription with Whisper.
45
+ print("Starting transcription with Whisper with word-level timestamps and VAD filter")
46
+ segments, _ = whisper_model.transcribe(audio_file, word_timestamps=True, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=500))
47
+ # Process each segment and word for detailed timestamping
48
+ transcript_with_timestamps = []
49
+ for segment in segments:
50
+ for word in segment.words:
51
+ start_time = f"{word.start:.2f}"
52
+ end_time = f"{word.end:.2f}"
53
+ transcript_with_timestamps.append(f"[{start_time}s -> {end_time}s] {word.word}")
54
 
55
  # Create a list to hold the translated lines.
56
  translated_lines = []
57
 
58
  with open(transcript_file, "w+", encoding="utf-8") as f:
59
  counter = 1
60
+ for line in transcript_with_timestamps:
61
+ # Extract timestamp and word from the line
62
+ timestamp, word = re.match(r"\[(.*?)s -> (.*?)s\] (.*)", line).groups()
63
+ start_time, end_time = timestamp.split(' -> ')
64
+
65
+ # Convert timestamps to SRT format
66
+ formatted_start = str(datetime.timedelta(seconds=float(start_time)))
67
+ formatted_end = str(datetime.timedelta(seconds=float(end_time)))
68
+
69
+ # Write to SRT file
 
 
 
 
70
  f.write(f"{counter}\n")
71
  f.write(f"{formatted_start} --> {formatted_end}\n")
72
+ f.write(f"{word}\n\n")
73
  counter += 1
74
 
75
+
76
 
77
  # Move the file pointer to the beginning of the file.
78
  f.seek(0)