caspr

Paused

App Files Files Community

artificialguybr commited on Jan 28, 2024

Commit

f00d512

verified ·

1 Parent(s): 01c4960

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -27

app.py CHANGED Viewed

@@ -42,39 +42,34 @@ def process_video(Video, target_language):
     run(["ffmpeg", "-i", Video, audio_file])
     transcript_file = f"{common_uuid}.srt"
     # Transcription with Whisper.
-    print("Starting transcription with Whisper with word-level timestamps and VAD filter")
-    segments, _ = whisper_model.transcribe(audio_file, word_timestamps=True, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=500))
-    # Process each segment and word for detailed timestamping
-    transcript_with_timestamps = []
-    for segment in segments:
-        for word in segment.words:
-            start_time = f"{word.start:.2f}"
-            end_time = f"{word.end:.2f}"
-            transcript_with_timestamps.append(f"[{start_time}s -> {end_time}s] {word.word}")
     # Create a list to hold the translated lines.
     translated_lines = []
     with open(transcript_file, "w+", encoding="utf-8") as f:
         counter = 1
-        for line in transcript_with_timestamps:
-            # Use a regular expression to extract timestamp and word from the line
-            match = re.match(r"\[(.*?)s -> (.*?)s\] (.*)", line)
-            if match:
-                start_time, end_time, word = match.groups()
-                # Convert timestamps to SRT format
-                formatted_start = str(datetime.timedelta(seconds=float(start_time)))
-                formatted_end = str(datetime.timedelta(seconds=float(end_time)))
-                # Write to SRT file
-                f.write(f"{counter}\n")
-                f.write(f"{formatted_start} --> {formatted_end}\n")
-                f.write(f"{word}\n\n")
-                counter += 1
-            else:
-                # Handle the case where the line does not match the pattern
-                print(f"Line does not match expected format: {line}")
         # Move the file pointer to the beginning of the file.
         f.seek(0)

     run(["ffmpeg", "-i", Video, audio_file])
     transcript_file = f"{common_uuid}.srt"
     # Transcription with Whisper.
+    print("Iniciando transcrição com Whisper")
+    segments, _ = whisper_model.transcribe(audio_file, beam_size=5)
+    segments = list(segments)
+    transcript_file = f"{current_path}/{common_uuid}.srt"
     # Create a list to hold the translated lines.
     translated_lines = []
     with open(transcript_file, "w+", encoding="utf-8") as f:
         counter = 1
+        for segment in segments:
+            start_hours = int(segment.start // 3600)
+            start_minutes = int((segment.start % 3600) // 60)
+            start_seconds = int(segment.start % 60)
+            start_milliseconds = int((segment.start - int(segment.start)) * 1000)
+            end_hours = int(segment.end // 3600)
+            end_minutes = int((segment.end % 3600) // 60)
+            end_seconds = int(segment.end % 60)
+            end_milliseconds = int((segment.end - int(segment.end)) * 1000)
+            formatted_start = f"{start_hours:02d}:{start_minutes:02d}:{start_seconds:02d},{start_milliseconds:03d}"
+            formatted_end = f"{end_hours:02d}:{end_minutes:02d}:{end_seconds:02d},{end_milliseconds:03d}"
+            f.write(f"{counter}\n")
+            f.write(f"{formatted_start} --> {formatted_end}\n")
+            f.write(f"{segment.text}\n\n")
+            counter += 1
         # Move the file pointer to the beginning of the file.
         f.seek(0)