artificialguybr commited on
Commit
f00d512
·
verified ·
1 Parent(s): 01c4960

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -27
app.py CHANGED
@@ -42,39 +42,34 @@ def process_video(Video, target_language):
42
  run(["ffmpeg", "-i", Video, audio_file])
43
  transcript_file = f"{common_uuid}.srt"
44
  # Transcription with Whisper.
45
- print("Starting transcription with Whisper with word-level timestamps and VAD filter")
46
- segments, _ = whisper_model.transcribe(audio_file, word_timestamps=True, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=500))
47
- # Process each segment and word for detailed timestamping
48
- transcript_with_timestamps = []
49
- for segment in segments:
50
- for word in segment.words:
51
- start_time = f"{word.start:.2f}"
52
- end_time = f"{word.end:.2f}"
53
- transcript_with_timestamps.append(f"[{start_time}s -> {end_time}s] {word.word}")
54
 
55
  # Create a list to hold the translated lines.
56
  translated_lines = []
57
 
58
  with open(transcript_file, "w+", encoding="utf-8") as f:
59
  counter = 1
60
- for line in transcript_with_timestamps:
61
- # Use a regular expression to extract timestamp and word from the line
62
- match = re.match(r"\[(.*?)s -> (.*?)s\] (.*)", line)
63
- if match:
64
- start_time, end_time, word = match.groups()
65
-
66
- # Convert timestamps to SRT format
67
- formatted_start = str(datetime.timedelta(seconds=float(start_time)))
68
- formatted_end = str(datetime.timedelta(seconds=float(end_time)))
69
-
70
- # Write to SRT file
71
- f.write(f"{counter}\n")
72
- f.write(f"{formatted_start} --> {formatted_end}\n")
73
- f.write(f"{word}\n\n")
74
- counter += 1
75
- else:
76
- # Handle the case where the line does not match the pattern
77
- print(f"Line does not match expected format: {line}")
78
  # Move the file pointer to the beginning of the file.
79
  f.seek(0)
80
 
 
42
  run(["ffmpeg", "-i", Video, audio_file])
43
  transcript_file = f"{common_uuid}.srt"
44
  # Transcription with Whisper.
45
+ print("Iniciando transcrição com Whisper")
46
+ segments, _ = whisper_model.transcribe(audio_file, beam_size=5)
47
+ segments = list(segments)
48
+ transcript_file = f"{current_path}/{common_uuid}.srt"
 
 
 
 
 
49
 
50
  # Create a list to hold the translated lines.
51
  translated_lines = []
52
 
53
  with open(transcript_file, "w+", encoding="utf-8") as f:
54
  counter = 1
55
+ for segment in segments:
56
+ start_hours = int(segment.start // 3600)
57
+ start_minutes = int((segment.start % 3600) // 60)
58
+ start_seconds = int(segment.start % 60)
59
+ start_milliseconds = int((segment.start - int(segment.start)) * 1000)
60
+
61
+ end_hours = int(segment.end // 3600)
62
+ end_minutes = int((segment.end % 3600) // 60)
63
+ end_seconds = int(segment.end % 60)
64
+ end_milliseconds = int((segment.end - int(segment.end)) * 1000)
65
+
66
+ formatted_start = f"{start_hours:02d}:{start_minutes:02d}:{start_seconds:02d},{start_milliseconds:03d}"
67
+ formatted_end = f"{end_hours:02d}:{end_minutes:02d}:{end_seconds:02d},{end_milliseconds:03d}"
68
+
69
+ f.write(f"{counter}\n")
70
+ f.write(f"{formatted_start} --> {formatted_end}\n")
71
+ f.write(f"{segment.text}\n\n")
72
+ counter += 1
73
  # Move the file pointer to the beginning of the file.
74
  f.seek(0)
75