Lenylvt commited on
Commit
c18dcee
·
verified ·
1 Parent(s): 8c56203

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -31
app.py CHANGED
@@ -1,12 +1,8 @@
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
  import logging
4
- import os
5
- from moviepy.editor import VideoFileClip
6
- import ffmpeg # Make sure to install ffmpeg-python
7
  from transformers import MarianMTModel, MarianTokenizer
8
  import pandas as pd
9
- import pysrt
10
  import requests
11
 
12
  # Configure logging for debugging purposes
@@ -22,29 +18,18 @@ df['ISO 639-1'] = df['ISO 639-1'].str.strip()
22
  # Prepare language options for the dropdown
23
  language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]
24
 
25
- def format_timestamp(seconds):
26
- """Convert seconds to HH:MM:SS.mmm format."""
27
- hours = int(seconds // 3600)
28
- minutes = int((seconds % 3600) // 60)
29
- seconds_remainder = seconds % 60
30
- return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
31
-
32
- def extract_audio(video_path):
33
- """Extract audio from video to a temporary audio file."""
34
- output_audio_path = '/tmp/audio.wav'
35
- ffmpeg.input(video_path).output(output_audio_path, acodec='pcm_s16le', ac=1, ar='16k').run(quiet=True)
36
- return output_audio_path
37
-
38
- def transcribe_and_optionally_translate(video_file, source_language, target_language, model_size, allow_modification):
39
- audio_file = extract_audio(video_file)
40
-
41
  # Transcription
42
- device = "cpu" # GPU : cuda CPU : cpu
43
- compute_type = "int8" # GPU : float16 or int8 - CPU : int8
44
  model = WhisperModel(model_size, device=device, compute_type=compute_type)
45
  segments, _ = model.transcribe(audio_file)
46
  transcription = " ".join([segment.text for segment in segments])
47
 
 
 
 
 
48
  # Translation
49
  if source_language != target_language:
50
  model_name = f"Helsinki-NLP/opus-mt-{source_language}-{target_language}"
@@ -53,7 +38,7 @@ def transcribe_and_optionally_translate(video_file, source_language, target_lang
53
  translated = model.generate(**tokenizer(transcription, return_tensors="pt", padding=True, truncation=True, max_length=512))
54
  transcription = tokenizer.decode(translated[0], skip_special_tokens=True)
55
 
56
- return transcription, allow_modification
57
 
58
  def add_hard_subtitle_to_video(input_video, transcript):
59
  """Add hard subtitles to video."""
@@ -66,14 +51,16 @@ def add_hard_subtitle_to_video(input_video, transcript):
66
 
67
  return output_video_path
68
 
69
- # Gradio Interface
70
- def process_video(video, source_language, target_language, model_size='base', allow_modification=False, modified_transcript=None):
71
- transcript, can_modify = transcribe_and_optionally_translate(video, source_language, target_language, model_size, allow_modification)
 
72
 
73
  if can_modify and modified_transcript:
74
- transcript = modified_transcript # Use the modified transcript if provided
 
 
75
 
76
- # Add hard subtitles to the video
77
  output_video = add_hard_subtitle_to_video(video, transcript)
78
  return output_video
79
 
@@ -81,17 +68,18 @@ def process_video(video, source_language, target_language, model_size='base', al
81
  app = gr.Interface(
82
  fn=process_video,
83
  inputs=[
84
- gr.Video(label="Upload Video"),
85
  gr.Dropdown(choices=language_options, label="Source Language"),
86
  gr.Dropdown(choices=language_options, label="Target Language"),
87
  gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size"),
88
- gr.Checkbox(label="Allow Transcript Modification?", value=False),
89
  gr.TextArea(label="Modified Transcript (if allowed)")
90
  ],
91
- outputs=gr.Video(label="Processed Video with Hard Subtitles"),
92
  title="Video Transcription and Translation Tool",
93
  description="Transcribe or translate your video content. Optionally, edit the transcription before adding hard subtitles."
94
  )
95
 
96
  if __name__ == "__main__":
97
  app.launch()
 
 
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
  import logging
 
 
 
4
  from transformers import MarianMTModel, MarianTokenizer
5
  import pandas as pd
 
6
  import requests
7
 
8
  # Configure logging for debugging purposes
 
18
  # Prepare language options for the dropdown
19
  language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]
20
 
21
+ def transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Transcription
23
+ device = "cpu" # Use "cuda" for GPU
24
+ compute_type = "int8" # Use "float16" or "int8" for GPU, "int8" for CPU
25
  model = WhisperModel(model_size, device=device, compute_type=compute_type)
26
  segments, _ = model.transcribe(audio_file)
27
  transcription = " ".join([segment.text for segment in segments])
28
 
29
+ if change_transcript:
30
+ # Assume user will modify the transcript manually before translation
31
+ return transcription, True
32
+
33
  # Translation
34
  if source_language != target_language:
35
  model_name = f"Helsinki-NLP/opus-mt-{source_language}-{target_language}"
 
38
  translated = model.generate(**tokenizer(transcription, return_tensors="pt", padding=True, truncation=True, max_length=512))
39
  transcription = tokenizer.decode(translated[0], skip_special_tokens=True)
40
 
41
+ return transcription, False
42
 
43
  def add_hard_subtitle_to_video(input_video, transcript):
44
  """Add hard subtitles to video."""
 
51
 
52
  return output_video_path
53
 
54
+ def process_video(video, source_language, target_language, model_size='base', change_transcript=False, modified_transcript=None):
55
+ audio_file = video # Directly use the video file as the audio input
56
+
57
+ transcript, can_modify = transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript)
58
 
59
  if can_modify and modified_transcript:
60
+ # Use the modified transcript for translation if allowed and provided
61
+ transcript = modified_transcript
62
+ # Perform translation here if necessary (similar to the previous step)
63
 
 
64
  output_video = add_hard_subtitle_to_video(video, transcript)
65
  return output_video
66
 
 
68
  app = gr.Interface(
69
  fn=process_video,
70
  inputs=[
71
+ gr.Video(label="Upload Video", type="filepath"),
72
  gr.Dropdown(choices=language_options, label="Source Language"),
73
  gr.Dropdown(choices=language_options, label="Target Language"),
74
  gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size"),
75
+ gr.Checkbox(label="Change Transcript before Translation?", value=False),
76
  gr.TextArea(label="Modified Transcript (if allowed)")
77
  ],
78
+ outputs=gr.Text(label="Transcript"),
79
  title="Video Transcription and Translation Tool",
80
  description="Transcribe or translate your video content. Optionally, edit the transcription before adding hard subtitles."
81
  )
82
 
83
  if __name__ == "__main__":
84
  app.launch()
85
+