younes21000 commited on
Commit
be55105
·
verified ·
1 Parent(s): 58f4eed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -61
app.py CHANGED
@@ -4,13 +4,20 @@ import librosa
4
  from transformers import pipeline
5
  from concurrent.futures import ThreadPoolExecutor
6
  import tempfile
 
 
 
 
7
 
8
- # Load Whisper model for speech-to-text
9
- asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
10
 
11
  # MarianMT or M2M100 for translation (multi-language)
12
  translator = pipeline("translation", model="facebook/m2m100_418M")
13
 
 
 
 
14
  # Supported languages with their codes
15
  languages = {
16
  "Persian (fa)": "fa",
@@ -27,84 +34,148 @@ def transcribe_audio(chunk):
27
  """Transcribe a single audio chunk."""
28
  return asr(chunk)["text"]
29
 
30
- def generate_subtitles(video_file, language_name):
31
  try:
32
- # Extract the target language code from the selected language name
33
- target_language = languages[language_name]
34
-
35
- # Check if video_file is a file object or a file path string
36
- if isinstance(video_file, str):
37
- video_path = video_file # It's a file path
38
- else:
39
- video_path = video_file.name # It's a file object
40
 
41
- print(f"Processing video from path: {video_path}")
42
-
43
- # Load the video and extract audio directly
44
  video = mp.VideoFileClip(video_path)
45
  audio = video.audio
46
 
47
- # Use a temporary file to hold the audio data
48
  with tempfile.NamedTemporaryFile(delete=True, suffix='.wav') as tmp_audio_file:
49
- audio.write_audiofile(tmp_audio_file.name, codec='pcm_s16le') # Specify codec as pcm_s16le
50
-
51
- print("Starting speech-to-text transcription")
52
-
53
- # Load the audio file as a waveform using librosa
54
- waveform, sr = librosa.load(tmp_audio_file.name, sr=16000) # sr=16000 for Whisper
55
 
56
- # Process audio in chunks
57
  chunk_duration = 15 # seconds
58
- chunk_size = sr * chunk_duration # number of samples per chunk
59
  chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
60
 
61
- # Use ThreadPoolExecutor for parallel processing
62
  with ThreadPoolExecutor() as executor:
63
  transcriptions = list(executor.map(transcribe_audio, chunks))
64
 
65
- # Combine all transcriptions into a single string
66
- full_transcription = " ".join(transcriptions)
 
 
67
 
68
- print("Starting translation")
 
69
 
70
- # Translate transcription to the target language using M2M100
71
- translated_subtitles = translator(
72
- full_transcription,
73
- src_lang="en", # Source language is English
74
- tgt_lang=target_language # Target language from user selection
 
 
 
 
 
 
75
  )[0]["translation_text"]
76
 
77
- # Return subtitles
78
- subtitles = f"Original: {full_transcription}\nTranslated: {translated_subtitles}"
79
- return subtitles
80
 
81
  except Exception as e:
82
- # Catch and log the error
83
- print(f"Error occurred: {e}")
84
- return f"Error occurred: {e}"
85
 
86
- # Define Gradio interface
87
- def subtitle_video(video_file, language_name):
88
  try:
89
- # Handle both file-like objects and file paths
90
- return generate_subtitles(video_file, language_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  except Exception as e:
92
- print(f"Error in processing video: {e}")
93
- return f"Error in processing video: {e}"
94
-
95
- # Gradio app layout
96
- interface = gr.Interface(
97
- fn=subtitle_video,
98
- inputs=[
99
- gr.Video(label="Upload Video"),
100
- gr.Dropdown( # Dropdown for language selection
101
- label="Choose Target Language",
102
- choices=list(languages.keys()), # Display language names in the dropdown
103
- value="Persian (fa)" # Default language
104
- )
105
- ],
106
- outputs="text",
107
- title="Automatic Video Subtitler & Translator"
108
- )
109
-
110
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from transformers import pipeline
5
  from concurrent.futures import ThreadPoolExecutor
6
  import tempfile
7
+ import docx # To create Word documents
8
+ from moviepy.video.tools.subtitles import SubtitlesClip
9
+ from moviepy.editor import TextClip
10
+ import os
11
 
12
+ # Load Whisper model for speech-to-text (using smaller 'tiny' model for faster performance)
13
+ asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") # Use 'whisper-tiny' for faster transcription
14
 
15
  # MarianMT or M2M100 for translation (multi-language)
16
  translator = pipeline("translation", model="facebook/m2m100_418M")
17
 
18
+ # Store generated subtitles and translations
19
+ subtitle_storage = {}
20
+
21
  # Supported languages with their codes
22
  languages = {
23
  "Persian (fa)": "fa",
 
34
  """Transcribe a single audio chunk."""
35
  return asr(chunk)["text"]
36
 
37
+ def add_subtitle(video):
38
  try:
39
+ # Load video and extract audio for processing
40
+ video_path = video.name if video else None
41
+ if not video_path:
42
+ return "No video provided!"
 
 
 
 
43
 
 
 
 
44
  video = mp.VideoFileClip(video_path)
45
  audio = video.audio
46
 
47
+ # Use a temporary file for audio extraction
48
  with tempfile.NamedTemporaryFile(delete=True, suffix='.wav') as tmp_audio_file:
49
+ audio.write_audiofile(tmp_audio_file.name, codec='pcm_s16le')
50
+ waveform, sr = librosa.load(tmp_audio_file.name, sr=16000)
 
 
 
 
51
 
52
+ # Transcribe in chunks (parallel)
53
  chunk_duration = 15 # seconds
54
+ chunk_size = sr * chunk_duration
55
  chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
56
 
 
57
  with ThreadPoolExecutor() as executor:
58
  transcriptions = list(executor.map(transcribe_audio, chunks))
59
 
60
+ full_transcription = " ".join(transcriptions)
61
+ subtitle_storage["original"] = full_transcription # Store the original subtitle
62
+
63
+ return f"Subtitle added: {full_transcription[:100]}..." # Display first 100 characters
64
 
65
+ except Exception as e:
66
+ return f"Error in adding subtitle: {e}"
67
 
68
+ def translate_subtitle(video):
69
+ try:
70
+ # Translate the stored subtitle
71
+ original_subtitle = subtitle_storage.get("original")
72
+ if not original_subtitle:
73
+ return "No subtitle to translate!"
74
+
75
+ translated_subtitle = translator(
76
+ original_subtitle,
77
+ src_lang="en", # Source language (assuming the subtitle is in English)
78
+ tgt_lang=languages["Persian (fa)"] # Set to the target language, here Persian
79
  )[0]["translation_text"]
80
 
81
+ subtitle_storage["translated"] = translated_subtitle # Store the translated subtitle
82
+
83
+ return "Subtitle translated successfully!"
84
 
85
  except Exception as e:
86
+ return f"Error in translating subtitle: {e}"
 
 
87
 
88
+ def download_word():
 
89
  try:
90
+ # Save translated subtitles to a Word document
91
+ translated_subtitle = subtitle_storage.get("translated")
92
+ if not translated_subtitle:
93
+ return "No translated subtitle to save!"
94
+
95
+ doc = docx.Document()
96
+ doc.add_heading('Translated Subtitles', 0)
97
+ doc.add_paragraph(translated_subtitle)
98
+
99
+ file_path = "translated_subtitles.docx"
100
+ doc.save(file_path)
101
+
102
+ return f"Translated subtitles saved as Word document: {file_path}"
103
+
104
+ except Exception as e:
105
+ return f"Error in saving subtitles as Word: {e}"
106
+
107
+ def download_video():
108
+ try:
109
+ # Add subtitles to the video
110
+ original_subtitle = subtitle_storage.get("original")
111
+ translated_subtitle = subtitle_storage.get("translated")
112
+
113
+ if not original_subtitle or not translated_subtitle:
114
+ return "No subtitles to overlay on video!"
115
+
116
+ video_path = subtitle_storage.get("video_path")
117
+ video = mp.VideoFileClip(video_path)
118
+
119
+ # Function to generate subtitle text
120
+ generator = lambda txt: TextClip(txt, font='Arial', fontsize=24, color='white')
121
+
122
+ # Simulated subtitle time intervals for simplicity
123
+ subs = [(i * 5, i * 5 + 5, translated_subtitle[i:i+50]) for i in range(0, len(translated_subtitle), 50)]
124
+
125
+ # Create subtitle clips
126
+ subtitles = SubtitlesClip(subs, generator)
127
+
128
+ # Overlay subtitles on video
129
+ subtitled_video = mp.CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))])
130
+
131
+ output_video_path = "subtitled_video.mp4"
132
+ subtitled_video.write_videofile(output_video_path)
133
+
134
+ return f"Subtitled video is ready for download: {output_video_path}"
135
+
136
  except Exception as e:
137
+ return f"Error in generating subtitled video: {e}"
138
+
139
+ # Gradio UI Interface
140
+ with gr.Blocks() as demo:
141
+ # Title
142
+ gr.Markdown("<h1 style='text-align: center;'>Video Subtitle Translator</h1>")
143
+
144
+ # Video Upload
145
+ with gr.Row():
146
+ video_input = gr.Video(label="Upload Video")
147
+ upload_button = gr.Button("Upload Video")
148
+ upload_status = gr.Textbox(label="Upload Status")
149
+
150
+ upload_button.click(add_subtitle, inputs=video_input, outputs=upload_status)
151
+
152
+ # Add Subtitle
153
+ with gr.Row():
154
+ add_subtitle_button = gr.Button("Add Subtitle")
155
+ subtitle_status = gr.Textbox(label="Subtitle Status")
156
+
157
+ add_subtitle_button.click(add_subtitle, inputs=video_input, outputs=subtitle_status)
158
+
159
+ # Translate Subtitle
160
+ with gr.Row():
161
+ translate_button = gr.Button("Translate Subtitle")
162
+ translate_status = gr.Textbox(label="Translation Status")
163
+
164
+ translate_button.click(translate_subtitle, inputs=video_input, outputs=translate_status)
165
+
166
+ # Download as Word
167
+ with gr.Row():
168
+ download_button = gr.Button("Download as Word")
169
+ download_status = gr.Textbox(label="Download Status")
170
+
171
+ download_button.click(download_word, inputs=None, outputs=download_status)
172
+
173
+ # Download Subtitled Video
174
+ with gr.Row():
175
+ download_video_button = gr.Button("Download Subtitled Video")
176
+ download_video_status = gr.Textbox(label="Download Video Status")
177
+
178
+ download_video_button.click(download_video, inputs=None, outputs=download_video_status)
179
+
180
+ # Launch the Gradio app
181
+ demo.launch()