ChiBenevisamPas commited on
Commit
cc29966
·
verified ·
1 Parent(s): 7052865

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +197 -0
app.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import moviepy.editor as mp
3
+ import librosa
4
+ from transformers import pipeline
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ import tempfile
7
+ import docx # To create Word documents
8
+ from moviepy.video.tools.subtitles import SubtitlesClip
9
+ from moviepy.editor import TextClip
10
+
11
+ # Load Whisper model for speech-to-text (using smaller 'tiny' model for faster performance)
12
+ asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
13
+
14
+ # MarianMT or M2M100 for translation (multi-language)
15
+ translator = pipeline("translation", model="facebook/m2m100_418M")
16
+
17
+ # Store generated subtitles and translations
18
+ subtitle_storage = {}
19
+
20
+ # Supported languages with their codes
21
+ languages = {
22
+ "Persian": "fa",
23
+ "French": "fr",
24
+ "Spanish": "es",
25
+ "German": "de",
26
+ "Chinese": "zh",
27
+ "Arabic": "ar",
28
+ "Hindi": "hi",
29
+ "Russian": "ru"
30
+ }
31
+
32
+ def transcribe_audio(chunk):
33
+ """Transcribe a single audio chunk."""
34
+ return asr(chunk)["text"]
35
+
36
+ def add_subtitle(video):
37
+ try:
38
+ # The video is passed as a file path string, so we use it directly
39
+ video_path = video if isinstance(video, str) else None
40
+ if not video_path:
41
+ return "No video provided!"
42
+
43
+ video = mp.VideoFileClip(video_path)
44
+ audio = video.audio
45
+
46
+ # Use a temporary file for audio extraction
47
+ with tempfile.NamedTemporaryFile(delete=True, suffix='.wav') as tmp_audio_file:
48
+ audio.write_audiofile(tmp_audio_file.name, codec='pcm_s16le')
49
+ waveform, sr = librosa.load(tmp_audio_file.name, sr=16000)
50
+
51
+ # Transcribe in chunks (parallel)
52
+ chunk_duration = 15 # seconds
53
+ chunk_size = sr * chunk_duration
54
+ chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
55
+
56
+ with ThreadPoolExecutor() as executor:
57
+ transcriptions = list(executor.map(transcribe_audio, chunks))
58
+
59
+ full_transcription = " ".join(transcriptions)
60
+ subtitle_storage["original"] = full_transcription # Store the original subtitle
61
+ subtitle_storage["video_path"] = video_path # Store the video path
62
+
63
+ return f"Subtitle added: {full_transcription[:100]}..." # Display first 100 characters
64
+
65
+ except Exception as e:
66
+ return f"Error in adding subtitle: {e}"
67
+
68
+ def translate_subtitle(video, language):
69
+ try:
70
+ # Translate the stored subtitle
71
+ original_subtitle = subtitle_storage.get("original")
72
+ if not original_subtitle:
73
+ return "No subtitle to translate!"
74
+
75
+ # Translate using the selected language
76
+ translated_subtitle = translator(
77
+ original_subtitle,
78
+ src_lang="en", # Source language (assuming the subtitle is in English)
79
+ tgt_lang=languages[language] # Get the language code from the dropdown selection
80
+ )[0]["translation_text"]
81
+
82
+ subtitle_storage["translated"] = translated_subtitle # Store the translated subtitle
83
+
84
+ return f"Subtitle translated to {language} successfully!"
85
+
86
+ except Exception as e:
87
+ return f"Error in translating subtitle: {e}"
88
+
89
+ def download_word():
90
+ try:
91
+ # Save translated subtitles to a Word document
92
+ translated_subtitle = subtitle_storage.get("translated")
93
+ if not translated_subtitle:
94
+ return "No translated subtitle to save!"
95
+
96
+ # Prepare the document
97
+ doc = docx.Document()
98
+ doc.add_heading('Translated Subtitles', 0)
99
+
100
+ # Create timestamps and subtitles
101
+ for i in range(0, len(translated_subtitle), 50):
102
+ start_time = (i // 50) * 5 # Each subtitle lasts for 5 seconds
103
+ subtitle_text = translated_subtitle[i:i + 50] # Get the next 50 characters
104
+
105
+ # Add a formatted string with timestamp and subtitle to the document
106
+ doc.add_paragraph(f"{start_time}s - {subtitle_text}")
107
+
108
+ file_path = "translated_subtitles.docx"
109
+ doc.save(file_path)
110
+
111
+ # Return the file for download
112
+ return file_path # Return the file path to allow Gradio to serve it as a downloadable file
113
+
114
+ except Exception as e:
115
+ return f"Error in saving subtitles as Word: {e}"
116
+
117
+ def download_video():
118
+ try:
119
+ # Add subtitles to the video
120
+ original_subtitle = subtitle_storage.get("original")
121
+ translated_subtitle = subtitle_storage.get("translated")
122
+
123
+ if not original_subtitle or not translated_subtitle:
124
+ return "No subtitles to overlay on video!"
125
+
126
+ video_path = subtitle_storage.get("video_path")
127
+ video = mp.VideoFileClip(video_path)
128
+
129
+ # Function to generate subtitle text
130
+ generator = lambda txt: TextClip(txt, font='Arial', fontsize=24, color='white')
131
+
132
+ # Generate subtitles (assuming each subtitle appears for 5 seconds)
133
+ subs = []
134
+ subtitle_length = 5 # seconds each subtitle will be displayed
135
+ for i in range(0, len(translated_subtitle), 50):
136
+ start_time = (i // 50) * subtitle_length
137
+ subtitle_text = translated_subtitle[i:i + 50] # Get the next 50 characters
138
+ subs.append((start_time, subtitle_text)) # Create a tuple for start time and text
139
+
140
+ # Create subtitle clips
141
+ subtitles = SubtitlesClip(subs, generator)
142
+
143
+ # Overlay subtitles on video
144
+ subtitled_video = mp.CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))])
145
+
146
+ output_video_path = "subtitled_video.mp4"
147
+ subtitled_video.write_videofile(output_video_path)
148
+
149
+ return f"Subtitled video is ready for download: {output_video_path}"
150
+
151
+ except Exception as e:
152
+ return f"Error in generating subtitled video: {e}"
153
+
154
+ # Gradio UI Interface
155
+ with gr.Blocks() as demo:
156
+ # Title
157
+ gr.Markdown("<h1 style='text-align: center;'>Video Subtitle Translator</h1>")
158
+
159
+ # Video Upload
160
+ with gr.Row():
161
+ video_input = gr.Video(label="Upload Video")
162
+ upload_button = gr.Button("Upload Video")
163
+ upload_status = gr.Textbox(label="Upload Status")
164
+
165
+ upload_button.click(add_subtitle, inputs=video_input, outputs=upload_status)
166
+
167
+ # Add Subtitle
168
+ with gr.Row():
169
+ add_subtitle_button = gr.Button("Add Subtitle")
170
+ subtitle_status = gr.Textbox(label="Subtitle Status")
171
+
172
+ add_subtitle_button.click(add_subtitle, inputs=video_input, outputs=subtitle_status)
173
+
174
+ # Translate Subtitle
175
+ with gr.Row():
176
+ language_dropdown = gr.Dropdown(choices=list(languages.keys()), label="Choose Target Language", value="Persian")
177
+ translate_button = gr.Button("Translate Subtitle")
178
+ translate_status = gr.Textbox(label="Translation Status")
179
+
180
+ translate_button.click(translate_subtitle, inputs=[video_input, language_dropdown], outputs=translate_status)
181
+
182
+ # Download as Word
183
+ with gr.Row():
184
+ download_button = gr.Button("Download as Word")
185
+ download_status = gr.File(label="Download Translated Word File") # File output for Word download
186
+
187
+ download_button.click(download_word, inputs=None, outputs=download_status)
188
+
189
+ # Download Subtitled Video
190
+ with gr.Row():
191
+ download_video_button = gr.Button("Download Subtitled Video")
192
+ download_video_status = gr.Textbox(label="Download Video Status")
193
+
194
+ download_video_button.click(download_video, inputs=None, outputs=download_video_status)
195
+
196
+ # Launch the Gradio app111110000000000000000
197
+ demo.launch()