Spaces:

ChiBenevisamPas
/

Video-Subtitle-Translate

Runtime error

App Files Files Community

ChiBenevisamPas commited on Oct 11, 2024

Commit

cc29966

verified ·

1 Parent(s): 7052865

Create app.py

Browse files

Files changed (1) hide show

app.py +197 -0

app.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import gradio as gr
+import moviepy.editor as mp
+import librosa
+from transformers import pipeline
+from concurrent.futures import ThreadPoolExecutor
+import tempfile
+import docx  # To create Word documents
+from moviepy.video.tools.subtitles import SubtitlesClip
+from moviepy.editor import TextClip
+# Load Whisper model for speech-to-text (using smaller 'tiny' model for faster performance)
+asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
+# MarianMT or M2M100 for translation (multi-language)
+translator = pipeline("translation", model="facebook/m2m100_418M")
+# Store generated subtitles and translations
+subtitle_storage = {}
+# Supported languages with their codes
+languages = {
+    "Persian": "fa",
+    "French": "fr",
+    "Spanish": "es",
+    "German": "de",
+    "Chinese": "zh",
+    "Arabic": "ar",
+    "Hindi": "hi",
+    "Russian": "ru"
+}
+def transcribe_audio(chunk):
+    """Transcribe a single audio chunk."""
+    return asr(chunk)["text"]
+def add_subtitle(video):
+    try:
+        # The video is passed as a file path string, so we use it directly
+        video_path = video if isinstance(video, str) else None
+        if not video_path:
+            return "No video provided!"
+        video = mp.VideoFileClip(video_path)
+        audio = video.audio
+        # Use a temporary file for audio extraction
+        with tempfile.NamedTemporaryFile(delete=True, suffix='.wav') as tmp_audio_file:
+            audio.write_audiofile(tmp_audio_file.name, codec='pcm_s16le')
+            waveform, sr = librosa.load(tmp_audio_file.name, sr=16000)
+            # Transcribe in chunks (parallel)
+            chunk_duration = 15  # seconds
+            chunk_size = sr * chunk_duration
+            chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
+            with ThreadPoolExecutor() as executor:
+                transcriptions = list(executor.map(transcribe_audio, chunks))
+            full_transcription = " ".join(transcriptions)
+            subtitle_storage["original"] = full_transcription  # Store the original subtitle
+            subtitle_storage["video_path"] = video_path  # Store the video path
+        return f"Subtitle added: {full_transcription[:100]}..."  # Display first 100 characters
+    except Exception as e:
+        return f"Error in adding subtitle: {e}"
+def translate_subtitle(video, language):
+    try:
+        # Translate the stored subtitle
+        original_subtitle = subtitle_storage.get("original")
+        if not original_subtitle:
+            return "No subtitle to translate!"
+        # Translate using the selected language
+        translated_subtitle = translator(
+            original_subtitle,
+            src_lang="en",  # Source language (assuming the subtitle is in English)
+            tgt_lang=languages[language]  # Get the language code from the dropdown selection
+        )[0]["translation_text"]
+        subtitle_storage["translated"] = translated_subtitle  # Store the translated subtitle
+        return f"Subtitle translated to {language} successfully!"
+    except Exception as e:
+        return f"Error in translating subtitle: {e}"
+def download_word():
+    try:
+        # Save translated subtitles to a Word document
+        translated_subtitle = subtitle_storage.get("translated")
+        if not translated_subtitle:
+            return "No translated subtitle to save!"
+        # Prepare the document
+        doc = docx.Document()
+        doc.add_heading('Translated Subtitles', 0)
+        # Create timestamps and subtitles
+        for i in range(0, len(translated_subtitle), 50):
+            start_time = (i // 50) * 5  # Each subtitle lasts for 5 seconds
+            subtitle_text = translated_subtitle[i:i + 50]  # Get the next 50 characters
+            # Add a formatted string with timestamp and subtitle to the document
+            doc.add_paragraph(f"{start_time}s - {subtitle_text}")
+        file_path = "translated_subtitles.docx"
+        doc.save(file_path)
+        # Return the file for download
+        return file_path  # Return the file path to allow Gradio to serve it as a downloadable file
+    except Exception as e:
+        return f"Error in saving subtitles as Word: {e}"
+def download_video():
+    try:
+        # Add subtitles to the video
+        original_subtitle = subtitle_storage.get("original")
+        translated_subtitle = subtitle_storage.get("translated")
+        if not original_subtitle or not translated_subtitle:
+            return "No subtitles to overlay on video!"
+        video_path = subtitle_storage.get("video_path")
+        video = mp.VideoFileClip(video_path)
+        # Function to generate subtitle text
+        generator = lambda txt: TextClip(txt, font='Arial', fontsize=24, color='white')
+        # Generate subtitles (assuming each subtitle appears for 5 seconds)
+        subs = []
+        subtitle_length = 5  # seconds each subtitle will be displayed
+        for i in range(0, len(translated_subtitle), 50):
+            start_time = (i // 50) * subtitle_length
+            subtitle_text = translated_subtitle[i:i + 50]  # Get the next 50 characters
+            subs.append((start_time, subtitle_text))  # Create a tuple for start time and text
+        # Create subtitle clips
+        subtitles = SubtitlesClip(subs, generator)
+        # Overlay subtitles on video
+        subtitled_video = mp.CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))])
+        output_video_path = "subtitled_video.mp4"
+        subtitled_video.write_videofile(output_video_path)
+        return f"Subtitled video is ready for download: {output_video_path}"
+    except Exception as e:
+        return f"Error in generating subtitled video: {e}"
+# Gradio UI Interface
+with gr.Blocks() as demo:
+    # Title
+    gr.Markdown("<h1 style='text-align: center;'>Video Subtitle Translator</h1>")
+    # Video Upload
+    with gr.Row():
+        video_input = gr.Video(label="Upload Video")
+        upload_button = gr.Button("Upload Video")
+        upload_status = gr.Textbox(label="Upload Status")
+    upload_button.click(add_subtitle, inputs=video_input, outputs=upload_status)
+    # Add Subtitle
+    with gr.Row():
+        add_subtitle_button = gr.Button("Add Subtitle")
+        subtitle_status = gr.Textbox(label="Subtitle Status")
+    add_subtitle_button.click(add_subtitle, inputs=video_input, outputs=subtitle_status)
+    # Translate Subtitle
+    with gr.Row():
+        language_dropdown = gr.Dropdown(choices=list(languages.keys()), label="Choose Target Language", value="Persian")
+        translate_button = gr.Button("Translate Subtitle")
+        translate_status = gr.Textbox(label="Translation Status")
+    translate_button.click(translate_subtitle, inputs=[video_input, language_dropdown], outputs=translate_status)
+    # Download as Word
+    with gr.Row():
+        download_button = gr.Button("Download as Word")
+        download_status = gr.File(label="Download Translated Word File")  # File output for Word download
+    download_button.click(download_word, inputs=None, outputs=download_status)
+    # Download Subtitled Video
+    with gr.Row():
+        download_video_button = gr.Button("Download Subtitled Video")
+        download_video_status = gr.Textbox(label="Download Video Status")
+    download_video_button.click(download_video, inputs=None, outputs=download_video_status)
+# Launch the Gradio app111110000000000000000
+demo.launch()