Spaces:

younes21000
/

DAI_Project

Sleeping

App Files Files Community

younes21000 commited on Oct 11, 2024

Commit

be55105

verified ·

1 Parent(s): 58f4eed

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -61

app.py CHANGED Viewed

@@ -4,13 +4,20 @@ import librosa
 from transformers import pipeline
 from concurrent.futures import ThreadPoolExecutor
 import tempfile
-# Load Whisper model for speech-to-text
-asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
 # MarianMT or M2M100 for translation (multi-language)
 translator = pipeline("translation", model="facebook/m2m100_418M")
 # Supported languages with their codes
 languages = {
     "Persian (fa)": "fa",
@@ -27,84 +34,148 @@ def transcribe_audio(chunk):
     """Transcribe a single audio chunk."""
     return asr(chunk)["text"]
-def generate_subtitles(video_file, language_name):
     try:
-        # Extract the target language code from the selected language name
-        target_language = languages[language_name]
-        # Check if video_file is a file object or a file path string
-        if isinstance(video_file, str):
-            video_path = video_file  # It's a file path
-        else:
-            video_path = video_file.name  # It's a file object
-        print(f"Processing video from path: {video_path}")
-        # Load the video and extract audio directly
         video = mp.VideoFileClip(video_path)
         audio = video.audio
-        # Use a temporary file to hold the audio data
         with tempfile.NamedTemporaryFile(delete=True, suffix='.wav') as tmp_audio_file:
-            audio.write_audiofile(tmp_audio_file.name, codec='pcm_s16le')  # Specify codec as pcm_s16le
-            print("Starting speech-to-text transcription")
-            # Load the audio file as a waveform using librosa
-            waveform, sr = librosa.load(tmp_audio_file.name, sr=16000)  # sr=16000 for Whisper
-            # Process audio in chunks
             chunk_duration = 15  # seconds
-            chunk_size = sr * chunk_duration  # number of samples per chunk
             chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
-            # Use ThreadPoolExecutor for parallel processing
             with ThreadPoolExecutor() as executor:
                 transcriptions = list(executor.map(transcribe_audio, chunks))
-        # Combine all transcriptions into a single string
-        full_transcription = " ".join(transcriptions)
-        print("Starting translation")
-        # Translate transcription to the target language using M2M100
-        translated_subtitles = translator(
-            full_transcription,
-            src_lang="en",  # Source language is English
-            tgt_lang=target_language  # Target language from user selection
         )[0]["translation_text"]
-        # Return subtitles
-        subtitles = f"Original: {full_transcription}\nTranslated: {translated_subtitles}"
-        return subtitles
     except Exception as e:
-        # Catch and log the error
-        print(f"Error occurred: {e}")
-        return f"Error occurred: {e}"
-# Define Gradio interface
-def subtitle_video(video_file, language_name):
     try:
-        # Handle both file-like objects and file paths
-        return generate_subtitles(video_file, language_name)
     except Exception as e:
-        print(f"Error in processing video: {e}")
-        return f"Error in processing video: {e}"
-# Gradio app layout
-interface = gr.Interface(
-    fn=subtitle_video,
-    inputs=[
-        gr.Video(label="Upload Video"),
-        gr.Dropdown(  # Dropdown for language selection
-            label="Choose Target Language",
-            choices=list(languages.keys()),  # Display language names in the dropdown
-            value="Persian (fa)"  # Default language
-        )
-    ],
-    outputs="text",
-    title="Automatic Video Subtitler & Translator"
-)
-interface.launch()

 from transformers import pipeline
 from concurrent.futures import ThreadPoolExecutor
 import tempfile
+import docx  # To create Word documents
+from moviepy.video.tools.subtitles import SubtitlesClip
+from moviepy.editor import TextClip
+import os
+# Load Whisper model for speech-to-text (using smaller 'tiny' model for faster performance)
+asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")  # Use 'whisper-tiny' for faster transcription
 # MarianMT or M2M100 for translation (multi-language)
 translator = pipeline("translation", model="facebook/m2m100_418M")
+# Store generated subtitles and translations
+subtitle_storage = {}
 # Supported languages with their codes
 languages = {
     "Persian (fa)": "fa",
     """Transcribe a single audio chunk."""
     return asr(chunk)["text"]
+def add_subtitle(video):
     try:
+        # Load video and extract audio for processing
+        video_path = video.name if video else None
+        if not video_path:
+            return "No video provided!"
         video = mp.VideoFileClip(video_path)
         audio = video.audio
+        # Use a temporary file for audio extraction
         with tempfile.NamedTemporaryFile(delete=True, suffix='.wav') as tmp_audio_file:
+            audio.write_audiofile(tmp_audio_file.name, codec='pcm_s16le')
+            waveform, sr = librosa.load(tmp_audio_file.name, sr=16000)
+            # Transcribe in chunks (parallel)
             chunk_duration = 15  # seconds
+            chunk_size = sr * chunk_duration
             chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
             with ThreadPoolExecutor() as executor:
                 transcriptions = list(executor.map(transcribe_audio, chunks))
+            full_transcription = " ".join(transcriptions)
+            subtitle_storage["original"] = full_transcription  # Store the original subtitle
+        return f"Subtitle added: {full_transcription[:100]}..."  # Display first 100 characters
+    except Exception as e:
+        return f"Error in adding subtitle: {e}"
+def translate_subtitle(video):
+    try:
+        # Translate the stored subtitle
+        original_subtitle = subtitle_storage.get("original")
+        if not original_subtitle:
+            return "No subtitle to translate!"
+        translated_subtitle = translator(
+            original_subtitle,
+            src_lang="en",  # Source language (assuming the subtitle is in English)
+            tgt_lang=languages["Persian (fa)"]  # Set to the target language, here Persian
         )[0]["translation_text"]
+        subtitle_storage["translated"] = translated_subtitle  # Store the translated subtitle
+        return "Subtitle translated successfully!"
     except Exception as e:
+        return f"Error in translating subtitle: {e}"
+def download_word():
     try:
+        # Save translated subtitles to a Word document
+        translated_subtitle = subtitle_storage.get("translated")
+        if not translated_subtitle:
+            return "No translated subtitle to save!"
+        doc = docx.Document()
+        doc.add_heading('Translated Subtitles', 0)
+        doc.add_paragraph(translated_subtitle)
+        file_path = "translated_subtitles.docx"
+        doc.save(file_path)
+        return f"Translated subtitles saved as Word document: {file_path}"
+    except Exception as e:
+        return f"Error in saving subtitles as Word: {e}"
+def download_video():
+    try:
+        # Add subtitles to the video
+        original_subtitle = subtitle_storage.get("original")
+        translated_subtitle = subtitle_storage.get("translated")
+        if not original_subtitle or not translated_subtitle:
+            return "No subtitles to overlay on video!"
+        video_path = subtitle_storage.get("video_path")
+        video = mp.VideoFileClip(video_path)
+        # Function to generate subtitle text
+        generator = lambda txt: TextClip(txt, font='Arial', fontsize=24, color='white')
+        # Simulated subtitle time intervals for simplicity
+        subs = [(i * 5, i * 5 + 5, translated_subtitle[i:i+50]) for i in range(0, len(translated_subtitle), 50)]
+        # Create subtitle clips
+        subtitles = SubtitlesClip(subs, generator)
+        # Overlay subtitles on video
+        subtitled_video = mp.CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))])
+        output_video_path = "subtitled_video.mp4"
+        subtitled_video.write_videofile(output_video_path)
+        return f"Subtitled video is ready for download: {output_video_path}"
     except Exception as e:
+        return f"Error in generating subtitled video: {e}"
+# Gradio UI Interface
+with gr.Blocks() as demo:
+    # Title
+    gr.Markdown("<h1 style='text-align: center;'>Video Subtitle Translator</h1>")
+    # Video Upload
+    with gr.Row():
+        video_input = gr.Video(label="Upload Video")
+        upload_button = gr.Button("Upload Video")
+        upload_status = gr.Textbox(label="Upload Status")
+    upload_button.click(add_subtitle, inputs=video_input, outputs=upload_status)
+    # Add Subtitle
+    with gr.Row():
+        add_subtitle_button = gr.Button("Add Subtitle")
+        subtitle_status = gr.Textbox(label="Subtitle Status")
+    add_subtitle_button.click(add_subtitle, inputs=video_input, outputs=subtitle_status)
+    # Translate Subtitle
+    with gr.Row():
+        translate_button = gr.Button("Translate Subtitle")
+        translate_status = gr.Textbox(label="Translation Status")
+    translate_button.click(translate_subtitle, inputs=video_input, outputs=translate_status)
+    # Download as Word
+    with gr.Row():
+        download_button = gr.Button("Download as Word")
+        download_status = gr.Textbox(label="Download Status")
+    download_button.click(download_word, inputs=None, outputs=download_status)
+    # Download Subtitled Video
+    with gr.Row():
+        download_video_button = gr.Button("Download Subtitled Video")
+        download_video_status = gr.Textbox(label="Download Video Status")
+    download_video_button.click(download_video, inputs=None, outputs=download_video_status)
+# Launch the Gradio app
+demo.launch()