import streamlit as st from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip import whisper from translate import Translator from gtts import gTTS import tempfile import os import numpy as np from datetime import timedelta import json from indic_transliteration import sanscript from indic_transliteration.sanscript import transliterate import azure.cognitiveservices.speech as speechsdk import ffmpeg # Tamil-specific voice configurations TAMIL_VOICES = { 'Female 1': {'name': 'ta-IN-PallaviNeural', 'style': 'normal'}, 'Female 2': {'name': 'ta-IN-PallaviNeural', 'style': 'formal'}, 'Male 1': {'name': 'ta-IN-ValluvarNeural', 'style': 'normal'}, 'Male 2': {'name': 'ta-IN-ValluvarNeural', 'style': 'formal'} } class TamilTextProcessor: @staticmethod def normalize_tamil_text(text): """Normalize Tamil text for better pronunciation""" # Convert Tamil numerals to English numerals tamil_numerals = {'௦': '0', '௧': '1', '௨': '2', '௩': '3', '௪': '4', '௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'} for tamil_num, eng_num in tamil_numerals.items(): text = text.replace(tamil_num, eng_num) return text @staticmethod def process_for_tts(text): """Process Tamil text for TTS""" # Remove any unsupported characters text = ''.join(char for char in text if ord(char) < 65535) # Normalize whitespace text = ' '.join(text.split()) return text class TamilDubber: def __init__(self): try: self.whisper_model = whisper.load_model("base") except Exception as e: st.error(f"Error loading Whisper model: {e}") raise self.temp_files = [] def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.cleanup() def cleanup(self): for temp_file in self.temp_files: if os.path.exists(temp_file): try: os.remove(temp_file) except Exception: pass def create_temp_file(self, suffix): temp_file = tempfile.mktemp(suffix=suffix) self.temp_files.append(temp_file) return temp_file def extract_audio(self, video_path): """Extract audio and transcribe using Whisper""" try: video = VideoFileClip(video_path) audio_path = self.create_temp_file(".wav") video.audio.write_audiofile(audio_path) # Transcribe using Whisper result = self.whisper_model.transcribe(audio_path) return result["segments"], video.duration except Exception as e: st.error(f"Error in audio extraction: {e}") raise def translate_segments(self, segments): """Translate segments to Tamil""" translator = Translator(to_lang='ta') translated_segments = [] for segment in segments: try: translated_text = translator.translate(segment["text"]) translated_text = TamilTextProcessor.normalize_tamil_text(translated_text) translated_text = TamilTextProcessor.process_for_tts(translated_text) translated_segments.append({ "text": translated_text, "start": segment["start"], "end": segment["end"], "duration": segment["end"] - segment["start"] }) except Exception as e: st.warning(f"Translation warning for segment: {str(e)}") # Keep original text if translation fails translated_segments.append({ "text": segment["text"], "start": segment["start"], "end": segment["end"], "duration": segment["end"] - segment["start"] }) return translated_segments def generate_audio(self, text, voice_style="normal"): """Generate Tamil audio using gTTS""" try: temp_path = self.create_temp_file(".mp3") tts = gTTS(text=text, lang='ta', slow=False) tts.save(temp_path) return temp_path except Exception as e: st.error(f"Error in audio generation: {e}") raise def create_subtitles(self, segments, output_path): """Generate SRT subtitles""" try: with open(output_path, 'w', encoding='utf-8') as f: for idx, segment in enumerate(segments, 1): start_time = str(timedelta(seconds=int(segment["start"]))) end_time = str(timedelta(seconds=int(segment["end"]))) f.write(f"{idx}\n") f.write(f"{start_time} --> {end_time}\n") f.write(f"{segment['text']}\n\n") except Exception as e: st.error(f"Error creating subtitles: {e}") raise def main(): st.title("Tamil Movie Dubbing System") st.sidebar.header("டப்பிங் அமைப்புகள்") # Dubbing Settings in Tamil # File uploader video_file = st.file_uploader("Upload Video File", type=['mp4', 'mov', 'avi']) if not video_file: return # Settings voice_type = st.selectbox("Select Voice", list(TAMIL_VOICES.keys())) with st.expander("Advanced Settings"): generate_subtitles = st.checkbox("Generate Tamil Subtitles", value=True) subtitle_size = st.slider("Subtitle Size", 16, 32, 24) subtitle_color = st.color_picker("Subtitle Color", "#FFFFFF") if st.button("Start Tamil Dubbing"): try: with st.spinner("Processing video..."): with TamilDubber() as dubber: # Save uploaded video temp_video_path = dubber.create_temp_file(".mp4") with open(temp_video_path, "wb") as f: f.write(video_file.read()) # Progress tracking progress_bar = st.progress(0) status_text = st.empty() # Extract audio and transcribe status_text.text("Extracting audio and transcribing...") segments, video_duration = dubber.extract_audio(temp_video_path) progress_bar.progress(0.25) # Translate segments status_text.text("Translating to Tamil...") translated_segments = dubber.translate_segments(segments) progress_bar.progress(0.50) # Generate Tamil audio status_text.text("Generating Tamil audio...") output_segments = [] video = VideoFileClip(temp_video_path) final_audio_path = dubber.create_temp_file(".mp3") for idx, segment in enumerate(translated_segments): audio_path = dubber.generate_audio(segment["text"]) output_segments.append({ "audio": audio_path, "start": segment["start"], "end": segment["end"] }) progress_bar.progress(0.50 + (0.25 * (idx + 1) / len(translated_segments))) # Generate subtitles if requested if generate_subtitles: subtitle_path = dubber.create_temp_file(".srt") dubber.create_subtitles(translated_segments, subtitle_path) # Create final video status_text.text("Creating final video...") output_path = dubber.create_temp_file(".mp4") # Add subtitles if enabled if generate_subtitles: def create_subtitle_clip(txt): return TextClip( txt=txt, fontsize=subtitle_size, color=subtitle_color, stroke_color='black', stroke_width=1 ) subtitle_clips = [] for segment in translated_segments: clip = create_subtitle_clip(segment["text"]) clip = clip.set_position(('center', 'bottom')) clip = clip.set_start(segment["start"]) clip = clip.set_duration(segment["duration"]) subtitle_clips.append(clip) final_video = CompositeVideoClip([video] + subtitle_clips) else: final_video = video # Write final video final_video.write_videofile( output_path, codec='libx264', audio_codec='aac', fps=video.fps ) progress_bar.progress(1.0) # Display result st.success("டப்பிங் வெற்றிகரமாக முடிந்தது!") # Dubbing completed successfully in Tamil st.video(output_path) # Download button with open(output_path, "rb") as f: st.download_button( "Download Dubbed Video", f, file_name="tamil_dubbed_video.mp4", mime="video/mp4" ) except Exception as e: st.error(f"An error occurred: {str(e)}") if __name__ == "__main__": main()