|
import streamlit as st |
|
from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip, concatenate_audioclips |
|
import whisper |
|
from translate import Translator |
|
from gtts import gTTS |
|
import tempfile |
|
import os |
|
import numpy as np |
|
from datetime import timedelta |
|
import shutil |
|
from pathlib import Path |
|
|
|
|
|
st.set_page_config( |
|
page_title="Tamil Movie Dubber", |
|
page_icon="🎬", |
|
layout="wide" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.stButton>button { |
|
width: 100%; |
|
border-radius: 5px; |
|
height: 3em; |
|
background-color: #FF4B4B; |
|
color: white; |
|
} |
|
.stProgress .st-bo { |
|
background-color: #FF4B4B; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
TAMIL_VOICES = { |
|
'Female 1': {'name': 'ta-IN-PallaviNeural', 'style': 'normal'}, |
|
'Female 2': {'name': 'ta-IN-PallaviNeural', 'style': 'formal'}, |
|
'Male 1': {'name': 'ta-IN-ValluvarNeural', 'style': 'normal'}, |
|
'Male 2': {'name': 'ta-IN-ValluvarNeural', 'style': 'formal'} |
|
} |
|
|
|
class TamilTextProcessor: |
|
@staticmethod |
|
def normalize_tamil_text(text): |
|
"""Normalize Tamil text for better pronunciation""" |
|
tamil_numerals = {'௦': '0', '௧': '1', '௨': '2', '௩': '3', '௪': '4', |
|
'௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'} |
|
for tamil_num, eng_num in tamil_numerals.items(): |
|
text = text.replace(tamil_num, eng_num) |
|
return text |
|
|
|
@staticmethod |
|
def process_for_tts(text): |
|
"""Process Tamil text for TTS""" |
|
text = ''.join(char for char in text if ord(char) < 65535) |
|
text = ' '.join(text.split()) |
|
return text |
|
|
|
@st.cache_resource |
|
def load_whisper_model(): |
|
"""Load Whisper model with caching""" |
|
return whisper.load_model("base") |
|
|
|
class VideoProcessor: |
|
def __init__(self): |
|
self.temp_dir = Path(tempfile.mkdtemp()) |
|
self.whisper_model = load_whisper_model() |
|
|
|
def create_temp_path(self, suffix): |
|
"""Create a temporary file path""" |
|
return str(self.temp_dir / f"temp_{os.urandom(4).hex()}{suffix}") |
|
|
|
def cleanup(self): |
|
"""Clean up temporary directory""" |
|
try: |
|
shutil.rmtree(self.temp_dir) |
|
except Exception as e: |
|
st.warning(f"Cleanup warning: {e}") |
|
|
|
def transcribe_video(self, video_path): |
|
"""Transcribe video audio using Whisper""" |
|
try: |
|
with VideoFileClip(video_path) as video: |
|
|
|
audio_path = self.create_temp_path(".wav") |
|
video.audio.write_audiofile(audio_path, fps=16000, verbose=False, logger=None) |
|
|
|
|
|
result = self.whisper_model.transcribe(audio_path) |
|
return result["segments"], video.duration |
|
|
|
except Exception as e: |
|
raise Exception(f"Transcription error: {str(e)}") |
|
|
|
def translate_segments(self, segments): |
|
"""Translate segments to Tamil""" |
|
translator = Translator(to_lang='ta') |
|
translated_segments = [] |
|
|
|
for segment in segments: |
|
try: |
|
translated_text = translator.translate(segment["text"]) |
|
translated_text = TamilTextProcessor.normalize_tamil_text(translated_text) |
|
translated_text = TamilTextProcessor.process_for_tts(translated_text) |
|
|
|
translated_segments.append({ |
|
"text": translated_text, |
|
"start": segment["start"], |
|
"end": segment["end"], |
|
"duration": segment["end"] - segment["start"] |
|
}) |
|
except Exception as e: |
|
st.warning(f"Translation warning for segment: {str(e)}") |
|
|
|
translated_segments.append({ |
|
"text": segment["text"], |
|
"start": segment["start"], |
|
"end": segment["end"], |
|
"duration": segment["end"] - segment["start"] |
|
}) |
|
|
|
return translated_segments |
|
|
|
def generate_tamil_audio(self, text): |
|
"""Generate Tamil audio using gTTS""" |
|
try: |
|
audio_path = self.create_temp_path(".mp3") |
|
tts = gTTS(text=text, lang='ta', slow=False) |
|
tts.save(audio_path) |
|
return audio_path |
|
except Exception as e: |
|
raise Exception(f"Audio generation error: {str(e)}") |
|
|
|
def create_subtitle_clip(self, txt, fontsize, color, size): |
|
"""Create a subtitle clip""" |
|
return TextClip( |
|
txt=txt, |
|
fontsize=fontsize, |
|
color=color, |
|
bg_color='rgba(0,0,0,0.5)', |
|
size=size, |
|
method='caption' |
|
) |
|
|
|
def process_video(video_data, voice_type, generate_subtitles=True, subtitle_size=24, subtitle_color='white'): |
|
"""Main video processing function""" |
|
processor = VideoProcessor() |
|
|
|
try: |
|
|
|
input_path = processor.create_temp_path(".mp4") |
|
with open(input_path, "wb") as f: |
|
f.write(video_data) |
|
|
|
|
|
video = VideoFileClip(input_path) |
|
|
|
|
|
progress_text = st.empty() |
|
progress_bar = st.progress(0) |
|
|
|
|
|
progress_text.text("Transcribing video...") |
|
segments, duration = processor.transcribe_video(input_path) |
|
progress_bar.progress(0.25) |
|
|
|
|
|
progress_text.text("Translating to Tamil...") |
|
translated_segments = processor.translate_segments(segments) |
|
progress_bar.progress(0.50) |
|
|
|
|
|
progress_text.text("Generating Tamil audio...") |
|
subtitle_clips = [] |
|
audio_clips = [] |
|
|
|
for i, segment in enumerate(translated_segments): |
|
|
|
audio_path = processor.generate_tamil_audio(segment["text"]) |
|
audio_clip = AudioFileClip(audio_path) |
|
audio_clips.append(audio_clip.set_start(segment["start"])) |
|
|
|
|
|
if generate_subtitles: |
|
subtitle_clip = processor.create_subtitle_clip( |
|
segment["text"], |
|
subtitle_size, |
|
subtitle_color, |
|
(video.w, None) |
|
) |
|
subtitle_clip = (subtitle_clip |
|
.set_position(('center', 'bottom')) |
|
.set_start(segment["start"]) |
|
.set_duration(segment["duration"])) |
|
subtitle_clips.append(subtitle_clip) |
|
|
|
progress_bar.progress(0.50 + (0.4 * (i + 1) / len(translated_segments))) |
|
|
|
|
|
progress_text.text("Creating final video...") |
|
|
|
|
|
final_audio = CompositeVideoClip([*audio_clips]) |
|
|
|
|
|
if generate_subtitles: |
|
final_video = CompositeVideoClip([video, *subtitle_clips]) |
|
else: |
|
final_video = video |
|
|
|
|
|
final_video = final_video.set_audio(final_audio) |
|
|
|
|
|
output_path = processor.create_temp_path(".mp4") |
|
final_video.write_videofile( |
|
output_path, |
|
codec='libx264', |
|
audio_codec='aac', |
|
temp_audiofile=processor.create_temp_path(".m4a"), |
|
remove_temp=True, |
|
verbose=False, |
|
logger=None |
|
) |
|
|
|
progress_bar.progress(1.0) |
|
progress_text.text("Processing complete!") |
|
|
|
return output_path |
|
|
|
except Exception as e: |
|
raise Exception(f"Video processing error: {str(e)}") |
|
|
|
finally: |
|
|
|
processor.cleanup() |
|
|
|
def main(): |
|
st.title("Tamil Movie Dubbing System") |
|
st.markdown(""" |
|
👋 Welcome! This tool helps you: |
|
- 🎥 Convert English videos to Tamil |
|
- 🗣️ Generate Tamil voiceovers |
|
- 📝 Add Tamil subtitles |
|
""") |
|
|
|
|
|
video_file = st.file_uploader("Upload Video File", type=['mp4', 'mov', 'avi']) |
|
|
|
if not video_file: |
|
st.warning("Please upload a video to begin.") |
|
return |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
voice_type = st.selectbox("Select Voice", list(TAMIL_VOICES.keys())) |
|
|
|
with col2: |
|
generate_subtitles = st.checkbox("Generate Subtitles", value=True) |
|
|
|
if generate_subtitles: |
|
col3, col4 = st.columns(2) |
|
with col3: |
|
subtitle_size = st.slider("Subtitle Size", 16, 32, 24) |
|
with col4: |
|
subtitle_color = st.color_picker("Subtitle Color", "#FFFFFF") |
|
|
|
|
|
if st.button("Start Dubbing"): |
|
try: |
|
with st.spinner("Processing video..."): |
|
output_path = process_video( |
|
video_file.read(), |
|
voice_type, |
|
generate_subtitles, |
|
subtitle_size if generate_subtitles else 24, |
|
subtitle_color if generate_subtitles else 'white' |
|
) |
|
|
|
|
|
st.success("டப்பிங் வெற்றிகரமாக முடிந்தது!") |
|
|
|
|
|
st.video(output_path) |
|
|
|
|
|
with open(output_path, "rb") as f: |
|
st.download_button( |
|
"⬇️ Download Dubbed Video", |
|
f, |
|
file_name="tamil_dubbed_video.mp4", |
|
mime="video/mp4" |
|
) |
|
|
|
except Exception as e: |
|
st.error(f"Processing failed: {str(e)}") |
|
st.error("Please try uploading a different video or check if the format is supported.") |
|
|
|
if __name__ == "__main__": |
|
main() |