|
import streamlit as st |
|
from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip |
|
import whisper |
|
from translate import Translator |
|
from gtts import gTTS |
|
import tempfile |
|
import os |
|
import numpy as np |
|
from datetime import timedelta |
|
import json |
|
from indic_transliteration import sanscript |
|
from indic_transliteration.sanscript import transliterate |
|
import azure.cognitiveservices.speech as speechsdk |
|
import ffmpeg |
|
|
|
|
|
st.set_page_config( |
|
page_title="translate", |
|
page_icon="🎬", |
|
layout="wide" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.stButton>button { |
|
width: 100%; |
|
border-radius: 5px; |
|
height: 3em; |
|
background-color: #FF4B4B; |
|
color: white; |
|
} |
|
.stProgress .st-bo { |
|
background-color: #FF4B4B; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
TAMIL_VOICES = { |
|
'Female 1': {'name': 'ta-IN-PallaviNeural', 'style': 'normal'}, |
|
'Female 2': {'name': 'ta-IN-PallaviNeural', 'style': 'formal'}, |
|
'Male 1': {'name': 'ta-IN-ValluvarNeural', 'style': 'normal'}, |
|
'Male 2': {'name': 'ta-IN-ValluvarNeural', 'style': 'formal'} |
|
} |
|
|
|
class TamilTextProcessor: |
|
@staticmethod |
|
def normalize_tamil_text(text): |
|
"""Normalize Tamil text for better pronunciation""" |
|
tamil_numerals = {'௦': '0', '௧': '1', '௨': '2', '௩': '3', '௪': '4', |
|
'௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'} |
|
for tamil_num, eng_num in tamil_numerals.items(): |
|
text = text.replace(tamil_num, eng_num) |
|
return text |
|
|
|
@staticmethod |
|
def process_for_tts(text): |
|
"""Process Tamil text for TTS""" |
|
text = ''.join(char for char in text if ord(char) < 65535) |
|
text = ' '.join(text.split()) |
|
return text |
|
|
|
@st.cache_resource |
|
def load_whisper_model(): |
|
"""Load Whisper model with caching""" |
|
return whisper.load_model("base") |
|
|
|
class TamilDubber: |
|
def __init__(self): |
|
self.whisper_model = load_whisper_model() |
|
self.temp_dir = tempfile.mkdtemp() |
|
|
|
def create_temp_file(self, suffix): |
|
"""Create a temporary file in the temp directory""" |
|
return os.path.join(self.temp_dir, f"temp_{os.urandom(8).hex()}{suffix}") |
|
|
|
def cleanup(self): |
|
"""Clean up temporary files""" |
|
import shutil |
|
try: |
|
shutil.rmtree(self.temp_dir) |
|
except Exception as e: |
|
st.warning(f"Cleanup warning: {e}") |
|
|
|
def extract_audio(self, video_path): |
|
"""Extract audio and transcribe using Whisper""" |
|
try: |
|
video = VideoFileClip(video_path) |
|
audio_path = self.create_temp_file(".wav") |
|
video.audio.write_audiofile(audio_path, fps=16000) |
|
|
|
|
|
result = self.whisper_model.transcribe(audio_path) |
|
return result["segments"], video.duration |
|
|
|
except Exception as e: |
|
st.error(f"Error in audio extraction: {e}") |
|
raise |
|
|
|
def translate_segments(self, segments): |
|
"""Translate segments to Tamil""" |
|
translator = Translator(to_lang='ta') |
|
translated_segments = [] |
|
|
|
for segment in segments: |
|
try: |
|
translated_text = translator.translate(segment["text"]) |
|
translated_text = TamilTextProcessor.normalize_tamil_text(translated_text) |
|
translated_text = TamilTextProcessor.process_for_tts(translated_text) |
|
|
|
translated_segments.append({ |
|
"text": translated_text, |
|
"start": segment["start"], |
|
"end": segment["end"], |
|
"duration": segment["end"] - segment["start"] |
|
}) |
|
except Exception as e: |
|
st.warning(f"Translation warning for segment: {str(e)}") |
|
|
|
translated_segments.append({ |
|
"text": segment["text"], |
|
"start": segment["start"], |
|
"end": segment["end"], |
|
"duration": segment["end"] - segment["start"] |
|
}) |
|
|
|
return translated_segments |
|
|
|
def generate_audio(self, text, voice_style="normal"): |
|
"""Generate Tamil audio using gTTS""" |
|
try: |
|
temp_path = self.create_temp_file(".mp3") |
|
tts = gTTS(text=text, lang='ta', slow=False) |
|
tts.save(temp_path) |
|
return temp_path |
|
except Exception as e: |
|
st.error(f"Error in audio generation: {e}") |
|
raise |
|
|
|
def create_subtitles(self, segments, output_path): |
|
"""Generate SRT subtitles""" |
|
try: |
|
with open(output_path, 'w', encoding='utf-8') as f: |
|
for idx, segment in enumerate(segments, 1): |
|
start_time = str(timedelta(seconds=int(segment["start"]))) |
|
end_time = str(timedelta(seconds=int(segment["end"]))) |
|
f.write(f"{idx}\n") |
|
f.write(f"{start_time} --> {end_time}\n") |
|
f.write(f"{segment['text']}\n\n") |
|
except Exception as e: |
|
st.error(f"Error creating subtitles: {e}") |
|
raise |
|
|
|
def main(): |
|
st.title("Tamil Movie Dubbing System") |
|
st.markdown(""" |
|
👋 Welcome to the Tamil Movie Dubbing System! This tool helps you: |
|
- 🎥 Convert English videos to Tamil |
|
- 🗣️ Generate Tamil voiceovers |
|
- 📝 Add Tamil subtitles |
|
""") |
|
|
|
st.sidebar.header("டப்பிங் அமைப்புகள்") |
|
|
|
|
|
st.info("Please upload a video file (MP4, MOV, or AVI format)") |
|
video_file = st.file_uploader("Upload Video File", type=['mp4', 'mov', 'avi']) |
|
|
|
if not video_file: |
|
st.warning("Please upload a video to begin the dubbing process.") |
|
return |
|
|
|
|
|
with st.sidebar: |
|
st.subheader("Voice Settings") |
|
voice_type = st.selectbox("Select Voice", list(TAMIL_VOICES.keys())) |
|
|
|
st.subheader("Subtitle Settings") |
|
generate_subtitles = st.checkbox("Generate Tamil Subtitles", value=True) |
|
if generate_subtitles: |
|
subtitle_size = st.slider("Subtitle Size", 16, 32, 24) |
|
subtitle_color = st.color_picker("Subtitle Color", "#FFFFFF") |
|
|
|
|
|
if st.button("Start Tamil Dubbing"): |
|
try: |
|
dubber = TamilDubber() |
|
|
|
|
|
progress_bar = st.progress(0) |
|
status_text = st.empty() |
|
|
|
try: |
|
|
|
temp_video_path = dubber.create_temp_file(".mp4") |
|
with open(temp_video_path, "wb") as f: |
|
f.write(video_file.read()) |
|
|
|
|
|
status_text.text("📥 Extracting audio and transcribing...") |
|
segments, video_duration = dubber.extract_audio(temp_video_path) |
|
progress_bar.progress(0.25) |
|
|
|
|
|
status_text.text("🔄 Translating to Tamil...") |
|
translated_segments = dubber.translate_segments(segments) |
|
progress_bar.progress(0.50) |
|
|
|
|
|
status_text.text("🔊 Generating Tamil audio...") |
|
video = VideoFileClip(temp_video_path) |
|
|
|
audio_segments = [] |
|
for idx, segment in enumerate(translated_segments): |
|
audio_path = dubber.generate_audio(segment["text"]) |
|
audio_segments.append({ |
|
"audio": AudioFileClip(audio_path), |
|
"start": segment["start"] |
|
}) |
|
progress_bar.progress(0.50 + (0.25 * (idx + 1) / len(translated_segments))) |
|
|
|
|
|
status_text.text("🎬 Creating final video...") |
|
output_path = dubber.create_temp_file(".mp4") |
|
|
|
|
|
if generate_subtitles: |
|
srt_path = dubber.create_temp_file(".srt") |
|
dubber.create_subtitles(translated_segments, srt_path) |
|
|
|
|
|
stream = ffmpeg.input(temp_video_path) |
|
stream = ffmpeg.output(stream, output_path, |
|
vf=f'subtitles={srt_path}:force_style=\'FontSize={subtitle_size},PrimaryColour={subtitle_color}\'', |
|
acodec='aac') |
|
ffmpeg.run(stream, overwrite_output=True) |
|
else: |
|
|
|
video.write_videofile(output_path) |
|
|
|
progress_bar.progress(1.0) |
|
status_text.text("✅ Dubbing completed!") |
|
|
|
|
|
st.success("டப்பிங் வெற்றிகரமாக முடிந்தது!") |
|
st.video(output_path) |
|
|
|
|
|
with open(output_path, "rb") as f: |
|
st.download_button( |
|
"⬇️ Download Dubbed Video", |
|
f, |
|
file_name="tamil_dubbed_video.mp4", |
|
mime="video/mp4" |
|
) |
|
|
|
finally: |
|
|
|
dubber.cleanup() |
|
|
|
except Exception as e: |
|
st.error(f"An error occurred: {str(e)}") |
|
st.error("Please try again with a different video or check if the video format is supported.") |
|
|
|
if __name__ == "__main__": |
|
main() |