import gradio as gr import torch import torchaudio import numpy as np from transformers import AutoProcessor, SeamlessM4Tv2Model from datetime import datetime class SeamlessTranslator: def __init__(self, model_name: str = "facebook/seamless-m4t-v2-large"): self.processor = AutoProcessor.from_pretrained(model_name) self.model = SeamlessM4Tv2Model.from_pretrained(model_name) self.sample_rate = self.model.config.sampling_rate self.language_codes = { "English": "eng", "Spanish": "spa", "French": "fra", "German": "deu", "Italian": "ita", "Portuguese": "por", "Russian": "rus", "Chinese": "cmn", "Japanese": "jpn" } def translate(self, text: str, src_lang: str, tgt_lang: str) -> tuple[int, np.ndarray]: try: inputs = self.processor(text=text, src_lang=self.language_codes[src_lang], return_tensors="pt") audio_array = self.model.generate(**inputs, tgt_lang=self.language_codes[tgt_lang])[0].cpu().numpy().squeeze() return self.sample_rate, audio_array except Exception as e: raise gr.Error(f"Translation failed: {str(e)}") # Custom CSS for Jarvis theme css = """ #jarvis-interface { background-color: black !important; background-image: radial-gradient(circle at center, #00303030 0%, #00000080 100%); min-height: 100vh; font-family: 'Courier New', monospace; } #status-ring { width: 300px; height: 300px; border: 4px solid #00ffff; border-radius: 50%; margin: 20px auto; position: relative; animation: pulse 2s infinite; display: flex; align-items: center; justify-content: center; } @keyframes pulse { 0% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0.4); } 70% { box-shadow: 0 0 0 20px rgba(0, 255, 255, 0); } 100% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0); } } #inner-ring { width: 200px; height: 200px; border: 2px solid #00ffff; border-radius: 50%; display: flex; align-items: center; justify-content: center; } #core { width: 100px; height: 100px; border: 3px solid #00ffff; border-radius: 50%; background-color: black; display: flex; align-items: center; justify-content: center; color: #00ffff; text-align: center; padding: 10px; } .jarvis-textbox { background-color: black !important; border: 2px solid #00ffff !important; color: #00ffff !important; font-family: 'Courier New', monospace !important; } .jarvis-button { background-color: transparent !important; border: 2px solid #00ffff !important; color: #00ffff !important; font-family: 'Courier New', monospace !important; } .jarvis-button:hover { background-color: rgba(0, 255, 255, 0.1) !important; } .status-box { background-color: black !important; border: 2px solid #00ffff !important; color: #00ffff !important; padding: 10px !important; border-radius: 5px !important; margin: 5px !important; text-align: center !important; } """ def create_interface(): translator = SeamlessTranslator() def translate_text(text: str, src_lang: str, tgt_lang: str, progress=gr.Progress()): progress(0, desc="Initializing...") progress(0.3, desc="Processing text...") sample_rate, audio = translator.translate(text, src_lang, tgt_lang) progress(0.7, desc="Generating audio...") progress(1.0, desc="Complete!") return audio with gr.Blocks(css=css, title="J.A.R.V.I.S Translator") as demo: gr.Markdown( """ # J.A.R.V.I.S TRANSLATION SYSTEM ### Powered by SeamlessM4T """ ) # Jarvis interface container with gr.Column(elem_id="jarvis-interface"): # Status Ring gr.HTML("""
JARVIS
ACTIVE
""") # Input controls with gr.Row(): text_input = gr.Textbox( label="Command Input", placeholder="Enter text to translate...", elem_classes=["jarvis-textbox"], lines=3 ) with gr.Row(): src_lang = gr.Dropdown( choices=list(translator.language_codes.keys()), value="English", label="Source Language", elem_classes=["jarvis-textbox"] ) tgt_lang = gr.Dropdown( choices=list(translator.language_codes.keys()), value="Spanish", label="Target Language", elem_classes=["jarvis-textbox"] ) translate_btn = gr.Button( "▶ EXECUTE TRANSLATION", elem_classes=["jarvis-button"] ) # Output audio audio_output = gr.Audio( label="Translated Speech", type="numpy" ) # Status boxes with gr.Row(): with gr.Column(): gr.Markdown( """
SYSTEM STATUS
ACTIVE
""" ) with gr.Column(): gr.Markdown( """
AUDIO SYSTEM
READY
""" ) with gr.Column(): gr.Markdown( """
TRANSLATION
ONLINE
""" ) # Event handler translate_btn.click( fn=translate_text, inputs=[text_input, src_lang, tgt_lang], outputs=audio_output ) return demo if __name__ == "__main__": demo = create_interface() demo.queue() demo.launch()