Anupam251272 commited on
Commit
56df39f
·
verified ·
1 Parent(s): f09fd3c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ import numpy as np
5
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
+ from googletrans import Translator
7
+ from gtts import gTTS
8
+ import librosa
9
+ import tempfile
10
+ import soundfile as sf
11
+
12
+ class RealTimeTranslator:
13
+ def __init__(self):
14
+ # Initialize Whisper model for speech recognition
15
+ self.processor = WhisperProcessor.from_pretrained("openai/whisper-small")
16
+ self.model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
17
+
18
+ # Use GPU if available
19
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
20
+ self.model = self.model.to(self.device)
21
+
22
+ # Initialize translator
23
+ self.translator = Translator()
24
+
25
+ # Supported languages
26
+ self.languages = {
27
+ 'en': 'English',
28
+ 'fr': 'French',
29
+ 'hi': 'Hindi',
30
+ 'es': 'Spanish',
31
+ 'de': 'German',
32
+ 'ja': 'Japanese'
33
+ }
34
+
35
+ def speech_to_text(self, audio_path, source_lang):
36
+ """Convert speech to text using Whisper"""
37
+ # Load and preprocess audio
38
+ audio, _ = librosa.load(audio_path, sr=16000)
39
+ input_features = self.processor(audio, sampling_rate=16000, return_tensors="pt").input_features
40
+ input_features = input_features.to(self.device)
41
+
42
+ # Generate token ids
43
+ predicted_ids = self.model.generate(input_features)
44
+
45
+ # Decode token ids to text
46
+ transcription = self.processor.batch_decode(predicted_ids, skip_special_tokens=True)
47
+ return transcription[0]
48
+
49
+ def translate_text(self, text, source_lang, target_lang):
50
+ """Translate text using Google Translate"""
51
+ translation = self.translator.translate(text, src=source_lang, dest=target_lang)
52
+ return translation.text
53
+
54
+ def text_to_speech(self, text, target_lang):
55
+ """Convert text to speech using gTTS"""
56
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as fp:
57
+ tts = gTTS(text=text, lang=target_lang)
58
+ tts.save(fp.name)
59
+ return fp.name
60
+
61
+ def process_audio(self, audio, source_lang, target_lang):
62
+ """Complete pipeline: Speech → Text → Translation → Speech"""
63
+ try:
64
+ if audio is None:
65
+ return None, "No audio input received", "Please provide audio input"
66
+
67
+ # Save input audio temporarily
68
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as fp:
69
+ sf.write(fp.name, audio[1], audio[0])
70
+ audio_path = fp.name
71
+
72
+ # Speech to text
73
+ text = self.speech_to_text(audio_path, source_lang)
74
+
75
+ # Translate text
76
+ translated_text = self.translate_text(text, source_lang, target_lang)
77
+
78
+ # Text to speech
79
+ output_audio_path = self.text_to_speech(translated_text, target_lang)
80
+
81
+ # Load the generated audio
82
+ output_audio, sr = librosa.load(output_audio_path)
83
+
84
+ # Clean up temporary files
85
+ os.unlink(audio_path)
86
+ os.unlink(output_audio_path)
87
+
88
+ return (sr, output_audio), text, translated_text
89
+
90
+ except Exception as e:
91
+ return None, f"Error: {str(e)}", f"Error: {str(e)}"
92
+
93
+ def create_gradio_interface():
94
+ translator = RealTimeTranslator()
95
+
96
+ # Create the Gradio interface with updated Audio component syntax
97
+ demo = gr.Interface(
98
+ fn=translator.process_audio,
99
+ inputs=[
100
+ gr.Audio(sources=["microphone"], type="numpy", label="Input Audio"), # Updated syntax
101
+ gr.Dropdown(choices=list(translator.languages.keys()), value="en", label="Source Language"),
102
+ gr.Dropdown(choices=list(translator.languages.keys()), value="fr", label="Target Language")
103
+ ],
104
+ outputs=[
105
+ gr.Audio(label="Translated Audio"),
106
+ gr.Textbox(label="Original Text"),
107
+ gr.Textbox(label="Translated Text")
108
+ ],
109
+ title="Real-time Language Translator",
110
+ description="Speak in your language and get instant translation in the target language",
111
+ examples=[
112
+ [None, "en", "fr"],
113
+ [None, "hi", "en"],
114
+ [None, "es", "ja"]
115
+ ]
116
+ )
117
+ return demo
118
+
119
+ if __name__ == "__main__":
120
+ demo = create_gradio_interface()
121
+ demo.launch(share=True, debug=True)