Anupam251272 commited on
Commit
7dd66b8
Β·
verified Β·
1 Parent(s): 6ee873f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -28
app.py CHANGED
@@ -3,23 +3,25 @@ import torch
3
  import gradio as gr
4
  import numpy as np
5
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
- from deep_translator import GoogleTranslator
7
  from gtts import gTTS
8
  import librosa
9
  import tempfile
10
  import soundfile as sf
11
 
12
-
13
  class RealTimeTranslator:
14
  def __init__(self):
15
- # Initialize Whisper model for speech recognition
16
- self.processor = WhisperProcessor.from_pretrained("openai/whisper-small")
17
- self.model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
18
 
19
  # Use GPU if available
20
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
21
  self.model = self.model.to(self.device)
22
 
 
 
 
23
  # Supported languages
24
  self.languages = {
25
  'en': 'English',
@@ -32,32 +34,38 @@ class RealTimeTranslator:
32
 
33
  def speech_to_text(self, audio_path, source_lang):
34
  """Convert speech to text using Whisper"""
35
- # Load and preprocess audio
36
- audio, _ = librosa.load(audio_path, sr=16000)
37
- input_features = self.processor(audio, sampling_rate=16000, return_tensors="pt").input_features
38
- input_features = input_features.to(self.device)
 
39
 
40
- # Generate token ids
41
- predicted_ids = self.model.generate(input_features)
42
 
43
- # Decode token ids to text
44
- transcription = self.processor.batch_decode(predicted_ids, skip_special_tokens=True)
45
- return transcription[0]
 
 
46
 
47
  def translate_text(self, text, source_lang, target_lang):
48
- """Translate text using deep-translator"""
49
  try:
50
- translated_text = GoogleTranslator(source=source_lang, target=target_lang).translate(text)
51
- return translated_text
52
  except Exception as e:
53
- return f"Translation error: {str(e)}"
54
 
55
  def text_to_speech(self, text, target_lang):
56
  """Convert text to speech using gTTS"""
57
- with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as fp:
58
- tts = gTTS(text=text, lang=target_lang)
59
- tts.save(fp.name)
60
- return fp.name
 
 
 
61
 
62
  def process_audio(self, audio, source_lang, target_lang):
63
  """Complete pipeline: Speech β†’ Text β†’ Translation β†’ Speech"""
@@ -72,12 +80,18 @@ class RealTimeTranslator:
72
 
73
  # Speech to text
74
  text = self.speech_to_text(audio_path, source_lang)
 
 
75
 
76
  # Translate text
77
  translated_text = self.translate_text(text, source_lang, target_lang)
 
 
78
 
79
  # Text to speech
80
  output_audio_path = self.text_to_speech(translated_text, target_lang)
 
 
81
 
82
  # Load the generated audio
83
  output_audio, sr = librosa.load(output_audio_path)
@@ -91,15 +105,14 @@ class RealTimeTranslator:
91
  except Exception as e:
92
  return None, f"Error: {str(e)}", f"Error: {str(e)}"
93
 
94
-
95
  def create_gradio_interface():
96
  translator = RealTimeTranslator()
97
 
98
- # Create the Gradio interface with updated Audio component syntax
99
  demo = gr.Interface(
100
  fn=translator.process_audio,
101
  inputs=[
102
- gr.Audio(sources=["microphone"], type="numpy", label="Input Audio"), # Updated syntax
103
  gr.Dropdown(choices=list(translator.languages.keys()), value="en", label="Source Language"),
104
  gr.Dropdown(choices=list(translator.languages.keys()), value="fr", label="Target Language")
105
  ],
@@ -109,7 +122,7 @@ def create_gradio_interface():
109
  gr.Textbox(label="Translated Text")
110
  ],
111
  title="Real-time Language Translator",
112
- description="Speak in your language and get instant translation in the target language",
113
  examples=[
114
  [None, "en", "fr"],
115
  [None, "hi", "en"],
@@ -118,7 +131,6 @@ def create_gradio_interface():
118
  )
119
  return demo
120
 
121
-
122
  if __name__ == "__main__":
123
  demo = create_gradio_interface()
124
- demo.launch(share=True, debug=True)
 
3
  import gradio as gr
4
  import numpy as np
5
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
+ from googletrans import Translator
7
  from gtts import gTTS
8
  import librosa
9
  import tempfile
10
  import soundfile as sf
11
 
 
12
  class RealTimeTranslator:
13
  def __init__(self):
14
+ # Initialize Whisper model for speech recognition (using tiny model for lower resource usage)
15
+ self.processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
16
+ self.model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
17
 
18
  # Use GPU if available
19
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
20
  self.model = self.model.to(self.device)
21
 
22
+ # Initialize translator
23
+ self.translator = Translator()
24
+
25
  # Supported languages
26
  self.languages = {
27
  'en': 'English',
 
34
 
35
  def speech_to_text(self, audio_path, source_lang):
36
  """Convert speech to text using Whisper"""
37
+ try:
38
+ # Load and preprocess audio
39
+ audio, _ = librosa.load(audio_path, sr=16000)
40
+ input_features = self.processor(audio, sampling_rate=16000, return_tensors="pt").input_features
41
+ input_features = input_features.to(self.device)
42
 
43
+ # Generate token ids
44
+ predicted_ids = self.model.generate(input_features)
45
 
46
+ # Decode token ids to text
47
+ transcription = self.processor.batch_decode(predicted_ids, skip_special_tokens=True)
48
+ return transcription[0]
49
+ except Exception as e:
50
+ return f"Error in speech-to-text: {str(e)}"
51
 
52
  def translate_text(self, text, source_lang, target_lang):
53
+ """Translate text using Google Translate"""
54
  try:
55
+ translation = self.translator.translate(text, src=source_lang, dest=target_lang)
56
+ return translation.text
57
  except Exception as e:
58
+ return f"Error in translation: {str(e)}"
59
 
60
  def text_to_speech(self, text, target_lang):
61
  """Convert text to speech using gTTS"""
62
+ try:
63
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as fp:
64
+ tts = gTTS(text=text, lang=target_lang)
65
+ tts.save(fp.name)
66
+ return fp.name
67
+ except Exception as e:
68
+ return f"Error in text-to-speech: {str(e)}"
69
 
70
  def process_audio(self, audio, source_lang, target_lang):
71
  """Complete pipeline: Speech β†’ Text β†’ Translation β†’ Speech"""
 
80
 
81
  # Speech to text
82
  text = self.speech_to_text(audio_path, source_lang)
83
+ if "Error" in text:
84
+ return None, text, ""
85
 
86
  # Translate text
87
  translated_text = self.translate_text(text, source_lang, target_lang)
88
+ if "Error" in translated_text:
89
+ return None, text, translated_text
90
 
91
  # Text to speech
92
  output_audio_path = self.text_to_speech(translated_text, target_lang)
93
+ if "Error" in output_audio_path:
94
+ return None, text, translated_text
95
 
96
  # Load the generated audio
97
  output_audio, sr = librosa.load(output_audio_path)
 
105
  except Exception as e:
106
  return None, f"Error: {str(e)}", f"Error: {str(e)}"
107
 
 
108
  def create_gradio_interface():
109
  translator = RealTimeTranslator()
110
 
111
+ # Create the Gradio interface
112
  demo = gr.Interface(
113
  fn=translator.process_audio,
114
  inputs=[
115
+ gr.Audio(sources=["microphone"], type="numpy", label="Input Audio"),
116
  gr.Dropdown(choices=list(translator.languages.keys()), value="en", label="Source Language"),
117
  gr.Dropdown(choices=list(translator.languages.keys()), value="fr", label="Target Language")
118
  ],
 
122
  gr.Textbox(label="Translated Text")
123
  ],
124
  title="Real-time Language Translator",
125
+ description="Speak in your language and get instant translation in the target language. Please ensure your device is set to speakerphone mode for best results.",
126
  examples=[
127
  [None, "en", "fr"],
128
  [None, "hi", "en"],
 
131
  )
132
  return demo
133
 
 
134
  if __name__ == "__main__":
135
  demo = create_gradio_interface()
136
+ demo.launch(share=True, debug=True)