vsrinivas commited on
Commit
0f1372a
·
verified ·
1 Parent(s): 2438345

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -14,15 +14,17 @@ import gradio as gr
14
  import concurrent.futures
15
  import assemblyai as aai
16
 
17
- aai.settings.api_key = "d5b107f34d534b4ebdfbd869f8408f92"
18
- transcriber = aai.Transcriber()
19
 
20
  AI71_API_KEY = os.getenv('AI71_API_KEY')
21
  XI_API_KEY = os.getenv('ELEVEN_LABS_API_KEY')
22
  client = ElevenLabs(api_key=XI_API_KEY)
23
 
24
- model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
25
  tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
 
 
26
  # transcriber = whisper.load_model("turbo")
27
 
28
  language_codes = {"English":"en", "Hindi":"hi", "Portuguese":"pt", "Chinese":"zh", "Spanish":"es",
@@ -162,10 +164,13 @@ def summarize(meeting_texts=meeting_texts):
162
  # Placeholder function for speech to text conversion
163
  def speech_to_text(video):
164
  print('Started transcribing')
165
- # audio = AudioSegment.from_file(video)
166
- # audio.export('temp.mp3', format="mp3")
167
- # transcript= transcriber.transcribe('temp.mp3')['text']
168
- transcript = transcriber.transcribe(video).text
 
 
 
169
  print('transcript:', transcript)
170
  return transcript
171
 
@@ -173,7 +178,7 @@ def speech_to_text(video):
173
  def translate_text(text, source_language,target_language):
174
  tokenizer.src_lang = source_language
175
  encoded_ln = tokenizer(text, return_tensors="pt")
176
- generated_tokens = model.generate(**encoded_ln, forced_bos_token_id=tokenizer.get_lang_id(target_language))
177
  translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
178
  print('translated_text:', translated_text)
179
  return translated_text
 
14
  import concurrent.futures
15
  import assemblyai as aai
16
 
17
+ # aai.settings.api_key = "d5b107f34d534b4ebdfbd869f8408f92"
18
+ # transcriber = aai.Transcriber()
19
 
20
  AI71_API_KEY = os.getenv('AI71_API_KEY')
21
  XI_API_KEY = os.getenv('ELEVEN_LABS_API_KEY')
22
  client = ElevenLabs(api_key=XI_API_KEY)
23
 
24
+ translator = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
25
  tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
26
+
27
+ transcriber = gr.load("models/openai/whisper-large-v3-turbo")
28
  # transcriber = whisper.load_model("turbo")
29
 
30
  language_codes = {"English":"en", "Hindi":"hi", "Portuguese":"pt", "Chinese":"zh", "Spanish":"es",
 
164
  # Placeholder function for speech to text conversion
165
  def speech_to_text(video):
166
  print('Started transcribing')
167
+ audio = AudioSegment.from_file(video, format="mp4")
168
+ audio.export('temp.wav', format="wav")
169
+
170
+ # transcript = transcriber.transcribe(video).text
171
+ # transcript = transcriber.transcribe(video).text
172
+ transcript = transcriber("temp.wav").split("'")[1].strip()
173
+
174
  print('transcript:', transcript)
175
  return transcript
176
 
 
178
  def translate_text(text, source_language,target_language):
179
  tokenizer.src_lang = source_language
180
  encoded_ln = tokenizer(text, return_tensors="pt")
181
+ generated_tokens = translator.generate(**encoded_ln, forced_bos_token_id=tokenizer.get_lang_id(target_language))
182
  translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
183
  print('translated_text:', translated_text)
184
  return translated_text