Update app.py
Browse files
app.py
CHANGED
@@ -14,15 +14,17 @@ import gradio as gr
|
|
14 |
import concurrent.futures
|
15 |
import assemblyai as aai
|
16 |
|
17 |
-
aai.settings.api_key = "d5b107f34d534b4ebdfbd869f8408f92"
|
18 |
-
transcriber = aai.Transcriber()
|
19 |
|
20 |
AI71_API_KEY = os.getenv('AI71_API_KEY')
|
21 |
XI_API_KEY = os.getenv('ELEVEN_LABS_API_KEY')
|
22 |
client = ElevenLabs(api_key=XI_API_KEY)
|
23 |
|
24 |
-
|
25 |
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
|
|
|
|
|
26 |
# transcriber = whisper.load_model("turbo")
|
27 |
|
28 |
language_codes = {"English":"en", "Hindi":"hi", "Portuguese":"pt", "Chinese":"zh", "Spanish":"es",
|
@@ -162,10 +164,13 @@ def summarize(meeting_texts=meeting_texts):
|
|
162 |
# Placeholder function for speech to text conversion
|
163 |
def speech_to_text(video):
|
164 |
print('Started transcribing')
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
transcript = transcriber.transcribe(video).text
|
|
|
|
|
|
|
169 |
print('transcript:', transcript)
|
170 |
return transcript
|
171 |
|
@@ -173,7 +178,7 @@ def speech_to_text(video):
|
|
173 |
def translate_text(text, source_language,target_language):
|
174 |
tokenizer.src_lang = source_language
|
175 |
encoded_ln = tokenizer(text, return_tensors="pt")
|
176 |
-
generated_tokens =
|
177 |
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
178 |
print('translated_text:', translated_text)
|
179 |
return translated_text
|
|
|
14 |
import concurrent.futures
|
15 |
import assemblyai as aai
|
16 |
|
17 |
+
# aai.settings.api_key = "d5b107f34d534b4ebdfbd869f8408f92"
|
18 |
+
# transcriber = aai.Transcriber()
|
19 |
|
20 |
AI71_API_KEY = os.getenv('AI71_API_KEY')
|
21 |
XI_API_KEY = os.getenv('ELEVEN_LABS_API_KEY')
|
22 |
client = ElevenLabs(api_key=XI_API_KEY)
|
23 |
|
24 |
+
translator = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
|
25 |
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
|
26 |
+
|
27 |
+
transcriber = gr.load("models/openai/whisper-large-v3-turbo")
|
28 |
# transcriber = whisper.load_model("turbo")
|
29 |
|
30 |
language_codes = {"English":"en", "Hindi":"hi", "Portuguese":"pt", "Chinese":"zh", "Spanish":"es",
|
|
|
164 |
# Placeholder function for speech to text conversion
|
165 |
def speech_to_text(video):
|
166 |
print('Started transcribing')
|
167 |
+
audio = AudioSegment.from_file(video, format="mp4")
|
168 |
+
audio.export('temp.wav', format="wav")
|
169 |
+
|
170 |
+
# transcript = transcriber.transcribe(video).text
|
171 |
+
# transcript = transcriber.transcribe(video).text
|
172 |
+
transcript = transcriber("temp.wav").split("'")[1].strip()
|
173 |
+
|
174 |
print('transcript:', transcript)
|
175 |
return transcript
|
176 |
|
|
|
178 |
def translate_text(text, source_language,target_language):
|
179 |
tokenizer.src_lang = source_language
|
180 |
encoded_ln = tokenizer(text, return_tensors="pt")
|
181 |
+
generated_tokens = translator.generate(**encoded_ln, forced_bos_token_id=tokenizer.get_lang_id(target_language))
|
182 |
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
183 |
print('translated_text:', translated_text)
|
184 |
return translated_text
|