File size: 1,364 Bytes
66dff0f
887d074
 
087b372
 
 
 
 
 
 
 
f531a85
887d074
 
 
 
f531a85
887d074
b11f488
087b372
887d074
 
1571261
887d074
24f7854
29b5120
087b372
0fb7c05
29b5120
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import whisper
import numpy as np
from pydub import AudioSegment
from deep_translator import GoogleTranslator, detection
import os

available_languages = GoogleTranslator().get_supported_languages(as_dict=True)
formatted_languages = {key.title(): value for key, value in available_languages.items()}
formatted_codes = {value: key.title() for key, value in available_languages.items()}

lang_detect_key = os.getenv("detect_language_api_key")

def audio_to_numpy(audio_file_input):
    audio = AudioSegment.from_file(audio_file_input)
    audio = audio.set_channels(1).set_frame_rate(16000)
    samples = np.array(audio.get_array_of_samples(), dtype=np.float32)

    return samples / np.iinfo(audio.array_type).max

def src_audio_to_eng_translator(audio_file_input, model_size = "turbo", target_lang = "English"):
    audio_data = audio_to_numpy(audio_file_input)

    model = whisper.load_model(model_size)
    result = model.transcribe(audio_data)
    input_text = result["text"]

    src_lang_code = detection.single_detection(input_text, api_key = lang_detect_key)
    src_lang = formatted_codes.get(src_lang_code, 'Source language not detected')
    target_lang_code = formatted_languages.get(target_lang, 'en')
    translated_text = GoogleTranslator(source='auto', target=target_lang_code).translate(input_text)
    return input_text, translated_text, src_lang