File size: 3,142 Bytes
ed1c090
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5233b9
ed1c090
 
 
 
 
b5233b9
ec02589
b5233b9
ed1c090
 
 
 
 
 
 
 
 
 
 
 
 
ec02589
ed1c090
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5233b9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
import voice_recognition as vr

# Example lists for source and target languages
source_languages = ["English", "French", "Arabic", "MA-Arabic", "Tagalog", "Cebuano"]
target_languages = ["English", "French", "Arabic", "MA-Arabic", "Tagalog", "Cebuano"]

# Model and task options
models = ["Whisper", "Google"]
tasks = ["Transcript", "Transcript and Translate"]

# Dictionary mapping language labels to ISO codes
language_mapping = {
    "English": "en",
    "French": "fr",
    "Arabic": "ar",
    "MA-Arabic": "ar",
    "Tagalog": "tl",
    "Cebuano": "fil-PH"
}

# Method to get the ISO code from a language label
def get_iso_code(language_label):
    return language_mapping.get(language_label, "en")


# Function to handle the input and return the selections
def process_inputs(source_lang, target_lang, model, task, audio_mic, audio_upload):
    print("button submit clicked !!")
    audio = None
    if audio_mic:
        audio = audio_mic
    elif audio_upload:
        audio = audio_upload
    print("audio")
    print(audio)
    print("----")
    response = vr.process_audio_recognition(model=model, audio_path=audio, source_lang=get_iso_code(source_lang),
                                           target_lang=get_iso_code(target_lang), translate= task == "Transcript and Translate")
    return response
    # return {
    #     "Source Language": source_lang,
    #     "Target Language": target_lang,
    #     "Model": model,
    #     "Task": task,
    #     "Audio Received": "Yes" if audio else "No"
    # }


# Gradio interface setup
with gr.Blocks(title = "Voice Transcription") as demo:
    with gr.Row():
        # Dropdown for source and target languages (display name, return ISO code)
        source_lang_dropdown = gr.Dropdown(label="Source Language", choices=source_languages, value="English")
        target_lang_dropdown = gr.Dropdown(label="Target Language", choices=target_languages, value="French")
    with gr.Row():
        # Dropdown for models and tasks
        model_dropdown = gr.Dropdown(label="Model", choices=models, value=models[0], info="The model that will be used")
        task_dropdown = gr.Dropdown(label="Task", choices=tasks, value=tasks[0], info="Transcript and/or Translate")

    # Audio input (mic or upload)
    with gr.Tab("Record"):
        audio_input_mic = gr.Audio(sources=["microphone"], type="filepath")
    with gr.Tab("Upload"):
        audio_input_upload = gr.Audio(sources=["upload"], type="filepath")

    #audio_input = gr.Audio(sources=["microphone"], type="filepath")
    #audio_input2 = gr.Audio(sources=["upload"], type="filepath")

    # Button to submit
    submit_button = gr.Button("Submit")

    # Output label to show the selected inputs
    output = gr.Textbox(label="Result")

    # Link button click to function
    submit_button.click(fn=process_inputs,
                        inputs=[source_lang_dropdown, target_lang_dropdown, model_dropdown, task_dropdown, audio_input_mic, audio_input_upload],
                        outputs=output)

if __name__ == '__main__':
    # check the model rate
    demo.launch(favicon_path = "./favicon.ico")