File size: 5,837 Bytes
594893f
 
 
 
 
5e48419
 
594893f
 
5e48419
 
 
 
 
 
594893f
5e48419
 
 
a96b890
 
a12adfc
 
 
5e48419
594893f
 
5e48419
 
 
 
 
 
 
 
 
 
594893f
5e48419
 
 
a5fcb05
5e48419
 
 
594893f
 
5e48419
 
 
 
 
 
 
 
 
 
 
 
 
a12adfc
5e48419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594893f
5e48419
 
 
 
 
 
594893f
5e48419
 
 
 
 
 
594893f
5e48419
 
 
594893f
5e48419
 
 
 
594893f
5e48419
 
 
 
 
 
594893f
5e48419
594893f
5e48419
594893f
 
983b6c1
5e48419
983b6c1
 
 
5e48419
 
 
 
 
 
 
 
 
 
 
 
 
594893f
983b6c1
594893f
983b6c1
 
 
2ce6def
594893f
 
3b71270
 
594893f
 
 
5e48419
 
 
 
 
a12adfc
5e48419
 
a12adfc
5e48419
 
 
 
 
 
 
 
594893f
a12adfc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import gradio as gr
import re
import os
import requests
import time
import soundfile as sf
import io


def audio_to_bytes(audio):
    data, sr = sf.read(audio)
    audio_bytes = io.BytesIO()
    sf.write(audio_bytes, data, sr, format='WAV')
    audio_bytes.seek(0)
    return audio_bytes

def langswitch_API_call(audio, language):
    audio_bytes = audio_to_bytes(audio)
    files = {'file': (f'audio_chunk.wav', audio_bytes, 'audio/wav')}
    api_url = os.getenv("api_url")
    response = requests.post(f"{api_url}/online/http?language={language}", files=files)
    if response.status_code != 200:
        print(response)
        raise Exception("API error")
    return response.json()

def transcribe_base(audio, language):
    response = langswitch_API_call(audio, language)
    print(response)
    transcription = response["transcription"]
    is_new_speaker = response["is_new_speaker"]
    speaker = response["classified_speaker"]
    if is_new_speaker:
        speaker_class_string = f'New speaker detected. Assigned new ID {speaker}'
    else:
        speaker_class_string = f'Speaker found in database, ID {speaker}'
    return transcription, speaker_class_string

def transcribe_mic(audio_microphone, language):
    print("Transcription microphone")
    return transcribe_base(audio_microphone, language)

def transcribe_file(audio_upload, language):
    print("Transcription local file")
    return transcribe_base(audio_upload, language)


css_content = """
/*
.gradio-container{
    padding: 0 !important;
}
.html-container{
    padding: 0 !important;
}
*/
#orai-info{
    padding: 50px;
    text-align: center;
    font-size: 1rem;
    background: url('https://elia.eus/static/elhuyar/img/landing_page/ig.webp') rgba(0,0,0,0.8);
    background-repeat: no-repeat;
    background-position: center center;
    background-size: cover;
    background-blend-mode: multiply;
}
#orai-info-text p{
    color: white !important;
}
/*
#orai-info img{
    margin: auto;
    display: block;
    margin-bottom: 1rem;
}*/
.bold{
    font-weight: bold;
    color: inherit !important;
}
footer{
    display:none !important
}

.logos{
    display: flex;
    justify-content: center;
}
.sermas-logo{
    display: flex;
    align-items: center;
    margin-right: 3rem;
}
.sermas-logo span{
    color: white !important;
    font-size: 2.5rem;
    font-family: Verdana, Geneva, sans-serif !important;
    font-weight: bold;
}

.text-elhuyar{
    color: #0045e7;
}

#header{
    padding: 50px;
    padding-top: 30px;
    background-color: #5b65a7;
}
#header h1,h3{
    color: white;
}

button.primary{
    background-color: #5b65a7;
}
button.primary:hover{
    background-color: #3c4687;
}

button.selected{
    color: #5b65a7 !important;
}
button.selected::after{
    background-color: #5b65a7;
}

.record-button::before{
    background: #5b65a7;
}

"""


demo = gr.Blocks(css=css_content) #, fill_width=True)
with demo:
    gr.HTML("""
<div id="header">
    <h1>LANGSWITCH</h1>
    <h3>Multilingual Automatic Speech Recognition in noisy environments</h3>
</div>
""")

    with gr.Tab("Transcribe microphone"):
        iface = gr.Interface(
            fn=transcribe_mic,
            inputs=[
                gr.Audio(sources="microphone", type="filepath"),
                gr.Dropdown(choices=[("English", "en"),
                                     ("Spanish", "es"),
                                     ("French", "fr"),
                                     ("Italian", "it"),
                                     ("Basque", "eu")],
                            value="en")
            ],
            outputs=[
                gr.Textbox(label="Transcription", autoscroll=False),
                gr.Textbox(label="Speaker Identification", autoscroll=False)
            ],
            allow_flagging="never",
        )

    with gr.Tab("Transcribe local file"):
        iface = gr.Interface(
            fn=transcribe_file,
            inputs=[
                gr.Audio(sources="upload", type="filepath"),
                gr.Dropdown(choices=[("English", "en"),
                                     ("Spanish", "es"),
                                     ("French", "fr"),
                                     ("Italian", "it"),
                                     ("Basque", "eu")],
                            value="en")
            ],
            outputs=[
                gr.Textbox(label="Transcription", autoscroll=False),
                gr.Textbox(label="Speaker Identification", autoscroll=False)
            ],
            allow_flagging="never",
        )

    gr.HTML("""
<div id="orai-info">
    <div class="logos">
        <div class="sermas-logo">
            <img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/>
            <span>SERMAS</span>
        </div>
        <img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/>
    </div>
    <div id="orai-info-text">
        <p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p>
        <p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p>
        <p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p>
    </div>
</div>
<p>""")
demo.queue(max_size=1)
demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.")