Spaces:

orai-nlp
/

Sermas

Sleeping

File size: 5,837 Bytes

import gradio as gr
import re
import os
import requests
import time
import soundfile as sf
import io


def audio_to_bytes(audio):
    data, sr = sf.read(audio)
    audio_bytes = io.BytesIO()
    sf.write(audio_bytes, data, sr, format='WAV')
    audio_bytes.seek(0)
    return audio_bytes

def langswitch_API_call(audio, language):
    audio_bytes = audio_to_bytes(audio)
    files = {'file': (f'audio_chunk.wav', audio_bytes, 'audio/wav')}
    api_url = os.getenv("api_url")
    response = requests.post(f"{api_url}/online/http?language={language}", files=files)
    if response.status_code != 200:
        print(response)
        raise Exception("API error")
    return response.json()

def transcribe_base(audio, language):
    response = langswitch_API_call(audio, language)
    print(response)
    transcription = response["transcription"]
    is_new_speaker = response["is_new_speaker"]
    speaker = response["classified_speaker"]
    if is_new_speaker:
        speaker_class_string = f'New speaker detected. Assigned new ID {speaker}'
    else:
        speaker_class_string = f'Speaker found in database, ID {speaker}'
    return transcription, speaker_class_string

def transcribe_mic(audio_microphone, language):
    print("Transcription microphone")
    return transcribe_base(audio_microphone, language)

def transcribe_file(audio_upload, language):
    print("Transcription local file")
    return transcribe_base(audio_upload, language)


css_content = """
/*
.gradio-container{
    padding: 0 !important;
}
.html-container{
    padding: 0 !important;
}
*/
#orai-info{
    padding: 50px;
    text-align: center;
    font-size: 1rem;
    background: url('https://elia.eus/static/elhuyar/img/landing_page/ig.webp') rgba(0,0,0,0.8);
    background-repeat: no-repeat;
    background-position: center center;
    background-size: cover;
    background-blend-mode: multiply;
}
#orai-info-text p{
    color: white !important;
}
/*
#orai-info img{
    margin: auto;
    display: block;
    margin-bottom: 1rem;
}*/
.bold{
    font-weight: bold;
    color: inherit !important;
}
footer{
    display:none !important
}

.logos{
    display: flex;
    justify-content: center;
}
.sermas-logo{
    display: flex;
    align-items: center;
    margin-right: 3rem;
}
.sermas-logo span{
    color: white !important;
    font-size: 2.5rem;
    font-family: Verdana, Geneva, sans-serif !important;
    font-weight: bold;
}

.text-elhuyar{
    color: #0045e7;
}

#header{
    padding: 50px;
    padding-top: 30px;
    background-color: #5b65a7;
}
#header h1,h3{
    color: white;
}

button.primary{
    background-color: #5b65a7;
}
button.primary:hover{
    background-color: #3c4687;
}

button.selected{
    color: #5b65a7 !important;
}
button.selected::after{
    background-color: #5b65a7;
}

.record-button::before{
    background: #5b65a7;
}

"""


demo = gr.Blocks(css=css_content) #, fill_width=True)
with demo:
    gr.HTML("""
<div id="header">
    <h1>LANGSWITCH</h1>
    <h3>Multilingual Automatic Speech Recognition in noisy environments</h3>
</div>
""")

    with gr.Tab("Transcribe microphone"):
        iface = gr.Interface(
            fn=transcribe_mic,
            inputs=[
                gr.Audio(sources="microphone", type="filepath"),
                gr.Dropdown(choices=[("English", "en"),
                                     ("Spanish", "es"),
                                     ("French", "fr"),
                                     ("Italian", "it"),
                                     ("Basque", "eu")],
                            value="en")
            ],
            outputs=[
                gr.Textbox(label="Transcription", autoscroll=False),
                gr.Textbox(label="Speaker Identification", autoscroll=False)
            ],
            allow_flagging="never",
        )

    with gr.Tab("Transcribe local file"):
        iface = gr.Interface(
            fn=transcribe_file,
            inputs=[
                gr.Audio(sources="upload", type="filepath"),
                gr.Dropdown(choices=[("English", "en"),
                                     ("Spanish", "es"),
                                     ("French", "fr"),
                                     ("Italian", "it"),
                                     ("Basque", "eu")],
                            value="en")
            ],
            outputs=[
                gr.Textbox(label="Transcription", autoscroll=False),
                gr.Textbox(label="Speaker Identification", autoscroll=False)
            ],
            allow_flagging="never",
        )

    gr.HTML("""
<div id="orai-info">
    <div class="logos">
        <div class="sermas-logo">
            <img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/>
            <span>SERMAS</span>
        </div>
        <img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/>
    </div>
    <div id="orai-info-text">
        <p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p>
        <p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p>
        <p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p>
    </div>
</div>
<p>""")
demo.queue(max_size=1)
demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.")