Spaces:

orai-nlp
/

Sermas

Running

App Files Files

andercorral commited on Dec 19, 2024

Commit

5e48419

1 Parent(s): 2ce6def

Updated app

Browse files

Files changed (5) hide show

app.py +157 -37
assets/code.webp +0 -0
assets/orai_bw.svg +418 -0
assets/sermas-logo.png +0 -0
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,62 +1,165 @@
 import gradio as gr
 import re
-import subprocess
-import math
-import shutil
-import soundfile as sf
-import tempfile
 import os
 import requests
 import time
-def _return_yt_html_embed(yt_url):
-    video_id = yt_url.split("?v=")[-1]
-    HTML_str = (
-        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
-        " </center>"
-    )
-    return HTML_str
 def transcribe_base(audio, language):
-    start_time = time.time()
-    d, sr = sf.read(audio)
-    data = {'audio': d.tolist(),
-            'sampling_rate': sr,
-            'language': language}
-    response = requests.post(os.getenv("api_url"), json=data).json()
-    transcription = response["text"]
-    speaker_class_string = response["speaker_class_string"]
-    end_time = time.time()
-    print("-"*50)
-    print(len(data["audio"])/float(sr))
-    print(end_time-start_time)
-    print("-"*50)
-    return transcription, speaker_class_string
-def transcribe(audio_microphone, audio_upload, language):
-    print("Transcription request")
-    print(audio_microphone, audio_upload, language)
-    audio = audio_microphone if audio_microphone is not None else audio_upload
-    return transcribe_base(audio, language)
-demo = gr.Blocks()
 with demo:
-    gr.Markdown("# Speech recognition using Whisper models")
-    gr.Markdown("Orai NLP Technologies")
-    with gr.Tab("Trancribe Audio"):
         iface = gr.Interface(
-            fn=transcribe,
             inputs=[
                 gr.Audio(sources="microphone", type="filepath"),
                 gr.Audio(sources="upload", type="filepath"),
                 gr.Dropdown(choices=[("Basque", "eu"),
                                      ("Spanish", "es"),
@@ -71,5 +174,22 @@ with demo:
             ],
             allow_flagging="never",
         )
 demo.queue(max_size=1)
-demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.")

 import gradio as gr
 import re
 import os
 import requests
 import time
+import soundfile as sf
+import io
+def audio_to_bytes(audio):
+    data, sr = sf.read(audio)
+    audio_bytes = io.BytesIO()
+    sf.write(audio_bytes, data, sr, format='WAV')
+    audio_bytes.seek(0)
+    return audio_bytes
+def langswitch_API_call(audio, language):
+    audio_bytes = audio_to_bytes(audio)
+    files = {'file': (f'audio_chunk.wav', audio_bytes, 'audio/wav')}
+    data = {'language': language}
+    response = requests.post(os.getenv("api_url"), files=files, data=data)
+    return response.json()
 def transcribe_base(audio, language):
+    response = langswitch_API_call(audio, language)
+    print(response)
+    transcription = response["transcription"]
+    is_new_speaker = response["is_new_speaker"]
+    speaker = response["classified_speaker"]
+    if is_new_speaker:
+        speaker_class_string = f'New speaker detected. Assigned new ID {speaker}'
+    else:
+        speaker_class_string = f'Speaker found in database, ID {speaker}'
+    return transcription, speaker_class_string
+def transcribe_mic(audio_microphone, language):
+    print("Transcription microphone")
+    return transcribe_base(audio_microphone, language)
+def transcribe_file(audio_upload, language):
+    print("Transcription local file")
+    return transcribe_base(audio_upload, language)
+css_content = """
+/*
+.gradio-container{
+    padding: 0 !important;
+}
+.html-container{
+    padding: 0 !important;
+}
+*/
+#orai-info{
+    padding: 50px;
+    text-align: center;
+    font-size: 1rem;
+    background: url('gradio_api/file=assets/code.webp') rgba(0,0,0,0.8);
+    background-repeat: no-repeat;
+    background-position: center center;
+    background-size: cover;
+    background-blend-mode: multiply;
+}
+#orai-info-text p{
+    color: white !important;
+}
+/*
+#orai-info img{
+    margin: auto;
+    display: block;
+    margin-bottom: 1rem;
+}*/
+.bold{
+    font-weight: bold;
+    color: inherit !important;
+}
+footer{
+    display:none !important
+}
+.logos{
+    display: flex;
+    justify-content: center;
+}
+.sermas-logo{
+    display: flex;
+    align-items: center;
+    margin-right: 3rem;
+}
+.sermas-logo span{
+    color: white !important;
+    font-size: 2.5rem;
+    font-family: Verdana, Geneva, sans-serif !important;
+    font-weight: bold;
+}
+.text-elhuyar{
+    color: #0045e7;
+}
+#header{
+    padding: 50px;
+    padding-top: 30px;
+    background-color: #5b65a7;
+}
+#header h1,h3{
+    color: white;
+}
+button.primary{
+    background-color: #5b65a7;
+}
+button.primary:hover{
+    background-color: #3c4687;
+}
+button.selected{
+    color: #5b65a7 !important;
+}
+button.selected::after{
+    background-color: #5b65a7;
+}
+.record-button::before{
+    background: #5b65a7;
+}
+"""
+demo = gr.Blocks(css=css_content) #, fill_width=True)
 with demo:
+    gr.HTML("""
+<div id="header">
+    <h1>LANGSWITCH</h1>
+    <h3>Multilingual Automatic Speech Recognition in noisy environments</h3>
+</div>
+""")
+    with gr.Tab("Transcribe microphone"):
         iface = gr.Interface(
+            fn=transcribe_mic,
             inputs=[
                 gr.Audio(sources="microphone", type="filepath"),
+                gr.Dropdown(choices=[("Basque", "eu"),
+                                     ("Spanish", "es"),
+                                     ("English", "en")],
+                                     #("French", "fr"),
+                                     #("Italian", "it"),
+                            value="en")
+            ],
+            outputs=[
+                gr.Textbox(label="Transcription", autoscroll=False),
+                gr.Textbox(label="Speaker Identification", autoscroll=False)
+            ],
+            allow_flagging="never",
+        )
+    with gr.Tab("Transcribe local file"):
+        iface = gr.Interface(
+            fn=transcribe_file,
+            inputs=[
                 gr.Audio(sources="upload", type="filepath"),
                 gr.Dropdown(choices=[("Basque", "eu"),
                                      ("Spanish", "es"),
             ],
             allow_flagging="never",
         )
+    gr.HTML("""
+<div id="orai-info">
+    <div class="logos">
+        <div class="sermas-logo">
+            <img src="gradio_api/file=assets/sermas-logo.png" width=100/>
+            <span>SERMAS</span>
+        </div>
+        <img src="gradio_api/file=assets/orai_bw.svg" width=175/>
+    </div>
+    <div id="orai-info-text">
+        <p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p>
+        <p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p>
+        <p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p>
+    </div>
+</div>
+<p>""")
 demo.queue(max_size=1)
+demo.launch(share=False, max_threads=3, allowed_paths=[f"{os.getcwd()}/assets/"], auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.")

assets/code.webp ADDED Viewed

assets/orai_bw.svg ADDED Viewed

assets/sermas-logo.png ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
 soundfile==0.12.1
 requests==2.31.0
-PyYAML==6.0.1

 soundfile==0.12.1
 requests==2.31.0
+gradio==5.9.1