Spaces:

orai-nlp
/

Sermas

Running

App Files Files

Sermas / app.py

imorcillo

Recovered original language tags

0410b62 verified 15 days ago

raw

history blame

5.88 kB

	import gradio as gr
	import re
	import os
	import requests
	import time
	import soundfile as sf
	import io


	def audio_to_bytes(audio):
	data, sr = sf.read(audio)
	audio_bytes = io.BytesIO()
	sf.write(audio_bytes, data, sr, format='WAV')
	audio_bytes.seek(0)
	return audio_bytes

	def langswitch_API_call(audio, language):
	audio_bytes = audio_to_bytes(audio)
	files = {'file': (f'audio_chunk.wav', audio_bytes, 'audio/wav')}
	api_url = os.getenv("api_url")
	response = requests.post(f"{api_url}/online/http?language={language}", files=files)
	if response.status_code != 200:
	print(response)
	raise Exception("API error")
	return response.json()

	def transcribe_base(audio, language):
	response = langswitch_API_call(audio, language)
	print(response)
	transcription = response["transcription"]
	is_new_speaker = response["is_new_speaker"]
	speaker = response["classified_speaker"]
	if is_new_speaker:
	speaker_class_string = f'New speaker detected. Assigned new ID {speaker}'
	else:
	speaker_class_string = f'Speaker found in database, ID {speaker}'
	return transcription, speaker_class_string

	def transcribe_mic(audio_microphone, language):
	print("Transcription microphone")
	return transcribe_base(audio_microphone, language)

	def transcribe_file(audio_upload, language):
	print("Transcription local file")
	return transcribe_base(audio_upload, language)


	css_content = """
	/*
	.gradio-container{
	padding: 0 !important;
	}
	.html-container{
	padding: 0 !important;
	}
	*/
	#orai-info{
	padding: 50px;
	text-align: center;
	font-size: 1rem;
	background: url('https://elia.eus/static/elhuyar/img/landing_page/ig.webp') rgba(0,0,0,0.8);
	background-repeat: no-repeat;
	background-position: center center;
	background-size: cover;
	background-blend-mode: multiply;
	}
	#orai-info-text p{
	color: white !important;
	}
	/*
	#orai-info img{
	margin: auto;
	display: block;
	margin-bottom: 1rem;
	}*/
	.bold{
	font-weight: bold;
	color: inherit !important;
	}
	footer{
	display:none !important
	}

	.logos{
	display: flex;
	justify-content: center;
	}
	.sermas-logo{
	display: flex;
	align-items: center;
	margin-right: 3rem;
	}
	.sermas-logo span{
	color: white !important;
	font-size: 2.5rem;
	font-family: Verdana, Geneva, sans-serif !important;
	font-weight: bold;
	}

	.text-elhuyar{
	color: #0045e7;
	}

	#header{
	padding: 50px;
	padding-top: 30px;
	background-color: #5b65a7;
	}
	#header h1,h3{
	color: white;
	}

	button.primary{
	background-color: #5b65a7;
	}
	button.primary:hover{
	background-color: #3c4687;
	}

	button.selected{
	color: #5b65a7 !important;
	}
	button.selected::after{
	background-color: #5b65a7;
	}

	.record-button::before{
	background: #5b65a7;
	}

	"""


	demo = gr.Blocks(css=css_content) #, fill_width=True)
	with demo:
	gr.HTML("""
	<div id="header">
	<h1>LANGSWITCH</h1>
	<h3>Multilingual Automatic Speech Recognition in noisy environments</h3>
	</div>
	""")

	with gr.Tab("Transcribe microphone"):
	iface = gr.Interface(
	fn=transcribe_mic,
	inputs=[
	gr.Audio(sources="microphone", type="filepath"),
	gr.Dropdown(choices=[("English", "en"),
	("Spanish", "es"),
	("French", "fr"),
	("Italian", "it"),
	("Basque", "eu")],
	value="en")
	],
	outputs=[
	gr.Textbox(label="Transcription", autoscroll=False),
	#gr.Textbox(label="Speaker Identification", autoscroll=False)
	],
	allow_flagging="never",
	)

	with gr.Tab("Transcribe local file"):
	iface = gr.Interface(
	fn=transcribe_file,
	inputs=[
	gr.Audio(sources="upload", type="filepath"),
	gr.Dropdown(choices=[("English", "en"),
	("Spanish", "es"),
	("French", "fr"),
	("Italian", "it"),
	("Basque", "eu")],
	value="en")
	],
	outputs=[
	gr.Textbox(label="Transcription", autoscroll=False),
	#gr.Textbox(label="Speaker Identification", autoscroll=False)
	],
	allow_flagging="never",
	)

	gr.HTML("""
	<div id="orai-info">
	<div class="logos">
	<div class="sermas-logo">
	<img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/>
	<span>SERMAS</span>
	</div>
	<img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/>
	</div>
	<div id="orai-info-text">
	<p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p>
	<p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p>
	<p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p>
	</div>
	</div>
	<p>""")
	demo.queue(max_size=1)
	#demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.")
	demo.launch(share=False, max_threads=3)