mrisdi's picture
feat: ASL
428f20d
import gradio as gr
from ultralytics import YOLO
import speech_recognition as sr
model = YOLO('best.pt')
def proses_image(image):
results = model(image)
return results[0].plot()
def process_audio(audio):
recognizer = sr.Recognizer()
with sr.AudioFile(audio) as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
return "Audio tidak dapat dikenali."
except sr.RequestError as e:
return f"Error dengan layanan pengenalan suara: {e}"
def update_visibility(input):
if input == "Gambar":
return gr.Row(visible=True), gr.Row(visible=False)
elif input == "Audio":
return gr.Row(visible=False), gr.Row(visible=True)
with gr.Blocks() as demo:
with gr.Row():
gr.Markdown("# Multimodal America Sign Language")
with gr.Row():
input = gr.Radio(["Gambar", "Audio"], value="Gambar", label="Pilih mode:")
with gr.Row(visible=True) as gambar:
gr.Interface(
fn=proses_image,
inputs=gr.Image(),
outputs=gr.Image(),
live=True
)
with gr.Row(visible=False) as audio:
gr.Interface(
fn=process_audio,
inputs=gr.Audio(sources="microphone", type="filepath", streaming=True),
outputs=gr.Textbox(),
live=True
)
input.change(update_visibility, inputs=[input], outputs=[gambar, audio])
demo.launch()