File size: 1,550 Bytes
e78e1d7 f626de0 e78e1d7 f626de0 428f20d f626de0 e78e1d7 f626de0 850343e e78e1d7 850343e e78e1d7 850343e e78e1d7 428f20d e78e1d7 f626de0 850343e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
from ultralytics import YOLO
import speech_recognition as sr
model = YOLO('best.pt')
def proses_image(image):
results = model(image)
return results[0].plot()
def process_audio(audio):
recognizer = sr.Recognizer()
with sr.AudioFile(audio) as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
return "Audio tidak dapat dikenali."
except sr.RequestError as e:
return f"Error dengan layanan pengenalan suara: {e}"
def update_visibility(input):
if input == "Gambar":
return gr.Row(visible=True), gr.Row(visible=False)
elif input == "Audio":
return gr.Row(visible=False), gr.Row(visible=True)
with gr.Blocks() as demo:
with gr.Row():
gr.Markdown("# Multimodal America Sign Language")
with gr.Row():
input = gr.Radio(["Gambar", "Audio"], value="Gambar", label="Pilih mode:")
with gr.Row(visible=True) as gambar:
gr.Interface(
fn=proses_image,
inputs=gr.Image(),
outputs=gr.Image(),
live=True
)
with gr.Row(visible=False) as audio:
gr.Interface(
fn=process_audio,
inputs=gr.Audio(sources="microphone", type="filepath", streaming=True),
outputs=gr.Textbox(),
live=True
)
input.change(update_visibility, inputs=[input], outputs=[gambar, audio])
demo.launch()
|