|
import gradio as gr |
|
from ultralytics import YOLO |
|
import speech_recognition as sr |
|
|
|
model = YOLO('best.pt') |
|
|
|
def proses_image(image): |
|
results = model(image) |
|
return results[0].plot() |
|
|
|
def process_audio(audio): |
|
recognizer = sr.Recognizer() |
|
with sr.AudioFile(audio) as source: |
|
audio_data = recognizer.record(source) |
|
try: |
|
text = recognizer.recognize_google(audio_data) |
|
return text |
|
except sr.UnknownValueError: |
|
return "Audio tidak dapat dikenali." |
|
except sr.RequestError as e: |
|
return f"Error dengan layanan pengenalan suara: {e}" |
|
|
|
def update_visibility(input): |
|
if input == "Gambar": |
|
return gr.Row(visible=True), gr.Row(visible=False) |
|
elif input == "Audio": |
|
return gr.Row(visible=False), gr.Row(visible=True) |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
gr.Markdown("# Multimodal America Sign Language") |
|
with gr.Row(): |
|
input = gr.Radio(["Gambar", "Audio"], value="Gambar", label="Pilih mode:") |
|
with gr.Row(visible=True) as gambar: |
|
gr.Interface( |
|
fn=proses_image, |
|
inputs=gr.Image(), |
|
outputs=gr.Image(), |
|
live=True |
|
) |
|
with gr.Row(visible=False) as audio: |
|
gr.Interface( |
|
fn=process_audio, |
|
inputs=gr.Audio(sources="microphone", type="filepath", streaming=True), |
|
outputs=gr.Textbox(), |
|
live=True |
|
) |
|
|
|
input.change(update_visibility, inputs=[input], outputs=[gambar, audio]) |
|
|
|
demo.launch() |
|
|