import gradio as gr from ultralytics import YOLO import speech_recognition as sr model = YOLO('best.pt') def proses_image(image): results = model(image) return results[0].plot() def process_audio(audio): recognizer = sr.Recognizer() with sr.AudioFile(audio) as source: audio_data = recognizer.record(source) try: text = recognizer.recognize_google(audio_data) return text except sr.UnknownValueError: return "Audio tidak dapat dikenali." except sr.RequestError as e: return f"Error dengan layanan pengenalan suara: {e}" def update_visibility(input): if input == "Gambar": return gr.Row(visible=True), gr.Row(visible=False) elif input == "Audio": return gr.Row(visible=False), gr.Row(visible=True) with gr.Blocks() as demo: with gr.Row(): gr.Markdown("# Multimodal America Sign Language") with gr.Row(): input = gr.Radio(["Gambar", "Audio"], value="Gambar", label="Pilih mode:") with gr.Row(visible=True) as gambar: gr.Interface( fn=proses_image, inputs=gr.Image(), outputs=gr.Image(), live=True ) with gr.Row(visible=False) as audio: gr.Interface( fn=process_audio, inputs=gr.Audio(sources="microphone", type="filepath", streaming=True), outputs=gr.Textbox(), live=True ) input.change(update_visibility, inputs=[input], outputs=[gambar, audio]) demo.launch()