msVision_3

Runtime error

File size: 1,387 Bytes

import gradio as gr
from transformers import pipeline
import soundfile as sf
import io

# 이미지 인식 파이프라인 로드
model = pipeline("image-classification", model="google/vit-base-patch16-224")

# 카테고리에 따른 사운드 파일의 경로를 정의
sound_files = {
    "dog": "path/to/dog_bark.wav",
    "cat": "path/to/cat_meow.wav",
    # ... 각 카테고리에 대한 사운드 파일 경로 추가
}

def classify_image(uploaded_image):
    predictions = model(uploaded_image)
    # 가장 확률이 높은 예측 결과를 가져옴
    top_prediction = predictions[0]['label']
    
    # 예측 결과에 해당하는 사운드 파일을 로드
    sound_path = sound_files.get(top_prediction, None)
    if sound_path is not None:
        with open(sound_path, 'rb') as file:
            audio_data = file.read()
        return top_prediction, audio_data
    else:
        # 해당하는 사운드 파일이 없는 경우 빈 오디오 데이터 반환
        return top_prediction, None

# Gradio 인터페이스 생성
iface = gr.Interface(
    fn=classify_image,
    inputs=gr.Image(type="pil"),
    outputs=[gr.Label(), gr.Audio(format="wav")],
    title="이미지 분류 및 사운드 재생",
    description="이미지를 업로드하면, 사물을 인식하고 해당하는 사운드를 재생합니다."
)

# 인터페이스 실행
iface.launch()