File size: 1,387 Bytes
2bd9468
 
791d393
 
2bd9468
9902a40
891d8e1
2bd9468
791d393
 
 
 
 
87c119f
 
9902a40
 
87c119f
 
 
791d393
 
 
 
 
 
 
 
 
2bd9468
87c119f
23708c8
 
 
791d393
23708c8
791d393
23708c8
2bd9468
3382a71
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import gradio as gr
from transformers import pipeline
import soundfile as sf
import io

# ์ด๋ฏธ์ง€ ์ธ์‹ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
model = pipeline("image-classification", model="google/vit-base-patch16-224")

# ์นดํ…Œ๊ณ ๋ฆฌ์— ๋”ฐ๋ฅธ ์‚ฌ์šด๋“œ ํŒŒ์ผ์˜ ๊ฒฝ๋กœ๋ฅผ ์ •์˜
sound_files = {
    "dog": "path/to/dog_bark.wav",
    "cat": "path/to/cat_meow.wav",
    # ... ๊ฐ ์นดํ…Œ๊ณ ๋ฆฌ์— ๋Œ€ํ•œ ์‚ฌ์šด๋“œ ํŒŒ์ผ ๊ฒฝ๋กœ ์ถ”๊ฐ€
}

def classify_image(uploaded_image):
    predictions = model(uploaded_image)
    # ๊ฐ€์žฅ ํ™•๋ฅ ์ด ๋†’์€ ์˜ˆ์ธก ๊ฒฐ๊ณผ๋ฅผ ๊ฐ€์ ธ์˜ด
    top_prediction = predictions[0]['label']
    
    # ์˜ˆ์ธก ๊ฒฐ๊ณผ์— ํ•ด๋‹นํ•˜๋Š” ์‚ฌ์šด๋“œ ํŒŒ์ผ์„ ๋กœ๋“œ
    sound_path = sound_files.get(top_prediction, None)
    if sound_path is not None:
        with open(sound_path, 'rb') as file:
            audio_data = file.read()
        return top_prediction, audio_data
    else:
        # ํ•ด๋‹นํ•˜๋Š” ์‚ฌ์šด๋“œ ํŒŒ์ผ์ด ์—†๋Š” ๊ฒฝ์šฐ ๋นˆ ์˜ค๋””์˜ค ๋ฐ์ดํ„ฐ ๋ฐ˜ํ™˜
        return top_prediction, None

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
iface = gr.Interface(
    fn=classify_image,
    inputs=gr.Image(type="pil"),
    outputs=[gr.Label(), gr.Audio(format="wav")],
    title="์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๋ฐ ์‚ฌ์šด๋“œ ์žฌ์ƒ",
    description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด, ์‚ฌ๋ฌผ์„ ์ธ์‹ํ•˜๊ณ  ํ•ด๋‹นํ•˜๋Š” ์‚ฌ์šด๋“œ๋ฅผ ์žฌ์ƒํ•ฉ๋‹ˆ๋‹ค."
)

# ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
iface.launch()