msVision_3 / app.py
seawolf2357's picture
Update app.py
87c119f verified
raw
history blame
1.38 kB
import gradio as gr
from transformers import pipeline
from PIL import Image
from pydub import AudioSegment
from pydub.playback import play
import io
# ์ด๋ฏธ์ง€ ์ธ์‹ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
model = pipeline("image-classification", model="google/vit-base-patch16-224")
# ์นดํ…Œ๊ณ ๋ฆฌ์— ๋”ฐ๋ฅธ ์‚ฌ์šด๋“œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ •์˜
sound_files = {
"dog": "dog_bark.mp3",
"cat": "cat_meow.mp3",
# ... ๊ฐ ์นดํ…Œ๊ณ ๋ฆฌ์— ๋Œ€ํ•œ ์‚ฌ์šด๋“œ ํŒŒ์ผ ๊ฒฝ๋กœ ์ถ”๊ฐ€
}
def classify_image(uploaded_image):
predictions = model(uploaded_image)
# ๊ฐ€์žฅ ํ™•๋ฅ ์ด ๋†’์€ ์˜ˆ์ธก ๊ฒฐ๊ณผ๋ฅผ ๊ฐ€์ ธ์˜ด
top_prediction = predictions[0]['label']
# ์˜ˆ์ธก ๊ฒฐ๊ณผ์— ํ•ด๋‹นํ•˜๋Š” ์‚ฌ์šด๋“œ ํŒŒ์ผ์„ ์žฌ์ƒ
if top_prediction in sound_files:
sound_path = sound_files[top_prediction]
sound = AudioSegment.from_file(sound_path)
play(sound)
return {prediction['label']: prediction['score'] for prediction in predictions}
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
iface = gr.Interface(fn=classify_image,
inputs=gr.Image(type="pil"),
outputs=gr.Label(num_top_classes=3),
title="์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๋ฐ ์‚ฌ์šด๋“œ ์žฌ์ƒ",
description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด, ์‚ฌ๋ฌผ์„ ์ธ์‹ํ•˜๊ณ  ํ•ด๋‹นํ•˜๋Š” ์Œํ–ฅ์„ ์žฌ์ƒํ•ฉ๋‹ˆ๋‹ค.")
# ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
iface.launch()