msVision_3 / app.py
seawolf2357's picture
Update app.py
8770d52 verified
raw
history blame
1.64 kB
import gradio as gr
from transformers import pipeline
import requests
# ์ด๋ฏธ์ง€ ์ธ์‹ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
def get_audiogen(prompt):
# ์˜ค๋””์˜ค ์ƒ์„ฑ ๋ชจ๋ธ API ํ˜ธ์ถœ
response = requests.post(
"https://api-inference.huggingface.co/models/fffiloni/audiogen",
headers={"Authorization": "/infer"},
json={"inputs": prompt, "parameters": {"length": 10}, "options": {"use_cache": False}}
)
result = response.json()
# ์—ฌ๊ธฐ์—์„œ result ์ฒ˜๋ฆฌ ๋กœ์ง์„ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
# ์˜ˆ: ์ƒ์„ฑ๋œ ์˜ค๋””์˜ค ํŒŒ์ผ์˜ URL์„ ๋ฐ˜ํ™˜ํ•˜๊ฑฐ๋‚˜, ์˜ค๋””์˜ค ๋ฐ์ดํ„ฐ ์ž์ฒด๋ฅผ ๋ฐ˜ํ™˜ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
return result
def classify_and_generate_audio(uploaded_image):
# ์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜
predictions = image_model(uploaded_image)
top_prediction = predictions[0]['label'] # ๊ฐ€์žฅ ํ™•๋ฅ ์ด ๋†’์€ ๋ถ„๋ฅ˜ ๊ฒฐ๊ณผ
# ์˜ค๋””์˜ค ์ƒ์„ฑ
audio_result = get_audiogen(top_prediction)
# audio_result๋ฅผ ์ฒ˜๋ฆฌํ•˜์—ฌ Gradio๊ฐ€ ์žฌ์ƒํ•  ์ˆ˜ ์žˆ๋Š” ํ˜•์‹์œผ๋กœ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
# ์˜ˆ: audio_result['url'] ๋˜๋Š” audio_result['audio_data'] ๋“ฑ
return top_prediction, audio_result
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
iface = gr.Interface(
fn=classify_and_generate_audio,
inputs=gr.Image(type="pil"),
outputs=[gr.Label(), gr.Audio()],
title="์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๋ฐ ์˜ค๋””์˜ค ์ƒ์„ฑ",
description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด, ์ด๋ฏธ์ง€๋ฅผ ๋ถ„์„ํ•˜์—ฌ ๋ฌด์—‡์ธ์ง€ ์„ค๋ช…ํ•˜๊ณ , ํ•ด๋‹นํ•˜๋Š” ์˜ค๋””์˜ค๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค."
)
# ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
iface.launch()