File size: 1,645 Bytes
2bd9468
 
8770d52
158913f
2bd9468
9902a40
8770d52
2bd9468
158913f
ef8b4e2
 
8770d52
 
ef8b4e2
8770d52
 
ef8b4e2
8770d52
 
 
158913f
8770d52
87c119f
8770d52
 
 
 
87c119f
8770d52
 
 
c034f68
8770d52
2bd9468
87c119f
23708c8
8770d52
23708c8
8770d52
 
 
23708c8
2bd9468
3382a71
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gradio as gr
from transformers import pipeline
import requests
import os  # os ๋ชจ๋“ˆ์„ ์ž„ํฌํŠธํ•ฉ๋‹ˆ๋‹ค.

# ์ด๋ฏธ์ง€ ์ธ์‹ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
image_model = pipeline("image-classification", model="google/vit-base-patch16-224")

# ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ Hugging Face API ํ† ํฐ ๋กœ๋“œ
hugging_face_auth_token = os.getenv("HUGGING_FACE_AUTH_TOKEN")

def get_audiogen(prompt):
    # ์˜ค๋””์˜ค ์ƒ์„ฑ ๋ชจ๋ธ API ํ˜ธ์ถœ
    headers = {"Authorization": f"Bearer {hugging_face_auth_token}"}
    response = requests.post(
        "https://api-inference.huggingface.co/models/fffiloni/audiogen",
        headers=headers,
        json={"inputs": prompt, "parameters": {"length": 10}, "options": {"use_cache": False}}
    )
    result = response.json()
    # ์—ฌ๊ธฐ์—์„œ result ์ฒ˜๋ฆฌ ๋กœ์ง์„ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
    return result

def classify_and_generate_audio(uploaded_image):
    # ์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜
    predictions = image_model(uploaded_image)
    top_prediction = predictions[0]['label']  # ๊ฐ€์žฅ ํ™•๋ฅ ์ด ๋†’์€ ๋ถ„๋ฅ˜ ๊ฒฐ๊ณผ
    
    # ์˜ค๋””์˜ค ์ƒ์„ฑ
    audio_result = get_audiogen(top_prediction)
    
    # audio_result๋ฅผ ์ฒ˜๋ฆฌํ•˜์—ฌ Gradio๊ฐ€ ์žฌ์ƒํ•  ์ˆ˜ ์žˆ๋Š” ํ˜•์‹์œผ๋กœ ๋ฐ˜ํ™˜
    return top_prediction, audio_result

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
iface = gr.Interface(
    fn=classify_and_generate_audio,
    inputs=gr.Image(type="pil"),
    outputs=[gr.Label(), gr.Audio()],
    title="์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๋ฐ ์˜ค๋””์˜ค ์ƒ์„ฑ",
    description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด, ์ด๋ฏธ์ง€๋ฅผ ๋ถ„์„ํ•˜์—ฌ ๋ฌด์—‡์ธ์ง€ ์„ค๋ช…ํ•˜๊ณ , ํ•ด๋‹นํ•˜๋Š” ์˜ค๋””์˜ค๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค."
)

# ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
iface.launch()