File size: 1,184 Bytes
2bd9468
 
e57e37e
2bd9468
9902a40
8770d52
2bd9468
3377e03
e57e37e
 
87c119f
3377e03
8770d52
 
e57e37e
3377e03
 
 
2bd9468
e57e37e
23708c8
3377e03
23708c8
8770d52
e57e37e
3377e03
 
23708c8
2bd9468
3382a71
1defd08
e57e37e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gradio as gr
from transformers import pipeline
# gradio_client ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์‚ฌ์šฉ ๊ฐ€์ •

# ์ด๋ฏธ์ง€ ์ธ์‹ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
image_model = pipeline("image-classification", model="google/vit-base-patch16-224")

def generate_voice(prompt):
    # Tango API๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์Œ์„ฑ ์ƒ์„ฑ (๊ฐ€์ •)
    return "https://example.com/generated_voice.mp3"  # ์˜ˆ์‹œ ์Œ์„ฑ ํŒŒ์ผ URL ๋ฐ˜ํ™˜

def classify_and_generate_voice(uploaded_image):
    # ์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜
    predictions = image_model(uploaded_image)
    top_prediction = predictions[0]['label']
    # ์Œ์„ฑ ์ƒ์„ฑ
    voice_result = generate_voice(top_prediction)
    return top_prediction, voice_result

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ ๋ฐ ์˜ˆ์‹œ ์ด๋ฏธ์ง€ ์„ค์ •
iface = gr.Interface(
    fn=classify_and_generate_voice,
    inputs=gr.Image(type="pil"),
    outputs=[gr.Label(), gr.Audio()],
    examples=[["dog.jpg"]],  # ์˜ˆ์‹œ ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ๋ฅผ ๋ฆฌ์ŠคํŠธ๋กœ ์ถ”๊ฐ€
    title="์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๋ฐ ์Œ์„ฑ ์ƒ์„ฑ",
    description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด, ์‚ฌ๋ฌผ์„ ์ธ์‹ํ•˜๊ณ  ํ•ด๋‹นํ•˜๋Š” ์Œ์„ฑ์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค."
)

# ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
if __name__ == "__main__":
    iface.launch(share=True)