msVision_3 / app.py
seawolf2357's picture
Update app.py
e57e37e verified
raw
history blame
1.18 kB
import gradio as gr
from transformers import pipeline
# gradio_client ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์‚ฌ์šฉ ๊ฐ€์ •
# ์ด๋ฏธ์ง€ ์ธ์‹ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
def generate_voice(prompt):
# Tango API๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์Œ์„ฑ ์ƒ์„ฑ (๊ฐ€์ •)
return "https://example.com/generated_voice.mp3" # ์˜ˆ์‹œ ์Œ์„ฑ ํŒŒ์ผ URL ๋ฐ˜ํ™˜
def classify_and_generate_voice(uploaded_image):
# ์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜
predictions = image_model(uploaded_image)
top_prediction = predictions[0]['label']
# ์Œ์„ฑ ์ƒ์„ฑ
voice_result = generate_voice(top_prediction)
return top_prediction, voice_result
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ ๋ฐ ์˜ˆ์‹œ ์ด๋ฏธ์ง€ ์„ค์ •
iface = gr.Interface(
fn=classify_and_generate_voice,
inputs=gr.Image(type="pil"),
outputs=[gr.Label(), gr.Audio()],
examples=[["dog.jpg"]], # ์˜ˆ์‹œ ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ๋ฅผ ๋ฆฌ์ŠคํŠธ๋กœ ์ถ”๊ฐ€
title="์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๋ฐ ์Œ์„ฑ ์ƒ์„ฑ",
description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด, ์‚ฌ๋ฌผ์„ ์ธ์‹ํ•˜๊ณ  ํ•ด๋‹นํ•˜๋Š” ์Œ์„ฑ์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค."
)
# ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
if __name__ == "__main__":
iface.launch(share=True)