Spaces:
Runtime error
Runtime error
File size: 2,277 Bytes
2bd9468 a6d7b81 2bd9468 9902a40 8770d52 2bd9468 ebcd803 3377e03 a6d7b81 87c119f 3377e03 8770d52 a6d7b81 3377e03 ebcd803 a6d7b81 ebcd803 a6d7b81 ebcd803 ad7babb a6d7b81 23708c8 3377e03 23708c8 8770d52 872e164 11179ad bc25881 23708c8 2bd9468 ad7babb 3382a71 a6d7b81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import gradio as gr
from transformers import pipeline
from gradio_client import Client # ๊ฐ์ : gradio_client ๋ผ์ด๋ธ๋ฌ๋ฆฌ๊ฐ ์ฌ์ฉ ๊ฐ๋ฅํ๋ค.
# ์ด๋ฏธ์ง ์ธ์ ํ์ดํ๋ผ์ธ ๋ก๋
image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
def generate_music(prompt):
# ์์
์์ฑ API ํธ์ถ
client = Client("https://haoheliu-audioldm2-text2audio-text2music.hf.space/")
result = client.predict(
prompt,
prompt, # ์์ฑ ์์ฑ์ ์ฌ์ฉ๋ ํ๋กฌํํธ
5, # ์์
์ ๊ธธ์ด (์ด)
0, # ๊ฐ์ด๋์ค ์ค์ผ์ผ
5, # ์๋ ๊ฐ
1, # ์์ฑํ waveform์ ์
fn_index=1 # ํจ์ ์ธ๋ฑ์ค
)
# API ํธ์ถ ๊ฒฐ๊ณผ ์ฒ๋ฆฌ
return result
def generate_voice(prompt):
# Tango API๋ฅผ ์ฌ์ฉํ์ฌ ์์ฑ ์์ฑ
client = Client("https://declare-lab-tango.hf.space/")
result = client.predict(
prompt, # ์ด๋ฏธ์ง ๋ถ๋ฅ ๊ฒฐ๊ณผ๋ฅผ ํ๋กฌํํธ๋ก ์ฌ์ฉ
100, # Steps
1, # Guidance Scale
api_name="/predict" # API ์๋ํฌ์ธํธ ๊ฒฝ๋ก
)
# Tango API ํธ์ถ ๊ฒฐ๊ณผ ์ฒ๋ฆฌ
# ์: result์์ ์์ฑ ํ์ผ URL ๋๋ ๋ฐ์ดํฐ ์ถ์ถ
return result
def classify_and_generate_voice(uploaded_image):
# ์ด๋ฏธ์ง ๋ถ๋ฅ
predictions = image_model(uploaded_image)
top_prediction = predictions[0]['label'] # ๊ฐ์ฅ ํ๋ฅ ์ด ๋์ ๋ถ๋ฅ ๊ฒฐ๊ณผ
# ์์ฑ ์์ฑ
voice_result = generate_voice(top_prediction)
# ์์
์์ฑ
music_result = generate_music("Generate music for: " + top_prediction)
# ๋ฐํ๋ ์์ฑ ๋ฐ ์์
๊ฒฐ๊ณผ๋ฅผ Gradio ์ธํฐํ์ด์ค๋ก ์ ๋ฌ
# ์: voice_result['url'] ๋๋ voice_result['audio_data'] ๋ฑ
return top_prediction, voice_result, music_result
# Gradio ์ธํฐํ์ด์ค ์์ฑ
iface = gr.Interface(
fn=classify_and_generate_voice,
inputs=gr.Image(type="pil"),
outputs=[gr.Label(), gr.Audio()],
title="msVision_3",
description="์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํ๋ฉด, ์ฌ๋ฌผ์ ์ธ์ํ๊ณ ํด๋นํ๋ ์์ฑ์ ์์ฑํฉ๋๋ค.(recognizes the object and generate voice)",
examples=["dog.jpg", "cat.png", "cafe.jpg"] # ์์ ๋ ๋ถ๋ถ: ์ฝค๋ง ์ถ๊ฐ
)
# ์ธํฐํ์ด์ค ์คํ
iface.launch()
|