api-proxy-demo / app.py
Sunghyun Jun
Update generate_speech demo
431ceed
import requests
import gradio as gr
import os
import time
# ν™˜κ²½ λ³€μˆ˜ μ΄ˆκΈ°ν™” 및 검증
def initialize_environment():
private_space_url = os.getenv("PRIVATE_SPACE_URL")
hf_token = os.getenv("HF_TOKEN")
if not private_space_url:
raise EnvironmentError("PRIVATE_SPACE_URL ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
if not hf_token:
raise EnvironmentError("HF_TOKEN ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
return private_space_url, hf_token
PRIVATE_SPACE_URL, HF_TOKEN = initialize_environment()
# Base URL둜 GET μš”μ²­ λ³΄λ‚΄λŠ” ν•¨μˆ˜
def test_base_url():
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
print(f"[DEBUG] Sending GET request to base URL: {PRIVATE_SPACE_URL}")
print(f"[DEBUG] HF Token: {HF_TOKEN}")
try:
response = requests.get(PRIVATE_SPACE_URL, headers=headers)
print(f"[DEBUG] Base URL Response Status Code: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"[ERROR] Base URL μš”μ²­ 쀑 였λ₯˜ λ°œμƒ: {e}")
# Actor ID 쑰회 ν•¨μˆ˜
def fetch_actor_ids():
url = f"{PRIVATE_SPACE_URL}/api/actor"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
try:
response = requests.get(url, headers=headers)
response.raise_for_status()
data = response.json()
# Actor λ¦¬μŠ€νŠΈμ—μ„œ 이름과 ID μΆ”μΆœ
return {
actor["name"]["en"]: actor["actor_id"]
for actor in data.get("result", [])
}
except requests.exceptions.RequestException as e:
print(f"[ERROR] Actor ID 쑰회 쀑 였λ₯˜ λ°œμƒ: {e}")
return {}
# URL μΉ˜ν™˜ ν•¨μˆ˜
def replace_speak_url(url):
if not url:
return None
return url.replace("https://create-test.icepeak.ai", PRIVATE_SPACE_URL)
# μŒμ„± 생성 ν•¨μˆ˜
def generate_speech(text, actor_name, lang="en", speed_x=1.0, volume=100):
actor_ids = fetch_actor_ids()
if not actor_ids:
return "Actor λͺ©λ‘μ„ κ°€μ Έμ˜€λŠ” 데 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€.", None
actor_id = actor_ids.get(actor_name)
if not actor_id:
return "μ„ νƒν•œ Actorλ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.", None
url = f"{PRIVATE_SPACE_URL}/api/speak"
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
payload = {
"actor_id": actor_id,
"lang": lang,
"text": text,
"speed_x": speed_x,
"volume": volume,
"tts_mode": "actor"
}
print(f"[DEBUG] Sending speech generation request to: {url}")
print(f"[DEBUG] Payload: {payload}")
try:
response = requests.post(url, json=payload, headers=headers)
print(f"[DEBUG] Response Status Code: {response.status_code}")
print(f"[DEBUG] Response Content: {response.text}")
response.raise_for_status()
data = response.json()
print(f"[DEBUG] Response Data: {data}")
speak_url = replace_speak_url(data.get("result", {}).get("speak_url"))
if not speak_url:
return "였λ₯˜: μœ νš¨ν•œ speak_url을 λ°˜ν™˜ν•˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.", None
# Polling for audio generation completion
audio_url = poll_audio_url(speak_url)
if not audio_url:
return "였λ₯˜: μ˜€λ””μ˜€ λ‹€μš΄λ‘œλ“œ URL을 κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€.", None
# Download the audio file
audio_content = download_audio(audio_url)
if not audio_content:
return "였λ₯˜: μŒμ„± νŒŒμΌμ„ λ‹€μš΄λ‘œλ“œν•  수 μ—†μŠ΅λ‹ˆλ‹€.", None
# Save the audio content to a temporary file
audio_file_path = "temp_audio.wav"
with open(audio_file_path, "wb") as audio_file:
audio_file.write(audio_content)
# Print debug information
file_size = os.path.getsize(audio_file_path)
print(f"[DEBUG] Saved audio file: {audio_file_path}, Size: {file_size} bytes")
return "μŒμ„±μ΄ μ„±κ³΅μ μœΌλ‘œ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€.", audio_file_path
except requests.exceptions.RequestException as e:
print(f"[ERROR] μŒμ„± 생성 쀑 였λ₯˜ λ°œμƒ: {e}")
return f"였λ₯˜: {str(e)}", None
# Polling ν•¨μˆ˜
def poll_audio_url(speak_url, timeout=30, interval=2):
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
start_time = time.time()
while time.time() - start_time < timeout:
try:
print(f"[DEBUG] Polling speak URL: {speak_url}")
response = requests.get(speak_url, headers=headers)
response.raise_for_status()
data = response.json()
status = data.get("result", {}).get("status")
if status == "done":
audio_info = data.get("result", {}).get("audio", {})
audio_url = replace_speak_url(audio_info.get("url"))
print(f"[DEBUG] Audio URL: {audio_url}")
return audio_url
except requests.exceptions.RequestException as e:
print(f"[DEBUG] Polling attempt failed: {e}")
time.sleep(interval)
print("[ERROR] Polling timed out.")
return None
# Audio λ‹€μš΄λ‘œλ“œ ν•¨μˆ˜
def download_audio(audio_url):
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
try:
print(f"[DEBUG] Downloading audio from: {audio_url}")
response = requests.get(audio_url, headers=headers, stream=True)
response.raise_for_status()
return response.content
except requests.exceptions.RequestException as e:
print(f"[ERROR] Audio λ‹€μš΄λ‘œλ“œ 쀑 였λ₯˜ λ°œμƒ: {e}")
return None
# Gradio μΈν„°νŽ˜μ΄μŠ€ ν•¨μˆ˜
def interface_function(text, actor_name, lang, speed_x, volume):
result_message, audio_file_path = generate_speech(text, actor_name, lang, speed_x, volume)
if audio_file_path:
return result_message, audio_file_path
return result_message, None
# Fetch actors to populate dropdown
actors = fetch_actor_ids()
actor_names = list(actors.keys())
if not actor_names:
print("[WARNING] Actor λͺ©λ‘μ„ κ°€μ Έμ˜¬ 수 μ—†μ–΄ κΈ°λ³Έ μ˜΅μ…˜μ΄ μ„€μ •λ©λ‹ˆλ‹€.")
# Base URL ν…ŒμŠ€νŠΈ
test_base_url()
# Gradio μΈν„°νŽ˜μ΄μŠ€ 생성
interface = gr.Interface(
fn=interface_function,
inputs=[
gr.Textbox(label="ν…μŠ€νŠΈ μž…λ ₯", placeholder="여기에 μŒμ„±μ„ ν•©μ„±ν•  ν…μŠ€νŠΈλ₯Ό μž…λ ₯ν•˜μ„Έμš”."),
gr.Dropdown(choices=actor_names, label="Actor 선택", interactive=True),
gr.Dropdown(choices=["en", "ko"], value="en", label="μ–Έμ–΄"),
gr.Slider(minimum=0.5, maximum=2.0, step=0.1, value=1.0, label="속도"),
gr.Slider(minimum=50, maximum=200, step=10, value=100, label="λ³Όλ₯¨"),
],
outputs=[
gr.Textbox(label="κ²°κ³Ό λ©”μ‹œμ§€"),
gr.Audio(label="μƒμ„±λœ μŒμ„±"),
],
title="μŒμ„± 생성 데λͺ¨",
description="쿼리 νŒŒλΌλ―Έν„°κ°€ 적용된 Actor 쑰회 ν›„ μŒμ„±μ„ μƒμ„±ν•©λ‹ˆλ‹€.",
)
if __name__ == "__main__":
interface.launch()