json / app.py
sheikhed's picture
Update app.py
355b39c verified
raw
history blame
10.8 kB
import os
import requests
import json
import time
import subprocess
import gradio as gr
import uuid
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# API Keys
A_KEY = os.getenv("A_KEY")
B_KEY = os.getenv("B_KEY")
# URLs
API_URL = os.getenv("API_URL")
UPLOAD_URL = os.getenv("UPLOAD_URL")
# Create temp directory if it doesn't exist
TEMP_DIR = "temp"
if not os.path.exists(TEMP_DIR):
os.makedirs(TEMP_DIR)
def get_voices():
url = "https://api.elevenlabs.io/v1/voices"
headers = {
"Accept": "application/json",
"xi-api-key": A_KEY
}
response = requests.get(url, headers=headers)
if response.status_code != 200:
return []
return [(voice['name'], voice['voice_id']) for voice in response.json().get('voices', [])]
def get_video_models():
return [f for f in os.listdir("models") if f.endswith((".mp4", ".avi", ".mov"))]
def text_to_speech(voice_id, text, session_id):
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
headers = {
"Accept": "audio/mpeg",
"Content-Type": "application/json",
"xi-api-key": A_KEY
}
data = {
"text": text,
"model_id": "eleven_turbo_v2_5",
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.5
}
}
response = requests.post(url, json=data, headers=headers)
if response.status_code != 200:
return None
audio_file_path = os.path.join(TEMP_DIR, f'temp_voice_{session_id}.mp3')
with open(audio_file_path, 'wb') as audio_file:
audio_file.write(response.content)
return audio_file_path
def process_uploaded_audio(audio_path, session_id):
"""Process and validate uploaded audio file"""
if not audio_path:
return None
# Get the file extension
ext = os.path.splitext(audio_path)[1].lower()
if ext not in ['.mp3', '.wav', '.m4a', '.aac']:
return None
# Create output path
output_path = os.path.join(TEMP_DIR, f'temp_voice_{session_id}.mp3')
# Convert to mp3 if not already mp3
if ext != '.mp3':
cmd = [
'ffmpeg', '-i', audio_path,
'-codec:a', 'libmp3lame', '-qscale:a', '2',
'-y', output_path
]
subprocess.run(cmd, check=True)
return output_path
else:
# If it's already MP3, just copy it to temp directory
with open(audio_path, 'rb') as src, open(output_path, 'wb') as dst:
dst.write(src.read())
return output_path
def upload_file(file_path):
with open(file_path, 'rb') as file:
files = {'fileToUpload': (os.path.basename(file_path), file)}
data = {'reqtype': 'fileupload'}
response = requests.post(UPLOAD_URL, files=files, data=data)
if response.status_code == 200:
return response.text.strip()
return None
def lipsync_api_call(video_url, audio_url):
headers = {
"Content-Type": "application/json",
"x-api-key": B_KEY
}
data = {
"audioUrl": audio_url,
"videoUrl": video_url,
"maxCredits": 1000,
"model": "sync-1.6.0",
"synergize": True,
"pads": [0, 5, 0, 0],
"synergizerStrength": 1
}
response = requests.post(API_URL, headers=headers, data=json.dumps(data))
return response.json()
def check_job_status(job_id):
headers = {"x-api-key": B_KEY}
max_attempts = 30
for _ in range(max_attempts):
response = requests.get(f"{API_URL}/{job_id}", headers=headers)
data = response.json()
if data["status"] == "COMPLETED":
return data["videoUrl"]
elif data["status"] == "FAILED":
return None
time.sleep(10)
return None
def get_media_duration(file_path):
cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return float(result.stdout.strip())
def combine_audio_video(video_path, audio_path, output_path):
video_duration = get_media_duration(video_path)
audio_duration = get_media_duration(audio_path)
if video_duration > audio_duration:
cmd = [
'ffmpeg', '-i', video_path, '-i', audio_path,
'-t', str(audio_duration),
'-map', '0:v', '-map', '1:a',
'-c:v', 'copy', '-c:a', 'aac',
'-y', output_path
]
else:
loop_count = int(audio_duration // video_duration) + 1
cmd = [
'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
'-t', str(audio_duration),
'-map', '0:v', '-map', '1:a',
'-c:v', 'copy', '-c:a', 'aac',
'-shortest', '-y', output_path
]
subprocess.run(cmd, check=True)
def process_video(voice, model, text, audio_file, progress=gr.Progress()):
session_id = str(uuid.uuid4())
try:
# Handle audio input (either text-to-speech or uploaded file)
if audio_file is not None:
progress(0.1, desc="Processing uploaded audio...")
audio_path = process_uploaded_audio(audio_file.name, session_id)
if not audio_path:
return None, "Failed to process uploaded audio file."
elif text:
progress(0.1, desc="Generating speech...")
audio_path = text_to_speech(voice, text, session_id)
if not audio_path:
return None, "Failed to generate speech audio."
else:
return None, "Please either enter text or upload an audio file."
progress(0.2, desc="Processing video...")
video_path = os.path.join("models", model)
try:
progress(0.3, desc="Uploading files...")
video_url = upload_file(video_path)
audio_url = upload_file(audio_path)
if not video_url or not audio_url:
raise Exception("Failed to upload files")
progress(0.4, desc="Initiating lipsync...")
job_data = lipsync_api_call(video_url, audio_url)
if "error" in job_data or "message" in job_data:
raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
job_id = job_data["id"]
progress(0.5, desc="Processing lipsync...")
result_url = check_job_status(job_id)
if result_url:
progress(0.9, desc="Downloading result...")
response = requests.get(result_url)
output_path = os.path.join(TEMP_DIR, f"output_{session_id}.mp4")
with open(output_path, "wb") as f:
f.write(response.content)
progress(1.0, desc="Complete!")
return output_path, "Lipsync completed successfully!"
else:
raise Exception("Lipsync processing failed or timed out")
except Exception as e:
progress(0.8, desc="Falling back to simple combination...")
try:
output_path = os.path.join(TEMP_DIR, f"output_{session_id}.mp4")
combine_audio_video(video_path, audio_path, output_path)
progress(1.0, desc="Complete!")
return output_path, f"Used fallback method. Original error: {str(e)}"
except Exception as fallback_error:
return None, f"All methods failed. Error: {str(fallback_error)}"
finally:
# Cleanup temp files
for temp_file in os.listdir(TEMP_DIR):
if session_id in temp_file:
try:
os.remove(os.path.join(TEMP_DIR, temp_file))
except:
pass
def create_interface():
voices = get_voices()
models = get_video_models()
with gr.Blocks() as app:
gr.Markdown("# JSON Train")
with gr.Row():
with gr.Column():
input_type = gr.Radio(
choices=["Text to Speech", "Upload Audio"],
label="Input Type",
value="Text to Speech"
)
with gr.Group() as tts_group:
voice_dropdown = gr.Dropdown(
choices=[v[0] for v in voices],
label="Select Voice",
value=voices[0][0] if voices else None
)
text_input = gr.Textbox(label="Enter text", lines=3)
with gr.Group() as audio_group:
audio_input = gr.Audio(
label="Upload Audio",
source="upload",
type="filepath"
)
model_dropdown = gr.Dropdown(
choices=models,
label="Select Video Model",
value=models[0] if models else None
)
generate_btn = gr.Button("Generate Video")
with gr.Column():
video_output = gr.Video(label="Generated Video")
status_output = gr.Textbox(label="Status", interactive=False)
def toggle_input_groups(choice):
if choice == "Text to Speech":
return gr.Group.update(visible=True), gr.Group.update(visible=False)
else:
return gr.Group.update(visible=False), gr.Group.update(visible=True)
input_type.change(
toggle_input_groups,
inputs=[input_type],
outputs=[tts_group, audio_group]
)
def on_generate(input_choice, voice_name, model_name, text, audio_file):
voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
if input_choice == "Text to Speech":
if not text:
return None, "Please enter some text."
return process_video(voice_id, model_name, text, None)
else:
if not audio_file:
return None, "Please upload an audio file."
return process_video(voice_id, model_name, None, audio_file)
generate_btn.click(
fn=on_generate,
inputs=[input_type, voice_dropdown, model_dropdown, text_input, audio_input],
outputs=[video_output, status_output]
)
return app
if __name__ == "__main__":
app = create_interface()
app.launch()