Spaces:

sheikhed
/

json

Sleeping

App Files Files Community

json / app.py

sheikhed

Update app.py

355b39c verified 9 months ago

raw

history blame

10.8 kB

	import os
	import requests
	import json
	import time
	import subprocess
	import gradio as gr
	import uuid
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# API Keys
	A_KEY = os.getenv("A_KEY")
	B_KEY = os.getenv("B_KEY")

	# URLs
	API_URL = os.getenv("API_URL")
	UPLOAD_URL = os.getenv("UPLOAD_URL")

	# Create temp directory if it doesn't exist
	TEMP_DIR = "temp"
	if not os.path.exists(TEMP_DIR):
	os.makedirs(TEMP_DIR)

	def get_voices():
	url = "https://api.elevenlabs.io/v1/voices"
	headers = {
	"Accept": "application/json",
	"xi-api-key": A_KEY
	}

	response = requests.get(url, headers=headers)
	if response.status_code != 200:
	return []
	return [(voice['name'], voice['voice_id']) for voice in response.json().get('voices', [])]

	def get_video_models():
	return [f for f in os.listdir("models") if f.endswith((".mp4", ".avi", ".mov"))]

	def text_to_speech(voice_id, text, session_id):
	url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"

	headers = {
	"Accept": "audio/mpeg",
	"Content-Type": "application/json",
	"xi-api-key": A_KEY
	}

	data = {
	"text": text,
	"model_id": "eleven_turbo_v2_5",
	"voice_settings": {
	"stability": 0.5,
	"similarity_boost": 0.5
	}
	}

	response = requests.post(url, json=data, headers=headers)
	if response.status_code != 200:
	return None

	audio_file_path = os.path.join(TEMP_DIR, f'temp_voice_{session_id}.mp3')
	with open(audio_file_path, 'wb') as audio_file:
	audio_file.write(response.content)
	return audio_file_path

	def process_uploaded_audio(audio_path, session_id):
	"""Process and validate uploaded audio file"""
	if not audio_path:
	return None

	# Get the file extension
	ext = os.path.splitext(audio_path)[1].lower()
	if ext not in ['.mp3', '.wav', '.m4a', '.aac']:
	return None

	# Create output path
	output_path = os.path.join(TEMP_DIR, f'temp_voice_{session_id}.mp3')

	# Convert to mp3 if not already mp3
	if ext != '.mp3':
	cmd = [
	'ffmpeg', '-i', audio_path,
	'-codec:a', 'libmp3lame', '-qscale:a', '2',
	'-y', output_path
	]
	subprocess.run(cmd, check=True)
	return output_path
	else:
	# If it's already MP3, just copy it to temp directory
	with open(audio_path, 'rb') as src, open(output_path, 'wb') as dst:
	dst.write(src.read())
	return output_path

	def upload_file(file_path):
	with open(file_path, 'rb') as file:
	files = {'fileToUpload': (os.path.basename(file_path), file)}
	data = {'reqtype': 'fileupload'}
	response = requests.post(UPLOAD_URL, files=files, data=data)

	if response.status_code == 200:
	return response.text.strip()
	return None

	def lipsync_api_call(video_url, audio_url):
	headers = {
	"Content-Type": "application/json",
	"x-api-key": B_KEY
	}

	data = {
	"audioUrl": audio_url,
	"videoUrl": video_url,
	"maxCredits": 1000,
	"model": "sync-1.6.0",
	"synergize": True,
	"pads": [0, 5, 0, 0],
	"synergizerStrength": 1
	}

	response = requests.post(API_URL, headers=headers, data=json.dumps(data))
	return response.json()

	def check_job_status(job_id):
	headers = {"x-api-key": B_KEY}
	max_attempts = 30

	for _ in range(max_attempts):
	response = requests.get(f"{API_URL}/{job_id}", headers=headers)
	data = response.json()

	if data["status"] == "COMPLETED":
	return data["videoUrl"]
	elif data["status"] == "FAILED":
	return None

	time.sleep(10)
	return None

	def get_media_duration(file_path):
	cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
	result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	return float(result.stdout.strip())

	def combine_audio_video(video_path, audio_path, output_path):
	video_duration = get_media_duration(video_path)
	audio_duration = get_media_duration(audio_path)

	if video_duration > audio_duration:
	cmd = [
	'ffmpeg', '-i', video_path, '-i', audio_path,
	'-t', str(audio_duration),
	'-map', '0:v', '-map', '1:a',
	'-c:v', 'copy', '-c:a', 'aac',
	'-y', output_path
	]
	else:
	loop_count = int(audio_duration // video_duration) + 1
	cmd = [
	'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
	'-t', str(audio_duration),
	'-map', '0:v', '-map', '1:a',
	'-c:v', 'copy', '-c:a', 'aac',
	'-shortest', '-y', output_path
	]

	subprocess.run(cmd, check=True)

	def process_video(voice, model, text, audio_file, progress=gr.Progress()):
	session_id = str(uuid.uuid4())

	try:
	# Handle audio input (either text-to-speech or uploaded file)
	if audio_file is not None:
	progress(0.1, desc="Processing uploaded audio...")
	audio_path = process_uploaded_audio(audio_file.name, session_id)
	if not audio_path:
	return None, "Failed to process uploaded audio file."
	elif text:
	progress(0.1, desc="Generating speech...")
	audio_path = text_to_speech(voice, text, session_id)
	if not audio_path:
	return None, "Failed to generate speech audio."
	else:
	return None, "Please either enter text or upload an audio file."

	progress(0.2, desc="Processing video...")
	video_path = os.path.join("models", model)

	try:
	progress(0.3, desc="Uploading files...")
	video_url = upload_file(video_path)
	audio_url = upload_file(audio_path)

	if not video_url or not audio_url:
	raise Exception("Failed to upload files")

	progress(0.4, desc="Initiating lipsync...")
	job_data = lipsync_api_call(video_url, audio_url)

	if "error" in job_data or "message" in job_data:
	raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))

	job_id = job_data["id"]

	progress(0.5, desc="Processing lipsync...")
	result_url = check_job_status(job_id)

	if result_url:
	progress(0.9, desc="Downloading result...")
	response = requests.get(result_url)
	output_path = os.path.join(TEMP_DIR, f"output_{session_id}.mp4")
	with open(output_path, "wb") as f:
	f.write(response.content)
	progress(1.0, desc="Complete!")
	return output_path, "Lipsync completed successfully!"
	else:
	raise Exception("Lipsync processing failed or timed out")

	except Exception as e:
	progress(0.8, desc="Falling back to simple combination...")
	try:
	output_path = os.path.join(TEMP_DIR, f"output_{session_id}.mp4")
	combine_audio_video(video_path, audio_path, output_path)
	progress(1.0, desc="Complete!")
	return output_path, f"Used fallback method. Original error: {str(e)}"
	except Exception as fallback_error:
	return None, f"All methods failed. Error: {str(fallback_error)}"
	finally:
	# Cleanup temp files
	for temp_file in os.listdir(TEMP_DIR):
	if session_id in temp_file:
	try:
	os.remove(os.path.join(TEMP_DIR, temp_file))
	except:
	pass

	def create_interface():
	voices = get_voices()
	models = get_video_models()

	with gr.Blocks() as app:
	gr.Markdown("# JSON Train")

	with gr.Row():
	with gr.Column():
	input_type = gr.Radio(
	choices=["Text to Speech", "Upload Audio"],
	label="Input Type",
	value="Text to Speech"
	)

	with gr.Group() as tts_group:
	voice_dropdown = gr.Dropdown(
	choices=[v[0] for v in voices],
	label="Select Voice",
	value=voices[0][0] if voices else None
	)
	text_input = gr.Textbox(label="Enter text", lines=3)

	with gr.Group() as audio_group:
	audio_input = gr.Audio(
	label="Upload Audio",
	source="upload",
	type="filepath"
	)

	model_dropdown = gr.Dropdown(
	choices=models,
	label="Select Video Model",
	value=models[0] if models else None
	)
	generate_btn = gr.Button("Generate Video")

	with gr.Column():
	video_output = gr.Video(label="Generated Video")
	status_output = gr.Textbox(label="Status", interactive=False)

	def toggle_input_groups(choice):
	if choice == "Text to Speech":
	return gr.Group.update(visible=True), gr.Group.update(visible=False)
	else:
	return gr.Group.update(visible=False), gr.Group.update(visible=True)

	input_type.change(
	toggle_input_groups,
	inputs=[input_type],
	outputs=[tts_group, audio_group]
	)

	def on_generate(input_choice, voice_name, model_name, text, audio_file):
	voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
	if input_choice == "Text to Speech":
	if not text:
	return None, "Please enter some text."
	return process_video(voice_id, model_name, text, None)
	else:
	if not audio_file:
	return None, "Please upload an audio file."
	return process_video(voice_id, model_name, None, audio_file)

	generate_btn.click(
	fn=on_generate,
	inputs=[input_type, voice_dropdown, model_dropdown, text_input, audio_input],
	outputs=[video_output, status_output]
	)

	return app

	if __name__ == "__main__":
	app = create_interface()
	app.launch()