# ---------------- Import Required Libraries --------------------- import json from flask import Flask, request, send_file, url_for from threading import Lock import util import chat_util import os import requests import time import sys from flask import render_template # import the package wav2lip_inference/wav2lip.py library_path = util.ROOT_DIR + "/code/wav2lip_inference" print(library_path) sys.path.insert(1, library_path) from Wav2Lip import Processor # ---------------- Load API Keys From the .env File --------------------- from dotenv import load_dotenv load_dotenv(util.ROOT_DIR + "/.env") # ---------------- Initialize application --------------------- util.initialize() util.start_log_task("Initializing Flask app...") app = Flask(__name__) util.end_log_task() patient_agent = chat_util.generate_blank_patient() # create mr al farsi/green as global variable CHUNK_SIZE = 1024 BASE_URL = 'http://localhost:5000' @app.route('/', methods=['POST']) def index(): return "Homepage!" from subprocess import run, PIPE from flask import logging, Flask, render_template, request import wave, keyboard, faster_whisper, torch.cuda model, answer, history = faster_whisper.WhisperModel(model_size_or_path="tiny.en", device='cuda' if torch.cuda.is_available() else 'cpu'), "", [] # import base64 # import pyaudio from threading import Thread @app.route('/client_test', methods=['GET']) def client_test(): return render_template('client.html') @app.route('/receive_audio', methods=['POST']) def receive_audio(): # dirname = "temp" # filename = "temp.webm" #request.files['audio_file'].filename save_path = "temp/temp.webm" wav_save_path = 'temp/temp.wav' request.files['audio_file'].save(save_path) Thread(target=transcribe_text).start() return "Received audio file" @app.route('/transcribe_text', methods=['POST']) def transcribe_text(): save_path = "temp/temp.webm" wav_save_path = "temp/temp.wav" print("converting to wave audio") # convert wepm to wav run(['ffmpeg', '-y', save_path, wav_save_path], stdout=PIPE, stderr=PIPE) print('preparing for transcription') # audio, frames = pyaudio.PyAudio(), [] # # Transcribe recording using whisper # with wave.open(wav_save_path, 'wb') as wf: # wf.setparams((1, audio.get_sample_size(pyaudio.paInt16), 16000, 0, 'NONE', 'NONE')) # wf.writeframes(b''.join(frames)) print('transcribing') user_text = " ".join(seg.text for seg in model.transcribe(wav_save_path, language="en")[0]) print(f'>>>{user_text}\n<<< ', end="", flush=True) return user_text # ---------------- Generation endpoints --------------------- @app.route('/generate_patient', methods=['POST']) def request_patient_generation(): # to do: sessions / authorization to have multiple active agents global patient_agent patient_agent = chat_util.generate_patient(language_model_name='gpt-4-turbo-preview') return f"Generated patient agent ({patient_agent.name}) using {patient_agent.model.model_name} and the following system message: {patient_agent.system_message}" @app.route('/generate_patient_text', methods=['POST']) def generate_patient_text(message_from_user=None): # message_from_user = request.args.get('message_from_user', type=str) if not message_from_user: message_from_user = request.json['message_from_user'] util.rprint(f"[bold]Conversation started [/bold] \n ─────────────────────────────────────────── ") util.rprint(f" [blue][bold]▶ CLINICIAN [/bold] {message_from_user} [/blue]\n") patient_agent.receive(name="Clinician", message=message_from_user) message_from_patient = patient_agent.send() util.rprint(f" [cyan3][bold]▶ PATIENT [/bold] {message_from_patient} [/cyan3]\n") return json.dumps({'message_from_patient': message_from_patient}) @app.route('/generate_patient_audio', methods=['POST']) def generate_patient_audio(message_from_user=None): request_id = util.generate_hash() if not message_from_user: message_from_user = request.json['message_from_user'] patient_text_response = json.loads(generate_patient_text(message_from_user))['message_from_patient'] url = "https://api.elevenlabs.io/v1/text-to-speech/jwnLlmJUpWazVNZOyzKE" querystring = {"optimize_streaming_latency": "4", "output_format": "mp3_44100_32"} payload = {"text": patient_text_response} headers = { "xi-api-key": os.environ["ELEVENLABS_API_KEY"], "Content-Type": "application/json" } util.start_log_task("Sending audio_files request to Eleven Labs...") response = requests.request("POST", url, json=payload, headers=headers, params=querystring) util.end_log_task() local_filename = request_id + '.mp3' util.log_task(f"Received {local_filename} from Eleven Labs.") filename = util.ROOT_DIR + '/audio_files/' + local_filename with open(filename, 'wb') as f: for chunk in response.iter_content(chunk_size=CHUNK_SIZE): if chunk: f.write(chunk) if response.status_code == 200: return json.dumps({'status': 'success', 'request_id': request_id, 'audio_url': BASE_URL + '/get_audio?filename=' + local_filename, 'audio_path': filename, 'message_from_patient': patient_text_response}) @app.route('/generate_remote_video', methods=['POST']) def generate_remote_video(audio_path=None): if not audio_path: audio_path = request.json['audio_path'] url = "https://api.synclabs.so/lipsync" querystring = {"optimize_streaming_latency": "4", "output_format": "mp3_44100_32"} payload = { "audioUrl": "https://cdn.syntheticpatients.org/audio/output_2024-04-30-T-01-47-46___72537b5cb2024fc3.mp3", "videoUrl": "https://cdn.syntheticpatients.org/video/alfarsi_speaking_shortly_5s_720p.mp4", "synergize": True, "maxCredits": None, "webhookUrl": None, "model": "wav2lip++" } headers = { "accept": "application/json", "x-api-key": os.environ["SYNCLABS_API_KEY"], "Content-Type": "application/json" } util.start_log_task("Sending video request to Sync Labs...") response = requests.request("POST", url, json=payload, headers=headers, params=querystring) util.end_log_task() print(response.text) video_sync_labs_id = json.loads(response.text)["id"] video_generating = True video_url = None while video_generating: url = f"https://api.synclabs.so/lipsync/{video_sync_labs_id}" headers = { "accept": "application/json", "x-api-key": os.environ["SYNCLABS_API_KEY"] } response = requests.request("GET", url, headers=headers) status = json.loads(response.text)["status"] if status == "COMPLETED": video_url = json.loads(response.text)["videoUrl"] util.lp("Video generation completed. Available at: " + video_url) video_generating = False else: util.lp("Video generation in progress. Status: " + status) time.sleep(5) return video_url @app.route('/generate_local_video', methods=['POST']) def generate_local_video(request_id=None): if not request_id: request_id = request.json['request_id'] audio_path = util.ROOT_DIR + '/audio_files/' + request_id + '.mp3' video_path = util.ROOT_DIR + '/video/trimmed.mp4' output_path = util.ROOT_DIR + '/video_output/' + request_id + '.mp4' output_url = BASE_URL + '/get_video?filename=' + request_id + '.mp4' util.log_mini_task("audio_files path: " + audio_path) util.log_mini_task("video path: " + video_path) processor.run(video_path, audio_path, output_path, resize_factor=1) return json.dumps({'status': 'success', 'request_id': request_id, 'video_url': output_url, 'video_path': output_path}) # ---------------------- Get endpoints ------------------------- @app.route('/get_audio', methods=['GET']) def get_audio_file(filename=None): if filename is None: filename = request.args.get('filename') return send_file(util.ROOT_DIR + '/audio_files/' + filename, as_attachment=True) @app.route('/get_video', methods=['GET']) def get_video_file(filename=None): if filename is None: filename = request.args.get('filename') return send_file(util.ROOT_DIR + '/video_output/' + filename, as_attachment=True) # ---------------- End-to-end endpoints ------------------------ @app.route('/get_video_from_text', methods=['POST']) def get_video_from_text(message_from_user=None): if not message_from_user: message_from_user = request.json['message_from_user'] audio_response_text = generate_patient_audio(message_from_user) audio_response = json.loads(audio_response_text) util.log_mini_task("audio_files response: " + audio_response_text) # fake_json = '''{"status": "success", "request_id": "79b0e694-399f-4cbd-b0d8-e9719a7697b8", "audio_url": "http://localhost:5000/get_audio?filename=79b0e694-399f-4cbd-b0d8-e9719a7697b8.mp3", "audio_path": "/Users/alexandergoodell/code/synthetic-patients-private/audio_files/79b0e694-399f-4cbd-b0d8-e9719a7697b8.mp3", "message_from_patient": "My favorite color is green. It reminds me of the lush green fields where I used to play softball with my daughters."}''' # audio_response = json.loads(fake_json) request_id = audio_response['request_id'] video_response = json.loads(generate_local_video(request_id)) return json.dumps({'status': 'success', 'request_id': request_id, 'video_url': video_response['video_url'], 'audio_url': audio_response['audio_url'], 'message_from_patient': audio_response['message_from_patient']}) @app.route('/client', methods=['GET']) def client(message_from_user=None): client_html = ''' Video Chat
''' return client_html # available at https://new-fond-dog.ngrok-free.app/synthetic_patient_demo @app.route('/synthetic_patient_demo', methods=['GET']) def demo(message_from_user=None): client_html = ''' Video Chat
''' return client_html processor = Processor() request_patient_generation() if __name__ == '__main__': app.run(host="0.0.0.0", debug=False, threaded=False)