import numpy as np import argparse import os import requests import subprocess # SSH AGENT # eval $(ssh-agent -s) # ssh-add ~/.ssh/id_ed25519_github2024 # # git remote set-url origin git@github.com:audeering/shift # https://stackoverflow.com/questions/57158779/how-to-stop-audio-with-playsound-module # import multiprocessing # from playsound import playsound # p = multiprocessing.Process(target=playsound, args=("file.mp3",)) # p.start() # input("press ENTER to stop playback") # p.terminate() # from playsound import playsound # playsound('/path/to/a/sound/file/you/want/to/play.mp3') def command_line_args(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( '--affective', help="Select Emotional or non-emotional variant of Available voices: https://audeering.github.io/shift/", action='store_false', ) parser.add_argument( '--device', help="Device ID", type=str, default='cpu', ) parser.add_argument( '--text', help="Text to be synthesized.", default='sample.txt', type=str, ) parser.add_argument( '--native', help=""" --native: (without argument) a flag to do voice cloning using the speech from --video, --native my_voice.wav: Voice cloning from user provided audio""", # nargs='?', # const=None, # default=False # default has to be none ) parser.add_argument( '--voice', help="TTS voice - Available voices: https://audeering.github.io/shift/", default="en_US/m-ailabs_low#judy_bieber", #'en_US/cmu-arctic_low#lnh', type=str, ) parser.add_argument( '--image', help="If provided is set as background for output video, see --text", type=str, ) parser.add_argument( '--video', help="Video file for video translation. Voice cloned from the video", type=str, ) parser.add_argument( '--out_file', help="Output file name.", type=str, default='b6' ) parser.add_argument( '--scene', help='Sound scene description.', type=str, default='calm background sounds of a castle' ) return parser def send_to_server(args): url = "http://192.168.88.209:5000" payload = { 'affective': args.affective, 'voice': args.voice, 'native': args.native, 'text': args.text, 'image': args.image, 'video': args.video, 'scene': args.scene, # 'out_file': args.out_file # let serve save as temp } # In data= we can write args # In files= sent actual files if provided text_file = open(args.text, 'rb') image_file, video_file, native_file = None, None, None if args.image is not None: print('\nLOADING IMAGE\n') try: image_file = open(args.image, 'rb') except FileNotFoundError: pass if args.video is not None: print('\nLOADING vid\n') try: video_file = open(args.video, 'rb') except FileNotFoundError: pass if args.native is not None: print('\nLOADING natv\n') try: native_file = open(args.native, 'rb') except FileNotFoundError: pass # --------------------- send this extra print('Sending...\n') response = requests.post(url, data=payload, files=[(args.image, image_file)]) # NONEs do not arrive to servers dict # Check the response from the server if response.status_code == 200: print("\nRequest was successful!") # print("Response:", respdonse.__dict__.keys(), '\n=====\n') else: print("Failed to send the request") print("Status Code:", response.status_code) print("Response:", response.text) return response def cli(): # args.out_file is not send to server - server writes tmp - copied by client parser = command_line_args() args = parser.parse_args() while True: args.text = input("Type your text: ") response = send_to_server(args) out_file = args.out_file + '.' + response.headers['suffix-file-type'].split('.')[-1] with open(out_file, 'wb') as f: f.write(response.content) print('REsponse AT client []\n----------------------------', response.headers) subprocess.run(["paplay", out_file]) if __name__ == '__main__': cli() # assume also video and text for video we have to write some classes for video for audiocraft # then call tts.py on this video with nonempty labels - thus calls audiocraft