File size: 5,008 Bytes
d72b2c3 38f0a43 d72b2c3 9bcbe02 d72b2c3 38f0a43 d72b2c3 38f0a43 d72b2c3 38f0a43 d72b2c3 38f0a43 d72b2c3 38f0a43 966f861 d72b2c3 38f0a43 d72b2c3 38f0a43 d72b2c3 38f0a43 d72b2c3 966f861 9bcbe02 d72b2c3 38f0a43 d72b2c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
# -*- coding: utf-8 -*-
import numpy as np
import argparse
import os
import re
import requests
from pathlib import Path
Path('out/').mkdir(parents=True, exist_ok=True)
# SSH AGENT
# eval $(ssh-agent -s)
# ssh-add ~/.ssh/id_ed25519_github2024
#
# git remote set-url origin [email protected]:audeering/shift
# ==
def alpha_num(f):
f = re.sub(' +', ' ', f) # delete spaces
f = re.sub(r'[^A-Za-z0-9 ]+', '', f) # del non alpha num
return f
def command_line_args():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'--affective',
help="Select Emotional or non-emotional variant of Available voices: https://audeering.github.io/shift/",
action='store_false',
)
parser.add_argument(
'--device',
help="Device ID",
type=str,
default='cpu',
)
parser.add_argument(
'--text',
help="Text to be synthesized.",
default='sample.txt',
type=str,
)
parser.add_argument(
'--native',
help="""
--native: (without argument) a flag to do voice cloning using the speech from --video,
--native my_voice.wav: Voice cloning from user provided audio""",
# nargs='?',
# const=None,
# default=False # default has to be none
)
parser.add_argument(
'--voice',
help="TTS voice - Available voices: https://audeering.github.io/shift/",
default="en_US/m-ailabs_low#judy_bieber", #'en_US/cmu-arctic_low#lnh',
type=str,
)
parser.add_argument(
'--image',
help="If provided is set as background for output video, see --text",
type=str,
)
parser.add_argument(
'--video',
help="Video file for video translation. Voice cloned from the video",
type=str,
)
parser.add_argument(
'--out_file',
help="Output file name.",
type=str,
default=None
)
parser.add_argument(
'--speed',
help='speec of TTS (only used in Non English voices).',
type=str,
default=1.24,
)
return parser
def send_to_server(args):
url = "http://192.168.88.209:5000"
payload = {
'affective': args.affective,
'voice': args.voice,
'native': args.native,
'text': args.text,
'image': args.image,
'video': args.video,
'speed': args.speed,
# 'out_file': args.out_file # let serve save as temp
}
# In data= we can write args
# In files= sent actual files if provided
text_file = open(args.text, 'rb')
image_file, video_file, native_file = None, None, None
if args.image is not None:
print('\nLOADING IMAGE\n')
try:
image_file = open(args.image, 'rb')
except FileNotFoundError:
pass
if args.video is not None:
print('\nLOADING vid\n')
try:
video_file = open(args.video, 'rb')
except FileNotFoundError:
pass
if args.native is not None:
print('\nLOADING natv\n')
try:
native_file = open(args.native, 'rb')
except FileNotFoundError:
pass
# --------------------- send this extra
# print('Sending...\n')
response = requests.post(url, data=payload,
files=[(args.text, text_file),
(args.image, image_file),
(args.video, video_file),
(args.native, native_file)]) # NONEs do not arrive to servers dict
# Check the response from the server
# if response.status_code == 200:
# print("\nRequest was successful!")
# # print("Response:", respdonse.__dict__.keys(), '\n=====\n')
# else:
# print("Failed to send the request")
# print("Status Code:", response.status_code)
# print("Response:", response.text)
return response
def cli():
parser = command_line_args()
args = parser.parse_args()
if args.out_file is None:
vid = alpha_num(args.video) if args.video else f'{np.random.rand()*1e7}'[:6]
args.out_file = alpha_num(args.text) + '_' + alpha_num(args.voice) + '_' + vid
response = send_to_server(args)
with open(
# args.out_file is not send to server - server writes tmp - copied by client
'./out/' + args.out_file + '.' + response.headers['suffix-file-type'].split('.')[-1],
'wb'
) as f:
f.write(response.content)
# print('REsponse AT client []\n----------------------------', response.headers)
if __name__ == '__main__':
cli()
# assume also video and text for video we have to write some classes for video for audiocraft
# then call tts.py on this video with nonempty labels - thus calls audiocraft |