File size: 5,153 Bytes

# -*- coding: utf-8 -*-
import numpy as np
import argparse
import os
import re
import requests
from pathlib import Path
Path('out/').mkdir(parents=True, exist_ok=True)

# SSH AGENT
#   eval $(ssh-agent -s)
#   ssh-add ~/.ssh/id_ed25519_github2024
#
#   git remote set-url origin [email protected]:audeering/shift
# ==


def alpha_num(f):
    f = re.sub(' +', ' ', f)              # delete spaces
    f = re.sub(r'[^A-Za-z0-9 ]+', '', f)  # del non alpha num
    return f


def command_line_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument(
        '--affective',
        help="Select Emotional or non-emotional variant of Available voices: https://audeering.github.io/shift/",
        action='store_false',
    )
    parser.add_argument(
        '--device',
        help="Device ID",
        type=str,
        default='cpu',
    )
    parser.add_argument(
        '--text',
        help="Text to be synthesized.",
        default='sample.txt',
        type=str,
    )
    parser.add_argument(
        '--soundscape',
        help='soundscape - MUST BE IN BRACKETS: \"forest\"',
        default=None,
        nargs='?',
        type=str,
        const='wind fjord',
    )
    parser.add_argument(
        '--native',
        help="""
        --native: (without argument) a flag to do voice cloning using the speech from --video,
        --native my_voice.wav:  Voice cloning from user provided audio""",
        # nargs='?',
        # const=None,
        # default=False   # default has to be none
        )
    parser.add_argument(
        '--voice',
        help="TTS voice - Available voices: https://audeering.github.io/shift/",
        default="en_US/m-ailabs_low#judy_bieber", #'en_US/cmu-arctic_low#lnh',
        type=str,
    )
    parser.add_argument(
        '--image',
        help="If provided is set as background for output video, see --text",
        type=str,
    )
    parser.add_argument(
        '--video',
        help="Video file for video translation. Voice cloned from the video",
        type=str,
    )
    parser.add_argument(
        '--out_file',
        help="Output file name.",
        type=str,
        default=None
    )
    parser.add_argument(
        '--speed',
        help='speec of TTS (only used in Non English voices).',
        type=str,
        default=1.44,
    )
    return parser

def send_to_server(args):
    url = "http://192.168.88.209:5000"

    # Args

    payload = {
        'affective': args.affective,
        'voice': args.voice,
        'soundscape': args.soundscape if args.soundscape != '' else None,
        'native': args.native,
        'text': args.text,
        'image': args.image,
        'video': args.video,
        'speed': args.speed,
        
        # 'out_file': args.out_file   # let serve save as temp
    }

    # Send Files
    
    text_file = open(args.text, 'rb')

    image_file, video_file, native_file = None, None, None
    if args.image is not None:
        print('\nLOADING IMAGE\n')
        try:
            image_file = open(args.image, 'rb')
        except FileNotFoundError:
            pass

    if args.video is not None:
        print('\nLOADING vid\n')
        try:
            video_file = open(args.video, 'rb')
        except FileNotFoundError:
            pass

    if args.native is not None:
        print('\nLOADING natv\n')
        try:
            native_file = open(args.native, 'rb')
        except FileNotFoundError:
            pass
            
    #

    response = requests.post(url, data=payload,    # contains str
                             files=[(args.text, text_file),
                                    (args.image, image_file),
                                    (args.video, video_file),
                                    (args.native, native_file)])  # NONEs do not arrive to servers dict

    # Check the response from the server
    # if response.status_code == 200:
    #     print("\nRequest was successful!")
    #     # print("Response:", respdonse.__dict__.keys(), '\n=====\n')

    # else:
    #     print("Failed to send the request")
    #     print("Status Code:", response.status_code)
    #     print("Response:", response.text)
    return response


def cli():
    parser = command_line_args()
    args = parser.parse_args()
    
    if args.out_file is None:
        vid = alpha_num(args.video) if args.video else f'{np.random.rand()*1e7}'[:6]
        args.out_file = alpha_num(args.text) + '_' + alpha_num(args.voice) + '_' + vid
    response = send_to_server(args)
    
    with open(
        # args.out_file is not send to server - server writes tmp - copied by client
        './out/' + args.out_file + '.' + response.headers['suffix-file-type'].split('.')[-1],
        'wb'
        ) as f:
        f.write(response.content)
    # print('REsponse AT client []\n----------------------------', response.headers)


if __name__ == '__main__':
    cli()

# assume also video and text for video we have to write some classes for video for audiocraft
# then call tts.py on this video with nonempty labels - thus calls audiocraft