File size: 2,768 Bytes
e1412bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
'''
import os
import torch
import se_extractor
from api import ToneColorConverter

ckpt_converter = 'checkpoints/converter'
device = 'cuda:0'
output_dir = 'outputs'

tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')

os.makedirs(output_dir, exist_ok=True)

from openai import OpenAI

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

response = client.audio.speech.create(
    model="tts-1",
    voice="nova",
    input="This audio will be used to extract the base speaker tone color embedding. " + \
        "Typically a very short audio should be sufficient, but increasing the audio " + \
        "length will also improve the output audio quality."
)

response.stream_to_file(f"{output_dir}/openai_source_output.mp3")

base_speaker = f"{output_dir}/openai_source_output.mp3"
source_se, audio_name = se_extractor.get_se(base_speaker, tone_color_converter)

reference_speaker = 'resources/example_reference.mp3'
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter)

text = [
    "MyShell is a decentralized and comprehensive platform for discovering, creating, and staking AI-native apps.",
]
src_path = f'{output_dir}/tmp.wav'

for i, t in enumerate(text):

    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input=t,
    )

    response.stream_to_file(src_path)

    save_path = f'{output_dir}/output_crosslingual_{i}.wav'

    tone_color_converter.convert(
        audio_src_path=src_path, 
        src_se=source_se, 
        tgt_se=target_se, 
        output_path=save_path,
        message='')



model = models.openai("gpt-3.5-turbo",system_prompt='You are an expert in identifying the emotion of a sentence')
result = model.generate_choice("Harry's mind was racing with thoughts of the recent events at Hogwarts", ["friendly", "cheerful", "excited", "sad", "angry", "terrified", "shouting", "whispering"])
print(result)
from openai import OpenAI
import os
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))


response = client.audio.speech.create(
    model="tts-1",
    voice="fable",
    input="This audio will be used to extract the base speaker tone color embedding. " + \
        "Typically a very short audio should be sufficient, but increasing the audio " + \
        "length will also improve the output audio quality."
)

response.stream_to_file(f"openai_source_output.mp3")
'''
import boto3
s3_client = boto3.client('s3',aws_access_key_id='AKIAW7WTE5RKJY2WJ55F', aws_secret_access_key='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')
response = s3_client.upload_file('/root/src/videly/openai_source_output.mp3', 'demovidelyusergenerations', 'test.mp3')