testspace / src /mytest.py
StormblessedKal's picture
docker for runpod
e1412bc
raw
history blame
2.77 kB
'''
import os
import torch
import se_extractor
from api import ToneColorConverter
ckpt_converter = 'checkpoints/converter'
device = 'cuda:0'
output_dir = 'outputs'
tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
os.makedirs(output_dir, exist_ok=True)
from openai import OpenAI
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
response = client.audio.speech.create(
model="tts-1",
voice="nova",
input="This audio will be used to extract the base speaker tone color embedding. " + \
"Typically a very short audio should be sufficient, but increasing the audio " + \
"length will also improve the output audio quality."
)
response.stream_to_file(f"{output_dir}/openai_source_output.mp3")
base_speaker = f"{output_dir}/openai_source_output.mp3"
source_se, audio_name = se_extractor.get_se(base_speaker, tone_color_converter)
reference_speaker = 'resources/example_reference.mp3'
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter)
text = [
"MyShell is a decentralized and comprehensive platform for discovering, creating, and staking AI-native apps.",
]
src_path = f'{output_dir}/tmp.wav'
for i, t in enumerate(text):
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=t,
)
response.stream_to_file(src_path)
save_path = f'{output_dir}/output_crosslingual_{i}.wav'
tone_color_converter.convert(
audio_src_path=src_path,
src_se=source_se,
tgt_se=target_se,
output_path=save_path,
message='')
model = models.openai("gpt-3.5-turbo",system_prompt='You are an expert in identifying the emotion of a sentence')
result = model.generate_choice("Harry's mind was racing with thoughts of the recent events at Hogwarts", ["friendly", "cheerful", "excited", "sad", "angry", "terrified", "shouting", "whispering"])
print(result)
from openai import OpenAI
import os
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
response = client.audio.speech.create(
model="tts-1",
voice="fable",
input="This audio will be used to extract the base speaker tone color embedding. " + \
"Typically a very short audio should be sufficient, but increasing the audio " + \
"length will also improve the output audio quality."
)
response.stream_to_file(f"openai_source_output.mp3")
'''
import boto3
s3_client = boto3.client('s3',aws_access_key_id='AKIAW7WTE5RKJY2WJ55F', aws_secret_access_key='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')
response = s3_client.upload_file('/root/src/videly/openai_source_output.mp3', 'demovidelyusergenerations', 'test.mp3')