# import gradio as gr | |
# import requests | |
# import os | |
# def function1(prompt): | |
# response = requests.post("https://tommy24-testing3.hf.space/run/predict", json={ | |
# "data": [ | |
# prompt, | |
# ]}).json() | |
# message = response["data"][0] | |
# url = 'https://api.elevenlabs.io/v1/text-to-speech/pNInz6obpgDQGcFmaJgB' | |
# headers = { | |
# 'accept': 'audio/mpeg', | |
# 'xi-api-key': os.environ.get("test2"), | |
# 'Content-Type': 'application/json' | |
# } | |
# data = { | |
# "text": message, | |
# "voice_settings": { | |
# "stability": 0, | |
# "similarity_boost": 0 | |
# } | |
# } | |
# response = requests.post(url, headers=headers, json=data) | |
# if response.status_code == 200: | |
# file_path = 'test.mp3' | |
# if os.path.isfile(file_path): | |
# os.remove(file_path) | |
# with open(file_path, 'wb') as f: | |
# f.write(response.content) | |
# return "test.mp3" | |
# iface = gr.Interface(fn=function1, inputs="text", outputs=[gr.Audio(label="Audio",type="numpy")]) | |
# iface.launch() | |
# import gradio as gr | |
# import requests | |
# import urllib.request | |
# from pydub import AudioSegment | |
# import numpy as np | |
# import os | |
# def function1(prompt): | |
# response = requests.post("https://tommy24-testing3.hf.space/run/predict", json={ | |
# "data": [ | |
# prompt, | |
# ]}).json() | |
# data = response["data"][0] | |
# response = requests.post("https://matthijs-speecht5-tts-demo.hf.space/run/predict", json={ | |
# "data": [ | |
# data, | |
# "KSP (male)", | |
# ] | |
# }).json() | |
# data = response["data"][0]["name"] | |
# data = "https://matthijs-speecht5-tts-demo.hf.space/file="+data | |
# file_name, headers = urllib.request.urlretrieve(data, "speech.mp3") | |
# # code = random.randint(1,1000) | |
# # generated_file = f"output{code}" | |
# filename = "output.mp3" | |
# if os.path.exists(filename): | |
# os.remove(filename) | |
# else: | |
# pass | |
# command = f"ffmpeg -i {file_name} -vn -ar 44100 -ac 2 -b:a 192k output.mp3" | |
# os.system(command) | |
# return "output.mp3" | |
# iface = gr.Interface(fn=function1, inputs="text", outputs=[gr.Audio(label="Audio",type="numpy")]) | |
# iface.launch() | |
import gradio as gr | |
import requests | |
import urllib.request | |
from pydub import AudioSegment | |
import numpy as np | |
import os | |
import sys | |
import wave | |
import io | |
import base64 | |
import azure.cognitiveservices.speech as speechsdk | |
speech_key = os.environ.get("test3") | |
service_region = os.environ.get("test4") | |
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) | |
# Note: the voice setting will not overwrite the voice element in input SSML. | |
speech_config.speech_synthesis_voice_name = os.environ.get("test5") | |
def function1(prompt): | |
response = requests.post("https://tommy24-testing3.hf.space/run/predict", json={ | |
"data": [ | |
prompt, | |
]}).json() | |
message = response["data"][0] | |
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config) | |
result = speech_synthesizer.speak_text_async(message).get() | |
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: | |
audio_stream = io.BytesIO(result.audio_data) | |
# Create a wave file object and write the audio data to it | |
with wave.open("audio.wav", 'wb') as wave_file: | |
wave_file.setnchannels(1) | |
wave_file.setsampwidth(2) | |
wave_file.setframerate(16000) | |
wave_file.writeframesraw(audio_stream.getvalue()) | |
# Use ffmpeg to convert the wave file to an mp3 file | |
filename = "output.mp3" | |
if os.path.exists(filename): | |
os.remove(filename) | |
else: | |
pass | |
command = f"ffmpeg -i audio.wav -y -codec:a libmp3lame -qscale:a 2 {filename}" | |
os.system(command) | |
return "output.mp3" | |
iface = gr.Interface(fn=function1, inputs="text", outputs=[gr.Audio(label="Audio",type="numpy")]) | |
iface.launch() |