Speech-ChatGPT-Speech

Runtime error

File size: 4,441 Bytes

bc14812

import tempfile
import gradio as gr
from neon_tts_plugin_coqui import CoquiTTS
LANGUAGES = list(CoquiTTS.langs.keys())
default_lang = "en"
#import whisper
#whisper_model = whisper.load_model("small")
whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
#chatgpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT")
chatgpt = gr.Blocks.load(name="spaces/seawolf2357/chatgptclone")
import os
import json
import openai
#session_token = os.environ.get('SessionToken')
api_key = os.environ.get('api_key')
#if you have OpenAI API key as a string, enable the below
openai.api_key = api_key

title = "Speech to ChatGPT to Speech"
#info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
#badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
coquiTTS = CoquiTTS()


# ChatGPT
def chat_hf(audio, custom_token, language):
    try:
        whisper_text = translate(audio)
        if whisper_text == "ERROR: You have to either use the microphone or upload an audio file":
            gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)"
        else:
            #gpt_response = chatgpt(whisper_text, [], fn_index=0)
            #print(gpt_response)
            #gpt_response = gpt_response[0]
            gpt_response = openai_create(whisper_text)

    except:
        whisper_text = translate(audio)
        gpt_response = """Sorry, I'm quite busy right now, but please try again later :)"""

    print(gpt_response)
    # to voice
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
    
    return whisper_text, gpt_response, fp.name

# whisper
#def translate(audio):
#    print("""
#    —
#    Sending audio to Whisper ...
#    —
#    """)
#    
#    audio = whisper.load_audio(audio)
#    audio = whisper.pad_or_trim(audio)
#    
#    mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
#    
#    _, probs = whisper_model.detect_language(mel)
#    
#    transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)
#    
#    transcription = whisper.decode(whisper_model, mel, transcript_options)
#    
#    print("language spoken: " + transcription.language)
#    print("transcript: " + transcription.text)
#    print("———————————————————————————————————————————")  
#      
#    return transcription.text

def translate(audio):
    print("""
    —
    Sending audio to Whisper ...
    —
    """)
   
    text_result = whisper(audio, None, "transcribe", fn_index=0)
    print(text_result)
    return text_result


def openai_create(prompt):

    response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prompt,
    temperature=0.9,
    max_tokens=150,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0.6,
    stop=[" Human:", " AI:"]
    )

    return response.choices[0].text

with gr.Blocks() as blocks:
    gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
                + title
                + "</h1>")
    #gr.Markdown(description)
    radio = gr.Radio(label="Language",choices=LANGUAGES,value=default_lang)
    with gr.Row(equal_height=True):# equal_height=False
        with gr.Column():# variant="panel"
            audio_file = gr.Audio(source="microphone",type="filepath")
            custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
            with gr.Row():# mobile_collapse=False
                submit = gr.Button("Submit", variant="primary")
        with gr.Column():
            text1 = gr.Textbox(label="Speech to Text")
            text2 = gr.Textbox(label="ChatGPT Response")
            audio = gr.Audio(label="Output", interactive=False)
    #gr.Markdown(info)
    #gr.Markdown("<center>"
    #            +f'<img src={badge} alt="visitors badge"/>'
    #            +"</center>")

    # actions
    submit.click(
        chat_hf,
        [audio_file, custom_token, radio],
        [text1, text2, audio],
    )
    radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2)


blocks.launch(debug=True)