Spaces:

maven-ai-llm-app-group
/

ai_language_buddy

Runtime error

File size: 6,989 Bytes

import gradio as gr
from faster_whisper import WhisperModel
import edge_tts
import tempfile
import asyncio



import openai
import os
os.environ['OPENAI_API_KEY'] = ""
base_message = '''
You are a language assistant. You help users to learn new languages. Users speak into their device, their voice is
converted to text, and you receive what they say. Your job is to correct them, in case they use the incorrect phrase,
idiom, do not form sentences properly and other issues. Remember, what you are receiving is the transcription of an audio file,
not the original text, so bear no mind to individual letter typos. Focus on the sentence structure, on the words they use and
HOW they use them.

RULES:

- You receive the user's incoming ATTEMPT AT TRYING TO SPEAK ONE OF THREE LANGUAGES: SPANISH, JAPANESE OR ENGLISH.
- If their attempt is correct, inform them of such in a manner similar to the examples
- If their attempt is incorrect, inform them of such in a manner similar to the examples
- Always answer with an English explanation of why their attempt is incorrect.
- Keep your answers to a 2 sentence-length maximum

Examples: \n
'''

flirty_friend_examples = '''
"I has go to store yesterday." -> "You're getting there! But it’s ‘I went to the store yesterday.’ Keep it up!"
"She don’t like the movie." -> "Almost perfect! It's ‘She doesn’t like the movie.’ But I like your effort!"
"We are going to beach tomorrow." -> "So close! It’s ‘We are going to the beach tomorrow.’ Can't wait to hear more!"
"He didn’t saw me." -> "You're almost there! It should be ‘He didn’t see me.’ You're doing great, though!"
"Yo estas bien." -> "Wow... somebody's been practicing! But it's `Yo estoy bien!` Almost there!"
'''

formal_teacher_examples = '''
"I has go to store yesterday." -> "Thank you for your attempt. The correct form is ‘I went to the store yesterday.’ Please note the past tense usage."
"She don’t like the movie." -> "That was close. The correct sentence is ‘She doesn’t like the movie.’ Keep practicing your conjugations."
"We are going to beach tomorrow." -> "Your sentence is almost correct. It should be ‘We are going to the beach tomorrow.’ Don’t forget the definite article."
"He didn’t saw me." -> "This was a good try. However, it should be ‘He didn’t see me.’ Focus on your verb tenses."
"Yo estas bien." -> "That is good! But it's `Yo estoy bien!` Focus on your pronouns!"

'''

sarcastic_bro_examples = '''
"I has go to store yesterday." -> "Wow, close, but no. It’s ‘I went to the store yesterday.’ Just saying."
"She don’t like the movie." -> "Dude... what? It’s ‘She doesn’t like the movie.’ English is tricky, huh?"
"We are going to beach tomorrow." -> "Almost there, bro. It's ‘We are going to the beach tomorrow.’ Keep at it!"
"He didn’t saw me." -> "Not quite. The right way is ‘He didn’t see me.’ Somebody's been slackin!"
"Yo estas bien." -> "As if, it's `Yo estoy bien!` But I bet your doing your hardest... yeah right."

'''

def create_message(role : str, user_query: str) -> str:
  if role == 'flirty_friend':
    complete_message = f'''{base_message}
    {flirty_friend_examples} \n

    User query: {user_query} -> '''
  elif role == 'formal_teacher':
    complete_message = f'''{base_message}
    {formal_teacher_examples} \n

    User query: {user_query} -> '''

  elif role == 'sarcastic_bro':
    complete_message = f'''{base_message}
    {sarcastic_bro_examples} \n

    User query: {user_query} -> '''

  return complete_message


def gpt_answer(prompt):


    messages = [
        {"role": "system", "content": prompt}
    ]
    completion =  openai.chat.completions.create(
    model="gpt-4",
    messages=[
        {
            "role": "user",
            "content": prompt,
        },
    ],
    )

    # Extract the generated response from the API response
    generated_text = completion.choices[0].message.content.strip()

    return generated_text

model = WhisperModel("tiny", compute_type="float32")

# Text-to-speech function
async def text_to_speech(text, voice):   
    communicate = edge_tts.Communicate(text, voice)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    return tmp_path, None

def generate_response(
    language_level, buddy_personality,
    language_choice, user_query_audio,
    chatbot_history
):
    # Convert input audio to text

    language_codes = {'English':'en',
                     'Spanish':'es',
                     'Japanese':'ja'}

    user_query_transcribed_segments, info = model.transcribe(
        audio=user_query_audio,
        language=language_codes[language_choice]
        )
    user_query_transcribed = list(user_query_transcribed_segments)[0].text.strip()
    user_message = 'User: ' + user_query_transcribed

    # Ask llm for response to text
    prompt = create_message(buddy_personality, user_message)
    bot_message = 'Bot: ' + gpt_answer(prompt)
    chatbot_history.append((user_message, bot_message))

    # Convert llm response to audio
    # Return None to reset user input audio and
    # llm response + user inputs in chatbot_history object to be displayed 
    if language_choice == "Spanish":
        voice_short_name =  "es-MX-JorgeNeural"
    elif language_choice == "Japanese":
        voice_short_name = "ja-JP-KeitaNeural"
    else: 
        # default to an english voice otherwise
        voice_short_name = "en-US-BrianNeural"
    bot_message_audio, warning = asyncio.run(text_to_speech(text=bot_message, voice=voice_short_name))
    
    return None, chatbot_history, bot_message_audio

with gr.Blocks() as demo:

    header_section = gr.Markdown(
    """
    # AI Language Buddy!
    Click the **converse** button to practice your language skills!
    """)
    
    language = gr.Dropdown(
        choices=['English', 'Spanish', 'Japanese'],
        label='Language Choice',
        value='English'
    )
    
    language_level = gr.Dropdown(
        choices=['Beginner', 'Intermediate', 'Advanced'],
        label='Language Level',
        value='Beginner'
    )
    
    personality = gr.Dropdown(
        choices=['Formal Teacher', 'Flirty Friend', 'Sarcastic Bro'],
        label='Language Buddy Personality',
        value='Flirty Friend'
    )

    chatbot = gr.Chatbot()
    
    user_input = gr.Audio(
        sources='microphone',
        show_download_button=True,
        type='filepath'
    )

    ai_response = gr.Audio(
        autoplay=True
    )

    converse_button = gr.Button("Send Message")

    clear_button = gr.Button("Clear Convo History")

    converse_button.click(
        fn=generate_response,
        inputs=[
            language_level,
            personality,
            language,
            user_input,
            chatbot
        ],
        outputs=[user_input,
                 chatbot,
                 ai_response]
    )

demo.launch()