Spaces:
Runtime error
Runtime error
File size: 6,989 Bytes
bd3e2e4 3b38860 8cf49b6 3b38860 fea2b5b 65e32e0 bd3e2e4 8cf49b6 d4b5f92 bd3e2e4 82f0eab 97e9b74 82f0eab d4b5f92 82f0eab bd3e2e4 fea2b5b 65e32e0 97e9b74 bd3e2e4 d4b5f92 9f98382 8cf49b6 82f0eab 8cf49b6 bd3e2e4 d4b5f92 65e32e0 d4b5f92 65e32e0 d4b5f92 65e32e0 d4b5f92 3b38860 bd3e2e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
import gradio as gr
from faster_whisper import WhisperModel
import edge_tts
import tempfile
import asyncio
import openai
import os
os.environ['OPENAI_API_KEY'] = ""
base_message = '''
You are a language assistant. You help users to learn new languages. Users speak into their device, their voice is
converted to text, and you receive what they say. Your job is to correct them, in case they use the incorrect phrase,
idiom, do not form sentences properly and other issues. Remember, what you are receiving is the transcription of an audio file,
not the original text, so bear no mind to individual letter typos. Focus on the sentence structure, on the words they use and
HOW they use them.
RULES:
- You receive the user's incoming ATTEMPT AT TRYING TO SPEAK ONE OF THREE LANGUAGES: SPANISH, JAPANESE OR ENGLISH.
- If their attempt is correct, inform them of such in a manner similar to the examples
- If their attempt is incorrect, inform them of such in a manner similar to the examples
- Always answer with an English explanation of why their attempt is incorrect.
- Keep your answers to a 2 sentence-length maximum
Examples: \n
'''
flirty_friend_examples = '''
"I has go to store yesterday." -> "You're getting there! But it’s ‘I went to the store yesterday.’ Keep it up!"
"She don’t like the movie." -> "Almost perfect! It's ‘She doesn’t like the movie.’ But I like your effort!"
"We are going to beach tomorrow." -> "So close! It’s ‘We are going to the beach tomorrow.’ Can't wait to hear more!"
"He didn’t saw me." -> "You're almost there! It should be ‘He didn’t see me.’ You're doing great, though!"
"Yo estas bien." -> "Wow... somebody's been practicing! But it's `Yo estoy bien!` Almost there!"
'''
formal_teacher_examples = '''
"I has go to store yesterday." -> "Thank you for your attempt. The correct form is ‘I went to the store yesterday.’ Please note the past tense usage."
"She don’t like the movie." -> "That was close. The correct sentence is ‘She doesn’t like the movie.’ Keep practicing your conjugations."
"We are going to beach tomorrow." -> "Your sentence is almost correct. It should be ‘We are going to the beach tomorrow.’ Don’t forget the definite article."
"He didn’t saw me." -> "This was a good try. However, it should be ‘He didn’t see me.’ Focus on your verb tenses."
"Yo estas bien." -> "That is good! But it's `Yo estoy bien!` Focus on your pronouns!"
'''
sarcastic_bro_examples = '''
"I has go to store yesterday." -> "Wow, close, but no. It’s ‘I went to the store yesterday.’ Just saying."
"She don’t like the movie." -> "Dude... what? It’s ‘She doesn’t like the movie.’ English is tricky, huh?"
"We are going to beach tomorrow." -> "Almost there, bro. It's ‘We are going to the beach tomorrow.’ Keep at it!"
"He didn’t saw me." -> "Not quite. The right way is ‘He didn’t see me.’ Somebody's been slackin!"
"Yo estas bien." -> "As if, it's `Yo estoy bien!` But I bet your doing your hardest... yeah right."
'''
def create_message(role : str, user_query: str) -> str:
if role == 'flirty_friend':
complete_message = f'''{base_message}
{flirty_friend_examples} \n
User query: {user_query} -> '''
elif role == 'formal_teacher':
complete_message = f'''{base_message}
{formal_teacher_examples} \n
User query: {user_query} -> '''
elif role == 'sarcastic_bro':
complete_message = f'''{base_message}
{sarcastic_bro_examples} \n
User query: {user_query} -> '''
return complete_message
def gpt_answer(prompt):
messages = [
{"role": "system", "content": prompt}
]
completion = openai.chat.completions.create(
model="gpt-4",
messages=[
{
"role": "user",
"content": prompt,
},
],
)
# Extract the generated response from the API response
generated_text = completion.choices[0].message.content.strip()
return generated_text
model = WhisperModel("tiny", compute_type="float32")
# Text-to-speech function
async def text_to_speech(text, voice):
communicate = edge_tts.Communicate(text, voice)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path, None
def generate_response(
language_level, buddy_personality,
language_choice, user_query_audio,
chatbot_history
):
# Convert input audio to text
language_codes = {'English':'en',
'Spanish':'es',
'Japanese':'ja'}
user_query_transcribed_segments, info = model.transcribe(
audio=user_query_audio,
language=language_codes[language_choice]
)
user_query_transcribed = list(user_query_transcribed_segments)[0].text.strip()
user_message = 'User: ' + user_query_transcribed
# Ask llm for response to text
prompt = create_message(buddy_personality, user_message)
bot_message = 'Bot: ' + gpt_answer(prompt)
chatbot_history.append((user_message, bot_message))
# Convert llm response to audio
# Return None to reset user input audio and
# llm response + user inputs in chatbot_history object to be displayed
if language_choice == "Spanish":
voice_short_name = "es-MX-JorgeNeural"
elif language_choice == "Japanese":
voice_short_name = "ja-JP-KeitaNeural"
else:
# default to an english voice otherwise
voice_short_name = "en-US-BrianNeural"
bot_message_audio, warning = asyncio.run(text_to_speech(text=bot_message, voice=voice_short_name))
return None, chatbot_history, bot_message_audio
with gr.Blocks() as demo:
header_section = gr.Markdown(
"""
# AI Language Buddy!
Click the **converse** button to practice your language skills!
""")
language = gr.Dropdown(
choices=['English', 'Spanish', 'Japanese'],
label='Language Choice',
value='English'
)
language_level = gr.Dropdown(
choices=['Beginner', 'Intermediate', 'Advanced'],
label='Language Level',
value='Beginner'
)
personality = gr.Dropdown(
choices=['Formal Teacher', 'Flirty Friend', 'Sarcastic Bro'],
label='Language Buddy Personality',
value='Flirty Friend'
)
chatbot = gr.Chatbot()
user_input = gr.Audio(
sources='microphone',
show_download_button=True,
type='filepath'
)
ai_response = gr.Audio(
autoplay=True
)
converse_button = gr.Button("Send Message")
clear_button = gr.Button("Clear Convo History")
converse_button.click(
fn=generate_response,
inputs=[
language_level,
personality,
language,
user_input,
chatbot
],
outputs=[user_input,
chatbot,
ai_response]
)
demo.launch() |