File size: 5,887 Bytes
fec4cfa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import gradio as gr
import speech_recognition as sr
from huggingface_hub import InferenceClient
import random
import textwrap
import pyttsx3
# Initialize the speech recognition and TTS engine
recognizer = sr.Recognizer()
tts_engine = pyttsx3.init()
# Define the model to be used
model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
client = InferenceClient(model)
# Embedded system prompt
system_prompt_text = (
"You are a smart and helpful co-worker of Thailand based multi-national company PTT, "
"and PTTEP. You help with any kind of request and provide a detailed answer to the question. "
"But if you are asked about something unethical or dangerous, you must refuse and provide a safe and respectful way to handle that."
)
# Read the content of the info.md file with UTF-8 encoding
with open("info.md", "r", encoding="utf-8") as file:
info_md_content = file.read()
# Chunk the info.md content into smaller sections
chunk_size = 2500 # Adjust this size as needed
info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
def get_all_chunks(chunks):
return "\n\n".join(chunks)
def format_prompt_mixtral(message, history, info_md_chunks):
prompt = "<s>"
all_chunks = get_all_chunks(info_md_chunks)
prompt += f"{all_chunks}\n\n" # Add all chunks of info.md at the beginning
prompt += f"{system_prompt_text}\n\n" # Add the system prompt
if history:
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def chat_inf(prompt, history, seed, temp, tokens, top_p, rep_p):
generate_kwargs = dict(
temperature=temp,
max_new_tokens=tokens,
top_p=top_p,
repetition_penalty=rep_p,
do_sample=True,
seed=seed,
)
formatted_prompt = format_prompt_mixtral(prompt, history, info_md_chunks)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
yield [(prompt, output)]
history.append((prompt, output))
yield history
def clear_fn():
return None, None
rand_val = random.randint(1, 1111111111111111)
def check_rand(inp, val):
if inp:
return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=random.randint(1, 1111111111111111))
else:
return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
def recognize_speech(audio):
with sr.AudioFile(audio) as source:
audio_data = recognizer.record(source) # Record the audio
try:
# Recognize the speech using Google's API
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
return "Sorry, I could not understand the audio."
except sr.RequestError:
return "Error: Could not request results from the speech recognition service."
def speak_text(text):
# Convert text to speech using pyttsx3
tts_engine.save_to_file(text, 'output.mp3') # Save the TTS audio
tts_engine.runAndWait() # Wait until TTS is done
with gr.Blocks() as app:
gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3>running on Huggingface Inference</h3><br><h7>EXPERIMENTAL</center>""")
with gr.Row():
chat = gr.Chatbot(height=500)
with gr.Group():
with gr.Row():
with gr.Column(scale=3):
inp = gr.Audio(type="filepath") # Audio input
with gr.Row():
with gr.Column(scale=2):
btn = gr.Button("Chat")
with gr.Column(scale=1):
with gr.Group():
stop_btn = gr.Button("Stop")
clear_btn = gr.Button("Clear")
with gr.Column(scale=1):
with gr.Group():
rand = gr.Checkbox(label="Random Seed", value=True)
seed = gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, step=1, value=rand_val)
tokens = gr.Slider(label="Max new tokens", value=3840, minimum=0, maximum=8000, step=64, interactive=True, visible=True, info="The maximum number of tokens")
temp = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
top_p = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
rep_p = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum=2.0, value=1.0)
hid1 = gr.Number(value=1, visible=False)
output_audio = gr.Audio(label="Output Audio", type="filepath", interactive=False) # Create an output audio component
def handle_chat(audio_input, chat_history, seed, temp, tokens, top_p, rep_p):
user_message = recognize_speech(audio_input) # Recognize speech input
if "Sorry" in user_message: # Check for error in recognition
return chat_history, user_message, None
response_gen = chat_inf(user_message, chat_history, seed, temp, tokens, top_p, rep_p)
response = next(response_gen)[0][-1][1] # Get the response text
speak_text(response) # Speak the response text
return chat_history + [(user_message, response)], response, 'output.mp3' # Return the filename for audio output
go = btn.click(handle_chat, [inp, chat, seed, temp, tokens, top_p, rep_p], [chat, inp, output_audio]) # Use output_audio instead of "output.mp3"
stop_btn.click(None, None, None, cancels=[go])
clear_btn.click(clear_fn, None, [inp, chat])
app.queue(default_concurrency_limit=10).launch(share=True, auth=("admin", "0112358"))
|