John Langley commited on
Commit
ba649da
·
1 Parent(s): 73c42bf

streaming voice

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. utilsinference.py +178 -0
app.py CHANGED
@@ -7,7 +7,7 @@ from huggingface_hub import hf_hub_download
7
  from llama_cpp import Llama
8
  from faster_whisper import WhisperModel
9
 
10
- from utilsasync import get_sentence, tts_interface
11
 
12
  os.environ["CUDACXX"] = "/usr/local/cuda/bin/nvcc"
13
  os.system('python -m unidic download')
@@ -105,7 +105,7 @@ async def create_demo():
105
  # Define chatbot component
106
  chatbot = gr.Chatbot(
107
  value=[(None, "Hi, I'm an AI training assistant. Let's get going, how should we start?")], # Initial greeting from the chatbot
108
- elem_id="chatbot",
109
  bubble_full_width=False,
110
  )
111
 
 
7
  from llama_cpp import Llama
8
  from faster_whisper import WhisperModel
9
 
10
+ from utilsinference import get_sentence, tts_interface
11
 
12
  os.environ["CUDACXX"] = "/usr/local/cuda/bin/nvcc"
13
  os.system('python -m unidic download')
 
105
  # Define chatbot component
106
  chatbot = gr.Chatbot(
107
  value=[(None, "Hi, I'm an AI training assistant. Let's get going, how should we start?")], # Initial greeting from the chatbot
108
+ elem_id="Conversation",
109
  bubble_full_width=False,
110
  )
111
 
utilsinference.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ import gradio as gr
3
+ import nltk
4
+ import edge_tts
5
+ import tempfile
6
+ import asyncio
7
+
8
+ # Download the 'punkt' tokenizer for the NLTK library
9
+ nltk.download("punkt")
10
+
11
+ client = InferenceClient(
12
+ "mistralai/Mistral-7B-Instruct-v0.3"
13
+ )
14
+
15
+
16
+ def format_prompt(message, history):
17
+ system_message = f"""
18
+ You are an empathetic, insightful, and supportive training coach who helps people deal with challenges and celebrate achievements.
19
+ You help people feel better by asking questions to reflect on and evoke feelings of positivity, gratitude, joy, and love.
20
+ You show radical candor and tough love.
21
+ Respond in a casual and friendly tone.
22
+ Sprinkle in filler words, contractions, idioms, and other casual speech that we use in conversation.
23
+ Emulate the user’s speaking style and be concise in your response.
24
+ """
25
+ prompt = (
26
+ "<s>[INST]" + system_message + "[/INST]"
27
+ )
28
+ for user_prompt, bot_response in history:
29
+ if user_prompt is not None:
30
+ prompt += f"[INST] {user_prompt} [/INST]"
31
+
32
+ prompt += f" {bot_response}</s> "
33
+
34
+ if message=="":
35
+ message="Hello"
36
+ prompt += f"[INST] {message} [/INST]"
37
+ return prompt
38
+
39
+
40
+ def generate_llm_output(
41
+ prompt,
42
+ history,
43
+ llm,
44
+ temperature=0.8,
45
+ max_tokens=256,
46
+ top_p=0.95,
47
+ stop_words=["<s>","[/INST]", "</s>"]
48
+ ):
49
+ temperature = float(temperature)
50
+ if temperature < 1e-2:
51
+ temperature = 1e-2
52
+ top_p = float(top_p)
53
+
54
+ generate_kwargs = dict(
55
+ temperature=temperature,
56
+ max_new_tokens=256,
57
+ top_p=top_p,
58
+ repetition_penalty=1.0,
59
+ do_sample=True,
60
+ seed=42,
61
+ )
62
+
63
+ formatted_prompt = format_prompt(prompt, history)
64
+ try:
65
+ print("LLM Input:", formatted_prompt)
66
+ # Local GGUF
67
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
68
+ output = ""
69
+ for response in stream:
70
+ output += response.token.text
71
+ yield output
72
+ return output
73
+
74
+ except Exception as e:
75
+ print("Unhandled Exception: ", str(e))
76
+ gr.Warning("Unfortunately Mistral is unable to process")
77
+ output = "I do not know what happened but I could not understand you ."
78
+ return output
79
+
80
+
81
+ # tts interface function
82
+ def tts_interface(text, voice):
83
+ audio = asyncio.run(text_to_speech(text, voice))
84
+ return audio
85
+
86
+
87
+ # Text-to-speech function
88
+ async def text_to_speech(text, voice):
89
+ rate = 10
90
+ pitch = 10
91
+ rate_str = f"{rate:+d}%"
92
+ pitch_str = f"{pitch:+d}Hz"
93
+
94
+ voice_short_name = voice.split(" - ")[0]
95
+ communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
96
+
97
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
98
+ tmp_path = tmp_file.name
99
+ await communicate.save(tmp_path)
100
+ return tmp_path
101
+
102
+
103
+
104
+ def get_sentence(history, llm):
105
+ history = [["", None]] if history is None else history
106
+ history[-1][1] = ""
107
+ sentence_list = []
108
+ sentence_hash_list = []
109
+
110
+ text_to_generate = ""
111
+ stored_sentence = None
112
+ stored_sentence_hash = None
113
+
114
+ for character in generate_llm_output(history[-1][0], history[:-1], llm):
115
+ history[-1][1] = character.replace("<|assistant|>","")
116
+ # It is coming word by word
117
+ text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())
118
+ if len(text_to_generate) > 1:
119
+
120
+ dif = len(text_to_generate) - len(sentence_list)
121
+
122
+ if dif == 1 and len(sentence_list) != 0:
123
+ continue
124
+
125
+ if dif == 2 and len(sentence_list) != 0 and stored_sentence is not None:
126
+ continue
127
+
128
+ # All this complexity due to trying append first short sentence to next one for proper language auto-detect
129
+ if stored_sentence is not None and stored_sentence_hash is None and dif>1:
130
+ #means we consumed stored sentence and should look at next sentence to generate
131
+ sentence = text_to_generate[len(sentence_list)+1]
132
+ elif stored_sentence is not None and len(text_to_generate)>2 and stored_sentence_hash is not None:
133
+ print("Appending stored")
134
+ sentence = stored_sentence + text_to_generate[len(sentence_list)+1]
135
+ stored_sentence_hash = None
136
+ else:
137
+ sentence = text_to_generate[len(sentence_list)]
138
+
139
+ # too short sentence just append to next one if there is any
140
+ # this is for proper language detection
141
+ if len(sentence)<=15 and stored_sentence_hash is None and stored_sentence is None:
142
+ if sentence[-1] in [".","!","?"]:
143
+ if stored_sentence_hash != hash(sentence):
144
+ stored_sentence = sentence
145
+ stored_sentence_hash = hash(sentence)
146
+ print("Storing:",stored_sentence)
147
+ continue
148
+
149
+
150
+ sentence_hash = hash(sentence)
151
+ if stored_sentence_hash is not None and sentence_hash == stored_sentence_hash:
152
+ continue
153
+
154
+ if sentence_hash not in sentence_hash_list:
155
+ sentence_hash_list.append(sentence_hash)
156
+ sentence_list.append(sentence)
157
+ print("New Sentence: ", sentence)
158
+ yield (sentence, history)
159
+
160
+ # return that final sentence token
161
+ try:
162
+ last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())[-1]
163
+ sentence_hash = hash(last_sentence)
164
+ if sentence_hash not in sentence_hash_list:
165
+ if stored_sentence is not None and stored_sentence_hash is not None:
166
+ last_sentence = stored_sentence + last_sentence
167
+ stored_sentence = stored_sentence_hash = None
168
+ print("Last Sentence with stored:",last_sentence)
169
+
170
+ sentence_hash_list.append(sentence_hash)
171
+ sentence_list.append(last_sentence)
172
+ print("Last Sentence: ", last_sentence)
173
+
174
+ yield (last_sentence, history)
175
+ except:
176
+ print("ERROR on last sentence history is :", history)
177
+
178
+