John Langley commited on
Commit
cd00a26
·
1 Parent(s): 339e247

working of a streaming solution

Browse files
Files changed (2) hide show
  1. app.py +8 -4
  2. utils.py +0 -114
app.py CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import hf_hub_download
6
  from llama_cpp import Llama
7
  from faster_whisper import WhisperModel
8
 
9
- from utils import get_sentence, tts_interface
10
 
11
  # The device to load the model onto.
12
  #
@@ -56,8 +56,12 @@ def respond(chat_history, voice):
56
  if not voice:
57
  return None, gr.Warning("Please select a voice.")
58
 
59
- history, response = get_sentence(chat_history, mistral_llm)
60
- return history, response
 
 
 
 
61
 
62
 
63
  #Gradio Interface
@@ -102,7 +106,7 @@ async def create_demo():
102
  ).then(fn=respond, inputs=[chatbot, voice], outputs=[chatbot, ai_response]).then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])
103
 
104
  submit_button.click(fn=add_text, inputs=[chatbot, user_msg], outputs=[chatbot, user_msg], queue=False
105
- ).then(fn=respond, inputs=[chatbot, voice], outputs=[chatbot, ai_response]) #.then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])
106
 
107
 
108
  return demo
 
6
  from llama_cpp import Llama
7
  from faster_whisper import WhisperModel
8
 
9
+ from utilsasync import get_sentence, tts_interface
10
 
11
  # The device to load the model onto.
12
  #
 
56
  if not voice:
57
  return None, gr.Warning("Please select a voice.")
58
 
59
+ for sentence, chatbot_history in get_sentence(chat_history, mistral_llm):
60
+ print("Inserting sentence to queue")
61
+ print(sentence)
62
+
63
+ #history, response = get_sentence(chat_history, mistral_llm)
64
+ return chatbot_history, sentence
65
 
66
 
67
  #Gradio Interface
 
106
  ).then(fn=respond, inputs=[chatbot, voice], outputs=[chatbot, ai_response]).then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])
107
 
108
  submit_button.click(fn=add_text, inputs=[chatbot, user_msg], outputs=[chatbot, user_msg], queue=False
109
+ ).then(fn=respond, inputs=[chatbot, voice], outputs=[chatbot, ai_response]).then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])
110
 
111
 
112
  return demo
utils.py DELETED
@@ -1,114 +0,0 @@
1
- import gradio as gr
2
- import nltk
3
- import edge_tts
4
- import tempfile
5
- import asyncio
6
-
7
- # Download the 'punkt' tokenizer for the NLTK library
8
- nltk.download("punkt")
9
-
10
- def format_prompt(message, history):
11
- system_message = f"""
12
- You are an empathetic, insightful, and supportive training coach who helps people deal with challenges and celebrate achievements.
13
- You help people feel better by asking questions to reflect on and evoke feelings of positivity, gratitude, joy, and love.
14
- You show radical candor and tough love.
15
- Respond in a casual and friendly tone.
16
- Sprinkle in filler words, contractions, idioms, and other casual speech that we use in conversation.
17
- Emulate the user’s speaking style and be concise in your response.
18
- """
19
- prompt = (
20
- "<s>[INST]" + system_message + "[/INST]"
21
- )
22
- for user_prompt, bot_response in history:
23
- if user_prompt is not None:
24
- prompt += f"[INST] {user_prompt} [/INST]"
25
-
26
- prompt += f" {bot_response}</s> "
27
-
28
- if message=="":
29
- message="Hello"
30
- prompt += f"[INST] {message} [/INST]"
31
- return prompt
32
-
33
-
34
- def generate_llm_output(
35
- prompt,
36
- history,
37
- llm,
38
- temperature=0.8,
39
- max_tokens=256,
40
- top_p=0.95,
41
- stop_words=["<s>","[/INST]", "</s>"]
42
- ):
43
- temperature = float(temperature)
44
- if temperature < 1e-2:
45
- temperature = 1e-2
46
- top_p = float(top_p)
47
-
48
- generate_kwargs = dict(
49
- temperature=temperature,
50
- max_tokens=max_tokens,
51
- top_p=top_p,
52
- stop=stop_words
53
- )
54
- formatted_prompt = format_prompt(prompt, history)
55
- try:
56
- print("LLM Input:", formatted_prompt)
57
- # Local GGUF
58
- output = ""
59
- stream = llm(
60
- formatted_prompt,
61
- **generate_kwargs,
62
- stream=True,
63
- )
64
- for r in stream:
65
- print(r["choices"][0]["text"])
66
- character = r["choices"][0]["text"]
67
- if character in stop_words:
68
- # end of context
69
- return
70
-
71
- output += r["choices"][0]["text"]
72
-
73
-
74
- except Exception as e:
75
- print("Unhandled Exception: ", str(e))
76
- gr.Warning("Unfortunately Mistral is unable to process")
77
- output = "I do not know what happened but I could not understand you ."
78
- return output
79
-
80
-
81
- # tts interface function
82
- def tts_interface(text, voice):
83
- audio = asyncio.run(text_to_speech(text, voice))
84
- return audio
85
-
86
-
87
- # Text-to-speech function
88
- async def text_to_speech(text, voice):
89
- rate = 10
90
- pitch = 10
91
- rate_str = f"{rate:+d}%"
92
- pitch_str = f"{pitch:+d}Hz"
93
-
94
- voice_short_name = voice.split(" - ")[0]
95
- communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
96
-
97
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
98
- tmp_path = tmp_file.name
99
- await communicate.save(tmp_path)
100
- return tmp_path
101
-
102
-
103
-
104
- def get_sentence(history, llm):
105
- history = [["", None]] if history is None else history
106
- history[-1][1] = ""
107
-
108
- text_to_generate = ""
109
- text_to_generate = generate_llm_output(history[-1][0], history[:-1], llm)
110
-
111
- history.append([None, text_to_generate])
112
- return (history, text_to_generate)
113
-
114
-