Spaces:
Sleeping
Sleeping
John Langley
commited on
Commit
·
8167b16
1
Parent(s):
ba649da
streaming voice
Browse files- app.py +8 -6
- utilsinference.py +16 -23
app.py
CHANGED
@@ -7,7 +7,7 @@ from huggingface_hub import hf_hub_download
|
|
7 |
from llama_cpp import Llama
|
8 |
from faster_whisper import WhisperModel
|
9 |
|
10 |
-
from utilsinference import get_sentence, tts_interface
|
11 |
|
12 |
os.environ["CUDACXX"] = "/usr/local/cuda/bin/nvcc"
|
13 |
os.system('python -m unidic download')
|
@@ -61,14 +61,16 @@ def respond(chat_history, voice):
|
|
61 |
if not voice:
|
62 |
return None, gr.Warning("Please select a voice.")
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
67 |
|
68 |
-
audiopb = tts_interface(sentence, voice)
|
69 |
|
70 |
#history, response = get_sentence(chat_history, mistral_llm)
|
71 |
-
|
72 |
|
73 |
|
74 |
#Gradio Interface
|
|
|
7 |
from llama_cpp import Llama
|
8 |
from faster_whisper import WhisperModel
|
9 |
|
10 |
+
from utilsinference import get_sentence, tts_interface, generate_llm_output
|
11 |
|
12 |
os.environ["CUDACXX"] = "/usr/local/cuda/bin/nvcc"
|
13 |
os.system('python -m unidic download')
|
|
|
61 |
if not voice:
|
62 |
return None, gr.Warning("Please select a voice.")
|
63 |
|
64 |
+
sentence = generate_llm_output(chat_history[-1][0], chat_history[:-1], mistral_llm)
|
65 |
+
audiopb = tts_interface(sentence, voice)
|
66 |
+
|
67 |
+
print("Inserting sentence to queue")
|
68 |
+
print(sentence)
|
69 |
+
|
70 |
|
|
|
71 |
|
72 |
#history, response = get_sentence(chat_history, mistral_llm)
|
73 |
+
yield chat_history, sentence, audiopb
|
74 |
|
75 |
|
76 |
#Gradio Interface
|
utilsinference.py
CHANGED
@@ -49,32 +49,25 @@ def generate_llm_output(
|
|
49 |
temperature = float(temperature)
|
50 |
if temperature < 1e-2:
|
51 |
temperature = 1e-2
|
52 |
-
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
|
63 |
formatted_prompt = format_prompt(prompt, history)
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
yield output
|
72 |
-
return output
|
73 |
-
|
74 |
-
except Exception as e:
|
75 |
-
print("Unhandled Exception: ", str(e))
|
76 |
-
gr.Warning("Unfortunately Mistral is unable to process")
|
77 |
-
output = "I do not know what happened but I could not understand you ."
|
78 |
return output
|
79 |
|
80 |
|
|
|
49 |
temperature = float(temperature)
|
50 |
if temperature < 1e-2:
|
51 |
temperature = 1e-2
|
52 |
+
top_p = float(top_p)
|
53 |
|
54 |
+
generate_kwargs = dict(
|
55 |
+
temperature=temperature,
|
56 |
+
max_new_tokens=max_tokens,
|
57 |
+
top_p=top_p,
|
58 |
+
repetition_penalty=1.0,
|
59 |
+
do_sample=True,
|
60 |
+
seed=42,
|
61 |
+
)
|
62 |
|
63 |
formatted_prompt = format_prompt(prompt, history)
|
64 |
+
|
65 |
+
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
66 |
+
output = ""
|
67 |
+
|
68 |
+
for response in stream:
|
69 |
+
output += response.token.text
|
70 |
+
yield output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
return output
|
72 |
|
73 |
|