LLaVa-56B-Chat / app.py
Satyam-Singh's picture
Update app.py
75bf577 verified
raw
history blame
2.71 kB
import gradio as gr
import pyttsx3 # Text-to-speech
import speech_recognition as sr # Speech-to-text
from llama_cpp import Llama
model = "bartowski/Llama-3.2-1B-Instruct-GGUF"
llm = Llama.from_pretrained(
repo_id=model,
filename="Llama-3.2-1B-Instruct-Q8_0.gguf",
verbose=True,
use_mmap=True,
use_mlock=True,
n_threads=4,
n_threads_batch=4,
n_ctx=2000,
)
# Initialize TTS engine
tts_engine = pyttsx3.init()
# Speech-to-text function
def speech_to_text():
recognizer = sr.Recognizer()
with sr.Microphone() as source:
print("Listening...")
audio = recognizer.listen(source)
try:
text = recognizer.recognize_google(audio)
print(f"You said: {text}")
return text
except sr.UnknownValueError:
return "Sorry, I did not understand that."
except sr.RequestError as e:
return f"Could not request results; {e}"
# Text-to-speech function
def text_to_speech(response_text):
tts_engine.say(response_text)
tts_engine.runAndWait()
# Main AI response function
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
completion = llm.create_chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p
)
for message in completion:
delta = message['choices'][0]['delta']
if 'content' in delta:
response += delta['content']
yield response
# Speak the AI response
text_to_speech(response)
# Gradio UI with added microphone component
demo = gr.Interface(
fn=respond,
inputs=[
gr.Microphone(streaming=True, label="Speak your question"),
gr.Textbox(
value="You are a helpful assistant.",
label="System message",
),
gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
outputs=gr.Textbox(label="Response"),
live=True,
description=model,
)
if __name__ == "__main__":
demo.launch()