Spaces:

devkushal75
/

TEST_HL

Sleeping

App Files Files

TEST_HL / app.py

devkushal75

Update app.py

c1533fd verified 21 days ago

raw

history blame

4.68 kB

	import gradio as gr
	from llama_cpp import Llama
	import whisper
	from gtts import gTTS
	import tempfile
	import os
	from huggingface_hub import hf_hub_download


	# ----- Initialization -----
	model_path = hf_hub_download(repo_id=TheBloke/Llama-2-7B-GGUF, filename=llama-2-7b.Q2_K.gguf)

	# Initialize the LLAMA model. Update the model_path to point to your model file.
	llm = Llama(
	model_path=model_path,
	n_threads=2, # CPU cores
	n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
	n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.
	n_ctx=4096, # Context window
	)


	# Load the Whisper model for speech-to-text transcription.
	whisper_model = whisper.load_model("base")

	# ----- Helper Functions -----

	def transcribe_audio(audio_file):
	"""
	Transcribes the provided audio file using Whisper.
	"""
	if audio_file is None:
	return ""
	result = whisper_model.transcribe(audio_file)
	return result["text"]

	def generate_response(prompt, max_tokens=150, temperature=0.7):
	"""
	Uses LLAMA-CPP to generate a response for the given prompt.
	"""
	# Call the LLAMA model. The output is a dict with a "choices" list.
	output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=True)
	response = output["choices"][0]["text"]
	return response.strip()

	def text_to_speech(text):
	"""
	Converts text to speech using gTTS and returns the filepath to the saved audio.
	"""
	tts = gTTS(text=text, lang="en")
	tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(tmp_file.name)
	return tmp_file.name

	def voice_chat(audio, text, history, max_tokens, temperature):
	"""
	Handles a single turn of the conversation:
	- If an audio file is provided and no text message, transcribe it.
	- Builds a conversation prompt from the chat history.
	- Generates a response from LLAMA.
	- Converts the response to speech.
	Returns updated chat history, the response text, the response audio filepath, and updated state.
	"""
	# Use the transcribed audio if text is empty.
	if audio is not None and (text is None or text.strip() == ""):
	user_input = transcribe_audio(audio)
	else:
	user_input = text if text else ""

	# Build the conversation prompt (history is a list of tuples: (user, assistant))
	prompt = ""
	if history:
	for (user_turn, bot_turn) in history:
	prompt += f"User: {user_turn}\nAssistant: {bot_turn}\n"
	prompt += f"User: {user_input}\nAssistant: "

	# Generate response using LLAMA-CPP.
	response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
	# Convert the response to speech audio.
	audio_response = text_to_speech(response_text)

	# Append this turn to the conversation history.
	new_history = history.copy() if history else []
	new_history.append((user_input, response_text))

	# Return four outputs: update the Chatbot display, show the assistant text, play audio, and update state.
	return new_history, response_text, audio_response, new_history

	# ----- Gradio Interface -----

	with gr.Blocks() as demo:
	gr.Markdown("# Voice Chatbot with LLAMA‑CPP")

	with gr.Row():
	with gr.Column(scale=5):
	# User inputs: Audio input and/or text input.
	audio_input = gr.Audio(type="filepath", label="Speak to Chatbot")
	text_input = gr.Textbox(placeholder="Or type your message", label="Your Message")
	send_btn = gr.Button("Send")
	max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens")
	temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
	with gr.Column(scale=7):
	# Display outputs: Chat history, assistant text response, and audio playback.
	chat_history = gr.Chatbot(label="Chat History")
	response_textbox = gr.Textbox(label="Assistant Response")
	audio_output = gr.Audio(label="Response Audio", type="filepath")

	# Gradio State to hold the conversation history.
	state = gr.State([])

	def run_voice_chat(audio, text, history, max_tokens, temperature):
	return voice_chat(audio, text, history, max_tokens, temperature)

	# On clicking the "Send" button, run the voice_chat function.
	send_btn.click(
	fn=run_voice_chat,
	inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider],
	outputs=[chat_history, response_textbox, audio_output, state]
	)

	# Launch the app.
	demo.launch()