Spaces:

devkushal75
/

TEST_HL

Sleeping

App Files Files

devkushal75 commited on 24 days ago

Commit

4d5fc75

verified ·

1 Parent(s): fab8000

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -21

app.py CHANGED Viewed

@@ -6,35 +6,31 @@ import tempfile
 import os
 from huggingface_hub import hf_hub_download
 # ----- Initialization -----
 model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
-model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in gguf format
 model_path = hf_hub_download(
     repo_id=model_name_or_path,
     filename=model_basename
 )
-# Initialize the LLAMA model. Update the model_path to point to your model file.
 llm = Llama(
     model_path=model_path,
-    n_threads=2, # CPU cores
-    n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
-    n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.
-    n_ctx=4096, # Context window
 )
 # Load the Whisper model for speech-to-text transcription.
 whisper_model = whisper.load_model("base")
 # ----- Helper Functions -----
 def transcribe_audio(audio_file):
-    """
-    Transcribes the provided audio file using Whisper.
-    """
     if audio_file is None:
         return ""
     result = whisper_model.transcribe(audio_file)
@@ -43,43 +39,52 @@ def transcribe_audio(audio_file):
 def generate_response(prompt, max_tokens=150, temperature=0.7):
     """
     Uses LLAMA-CPP to generate a response for the given prompt.
     """
-    # Call the LLAMA model. The output is a dict with a "choices" list.
-    output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=False)
     response = output["choices"][0]["text"]
     return response.strip()
 def text_to_speech(text):
-    """
-    Converts text to speech using gTTS and returns the filepath to the saved audio.
-    """
     tts = gTTS(text=text, lang="en")
     tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     tts.save(tmp_file.name)
     return tmp_file.name
 def voice_chat(audio, text, history, max_tokens, temperature):
     # Use the transcribed audio if text is empty.
     if audio is not None and (text is None or text.strip() == ""):
         user_input = transcribe_audio(audio)
     else:
         user_input = text if text else ""
-    # Build the prompt using only the current message (ignoring previous history)
     prompt = f"User: {user_input}\nAssistant: "
     # Generate response using LLAMA-CPP.
     response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
     # Convert only the assistant's response to speech.
     audio_response = text_to_speech(response_text)
-    # Instead of accumulating history, just return the current exchange.
     new_history = [(user_input, response_text)]
-    # Return outputs: updated chatbot display, assistant text, audio file, and updated state.
     return new_history, response_text, audio_response, new_history
 # ----- Gradio Interface -----
 with gr.Blocks() as demo:

 import os
 from huggingface_hub import hf_hub_download
 # ----- Initialization -----
 model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
+model_basename = "llama-2-13b-chat.Q5_K_M.gguf"  # the model is in GGUF format
 model_path = hf_hub_download(
     repo_id=model_name_or_path,
     filename=model_basename
 )
+# Initialize the LLAMA model.
 llm = Llama(
     model_path=model_path,
+    n_threads=2,
+    n_batch=512,
+    n_gpu_layers=43,
+    n_ctx=4096,
 )
 # Load the Whisper model for speech-to-text transcription.
 whisper_model = whisper.load_model("base")
 # ----- Helper Functions -----
 def transcribe_audio(audio_file):
+    """Transcribes the provided audio file using Whisper."""
     if audio_file is None:
         return ""
     result = whisper_model.transcribe(audio_file)
 def generate_response(prompt, max_tokens=150, temperature=0.7):
     """
     Uses LLAMA-CPP to generate a response for the given prompt.
+    Note: Removed echo=True to prevent repeating the prompt.
     """
+    output = llm(prompt, max_tokens=max_tokens, temperature=temperature)  # echo removed
     response = output["choices"][0]["text"]
     return response.strip()
 def text_to_speech(text):
+    """Converts text to speech using gTTS and returns the filepath to the saved audio."""
     tts = gTTS(text=text, lang="en")
     tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     tts.save(tmp_file.name)
     return tmp_file.name
 def voice_chat(audio, text, history, max_tokens, temperature):
+    """
+    Handles a single turn of the conversation:
+      - If an audio file is provided and no text message, transcribe it.
+      - Builds a prompt with only the current user input.
+      - Generates a response from LLAMA.
+      - Converts the assistant's response to speech.
+    Returns:
+      - A new history containing only the current turn.
+      - The assistant's response text.
+      - The assistant's response audio filepath.
+      - The updated state (new history).
+    """
     # Use the transcribed audio if text is empty.
     if audio is not None and (text is None or text.strip() == ""):
         user_input = transcribe_audio(audio)
     else:
         user_input = text if text else ""
+    # Build prompt without prior history.
     prompt = f"User: {user_input}\nAssistant: "
     # Generate response using LLAMA-CPP.
     response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
     # Convert only the assistant's response to speech.
     audio_response = text_to_speech(response_text)
+    # Create new history with only the current exchange.
     new_history = [(user_input, response_text)]
+    # Return the outputs.
     return new_history, response_text, audio_response, new_history
 # ----- Gradio Interface -----
 with gr.Blocks() as demo: