Spaces:

devkushal75
/

TEST_HL

Sleeping

App Files Files

devkushal75 commited on 22 days ago

Commit

882e54f

verified ·

1 Parent(s): 4d5fc75

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -24

app.py CHANGED Viewed

@@ -6,31 +6,35 @@ import tempfile
 import os
 from huggingface_hub import hf_hub_download
 # ----- Initialization -----
 model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
-model_basename = "llama-2-13b-chat.Q5_K_M.gguf"  # the model is in GGUF format
 model_path = hf_hub_download(
     repo_id=model_name_or_path,
     filename=model_basename
 )
-# Initialize the LLAMA model.
 llm = Llama(
     model_path=model_path,
-    n_threads=2,
-    n_batch=512,
-    n_gpu_layers=43,
-    n_ctx=4096,
 )
 # Load the Whisper model for speech-to-text transcription.
 whisper_model = whisper.load_model("base")
 # ----- Helper Functions -----
 def transcribe_audio(audio_file):
-    """Transcribes the provided audio file using Whisper."""
     if audio_file is None:
         return ""
     result = whisper_model.transcribe(audio_file)
@@ -39,14 +43,16 @@ def transcribe_audio(audio_file):
 def generate_response(prompt, max_tokens=150, temperature=0.7):
     """
     Uses LLAMA-CPP to generate a response for the given prompt.
-    Note: Removed echo=True to prevent repeating the prompt.
     """
-    output = llm(prompt, max_tokens=max_tokens, temperature=temperature)  # echo removed
     response = output["choices"][0]["text"]
     return response.strip()
 def text_to_speech(text):
-    """Converts text to speech using gTTS and returns the filepath to the saved audio."""
     tts = gTTS(text=text, lang="en")
     tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     tts.save(tmp_file.name)
@@ -56,14 +62,10 @@ def voice_chat(audio, text, history, max_tokens, temperature):
     """
     Handles a single turn of the conversation:
       - If an audio file is provided and no text message, transcribe it.
-      - Builds a prompt with only the current user input.
       - Generates a response from LLAMA.
-      - Converts the assistant's response to speech.
-    Returns:
-      - A new history containing only the current turn.
-      - The assistant's response text.
-      - The assistant's response audio filepath.
-      - The updated state (new history).
     """
     # Use the transcribed audio if text is empty.
     if audio is not None and (text is None or text.strip() == ""):
@@ -71,18 +73,23 @@ def voice_chat(audio, text, history, max_tokens, temperature):
     else:
         user_input = text if text else ""
-    # Build prompt without prior history.
-    prompt = f"User: {user_input}\nAssistant: "
     # Generate response using LLAMA-CPP.
     response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
-    # Convert only the assistant's response to speech.
     audio_response = text_to_speech(response_text)
-    # Create new history with only the current exchange.
-    new_history = [(user_input, response_text)]
-    # Return the outputs.
     return new_history, response_text, audio_response, new_history
 # ----- Gradio Interface -----
@@ -118,4 +125,4 @@ with gr.Blocks() as demo:
     )
 # Launch the app.
-demo.launch()

 import os
 from huggingface_hub import hf_hub_download
 # ----- Initialization -----
 model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
+model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in gguf format
 model_path = hf_hub_download(
     repo_id=model_name_or_path,
     filename=model_basename
 )
+# Initialize the LLAMA model. Update the model_path to point to your model file.
 llm = Llama(
     model_path=model_path,
+    n_threads=2, # CPU cores
+    n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
+    n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.
+    n_ctx=4096, # Context window
 )
 # Load the Whisper model for speech-to-text transcription.
 whisper_model = whisper.load_model("base")
 # ----- Helper Functions -----
 def transcribe_audio(audio_file):
+    """
+    Transcribes the provided audio file using Whisper.
+    """
     if audio_file is None:
         return ""
     result = whisper_model.transcribe(audio_file)
 def generate_response(prompt, max_tokens=150, temperature=0.7):
     """
     Uses LLAMA-CPP to generate a response for the given prompt.
     """
+    # Call the LLAMA model. The output is a dict with a "choices" list.
+    output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=True)
     response = output["choices"][0]["text"]
     return response.strip()
 def text_to_speech(text):
+    """
+    Converts text to speech using gTTS and returns the filepath to the saved audio.
+    """
     tts = gTTS(text=text, lang="en")
     tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     tts.save(tmp_file.name)
     """
     Handles a single turn of the conversation:
       - If an audio file is provided and no text message, transcribe it.
+      - Builds a conversation prompt from the chat history.
       - Generates a response from LLAMA.
+      - Converts the response to speech.
+    Returns updated chat history, the response text, the response audio filepath, and updated state.
     """
     # Use the transcribed audio if text is empty.
     if audio is not None and (text is None or text.strip() == ""):
     else:
         user_input = text if text else ""
+    # Build the conversation prompt (history is a list of tuples: (user, assistant))
+    prompt = ""
+    if history:
+        for (user_turn, bot_turn) in history:
+            prompt += f"User: {user_turn}\nAssistant: {bot_turn}\n"
+    prompt += f"User: {user_input}\nAssistant: "
     # Generate response using LLAMA-CPP.
     response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
+    # Convert the response to speech audio.
     audio_response = text_to_speech(response_text)
+    # Append this turn to the conversation history.
+    new_history = history.copy() if history else []
+    new_history.append((user_input, response_text))
+    # Return four outputs: update the Chatbot display, show the assistant text, play audio, and update state.
     return new_history, response_text, audio_response, new_history
 # ----- Gradio Interface -----
     )
 # Launch the app.
+demo.launch()