Spaces:

devkushal75
/

TEST_HL

Sleeping

App Files Files

devkushal75 commited on 20 days ago

Commit

02819d3

verified ·

1 Parent(s): 882e54f

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -31

app.py CHANGED Viewed

@@ -6,35 +6,31 @@ import tempfile
 import os
 from huggingface_hub import hf_hub_download
 # ----- Initialization -----
 model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
-model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in gguf format
 model_path = hf_hub_download(
     repo_id=model_name_or_path,
     filename=model_basename
 )
-# Initialize the LLAMA model. Update the model_path to point to your model file.
 llm = Llama(
     model_path=model_path,
-    n_threads=2, # CPU cores
-    n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
-    n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.
-    n_ctx=4096, # Context window
 )
 # Load the Whisper model for speech-to-text transcription.
 whisper_model = whisper.load_model("base")
 # ----- Helper Functions -----
 def transcribe_audio(audio_file):
-    """
-    Transcribes the provided audio file using Whisper.
-    """
     if audio_file is None:
         return ""
     result = whisper_model.transcribe(audio_file)
@@ -43,16 +39,14 @@ def transcribe_audio(audio_file):
 def generate_response(prompt, max_tokens=150, temperature=0.7):
     """
     Uses LLAMA-CPP to generate a response for the given prompt.
     """
-    # Call the LLAMA model. The output is a dict with a "choices" list.
-    output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=True)
     response = output["choices"][0]["text"]
     return response.strip()
 def text_to_speech(text):
-    """
-    Converts text to speech using gTTS and returns the filepath to the saved audio.
-    """
     tts = gTTS(text=text, lang="en")
     tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     tts.save(tmp_file.name)
@@ -62,10 +56,14 @@ def voice_chat(audio, text, history, max_tokens, temperature):
     """
     Handles a single turn of the conversation:
       - If an audio file is provided and no text message, transcribe it.
-      - Builds a conversation prompt from the chat history.
       - Generates a response from LLAMA.
-      - Converts the response to speech.
-    Returns updated chat history, the response text, the response audio filepath, and updated state.
     """
     # Use the transcribed audio if text is empty.
     if audio is not None and (text is None or text.strip() == ""):
@@ -73,23 +71,18 @@ def voice_chat(audio, text, history, max_tokens, temperature):
     else:
         user_input = text if text else ""
-    # Build the conversation prompt (history is a list of tuples: (user, assistant))
-    prompt = ""
-    if history:
-        for (user_turn, bot_turn) in history:
-            prompt += f"User: {user_turn}\nAssistant: {bot_turn}\n"
-    prompt += f"User: {user_input}\nAssistant: "
     # Generate response using LLAMA-CPP.
     response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
-    # Convert the response to speech audio.
     audio_response = text_to_speech(response_text)
-    # Append this turn to the conversation history.
-    new_history = history.copy() if history else []
-    new_history.append((user_input, response_text))
-    # Return four outputs: update the Chatbot display, show the assistant text, play audio, and update state.
     return new_history, response_text, audio_response, new_history
 # ----- Gradio Interface -----
@@ -125,4 +118,4 @@ with gr.Blocks() as demo:
     )
 # Launch the app.
-demo.launch()

 import os
 from huggingface_hub import hf_hub_download
 # ----- Initialization -----
 model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
+model_basename = "llama-2-13b-chat.Q5_K_M.gguf"  # the model is in GGUF format
 model_path = hf_hub_download(
     repo_id=model_name_or_path,
     filename=model_basename
 )
+# Initialize the LLAMA model.
 llm = Llama(
     model_path=model_path,
+    n_threads=2,  # CPU cores
+    n_batch=512,
+    n_gpu_layers=43,
+    n_ctx=4096,
 )
 # Load the Whisper model for speech-to-text transcription.
 whisper_model = whisper.load_model("base")
 # ----- Helper Functions -----
 def transcribe_audio(audio_file):
+    """Transcribes the provided audio file using Whisper."""
     if audio_file is None:
         return ""
     result = whisper_model.transcribe(audio_file)
 def generate_response(prompt, max_tokens=150, temperature=0.7):
     """
     Uses LLAMA-CPP to generate a response for the given prompt.
+    Note: Removed echo=True to avoid repeating the prompt.
     """
+    output = llm(prompt, max_tokens=max_tokens, temperature=temperature)  # echo removed
     response = output["choices"][0]["text"]
     return response.strip()
 def text_to_speech(text):
+    """Converts text to speech using gTTS and returns the filepath to the saved audio."""
     tts = gTTS(text=text, lang="en")
     tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     tts.save(tmp_file.name)
     """
     Handles a single turn of the conversation:
       - If an audio file is provided and no text message, transcribe it.
+      - Builds a prompt using only the current user input.
       - Generates a response from LLAMA.
+      - Converts the assistant's response to speech.
+    Returns:
+      - A new history containing only the current turn.
+      - The assistant's response text.
+      - The assistant's response audio filepath.
+      - Updated state (new history).
     """
     # Use the transcribed audio if text is empty.
     if audio is not None and (text is None or text.strip() == ""):
     else:
         user_input = text if text else ""
+    # Build prompt using only the current user input.
+    prompt = f"User: {user_input}\nAssistant: "
     # Generate response using LLAMA-CPP.
     response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
+    # Convert only the assistant's response to speech.
     audio_response = text_to_speech(response_text)
+    # Reset conversation history to only include the current turn.
+    new_history = [(user_input, response_text)]
     return new_history, response_text, audio_response, new_history
 # ----- Gradio Interface -----
     )
 # Launch the app.
+demo.launch()