gemma-2-27b-it-Q4_K_M-chat-test

Running on Zero

keitokei1994 commited on Jun 17, 2024

Commit

1f5ea77

verified ·

1 Parent(s): 2bf5e65

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,9 +3,9 @@ import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp_cuda_tensorcores import Llama
-REPO_ID = "MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF"
-MODEL_NAME = "Meta-Llama-3-70B-Instruct.Q3_K_L.gguf"
-MAX_CONTEXT_LENGTH = 8192
 CUDA = True
 SYSTEM_PROMPT = "You are a helpful, smart, kind, and efficient AI assistant. You always fulfill the user's requests to the best of your ability."
 TOKEN_STOP = ["<|eot_id|>"]
@@ -130,7 +130,7 @@ def clear_chat(chat_history_state, chat_message):
 def gui(llm_chat):
     with gr.Blocks(theme="NoCrypt/miku", css=css) as app:
-        gr.Markdown("# Llama 3 70B Instruct GGUF")
         gr.Markdown(
             f"""
                 ### This demo utilizes the repository ID {REPO_ID} with the model {MODEL_NAME}, powered by the LLaMA.cpp backend.

 from huggingface_hub import hf_hub_download
 from llama_cpp_cuda_tensorcores import Llama
+REPO_ID = "keitokei1994/shisa-v1-qwen2-7b-GGUF"
+MODEL_NAME = "shisa-v1-qwen2-7b.Q8_0.gguf"
+MAX_CONTEXT_LENGTH = 32768
 CUDA = True
 SYSTEM_PROMPT = "You are a helpful, smart, kind, and efficient AI assistant. You always fulfill the user's requests to the best of your ability."
 TOKEN_STOP = ["<|eot_id|>"]
 def gui(llm_chat):
     with gr.Blocks(theme="NoCrypt/miku", css=css) as app:
+        gr.Markdown("# shisa-v1-qwen2-7b.Q8_0.gguf")
         gr.Markdown(
             f"""
                 ### This demo utilizes the repository ID {REPO_ID} with the model {MODEL_NAME}, powered by the LLaMA.cpp backend.