Chat-Nous-Hermes-2-SOLAR-10.7B-GGUF

Sleeping

crystalkalem commited on Jun 5, 2024

Commit

3725f8e

verified ·

1 Parent(s): 0ecd162

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,14 +11,14 @@ chat_template = os.getenv('CHAT_TEMPLATE')
 # Interface variables
 model_name = model_id.split('/')[1].split('-GGUF')[0]
 title = f"{model_name}"
-description = f"Chat with <a href=\"https://huggingface.co/{model_id}\">{model_name}</a> in GGUF format ({quant})! Responce Time takes between 50 and 150 seconds, its not great."
 # Initialize the LLM
 llm = Llama(model_path="model.gguf",
-            n_ctx=32000,
-            n_threads=4,
             temp = 0.75,
-            n_vocab=16000,
             n_gpu_layers=-1,
             chat_format=chat_template)

 # Interface variables
 model_name = model_id.split('/')[1].split('-GGUF')[0]
 title = f"{model_name}"
+description = f"Chat with <a href=\"https://huggingface.co/{model_id}\">{model_name}</a> in GGUF format ({quant})! Context length = 4096, new token limit = 1024. Responce Time takes between 50 and 150 seconds, its not great."
 # Initialize the LLM
 llm = Llama(model_path="model.gguf",
+            n_ctx=4096,
+            n_threads=2,
             temp = 0.75,
+            n_vocab=1024,
             n_gpu_layers=-1,
             chat_format=chat_template)