crystalkalem
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -11,14 +11,14 @@ chat_template = os.getenv('CHAT_TEMPLATE')
|
|
11 |
# Interface variables
|
12 |
model_name = model_id.split('/')[1].split('-GGUF')[0]
|
13 |
title = f"{model_name}"
|
14 |
-
description = f"Chat with <a href=\"https://huggingface.co/{model_id}\">{model_name}</a> in GGUF format ({quant})! Responce Time takes between 50 and 150 seconds, its not great."
|
15 |
|
16 |
# Initialize the LLM
|
17 |
llm = Llama(model_path="model.gguf",
|
18 |
-
n_ctx=
|
19 |
-
n_threads=
|
20 |
temp = 0.75,
|
21 |
-
n_vocab=
|
22 |
n_gpu_layers=-1,
|
23 |
chat_format=chat_template)
|
24 |
|
|
|
11 |
# Interface variables
|
12 |
model_name = model_id.split('/')[1].split('-GGUF')[0]
|
13 |
title = f"{model_name}"
|
14 |
+
description = f"Chat with <a href=\"https://huggingface.co/{model_id}\">{model_name}</a> in GGUF format ({quant})! Context length = 4096, new token limit = 1024. Responce Time takes between 50 and 150 seconds, its not great."
|
15 |
|
16 |
# Initialize the LLM
|
17 |
llm = Llama(model_path="model.gguf",
|
18 |
+
n_ctx=4096,
|
19 |
+
n_threads=2,
|
20 |
temp = 0.75,
|
21 |
+
n_vocab=1024,
|
22 |
n_gpu_layers=-1,
|
23 |
chat_format=chat_template)
|
24 |
|