Update app.py
Browse files
app.py
CHANGED
@@ -72,14 +72,14 @@ terminators = [
|
|
72 |
# indicates the end of a sequence
|
73 |
|
74 |
#model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
|
75 |
-
model = Llama
|
76 |
model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
|
77 |
chat_format="llama-2",
|
78 |
n_gpu_layers = 0,
|
79 |
temperature=0.75,
|
80 |
max_tokens=500,
|
81 |
top_p=0.95,
|
82 |
-
|
83 |
# callback_manager=callback_manager,
|
84 |
# verbose=True, # Verbose is required to pass to the callback manager
|
85 |
)
|
|
|
72 |
# indicates the end of a sequence
|
73 |
|
74 |
#model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
|
75 |
+
model = Llama(
|
76 |
model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
|
77 |
chat_format="llama-2",
|
78 |
n_gpu_layers = 0,
|
79 |
temperature=0.75,
|
80 |
max_tokens=500,
|
81 |
top_p=0.95,
|
82 |
+
eos_tokens=terminators
|
83 |
# callback_manager=callback_manager,
|
84 |
# verbose=True, # Verbose is required to pass to the callback manager
|
85 |
)
|