Update app.py
Browse files
app.py
CHANGED
@@ -54,20 +54,20 @@ llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
|
54 |
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
55 |
#initiate model and tokenizer
|
56 |
|
57 |
-
generation_config = AutoConfig.from_pretrained(
|
58 |
-
"TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
|
59 |
-
max_new_tokens= 300,
|
60 |
# do_sample=True,
|
61 |
# stream = streamer,
|
62 |
-
top_p=0.95,
|
63 |
-
temperature=0.4,
|
64 |
-
stream = True
|
65 |
# eos_token_id=terminators
|
66 |
)
|
67 |
# send additional parameters to model for generation
|
68 |
|
69 |
#model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
|
70 |
-
model = Llama(
|
71 |
model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
|
72 |
chat_format="llama-2",
|
73 |
n_gpu_layers = 0,
|
|
|
54 |
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
55 |
#initiate model and tokenizer
|
56 |
|
57 |
+
#generation_config = AutoConfig.from_pretrained(
|
58 |
+
# "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
|
59 |
+
# max_new_tokens= 300,
|
60 |
# do_sample=True,
|
61 |
# stream = streamer,
|
62 |
+
# top_p=0.95,
|
63 |
+
# temperature=0.4,
|
64 |
+
# stream = True
|
65 |
# eos_token_id=terminators
|
66 |
)
|
67 |
# send additional parameters to model for generation
|
68 |
|
69 |
#model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
|
70 |
+
model = Llama.from_pretrained(
|
71 |
model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
|
72 |
chat_format="llama-2",
|
73 |
n_gpu_layers = 0,
|