Update app.py
Browse files
app.py
CHANGED
@@ -50,7 +50,7 @@ If you don't know the answer, just say "I do not know." Don't make up an answer.
|
|
50 |
llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
51 |
# TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working, TinyLlama/TinyLlama-1.1B-Chat-v0.6, andrijdavid/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
52 |
|
53 |
-
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
54 |
#initiate model and tokenizer
|
55 |
|
56 |
#generation_config = AutoConfig.from_pretrained(
|
@@ -64,21 +64,21 @@ tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
|
64 |
# eos_token_id=terminators
|
65 |
#)
|
66 |
# send additional parameters to model for generation
|
67 |
-
terminators = [
|
68 |
-
tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
|
69 |
-
tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
|
70 |
-
]
|
71 |
# indicates the end of a sequence
|
72 |
|
73 |
#model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
|
74 |
model = Llama(
|
75 |
model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
|
76 |
-
|
77 |
n_gpu_layers = 0,
|
78 |
temperature=0.75,
|
79 |
max_tokens=500,
|
80 |
-
top_p=0.95
|
81 |
-
|
82 |
# callback_manager=callback_manager,
|
83 |
# verbose=True, # Verbose is required to pass to the callback manager
|
84 |
)
|
|
|
50 |
llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
51 |
# TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working, TinyLlama/TinyLlama-1.1B-Chat-v0.6, andrijdavid/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
52 |
|
53 |
+
#tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
54 |
#initiate model and tokenizer
|
55 |
|
56 |
#generation_config = AutoConfig.from_pretrained(
|
|
|
64 |
# eos_token_id=terminators
|
65 |
#)
|
66 |
# send additional parameters to model for generation
|
67 |
+
#terminators = [
|
68 |
+
# tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
|
69 |
+
# tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
|
70 |
+
#]
|
71 |
# indicates the end of a sequence
|
72 |
|
73 |
#model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
|
74 |
model = Llama(
|
75 |
model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
|
76 |
+
# chat_format="llama-2",
|
77 |
n_gpu_layers = 0,
|
78 |
temperature=0.75,
|
79 |
max_tokens=500,
|
80 |
+
top_p=0.95 #,
|
81 |
+
# eos_tokens=terminators
|
82 |
# callback_manager=callback_manager,
|
83 |
# verbose=True, # Verbose is required to pass to the callback manager
|
84 |
)
|