Update app.py
Browse files
app.py
CHANGED
@@ -49,7 +49,6 @@ If you don't know the answer, just say "I do not know." Don't make up an answer.
|
|
49 |
# Provides context of how to answer the question
|
50 |
|
51 |
llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
52 |
-
# TheBloke/Llama-2-7B-Chat-GGML , TinyLlama/TinyLlama-1.1B-Chat-v1.0 , microsoft/Phi-3-mini-4k-instruct, health360/Healix-1.1B-V1-Chat-dDPO
|
53 |
# TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working, TinyLlama/TinyLlama-1.1B-Chat-v0.6, andrijdavid/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
54 |
|
55 |
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
@@ -138,19 +137,17 @@ def talk(prompt, history):
|
|
138 |
# the chat template structure should be based on text generation model format
|
139 |
print("check6")
|
140 |
|
141 |
-
terminators = [
|
142 |
-
tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
|
143 |
-
tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
|
144 |
-
]
|
145 |
# indicates the end of a sequence
|
146 |
import pprint
|
147 |
stream = model.create_chat_completion(messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}], max_tokens=1000, stop=["</s>"], stream=True)
|
148 |
# print(output['choices'][0]['message']['content'])
|
|
|
149 |
pprint.pprint(stream)
|
150 |
text = []
|
151 |
for output in stream:
|
152 |
# text += output['choices'][0]
|
153 |
text.append(output['choices'][0])
|
|
|
154 |
yield "".join(text)
|
155 |
print(text)
|
156 |
print("check3H")
|
|
|
49 |
# Provides context of how to answer the question
|
50 |
|
51 |
llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
|
|
52 |
# TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working, TinyLlama/TinyLlama-1.1B-Chat-v0.6, andrijdavid/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
53 |
|
54 |
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
|
|
137 |
# the chat template structure should be based on text generation model format
|
138 |
print("check6")
|
139 |
|
|
|
|
|
|
|
|
|
140 |
# indicates the end of a sequence
|
141 |
import pprint
|
142 |
stream = model.create_chat_completion(messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}], max_tokens=1000, stop=["</s>"], stream=True)
|
143 |
# print(output['choices'][0]['message']['content'])
|
144 |
+
print(f"{stream}")
|
145 |
pprint.pprint(stream)
|
146 |
text = []
|
147 |
for output in stream:
|
148 |
# text += output['choices'][0]
|
149 |
text.append(output['choices'][0])
|
150 |
+
print(f"{text}")
|
151 |
yield "".join(text)
|
152 |
print(text)
|
153 |
print("check3H")
|