Update app.py
Browse files
app.py
CHANGED
@@ -8,13 +8,13 @@ import time
|
|
8 |
#import torch
|
9 |
import pandas as pd
|
10 |
|
11 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
12 |
#from transformers import AutoModelForCausalLM, AutoModel
|
13 |
from transformers import TextIteratorStreamer
|
14 |
from threading import Thread
|
15 |
#from transformers import LlamaForCausalLM, LlamaTokenizer
|
16 |
#git lfs install
|
17 |
-
|
18 |
|
19 |
#from huggingface_hub import InferenceClient
|
20 |
from huggingface_hub import Repository, upload_file
|
@@ -29,12 +29,31 @@ historylog = [{
|
|
29 |
"Output": ''
|
30 |
}]
|
31 |
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
# TheBloke/Llama-2-7B-Chat-GGML , TinyLlama/TinyLlama-1.1B-Chat-v1.0 , microsoft/Phi-3-mini-4k-instruct, health360/Healix-1.1B-V1-Chat-dDPO
|
34 |
# TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working
|
35 |
|
36 |
model = AutoModelForCausalLM.from_pretrained(llm_model)
|
37 |
-
tokenizer = AutoTokenizer.from_pretrained(llm_model)
|
38 |
#initiate model and tokenizer
|
39 |
|
40 |
data = load_dataset("Namitg02/Test", split='train', streaming=False)
|
@@ -64,10 +83,10 @@ print("check2")
|
|
64 |
# memory = ConversationBufferMemory(return_messages=True)
|
65 |
|
66 |
|
67 |
-
terminators = [
|
68 |
-
tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
|
69 |
-
tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
|
70 |
-
]
|
71 |
# indicates the end of a sequence
|
72 |
|
73 |
|
|
|
8 |
#import torch
|
9 |
import pandas as pd
|
10 |
|
11 |
+
#from transformers import AutoTokenizer, AutoModelForCausalLM
|
12 |
#from transformers import AutoModelForCausalLM, AutoModel
|
13 |
from transformers import TextIteratorStreamer
|
14 |
from threading import Thread
|
15 |
#from transformers import LlamaForCausalLM, LlamaTokenizer
|
16 |
#git lfs install
|
17 |
+
from ctransformers import AutoModelForCausalLM, AutoConfig, Config, AutoTokenizer
|
18 |
|
19 |
#from huggingface_hub import InferenceClient
|
20 |
from huggingface_hub import Repository, upload_file
|
|
|
29 |
"Output": ''
|
30 |
}]
|
31 |
|
32 |
+
i_temperature = 0.30
|
33 |
+
i_max_new_tokens=1100
|
34 |
+
i_repetitionpenalty = 1.2
|
35 |
+
i_contextlength=12048
|
36 |
+
logfile = 'TinyLlama.1B.txt'
|
37 |
+
|
38 |
+
print("loading model...")
|
39 |
+
modelfile="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
40 |
+
|
41 |
+
|
42 |
+
conf = AutoConfig(Config(temperature=i_temperature,
|
43 |
+
repetition_penalty=i_repetitionpenalty,
|
44 |
+
batch_size=64,
|
45 |
+
max_new_tokens=i_max_new_tokens,
|
46 |
+
context_length=i_contextlength))
|
47 |
+
llm_model = AutoModelForCausalLM.from_pretrained(modelfile,
|
48 |
+
model_type="llama",
|
49 |
+
config=conf)
|
50 |
+
|
51 |
+
|
52 |
# TheBloke/Llama-2-7B-Chat-GGML , TinyLlama/TinyLlama-1.1B-Chat-v1.0 , microsoft/Phi-3-mini-4k-instruct, health360/Healix-1.1B-V1-Chat-dDPO
|
53 |
# TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working
|
54 |
|
55 |
model = AutoModelForCausalLM.from_pretrained(llm_model)
|
56 |
+
#tokenizer = AutoTokenizer.from_pretrained(llm_model)
|
57 |
#initiate model and tokenizer
|
58 |
|
59 |
data = load_dataset("Namitg02/Test", split='train', streaming=False)
|
|
|
83 |
# memory = ConversationBufferMemory(return_messages=True)
|
84 |
|
85 |
|
86 |
+
#terminators = [
|
87 |
+
# tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
|
88 |
+
# tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
|
89 |
+
#]
|
90 |
# indicates the end of a sequence
|
91 |
|
92 |
|