Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ from transformers import AutoTokenizer, LlamaForCausalLM, BitsAndBytesConfig
|
|
3 |
from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
|
4 |
from peft import PeftModel
|
5 |
import gradio as gr
|
|
|
6 |
|
7 |
# Add this new class for custom stopping criteria
|
8 |
class SentenceEndingCriteria(StoppingCriteria):
|
@@ -15,24 +16,25 @@ class SentenceEndingCriteria(StoppingCriteria):
|
|
15 |
return last_token in self.end_tokens
|
16 |
|
17 |
def load_model():
|
18 |
-
|
19 |
-
|
|
|
20 |
tokenizer = AutoTokenizer.from_pretrained(
|
21 |
model_path,
|
22 |
use_fast=False,
|
23 |
padding_side="left",
|
24 |
model_max_length=4096,
|
25 |
-
token=
|
26 |
)
|
27 |
|
28 |
tokenizer.pad_token = tokenizer.eos_token
|
29 |
|
30 |
-
# Load merged model with quantization
|
31 |
model = LlamaForCausalLM.from_pretrained(
|
32 |
model_path,
|
33 |
device_map="auto",
|
34 |
torch_dtype=torch.float16,
|
35 |
-
quantization_config=BitsAndBytesConfig(load_in_8bit=True)
|
|
|
36 |
)
|
37 |
|
38 |
return model, tokenizer
|
|
|
3 |
from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
|
4 |
from peft import PeftModel
|
5 |
import gradio as gr
|
6 |
+
import os
|
7 |
|
8 |
# Add this new class for custom stopping criteria
|
9 |
class SentenceEndingCriteria(StoppingCriteria):
|
|
|
16 |
return last_token in self.end_tokens
|
17 |
|
18 |
def load_model():
|
19 |
+
model_path = "Cioni223/mymodel"
|
20 |
+
token = os.environ.get("HUGGINGFACE_TOKEN") # Ensure you set this environment variable
|
21 |
+
|
22 |
tokenizer = AutoTokenizer.from_pretrained(
|
23 |
model_path,
|
24 |
use_fast=False,
|
25 |
padding_side="left",
|
26 |
model_max_length=4096,
|
27 |
+
token=token
|
28 |
)
|
29 |
|
30 |
tokenizer.pad_token = tokenizer.eos_token
|
31 |
|
|
|
32 |
model = LlamaForCausalLM.from_pretrained(
|
33 |
model_path,
|
34 |
device_map="auto",
|
35 |
torch_dtype=torch.float16,
|
36 |
+
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
|
37 |
+
use_auth_token=token
|
38 |
)
|
39 |
|
40 |
return model, tokenizer
|