Cioni223 commited on
Commit
10159e5
·
verified ·
1 Parent(s): e2221cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -12
app.py CHANGED
@@ -3,37 +3,33 @@ from transformers import AutoTokenizer, LlamaForCausalLM, BitsAndBytesConfig
3
  from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
4
  from peft import PeftModel
5
  import gradio as gr
6
-
7
 
8
  # Add this new class for custom stopping criteria
9
  class SentenceEndingCriteria(StoppingCriteria):
10
- def __init__(self, tokenizer, end_tokens):
11
- self.tokenizer = tokenizer
12
- self.end_tokens = end_tokens
13
-
14
- def __call__(self, input_ids, scores, **kwargs):
15
- last_token = input_ids[0][-1]
16
  return last_token in self.end_tokens
17
 
18
  def load_model():
19
- # Modify the model path to use the Hugging Face model ID
20
- model_path = "Cioni223/mymodel" # Replace with your actual model path on HF
 
21
  tokenizer = AutoTokenizer.from_pretrained(
22
  model_path,
23
  use_fast=False,
24
  padding_side="left",
25
  model_max_length=4096,
26
- token=True # Add this if your model is private
27
  )
28
 
29
  tokenizer.pad_token = tokenizer.eos_token
30
 
31
- # Load merged model with quantization
32
  model = LlamaForCausalLM.from_pretrained(
33
  model_path,
34
  device_map="auto",
35
  torch_dtype=torch.float16,
36
- quantization_config=BitsAndBytesConfig(load_in_8bit=True)
 
37
  )
38
 
39
  return model, tokenizer
 
3
  from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
4
  from peft import PeftModel
5
  import gradio as gr
6
+ import os
7
 
8
  # Add this new class for custom stopping criteria
9
  class SentenceEndingCriteria(StoppingCriteria):
 
 
 
 
 
 
10
  return last_token in self.end_tokens
11
 
12
  def load_model():
13
+ model_path = "Cioni223/mymodel"
14
+ token = os.environ.get("HUGGINGFACE_TOKEN") # Ensure you set this environment variable
15
+
16
  tokenizer = AutoTokenizer.from_pretrained(
17
  model_path,
18
  use_fast=False,
19
  padding_side="left",
20
  model_max_length=4096,
21
+ token=token
22
  )
23
 
24
  tokenizer.pad_token = tokenizer.eos_token
25
 
26
+
27
  model = LlamaForCausalLM.from_pretrained(
28
  model_path,
29
  device_map="auto",
30
  torch_dtype=torch.float16,
31
+ quantization_config=BitsAndBytesConfig(load_in_8bit=True),
32
+ use_auth_token=token
33
  )
34
 
35
  return model, tokenizer