Cioni223 commited on
Commit
50e3c67
·
verified ·
1 Parent(s): 55d0069

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -3,6 +3,7 @@ from transformers import AutoTokenizer, LlamaForCausalLM, BitsAndBytesConfig
3
  from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
4
  from peft import PeftModel
5
  import gradio as gr
 
6
 
7
  # Add this new class for custom stopping criteria
8
  class SentenceEndingCriteria(StoppingCriteria):
@@ -15,24 +16,25 @@ class SentenceEndingCriteria(StoppingCriteria):
15
  return last_token in self.end_tokens
16
 
17
  def load_model():
18
- # Modify the model path to use the Hugging Face model ID
19
- model_path = "Cioni223/mymodel" # Replace with your actual model path on HF
 
20
  tokenizer = AutoTokenizer.from_pretrained(
21
  model_path,
22
  use_fast=False,
23
  padding_side="left",
24
  model_max_length=4096,
25
- token=True # Add this if your model is private
26
  )
27
 
28
  tokenizer.pad_token = tokenizer.eos_token
29
 
30
- # Load merged model with quantization
31
  model = LlamaForCausalLM.from_pretrained(
32
  model_path,
33
  device_map="auto",
34
  torch_dtype=torch.float16,
35
- quantization_config=BitsAndBytesConfig(load_in_8bit=True)
 
36
  )
37
 
38
  return model, tokenizer
 
3
  from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
4
  from peft import PeftModel
5
  import gradio as gr
6
+ import os
7
 
8
  # Add this new class for custom stopping criteria
9
  class SentenceEndingCriteria(StoppingCriteria):
 
16
  return last_token in self.end_tokens
17
 
18
  def load_model():
19
+ model_path = "Cioni223/mymodel"
20
+ token = os.environ.get("HUGGINGFACE_TOKEN") # Ensure you set this environment variable
21
+
22
  tokenizer = AutoTokenizer.from_pretrained(
23
  model_path,
24
  use_fast=False,
25
  padding_side="left",
26
  model_max_length=4096,
27
+ token=token
28
  )
29
 
30
  tokenizer.pad_token = tokenizer.eos_token
31
 
 
32
  model = LlamaForCausalLM.from_pretrained(
33
  model_path,
34
  device_map="auto",
35
  torch_dtype=torch.float16,
36
+ quantization_config=BitsAndBytesConfig(load_in_8bit=True),
37
+ use_auth_token=token
38
  )
39
 
40
  return model, tokenizer