samlam111 commited on
Commit
3e4bfb6
·
1 Parent(s): a2e195e

Not using unsloth again

Browse files
Files changed (1) hide show
  1. app.py +6 -13
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- from unsloth import FastLanguageModel
4
- from unsloth.chat_templates import get_chat_template
5
  from transformers import TextStreamer
 
 
6
 
7
 
8
  """
@@ -12,19 +12,12 @@ For more information on `huggingface_hub` Inference API support, please check th
12
  model_name_or_path = "samlama111/lora_model"
13
 
14
  # client = InferenceClient(model_name_or_path)
15
- model, tokenizer = FastLanguageModel.from_pretrained(
16
- model_name = model_name_or_path,
17
- max_seq_length = 8192,
18
  load_in_4bit = True,
19
- # token = "hf_...", # No need since our model is public
20
- )
21
-
22
- tokenizer = get_chat_template(
23
- tokenizer,
24
- chat_template = "llama-3.1",
25
- mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
26
  )
27
- FastLanguageModel.for_inference(model) # Enable native 2x faster inference
28
 
29
  def respond(
30
  message,
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
3
  from transformers import TextStreamer
4
+ from peft import AutoPeftModelForCausalLM
5
+ from transformers import AutoTokenizer
6
 
7
 
8
  """
 
12
  model_name_or_path = "samlama111/lora_model"
13
 
14
  # client = InferenceClient(model_name_or_path)
15
+ model = AutoPeftModelForCausalLM.from_pretrained(
16
+ model_name_or_path, # YOUR MODEL YOU USED FOR TRAINING
 
17
  load_in_4bit = True,
18
+ device_map = "auto",
 
 
 
 
 
 
19
  )
20
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
21
 
22
  def respond(
23
  message,