MyNameIsSimon commited on
Commit
4470c09
·
1 Parent(s): dacd0e3

fixed model loading

Browse files
Files changed (2) hide show
  1. app.py +3 -2
  2. requirements.txt +2 -1
app.py CHANGED
@@ -4,7 +4,8 @@ os.environ["CUDA_VISIBLE_DEVICES"] = ""
4
  import gradio as gr
5
 
6
  #from unsloth import FastLanguageModel
7
- from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer
 
8
  from threading import Thread
9
 
10
  """
@@ -37,7 +38,7 @@ class MyModel:
37
  # )
38
  # FastLanguageModel.for_inference(client) # Enable native 2x faster inference
39
  tokenizer = AutoTokenizer.from_pretrained(model)
40
- client = AutoModelForCausalLM.from_pretrained(model)
41
 
42
  self.client = client
43
  self.tokenizer = tokenizer
 
4
  import gradio as gr
5
 
6
  #from unsloth import FastLanguageModel
7
+ from peft import AutoPeftModelForCausalLM
8
+ from transformers import TextIteratorStreamer, AutoTokenizer
9
  from threading import Thread
10
 
11
  """
 
38
  # )
39
  # FastLanguageModel.for_inference(client) # Enable native 2x faster inference
40
  tokenizer = AutoTokenizer.from_pretrained(model)
41
+ client = AutoPeftModelForCausalLM.from_pretrained(model, load_in_4bit=True)
42
 
43
  self.client = client
44
  self.tokenizer = tokenizer
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  huggingface_hub==0.25.2
2
  transformers>=4.45.1
3
- torch
 
 
1
  huggingface_hub==0.25.2
2
  transformers>=4.45.1
3
+ torch
4
+ peft