Commit
·
4470c09
1
Parent(s):
dacd0e3
fixed model loading
Browse files- app.py +3 -2
- requirements.txt +2 -1
app.py
CHANGED
@@ -4,7 +4,8 @@ os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
|
4 |
import gradio as gr
|
5 |
|
6 |
#from unsloth import FastLanguageModel
|
7 |
-
from
|
|
|
8 |
from threading import Thread
|
9 |
|
10 |
"""
|
@@ -37,7 +38,7 @@ class MyModel:
|
|
37 |
# )
|
38 |
# FastLanguageModel.for_inference(client) # Enable native 2x faster inference
|
39 |
tokenizer = AutoTokenizer.from_pretrained(model)
|
40 |
-
client =
|
41 |
|
42 |
self.client = client
|
43 |
self.tokenizer = tokenizer
|
|
|
4 |
import gradio as gr
|
5 |
|
6 |
#from unsloth import FastLanguageModel
|
7 |
+
from peft import AutoPeftModelForCausalLM
|
8 |
+
from transformers import TextIteratorStreamer, AutoTokenizer
|
9 |
from threading import Thread
|
10 |
|
11 |
"""
|
|
|
38 |
# )
|
39 |
# FastLanguageModel.for_inference(client) # Enable native 2x faster inference
|
40 |
tokenizer = AutoTokenizer.from_pretrained(model)
|
41 |
+
client = AutoPeftModelForCausalLM.from_pretrained(model, load_in_4bit=True)
|
42 |
|
43 |
self.client = client
|
44 |
self.tokenizer = tokenizer
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
huggingface_hub==0.25.2
|
2 |
transformers>=4.45.1
|
3 |
-
torch
|
|
|
|
1 |
huggingface_hub==0.25.2
|
2 |
transformers>=4.45.1
|
3 |
+
torch
|
4 |
+
peft
|