Spaces:
Paused
Paused
ElPlaguister
commited on
Commit
·
80fb1ef
1
Parent(s):
00e9c68
Feat KoAlpaca Tensor Parallelism
Browse files- koalpaca.py +5 -3
- requirements.txt +1 -0
koalpaca.py
CHANGED
@@ -2,13 +2,12 @@ import torch
|
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig
|
3 |
from peft import PeftModel, PeftConfig
|
4 |
from model import Model
|
5 |
-
|
6 |
|
7 |
class KoAlpaca(Model):
|
8 |
def __init__(self):
|
9 |
peft_model_id = "4n3mone/Komuchat-koalpaca-polyglot-12.8B"
|
10 |
config = PeftConfig.from_pretrained(peft_model_id)
|
11 |
-
# self.accelerator = Accelerator()
|
12 |
self.bnb_config = BitsAndBytesConfig(
|
13 |
load_in_4bit=True,
|
14 |
bnb_4bit_use_double_quant=True,
|
@@ -17,6 +16,8 @@ class KoAlpaca(Model):
|
|
17 |
)
|
18 |
#self.model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=self.bnb_config, device_map={"":0})
|
19 |
self.model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=self.bnb_config, device_map='auto')
|
|
|
|
|
20 |
self.model = PeftModel.from_pretrained(self.model, peft_model_id)
|
21 |
self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
22 |
self.gen_config = GenerationConfig.from_pretrained('./models/koalpaca', 'gen_config.json')
|
@@ -34,4 +35,5 @@ class KoAlpaca(Model):
|
|
34 |
generation_config=self.gen_config
|
35 |
)
|
36 |
outputs = self.tokenizer.decode(output_ids[0]).split("### 답변: ")[-1]
|
37 |
-
return outputs
|
|
|
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig
|
3 |
from peft import PeftModel, PeftConfig
|
4 |
from model import Model
|
5 |
+
import tensor_parallel as tp
|
6 |
|
7 |
class KoAlpaca(Model):
|
8 |
def __init__(self):
|
9 |
peft_model_id = "4n3mone/Komuchat-koalpaca-polyglot-12.8B"
|
10 |
config = PeftConfig.from_pretrained(peft_model_id)
|
|
|
11 |
self.bnb_config = BitsAndBytesConfig(
|
12 |
load_in_4bit=True,
|
13 |
bnb_4bit_use_double_quant=True,
|
|
|
16 |
)
|
17 |
#self.model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=self.bnb_config, device_map={"":0})
|
18 |
self.model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=self.bnb_config, device_map='auto')
|
19 |
+
print(self.model.device)
|
20 |
+
self.model = tp.tensor_parallel(self.model, [*next(self.model.parameters()).device])
|
21 |
self.model = PeftModel.from_pretrained(self.model, peft_model_id)
|
22 |
self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
23 |
self.gen_config = GenerationConfig.from_pretrained('./models/koalpaca', 'gen_config.json')
|
|
|
35 |
generation_config=self.gen_config
|
36 |
)
|
37 |
outputs = self.tokenizer.decode(output_ids[0]).split("### 답변: ")[-1]
|
38 |
+
return outputs
|
39 |
+
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ gradio==4.1.1
|
|
3 |
numpy==1.26.1
|
4 |
pandas==2.1.2
|
5 |
torch==2.0.1
|
|
|
6 |
git+https://github.com/huggingface/peft.git
|
|
|
3 |
numpy==1.26.1
|
4 |
pandas==2.1.2
|
5 |
torch==2.0.1
|
6 |
+
tensor_parallel==2.0.0
|
7 |
git+https://github.com/huggingface/peft.git
|