Spaces:
Runtime error
Runtime error
Commit
·
3d0f60e
1
Parent(s):
83f63dc
Update generate.py
Browse files- generate.py +7 -6
generate.py
CHANGED
@@ -13,17 +13,18 @@ from utils import StreamPeftGenerationMixin,StreamLlamaForCausalLM
|
|
13 |
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
|
14 |
|
15 |
parser = argparse.ArgumentParser()
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
20 |
args = parser.parse_args()
|
21 |
print(args)
|
22 |
tokenizer = LlamaTokenizer.from_pretrained(args.model_path)
|
23 |
|
24 |
LOAD_8BIT = True
|
25 |
-
|
26 |
-
LORA_WEIGHTS = args.lora_path
|
27 |
|
28 |
|
29 |
# fix the path for local checkpoint
|
|
|
13 |
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
|
14 |
|
15 |
parser = argparse.ArgumentParser()
|
16 |
+
TOT_CUDA="0,1" #Upgrade bitsandbytes to the latest version to enable balanced loading of multiple GPUs, for example: pip install bitsandbytes==0.39.0
|
17 |
+
BASE_MODEL="ziqingyang/chinese-llama-2-13b"
|
18 |
+
LORA_PATH="teachyourselfcoding/llama-2-13b-22sep"
|
19 |
+
USE_LOCAL=1 # 1: use local model, 0: use huggingface model
|
20 |
+
TYPE_WRITER=1 # whether output streamly
|
21 |
+
|
22 |
args = parser.parse_args()
|
23 |
print(args)
|
24 |
tokenizer = LlamaTokenizer.from_pretrained(args.model_path)
|
25 |
|
26 |
LOAD_8BIT = True
|
27 |
+
|
|
|
28 |
|
29 |
|
30 |
# fix the path for local checkpoint
|