dh-mc commited on
Commit
32463b2
·
1 Parent(s): f16c8b2

no padding

Browse files
llm_toolkit/llm_utils.py CHANGED
@@ -146,7 +146,7 @@ def eval_model(
146
  inputs = tokenizer(
147
  batch_prompts,
148
  return_tensors="pt",
149
- padding=True, # Ensure all inputs in the batch have the same length
150
  ).to(device)
151
 
152
  outputs = model.generate(
@@ -155,7 +155,7 @@ def eval_model(
155
  repetition_penalty=repetition_penalty,
156
  use_cache=False,
157
  )
158
- outputs = outputs[:, inputs["input_ids"].shape[1] :]
159
  decoded_output = tokenizer.batch_decode(
160
  outputs, skip_special_tokens=True
161
  ) # Skip special tokens for clean output
 
146
  inputs = tokenizer(
147
  batch_prompts,
148
  return_tensors="pt",
149
+ padding=batch_size > 1, # Ensure all inputs in the batch have the same length
150
  ).to(device)
151
 
152
  outputs = model.generate(
 
155
  repetition_penalty=repetition_penalty,
156
  use_cache=False,
157
  )
158
+ outputs = outputs[:, inputs["input_ids"].shape[1]:]
159
  decoded_output = tokenizer.batch_decode(
160
  outputs, skip_special_tokens=True
161
  ) # Skip special tokens for clean output
scripts/eval-epochs.sh CHANGED
@@ -7,7 +7,7 @@ pwd
7
 
8
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
9
  export RESIZE_TOKEN_EMBEDDINGS=true
10
- export USING_LLAMA_FACTORY=true
11
  export USING_P1_PROMPT_TEMPLATE=false
12
 
13
  export ORG_NAME=$1
 
7
 
8
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
9
  export RESIZE_TOKEN_EMBEDDINGS=true
10
+ # export USING_LLAMA_FACTORY=true
11
  export USING_P1_PROMPT_TEMPLATE=false
12
 
13
  export ORG_NAME=$1
scripts/eval-mgtv-qwen2.5_4bit.sh CHANGED
@@ -13,17 +13,18 @@ cat /etc/os-release
13
  lscpu
14
  grep MemTotal /proc/meminfo
15
 
 
16
  export LOAD_IN_4BIT=true
17
  $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-72B-Instruct
18
 
19
  export START_NUM_SHOTS=5
20
  $BASEDIR/scripts/eval-shots_4bit.sh Qwen Qwen2.5-72B-Instruct
21
 
22
-
23
  export START_NUM_SHOTS=40
24
  $BASEDIR/scripts/eval-shots_4bit.sh shenzhi-wang Llama3.1-70B-Chinese-Chat
25
 
26
  export LOAD_IN_4BIT=false
 
27
  $BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat
28
 
29
  export START_NUM_SHOTS=50
 
13
  lscpu
14
  grep MemTotal /proc/meminfo
15
 
16
+ export USING_LLAMA_FACTORY=false
17
  export LOAD_IN_4BIT=true
18
  $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-72B-Instruct
19
 
20
  export START_NUM_SHOTS=5
21
  $BASEDIR/scripts/eval-shots_4bit.sh Qwen Qwen2.5-72B-Instruct
22
 
 
23
  export START_NUM_SHOTS=40
24
  $BASEDIR/scripts/eval-shots_4bit.sh shenzhi-wang Llama3.1-70B-Chinese-Chat
25
 
26
  export LOAD_IN_4BIT=false
27
+ export USING_LLAMA_FACTORY=true
28
  $BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat
29
 
30
  export START_NUM_SHOTS=50
scripts/eval-shots.sh CHANGED
@@ -7,7 +7,7 @@ pwd
7
 
8
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
9
  export RESIZE_TOKEN_EMBEDDINGS=true
10
- export USING_LLAMA_FACTORY=true
11
  export USING_P1_PROMPT_TEMPLATE=false
12
  export LOAD_IN_4BIT=false
13
 
 
7
 
8
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
9
  export RESIZE_TOKEN_EMBEDDINGS=true
10
+ # export USING_LLAMA_FACTORY=true
11
  export USING_P1_PROMPT_TEMPLATE=false
12
  export LOAD_IN_4BIT=false
13