nm-research commited on
Commit
909b7f4
1 Parent(s): 80d4029

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -47,7 +47,7 @@ This model can be deployed efficiently using the [vLLM](https://docs.vllm.ai/en/
47
  from vllm import LLM, SamplingParams
48
  from transformers import AutoTokenizer
49
 
50
- model_id = "neuralmagic/Qwen2.5-72B-Instruct-quantized.w8a8"
51
  number_gpus = 1
52
  max_model_len = 8192
53
 
@@ -74,7 +74,7 @@ The model was evaluated on the [OpenLLM](https://huggingface.co/spaces/open-llm-
74
  ```
75
  lm_eval \
76
  --model vllm \
77
- --model_args pretrained="neuralmagic/Qwen2.5-72B-Instruct-quantized.w8a8",dtype=auto,gpu_memory_utilization=0.9,add_bos_token=True,max_model_len=4096,enable_chunk_prefill=True,tensor_parallel_size=1 \
78
  --tasks openllm \
79
  --batch_size auto
80
  ```
 
47
  from vllm import LLM, SamplingParams
48
  from transformers import AutoTokenizer
49
 
50
+ model_id = "neuralmagic-ent/Qwen2.5-72B-Instruct-quantized.w8a8"
51
  number_gpus = 1
52
  max_model_len = 8192
53
 
 
74
  ```
75
  lm_eval \
76
  --model vllm \
77
+ --model_args pretrained="neuralmagic-ent/Qwen2.5-72B-Instruct-quantized.w8a8",dtype=auto,gpu_memory_utilization=0.9,add_bos_token=True,max_model_len=4096,enable_chunk_prefill=True,tensor_parallel_size=1 \
78
  --tasks openllm \
79
  --batch_size auto
80
  ```