engine=DeepSpeed option.entryPoint=inference.py option.tensor_parallel_degree=1 option.max_tokens=4096 option.task=text-generation option.dtype=bf16 option.model_type=base option.model_loading_timeout=3600