Spaces:
Sleeping
Sleeping
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs""" | |
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs | |
from vllm.engine.async_llm_engine import AsyncLLMEngine | |
from vllm.engine.llm_engine import LLMEngine | |
from vllm.engine.ray_utils import initialize_cluster | |
from vllm.entrypoints.llm import LLM | |
from vllm.outputs import CompletionOutput, RequestOutput | |
from vllm.sampling_params import SamplingParams | |
__version__ = "0.2.7" | |
__all__ = [ | |
"LLM", | |
"SamplingParams", | |
"RequestOutput", | |
"CompletionOutput", | |
"LLMEngine", | |
"EngineArgs", | |
"AsyncLLMEngine", | |
"AsyncEngineArgs", | |
"initialize_cluster", | |
] | |