Spaces:
Running
Running
File size: 6,402 Bytes
ca56e6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
from typing import (
Optional,
List,
Union,
Dict,
Iterator,
Any,
)
from llama_cpp import Llama
from openai.types.chat import (
ChatCompletionMessage,
ChatCompletion,
ChatCompletionChunk,
)
from openai.types.chat import ChatCompletionMessageParam
from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice
from openai.types.chat.chat_completion_chunk import ChoiceDelta
from openai.types.completion_usage import CompletionUsage
from api.adapter import get_prompt_adapter
from api.utils.compat import model_parse
class LlamaCppEngine:
def __init__(
self,
model: Llama,
model_name: str,
prompt_name: Optional[str] = None,
):
"""
Initializes a LlamaCppEngine instance.
Args:
model (Llama): The Llama model to be used by the engine.
model_name (str): The name of the model.
prompt_name (Optional[str], optional): The name of the prompt. Defaults to None.
"""
self.model = model
self.model_name = model_name.lower()
self.prompt_name = prompt_name.lower() if prompt_name is not None else None
self.prompt_adapter = get_prompt_adapter(self.model_name, prompt_name=self.prompt_name)
def apply_chat_template(
self,
messages: List[ChatCompletionMessageParam],
functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
tools: Optional[List[Dict[str, Any]]] = None,
) -> str:
"""
Applies a chat template to the given list of messages.
Args:
messages (List[ChatCompletionMessageParam]): The list of chat completion messages.
functions (Optional[Union[Dict[str, Any], List[Dict[str, Any]]]], optional): The functions to be applied to the messages. Defaults to None.
tools (Optional[List[Dict[str, Any]]], optional): The tools to be used for postprocessing the messages. Defaults to None.
Returns:
str: The chat template applied to the messages.
"""
if self.prompt_adapter.function_call_available:
messages = self.prompt_adapter.postprocess_messages(messages, functions, tools)
return self.prompt_adapter.apply_chat_template(messages)
def create_completion(self, prompt, **kwargs) -> Union[Iterator, Dict[str, Any]]:
"""
Creates a completion using the specified prompt and additional keyword arguments.
Args:
prompt (str): The prompt for the completion.
**kwargs: Additional keyword arguments to be passed to the model's create_completion method.
Returns:
Union[Iterator, Dict[str, Any]]: The completion generated by the model.
"""
return self.model.create_completion(prompt, **kwargs)
def _create_chat_completion(self, prompt, **kwargs) -> ChatCompletion:
"""
Creates a chat completion using the specified prompt and additional keyword arguments.
Args:
prompt (str): The prompt for the chat completion.
**kwargs: Additional keyword arguments to be passed to the create_completion method.
Returns:
ChatCompletion: The chat completion generated by the model.
"""
completion = self.create_completion(prompt, **kwargs)
message = ChatCompletionMessage(
role="assistant",
content=completion["choices"][0]["text"].strip(),
)
choice = Choice(
index=0,
message=message,
finish_reason="stop",
logprobs=None,
)
usage = model_parse(CompletionUsage, completion["usage"])
return ChatCompletion(
id="chat" + completion["id"],
choices=[choice],
created=completion["created"],
model=completion["model"],
object="chat.completion",
usage=usage,
)
def _create_chat_completion_stream(self, prompt, **kwargs) -> Iterator:
"""
Generates a stream of chat completion chunks based on the given prompt.
Args:
prompt (str): The prompt for generating chat completion chunks.
**kwargs: Additional keyword arguments for creating completions.
Yields:
ChatCompletionChunk: A chunk of chat completion generated from the prompt.
"""
completion = self.create_completion(prompt, **kwargs)
for i, output in enumerate(completion):
_id, _created, _model = output["id"], output["created"], output["model"]
if i == 0:
choice = ChunkChoice(
index=0,
delta=ChoiceDelta(role="assistant", content=""),
finish_reason=None,
logprobs=None,
)
yield ChatCompletionChunk(
id=f"chat{_id}",
choices=[choice],
created=_created,
model=_model,
object="chat.completion.chunk",
)
if output["choices"][0]["finish_reason"] is None:
delta = ChoiceDelta(content=output["choices"][0]["text"])
else:
delta = ChoiceDelta()
choice = ChunkChoice(
index=0,
delta=delta,
finish_reason=output["choices"][0]["finish_reason"],
logprobs=None,
)
yield ChatCompletionChunk(
id=f"chat{_id}",
choices=[choice],
created=_created,
model=_model,
object="chat.completion.chunk",
)
def create_chat_completion(self, prompt, **kwargs) -> Union[Iterator, ChatCompletion]:
return (
self._create_chat_completion_stream(prompt, **kwargs)
if kwargs.get("stream", False)
else self._create_chat_completion(prompt, **kwargs)
)
@property
def stop(self):
"""
Gets the stop property of the prompt adapter.
Returns:
The stop property of the prompt adapter, or None if it does not exist.
"""
return self.prompt_adapter.stop if hasattr(self.prompt_adapter, "stop") else None
|