File size: 6,402 Bytes
ca56e6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
from typing import (
    Optional,
    List,
    Union,
    Dict,
    Iterator,
    Any,
)

from llama_cpp import Llama
from openai.types.chat import (
    ChatCompletionMessage,
    ChatCompletion,
    ChatCompletionChunk,
)
from openai.types.chat import ChatCompletionMessageParam
from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice
from openai.types.chat.chat_completion_chunk import ChoiceDelta
from openai.types.completion_usage import CompletionUsage

from api.adapter import get_prompt_adapter
from api.utils.compat import model_parse


class LlamaCppEngine:
    def __init__(
        self,
        model: Llama,
        model_name: str,
        prompt_name: Optional[str] = None,
    ):
        """
        Initializes a LlamaCppEngine instance.

        Args:
            model (Llama): The Llama model to be used by the engine.
            model_name (str): The name of the model.
            prompt_name (Optional[str], optional): The name of the prompt. Defaults to None.
        """
        self.model = model
        self.model_name = model_name.lower()
        self.prompt_name = prompt_name.lower() if prompt_name is not None else None
        self.prompt_adapter = get_prompt_adapter(self.model_name, prompt_name=self.prompt_name)

    def apply_chat_template(
        self,
        messages: List[ChatCompletionMessageParam],
        functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
        tools: Optional[List[Dict[str, Any]]] = None,
    ) -> str:
        """
        Applies a chat template to the given list of messages.

        Args:
            messages (List[ChatCompletionMessageParam]): The list of chat completion messages.
            functions (Optional[Union[Dict[str, Any], List[Dict[str, Any]]]], optional): The functions to be applied to the messages. Defaults to None.
            tools (Optional[List[Dict[str, Any]]], optional): The tools to be used for postprocessing the messages. Defaults to None.

        Returns:
            str: The chat template applied to the messages.
        """
        if self.prompt_adapter.function_call_available:
            messages = self.prompt_adapter.postprocess_messages(messages, functions, tools)
        return self.prompt_adapter.apply_chat_template(messages)

    def create_completion(self, prompt, **kwargs) -> Union[Iterator, Dict[str, Any]]:
        """
        Creates a completion using the specified prompt and additional keyword arguments.

        Args:
            prompt (str): The prompt for the completion.
            **kwargs: Additional keyword arguments to be passed to the model's create_completion method.

        Returns:
            Union[Iterator, Dict[str, Any]]: The completion generated by the model.
        """
        return self.model.create_completion(prompt, **kwargs)

    def _create_chat_completion(self, prompt, **kwargs) -> ChatCompletion:
        """
        Creates a chat completion using the specified prompt and additional keyword arguments.

        Args:
            prompt (str): The prompt for the chat completion.
            **kwargs: Additional keyword arguments to be passed to the create_completion method.

        Returns:
            ChatCompletion: The chat completion generated by the model.
        """
        completion = self.create_completion(prompt, **kwargs)
        message = ChatCompletionMessage(
            role="assistant",
            content=completion["choices"][0]["text"].strip(),
        )
        choice = Choice(
            index=0,
            message=message,
            finish_reason="stop",
            logprobs=None,
        )
        usage = model_parse(CompletionUsage, completion["usage"])
        return ChatCompletion(
            id="chat" + completion["id"],
            choices=[choice],
            created=completion["created"],
            model=completion["model"],
            object="chat.completion",
            usage=usage,
        )

    def _create_chat_completion_stream(self, prompt, **kwargs) -> Iterator:
        """
        Generates a stream of chat completion chunks based on the given prompt.

        Args:
            prompt (str): The prompt for generating chat completion chunks.
            **kwargs: Additional keyword arguments for creating completions.

        Yields:
            ChatCompletionChunk: A chunk of chat completion generated from the prompt.
        """
        completion = self.create_completion(prompt, **kwargs)
        for i, output in enumerate(completion):
            _id, _created, _model = output["id"], output["created"], output["model"]
            if i == 0:
                choice = ChunkChoice(
                    index=0,
                    delta=ChoiceDelta(role="assistant", content=""),
                    finish_reason=None,
                    logprobs=None,
                )
                yield ChatCompletionChunk(
                    id=f"chat{_id}",
                    choices=[choice],
                    created=_created,
                    model=_model,
                    object="chat.completion.chunk",
                )

            if output["choices"][0]["finish_reason"] is None:
                delta = ChoiceDelta(content=output["choices"][0]["text"])
            else:
                delta = ChoiceDelta()

            choice = ChunkChoice(
                index=0,
                delta=delta,
                finish_reason=output["choices"][0]["finish_reason"],
                logprobs=None,
            )
            yield ChatCompletionChunk(
                id=f"chat{_id}",
                choices=[choice],
                created=_created,
                model=_model,
                object="chat.completion.chunk",
            )

    def create_chat_completion(self, prompt, **kwargs) -> Union[Iterator, ChatCompletion]:
        return (
            self._create_chat_completion_stream(prompt, **kwargs)
            if kwargs.get("stream", False)
            else self._create_chat_completion(prompt, **kwargs)
        )

    @property
    def stop(self):
        """
        Gets the stop property of the prompt adapter.

        Returns:
            The stop property of the prompt adapter, or None if it does not exist.
        """
        return self.prompt_adapter.stop if hasattr(self.prompt_adapter, "stop") else None