Spaces:

gordonchan
/

embedding-m3e-large

Running

File size: 6,402 Bytes

ca56e6a

from typing import (
    Optional,
    List,
    Union,
    Dict,
    Iterator,
    Any,
)

from llama_cpp import Llama
from openai.types.chat import (
    ChatCompletionMessage,
    ChatCompletion,
    ChatCompletionChunk,
)
from openai.types.chat import ChatCompletionMessageParam
from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice
from openai.types.chat.chat_completion_chunk import ChoiceDelta
from openai.types.completion_usage import CompletionUsage

from api.adapter import get_prompt_adapter
from api.utils.compat import model_parse


class LlamaCppEngine:
    def __init__(
        self,
        model: Llama,
        model_name: str,
        prompt_name: Optional[str] = None,
    ):
        """
        Initializes a LlamaCppEngine instance.

        Args:
            model (Llama): The Llama model to be used by the engine.
            model_name (str): The name of the model.
            prompt_name (Optional[str], optional): The name of the prompt. Defaults to None.
        """
        self.model = model
        self.model_name = model_name.lower()
        self.prompt_name = prompt_name.lower() if prompt_name is not None else None
        self.prompt_adapter = get_prompt_adapter(self.model_name, prompt_name=self.prompt_name)

    def apply_chat_template(
        self,
        messages: List[ChatCompletionMessageParam],
        functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
        tools: Optional[List[Dict[str, Any]]] = None,
    ) -> str:
        """
        Applies a chat template to the given list of messages.

        Args:
            messages (List[ChatCompletionMessageParam]): The list of chat completion messages.
            functions (Optional[Union[Dict[str, Any], List[Dict[str, Any]]]], optional): The functions to be applied to the messages. Defaults to None.
            tools (Optional[List[Dict[str, Any]]], optional): The tools to be used for postprocessing the messages. Defaults to None.

        Returns:
            str: The chat template applied to the messages.
        """
        if self.prompt_adapter.function_call_available:
            messages = self.prompt_adapter.postprocess_messages(messages, functions, tools)
        return self.prompt_adapter.apply_chat_template(messages)

    def create_completion(self, prompt, **kwargs) -> Union[Iterator, Dict[str, Any]]:
        """
        Creates a completion using the specified prompt and additional keyword arguments.

        Args:
            prompt (str): The prompt for the completion.
            **kwargs: Additional keyword arguments to be passed to the model's create_completion method.

        Returns:
            Union[Iterator, Dict[str, Any]]: The completion generated by the model.
        """
        return self.model.create_completion(prompt, **kwargs)

    def _create_chat_completion(self, prompt, **kwargs) -> ChatCompletion:
        """
        Creates a chat completion using the specified prompt and additional keyword arguments.

        Args:
            prompt (str): The prompt for the chat completion.
            **kwargs: Additional keyword arguments to be passed to the create_completion method.

        Returns:
            ChatCompletion: The chat completion generated by the model.
        """
        completion = self.create_completion(prompt, **kwargs)
        message = ChatCompletionMessage(
            role="assistant",
            content=completion["choices"][0]["text"].strip(),
        )
        choice = Choice(
            index=0,
            message=message,
            finish_reason="stop",
            logprobs=None,
        )
        usage = model_parse(CompletionUsage, completion["usage"])
        return ChatCompletion(
            id="chat" + completion["id"],
            choices=[choice],
            created=completion["created"],
            model=completion["model"],
            object="chat.completion",
            usage=usage,
        )

    def _create_chat_completion_stream(self, prompt, **kwargs) -> Iterator:
        """
        Generates a stream of chat completion chunks based on the given prompt.

        Args:
            prompt (str): The prompt for generating chat completion chunks.
            **kwargs: Additional keyword arguments for creating completions.

        Yields:
            ChatCompletionChunk: A chunk of chat completion generated from the prompt.
        """
        completion = self.create_completion(prompt, **kwargs)
        for i, output in enumerate(completion):
            _id, _created, _model = output["id"], output["created"], output["model"]
            if i == 0:
                choice = ChunkChoice(
                    index=0,
                    delta=ChoiceDelta(role="assistant", content=""),
                    finish_reason=None,
                    logprobs=None,
                )
                yield ChatCompletionChunk(
                    id=f"chat{_id}",
                    choices=[choice],
                    created=_created,
                    model=_model,
                    object="chat.completion.chunk",
                )

            if output["choices"][0]["finish_reason"] is None:
                delta = ChoiceDelta(content=output["choices"][0]["text"])
            else:
                delta = ChoiceDelta()

            choice = ChunkChoice(
                index=0,
                delta=delta,
                finish_reason=output["choices"][0]["finish_reason"],
                logprobs=None,
            )
            yield ChatCompletionChunk(
                id=f"chat{_id}",
                choices=[choice],
                created=_created,
                model=_model,
                object="chat.completion.chunk",
            )

    def create_chat_completion(self, prompt, **kwargs) -> Union[Iterator, ChatCompletion]:
        return (
            self._create_chat_completion_stream(prompt, **kwargs)
            if kwargs.get("stream", False)
            else self._create_chat_completion(prompt, **kwargs)
        )

    @property
    def stop(self):
        """
        Gets the stop property of the prompt adapter.

        Returns:
            The stop property of the prompt adapter, or None if it does not exist.
        """
        return self.prompt_adapter.stop if hasattr(self.prompt_adapter, "stop") else None