File size: 5,973 Bytes
34b78ab
1f71841
8b1c859
 
 
 
 
 
1f71841
 
 
 
 
 
 
 
 
8b1c859
 
 
34b78ab
 
 
 
8b1c859
 
 
 
 
 
 
 
34b78ab
8b1c859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34b78ab
8b1c859
 
34b78ab
8b1c859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34b78ab
8b1c859
34b78ab
 
 
8b1c859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34b78ab
 
8b1c859
34b78ab
 
 
 
 
8b1c859
 
34b78ab
8b1c859
 
 
 
 
 
 
 
 
34b78ab
 
 
 
 
8b1c859
 
 
34b78ab
8b1c859
 
 
34b78ab
 
 
 
 
8b1c859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34b78ab
 
 
8b1c859
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import logging
import os
import time

import tiktoken
import openai


OPENAI_KEY = None
key_file = 'data/openaikey.txt'
if os.path.exists(key_file):
    with open(key_file) as f:
        OPENAI_KEY = f.read().strip()

if OPENAI_KEY is None:
    OPENAI_KEY = os.getenv('OPENAI_KEY')

openai.api_key = OPENAI_KEY


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def num_tokens_from_messages(messages, model):
    """
    Return the number of tokens used by a list of messages.
    https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
    """
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        logger.info("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    if model in {
        "gpt-3.5-turbo-0613",
        "gpt-3.5-turbo-16k-0613",
        "gpt-4-0314",
        "gpt-4-32k-0314",
        "gpt-4-0613",
        "gpt-4-32k-0613",
        }:
        tokens_per_message = 3
        tokens_per_name = 1
    elif model == "gpt-3.5-turbo-0301":
        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
        tokens_per_name = -1  # if there's a name, the role is omitted
    elif "gpt-3.5-turbo" in model:
        # logger.info()("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
    elif "gpt-4" in model:
        # logger.info()("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
        return num_tokens_from_messages(messages, model="gpt-4-0613")
    else:
        raise NotImplementedError(
            f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
        )
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value, disallowed_special=()))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
    return num_tokens


class ChatGptInteractor:
    def __init__(self, model_name='gpt-3.5-turbo', max_tokens=None, temperature=None, stream=False):
        self.model_name = model_name
        self.max_tokens = max_tokens
        self.temperature = temperature
        self.stream = stream
        self.tokenizer = tiktoken.encoding_for_model(self.model_name)

    def chat_completion_simple(
            self,
            *,
            user_text,
            system_text=None,
    ):
        return self.chat_completion(
            self._construct_messages_simple(user_text, system_text),
        )

    def count_tokens_simple(self, *, user_text, system_text=None):
        return self.count_tokens(self._construct_messages_simple(user_text, system_text))

    @staticmethod
    def _construct_messages_simple(user_text, system_text=None):
        messages = []
        if system_text is not None:
            messages.append({
                "role": "system",
                "content": system_text
            })
        messages.append({
            "role": "user",
            "content": user_text
        })
        return messages

    def chat_completion(self, messages):
        logger.info(f'Sending request to {self.model_name} stream={self.stream} ...')
        t1 = time.time()
        completion = self._request(messages)

        if self.stream:
            return self._generator(completion)

        t2 = time.time()
        usage = completion['usage']
        logger.info(
            f'Received response: {usage["prompt_tokens"]} in + {usage["completion_tokens"]} out'
            f' = {usage["total_tokens"]} total tokens. Time: {t2 - t1:3.1f} seconds'
        )
        return completion.choices[0].message['content']

    @staticmethod
    def get_stream_text(stream_part):
        return stream_part['choices'][0]['delta'].get('content', '')

    @staticmethod
    def _generator(completion):
        for part in completion:
            yield ChatGptInteractor.get_stream_text(part)

    def count_tokens(self, messages):
        return num_tokens_from_messages(messages, self.model_name)

    def _request(self, messages):
        for _ in range(5):
            try:
                completion = openai.ChatCompletion.create(
                    messages=messages,
                    model=self.model_name,
                    max_tokens=self.max_tokens,
                    temperature=self.temperature,
                    stream=self.stream,
                    request_timeout=100.0,
                )
                return completion
            except (openai.error.Timeout, openai.error.ServiceUnavailableError):
                continue
        raise RuntimeError('Failed to connect to OpenAI (timeout error)')


if __name__ == '__main__':
    cgi = ChatGptInteractor()

    for txt in [
        "Hello World!",
        "Hello",
        " World!",
        "World!",
        "World",
        "!",
        " ",
        "  ",
        "   ",
        "    ",
        "\n",
        "\n\t",
    ]:
        print(f'`{txt}` | {cgi.tokenizer.encode(txt)}')

    st = 'You are a helpful assistant and an experienced programmer, ' \
         'answering questions exactly in two rhymed sentences'
    ut = 'Explain the principle of recursion in programming'
    print('Count tokens:', cgi.count_tokens_simple(user_text=ut, system_text=st))

    print(cgi.chat_completion_simple(user_text=ut, system_text=st))
    print('---')

    cgi = ChatGptInteractor(stream=True)
    for part in cgi.chat_completion_simple(user_text=ut, system_text=st):
        print(part, end='')
    print('\n---')