File size: 5,973 Bytes
34b78ab 1f71841 8b1c859 1f71841 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 34b78ab 8b1c859 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import logging
import os
import time
import tiktoken
import openai
OPENAI_KEY = None
key_file = 'data/openaikey.txt'
if os.path.exists(key_file):
with open(key_file) as f:
OPENAI_KEY = f.read().strip()
if OPENAI_KEY is None:
OPENAI_KEY = os.getenv('OPENAI_KEY')
openai.api_key = OPENAI_KEY
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def num_tokens_from_messages(messages, model):
"""
Return the number of tokens used by a list of messages.
https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
"""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
logger.info("Warning: model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")
if model in {
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
"gpt-4-0314",
"gpt-4-32k-0314",
"gpt-4-0613",
"gpt-4-32k-0613",
}:
tokens_per_message = 3
tokens_per_name = 1
elif model == "gpt-3.5-turbo-0301":
tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
tokens_per_name = -1 # if there's a name, the role is omitted
elif "gpt-3.5-turbo" in model:
# logger.info()("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
elif "gpt-4" in model:
# logger.info()("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
return num_tokens_from_messages(messages, model="gpt-4-0613")
else:
raise NotImplementedError(
f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
)
num_tokens = 0
for message in messages:
num_tokens += tokens_per_message
for key, value in message.items():
num_tokens += len(encoding.encode(value, disallowed_special=()))
if key == "name":
num_tokens += tokens_per_name
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
return num_tokens
class ChatGptInteractor:
def __init__(self, model_name='gpt-3.5-turbo', max_tokens=None, temperature=None, stream=False):
self.model_name = model_name
self.max_tokens = max_tokens
self.temperature = temperature
self.stream = stream
self.tokenizer = tiktoken.encoding_for_model(self.model_name)
def chat_completion_simple(
self,
*,
user_text,
system_text=None,
):
return self.chat_completion(
self._construct_messages_simple(user_text, system_text),
)
def count_tokens_simple(self, *, user_text, system_text=None):
return self.count_tokens(self._construct_messages_simple(user_text, system_text))
@staticmethod
def _construct_messages_simple(user_text, system_text=None):
messages = []
if system_text is not None:
messages.append({
"role": "system",
"content": system_text
})
messages.append({
"role": "user",
"content": user_text
})
return messages
def chat_completion(self, messages):
logger.info(f'Sending request to {self.model_name} stream={self.stream} ...')
t1 = time.time()
completion = self._request(messages)
if self.stream:
return self._generator(completion)
t2 = time.time()
usage = completion['usage']
logger.info(
f'Received response: {usage["prompt_tokens"]} in + {usage["completion_tokens"]} out'
f' = {usage["total_tokens"]} total tokens. Time: {t2 - t1:3.1f} seconds'
)
return completion.choices[0].message['content']
@staticmethod
def get_stream_text(stream_part):
return stream_part['choices'][0]['delta'].get('content', '')
@staticmethod
def _generator(completion):
for part in completion:
yield ChatGptInteractor.get_stream_text(part)
def count_tokens(self, messages):
return num_tokens_from_messages(messages, self.model_name)
def _request(self, messages):
for _ in range(5):
try:
completion = openai.ChatCompletion.create(
messages=messages,
model=self.model_name,
max_tokens=self.max_tokens,
temperature=self.temperature,
stream=self.stream,
request_timeout=100.0,
)
return completion
except (openai.error.Timeout, openai.error.ServiceUnavailableError):
continue
raise RuntimeError('Failed to connect to OpenAI (timeout error)')
if __name__ == '__main__':
cgi = ChatGptInteractor()
for txt in [
"Hello World!",
"Hello",
" World!",
"World!",
"World",
"!",
" ",
" ",
" ",
" ",
"\n",
"\n\t",
]:
print(f'`{txt}` | {cgi.tokenizer.encode(txt)}')
st = 'You are a helpful assistant and an experienced programmer, ' \
'answering questions exactly in two rhymed sentences'
ut = 'Explain the principle of recursion in programming'
print('Count tokens:', cgi.count_tokens_simple(user_text=ut, system_text=st))
print(cgi.chat_completion_simple(user_text=ut, system_text=st))
print('---')
cgi = ChatGptInteractor(stream=True)
for part in cgi.chat_completion_simple(user_text=ut, system_text=st):
print(part, end='')
print('\n---')
|