File size: 2,781 Bytes
08ca036 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import math
import tiktoken
async def calculate_image_tokens(width, height, detail):
if detail == "low":
return 85
else:
max_dimension = max(width, height)
if max_dimension > 2048:
scale_factor = 2048 / max_dimension
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
else:
new_width = width
new_height = height
width, height = new_width, new_height
min_dimension = min(width, height)
if min_dimension > 768:
scale_factor = 768 / min_dimension
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
else:
new_width = width
new_height = height
width, height = new_width, new_height
num_masks_w = math.ceil(width / 512)
num_masks_h = math.ceil(height / 512)
total_masks = num_masks_w * num_masks_h
tokens_per_mask = 170
total_tokens = total_masks * tokens_per_mask + 85
return total_tokens
async def num_tokens_from_messages(messages, model=''):
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
if model == "gpt-3.5-turbo-0301":
tokens_per_message = 4
else:
tokens_per_message = 3
num_tokens = 0
for message in messages:
num_tokens += tokens_per_message
for key, value in message.items():
if isinstance(value, list):
for item in value:
if item.get("type") == "text":
num_tokens += len(encoding.encode(item.get("text")))
if item.get("type") == "image_url":
pass
else:
num_tokens += len(encoding.encode(value))
num_tokens += 3
return num_tokens
async def num_tokens_from_content(content, model=None):
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
encoded_content = encoding.encode(content)
len_encoded_content = len(encoded_content)
return len_encoded_content
async def split_tokens_from_content(content, max_tokens, model=None):
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
encoded_content = encoding.encode(content)
len_encoded_content = len(encoded_content)
if len_encoded_content >= max_tokens:
content = encoding.decode(encoded_content[:max_tokens])
return content, max_tokens, "length"
else:
return content, len_encoded_content, "stop"
|