File size: 796 Bytes
7f50b6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import requests
from bs4 import BeautifulSoup
import tiktoken
tokenizer = tiktoken.get_encoding('cl100k_base')
def process_input(text):
r = requests.get(text, verify=False)
soup = BeautifulSoup(r.text, "html.parser")
print(soup)
list_text = str(soup).split('parts":["')
#print(list_text)
s = ''
for item in list_text[1:int(len(list_text)/2)]:
if list_text.index(item)%2 == 1:
s = s + item.split('"]')[0]
amout_token = tiktoken_len(s)
return amout_token
def tiktoken_len(text):
tokens = tokenizer.encode(
text,
disallowed_special=()
)
return len(tokens)
answer = process_input('https://chatgpt.com/share/6737b9b5-56fc-8002-a212-35339f5b1d5a')
print(answer) |