File size: 796 Bytes
7f50b6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import requests
from bs4 import BeautifulSoup
import tiktoken

tokenizer = tiktoken.get_encoding('cl100k_base')

def process_input(text):

    r = requests.get(text, verify=False)
    
    soup = BeautifulSoup(r.text, "html.parser")
    print(soup)
    list_text = str(soup).split('parts":["')
    #print(list_text)
    s = ''
    for item in list_text[1:int(len(list_text)/2)]:
        if list_text.index(item)%2 == 1:
                s = s + item.split('"]')[0]

    amout_token = tiktoken_len(s)

    return amout_token

def tiktoken_len(text):
            tokens = tokenizer.encode(
                text,
                disallowed_special=()
            )
            return len(tokens)

answer = process_input('https://chatgpt.com/share/6737b9b5-56fc-8002-a212-35339f5b1d5a')

print(answer)