File size: 1,252 Bytes
4e0c974
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import aiohttp
import asyncio,pprint
import google.generativeai as palm
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import GooglePalm
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import PromptTemplate
import os
PALM_API = ''
API_KEY=os.environ.get("PALM_API",PALM_API)
palm.configure(api_key=API_KEY)


def count_tokens(text):
    return palm.count_message_tokens(prompt=text)['token_count']
llm = GooglePalm(
    google_api_key=API_KEY,       **{ "safety_settings": [
            {"category": "HARM_CATEGORY_DEROGATORY", "threshold": 4},
            {"category": "HARM_CATEGORY_TOXICITY", "threshold": 4},
            {"category": "HARM_CATEGORY_VIOLENCE", "threshold": 4},
            {"category": "HARM_CATEGORY_SEXUAL", "threshold": 4},
            {"category": "HARM_CATEGORY_MEDICAL", "threshold": 4},
            {"category": "HARM_CATEGORY_DANGEROUS", "threshold": 4},
        ]})
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n","."], chunk_size=40_000, chunk_overlap=500)
with open('./sample.txt', 'r') as file:
    essay = file.read()

docs = text_splitter.create_documents([essay])
for doc in docs:
    print(count_tokens(doc.page_content))