|
import aiohttp |
|
import asyncio,pprint |
|
import google.generativeai as palm |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.llms import GooglePalm |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain import PromptTemplate |
|
import os |
|
PALM_API = '' |
|
API_KEY=os.environ.get("PALM_API",PALM_API) |
|
palm.configure(api_key=API_KEY) |
|
|
|
|
|
def count_tokens(text): |
|
return palm.count_message_tokens(prompt=text)['token_count'] |
|
llm = GooglePalm( |
|
google_api_key=API_KEY, **{ "safety_settings": [ |
|
{"category": "HARM_CATEGORY_DEROGATORY", "threshold": 4}, |
|
{"category": "HARM_CATEGORY_TOXICITY", "threshold": 4}, |
|
{"category": "HARM_CATEGORY_VIOLENCE", "threshold": 4}, |
|
{"category": "HARM_CATEGORY_SEXUAL", "threshold": 4}, |
|
{"category": "HARM_CATEGORY_MEDICAL", "threshold": 4}, |
|
{"category": "HARM_CATEGORY_DANGEROUS", "threshold": 4}, |
|
]}) |
|
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n","."], chunk_size=40_000, chunk_overlap=500) |
|
with open('./sample.txt', 'r') as file: |
|
essay = file.read() |
|
|
|
docs = text_splitter.create_documents([essay]) |
|
for doc in docs: |
|
print(count_tokens(doc.page_content)) |
|
|
|
|