Mbonea's picture
summarization improved
4e0c974
raw
history blame
1.25 kB
import aiohttp
import asyncio,pprint
import google.generativeai as palm
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import GooglePalm
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import PromptTemplate
import os
PALM_API = ''
API_KEY=os.environ.get("PALM_API",PALM_API)
palm.configure(api_key=API_KEY)
def count_tokens(text):
return palm.count_message_tokens(prompt=text)['token_count']
llm = GooglePalm(
google_api_key=API_KEY, **{ "safety_settings": [
{"category": "HARM_CATEGORY_DEROGATORY", "threshold": 4},
{"category": "HARM_CATEGORY_TOXICITY", "threshold": 4},
{"category": "HARM_CATEGORY_VIOLENCE", "threshold": 4},
{"category": "HARM_CATEGORY_SEXUAL", "threshold": 4},
{"category": "HARM_CATEGORY_MEDICAL", "threshold": 4},
{"category": "HARM_CATEGORY_DANGEROUS", "threshold": 4},
]})
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n","."], chunk_size=40_000, chunk_overlap=500)
with open('./sample.txt', 'r') as file:
essay = file.read()
docs = text_splitter.create_documents([essay])
for doc in docs:
print(count_tokens(doc.page_content))