tranny / App /Chat /utils /Summarize.py
Mbonea's picture
Chunks are too many
a819bf2
raw
history blame
4.7 kB
import aiohttp
import asyncio, pprint
import google.generativeai as palm
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import PromptTemplate
import os
# from poe_api_wrapper import PoeApi
import pprint
# client = PoeApi("sXvCnfYy8CHnXNTRlxhmVg==")
bot = "Assistant"
CHAT_CODE = ""
PALM_API = ""
API_KEY = os.environ.get("PALM_API", PALM_API)
palm.configure(api_key=API_KEY)
GroqAPIKey = os.environ.get("Groq_API", "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")
text_splitter = RecursiveCharacterTextSplitter(
separators=["\n\n", "\n", "."],
chunk_size=1500,
length_function=len,
chunk_overlap=100,
)
map_prompt = """
Write a verbose summary like a masters student of the following:
"{text}"
CONCISE SUMMARY:
"""
combine_prompt = """
Write a concise summary of the following text delimited by triple backquotes.
Return your response in a detailed verbose paragraph which covers the text. Make it as insightful to the reader as possible, write like a masters student.
```{text}```
SUMMARY:
"""
def count_tokens(text):
return palm.count_message_tokens(prompt=text)["token_count"]
async def PalmTextModel(text, candidates=1):
# url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={API_KEY}"
url = "https://api.groq.com/openai/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {GroqAPIKey}",
}
payload = {"messages": [{"role": "user", "content": text}], "model": "mixtral-8x7b-32768"}
# payload = {
# "contents": [
# {
# "role": "user",
# "parts": [
# {
# "text": text
# }
# ]
# }
# ],
# "generationConfig": {
# "temperature": 0.9,
# "topK": 1,
# "topP": 1,
# "maxOutputTokens": 2048,
# "stopSequences": []
# },
# "safetySettings": [
# {
# "category": "HARM_CATEGORY_HARASSMENT",
# "threshold": "BLOCK_ONLY_HIGH"
# },
# {
# "category": "HARM_CATEGORY_HATE_SPEECH",
# "threshold": "BLOCK_ONLY_HIGH"
# },
# {
# "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
# "threshold": "BLOCK_ONLY_HIGH"
# },
# {
# "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
# "threshold": "BLOCK_ONLY_HIGH"
# }
# ]
# }
async with aiohttp.ClientSession() as session:
async with session.post(url, json=payload, headers=headers) as response:
if response.status == 200:
result = await response.json()
# print(result)
# if candidates > 1:
# temp = [
# candidate["content"]["parts"][0]["text"]
# for candidate in result["candidates"]
# ]
# return temp
temp = result["choices"][0]["message"]["content"]
# temp="hello"
# print('lo ')
return temp
else:
print(f"Error: {response.status}\n{await response.text()}")
# async def PalmTextModel(message):
# global CHAT_CODE
# if CHAT_CODE == "":
# for chunk in client.send_message(bot, message):
# pass
# CHAT_CODE = chunk["chatCode"]
# else:
# for chunk in client.send_message(bot, message, chatCode=CHAT_CODE):
# pass
# return chunk["text"]
async def Summarizer(essay):
docs = text_splitter.create_documents([essay])
# for 1 large document
if len(docs) == 1:
tasks = [
PalmTextModel(combine_prompt.format(text=doc.page_content)) for doc in docs
]
# Gather and execute the tasks concurrently
responses = await asyncio.gather(*tasks)
ans = " ".join(responses)
return ans
tasks = [PalmTextModel(map_prompt.format(text=doc.page_content)) for doc in docs]
# Gather and execute the tasks concurrently
chunked_tasks = [tasks[i:i+20] for i in range(0, len(tasks), 20)]
responses = []
for chunk in chunked_tasks:
chunk_responses = await asyncio.gather(*chunk)
responses.extend(chunk_responses)
main = " ".join(responses)
ans = await PalmTextModel(combine_prompt.format(text=main))
return ans
# asyncio.run(Summarizer("This is a test of the summarizer"))