Spaces:

bla
/

tranny

Runtime error

App Files Files Community

tranny / App /Chat /utils /Summarize.py

Mbonea

Chunks are too many

a819bf2 over 1 year ago

raw

history blame

4.7 kB

	import aiohttp
	import asyncio, pprint
	import google.generativeai as palm
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain import PromptTemplate
	import os

	# from poe_api_wrapper import PoeApi
	import pprint

	# client = PoeApi("sXvCnfYy8CHnXNTRlxhmVg==")
	bot = "Assistant"
	CHAT_CODE = ""

	PALM_API = ""
	API_KEY = os.environ.get("PALM_API", PALM_API)
	palm.configure(api_key=API_KEY)

	GroqAPIKey = os.environ.get("Groq_API", "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")
	text_splitter = RecursiveCharacterTextSplitter(
	separators=["\n\n", "\n", "."],
	chunk_size=1500,
	length_function=len,
	chunk_overlap=100,
	)


	map_prompt = """
	Write a verbose summary like a masters student of the following:
	"{text}"
	CONCISE SUMMARY:
	"""


	combine_prompt = """
	Write a concise summary of the following text delimited by triple backquotes.
	Return your response in a detailed verbose paragraph which covers the text. Make it as insightful to the reader as possible, write like a masters student.

	```{text}```

	SUMMARY:
	"""


	def count_tokens(text):
	return palm.count_message_tokens(prompt=text)["token_count"]


	async def PalmTextModel(text, candidates=1):
	# url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={API_KEY}"
	url = "https://api.groq.com/openai/v1/chat/completions"
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {GroqAPIKey}",
	}
	payload = {"messages": [{"role": "user", "content": text}], "model": "mixtral-8x7b-32768"}
	# payload = {
	# "contents": [
	# {
	# "role": "user",
	# "parts": [
	# {
	# "text": text
	# }
	# ]
	# }
	# ],
	# "generationConfig": {
	# "temperature": 0.9,
	# "topK": 1,
	# "topP": 1,
	# "maxOutputTokens": 2048,
	# "stopSequences": []
	# },
	# "safetySettings": [
	# {
	# "category": "HARM_CATEGORY_HARASSMENT",
	# "threshold": "BLOCK_ONLY_HIGH"
	# },
	# {
	# "category": "HARM_CATEGORY_HATE_SPEECH",
	# "threshold": "BLOCK_ONLY_HIGH"
	# },
	# {
	# "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
	# "threshold": "BLOCK_ONLY_HIGH"
	# },
	# {
	# "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
	# "threshold": "BLOCK_ONLY_HIGH"
	# }
	# ]
	# }

	async with aiohttp.ClientSession() as session:
	async with session.post(url, json=payload, headers=headers) as response:
	if response.status == 200:
	result = await response.json()
	# print(result)
	# if candidates > 1:
	# temp = [
	# candidate["content"]["parts"][0]["text"]
	# for candidate in result["candidates"]
	# ]
	# return temp
	temp = result["choices"][0]["message"]["content"]
	# temp="hello"
	# print('lo ')
	return temp
	else:
	print(f"Error: {response.status}\n{await response.text()}")


	# async def PalmTextModel(message):
	# global CHAT_CODE
	# if CHAT_CODE == "":
	# for chunk in client.send_message(bot, message):
	# pass
	# CHAT_CODE = chunk["chatCode"]
	# else:
	# for chunk in client.send_message(bot, message, chatCode=CHAT_CODE):
	# pass

	# return chunk["text"]


	async def Summarizer(essay):
	docs = text_splitter.create_documents([essay])

	# for 1 large document
	if len(docs) == 1:
	tasks = [
	PalmTextModel(combine_prompt.format(text=doc.page_content)) for doc in docs
	]
	# Gather and execute the tasks concurrently
	responses = await asyncio.gather(*tasks)
	ans = " ".join(responses)
	return ans

	tasks = [PalmTextModel(map_prompt.format(text=doc.page_content)) for doc in docs]
	# Gather and execute the tasks concurrently
	chunked_tasks = [tasks[i:i+20] for i in range(0, len(tasks), 20)]
	responses = []
	for chunk in chunked_tasks:
	chunk_responses = await asyncio.gather(*chunk)
	responses.extend(chunk_responses)
	main = " ".join(responses)
	ans = await PalmTextModel(combine_prompt.format(text=main))
	return ans

	# asyncio.run(Summarizer("This is a test of the summarizer"))