from langchain.prompts import ChatPromptTemplate from langchain_core.prompts import ChatPromptTemplate from src.utils.api_key_manager import APIKeyManager, with_api_manager from src.query_processing.late_chunking.late_chunker import LateChunker class Reasoner: def __init__(self): self.manager = APIKeyManager() self.model = self.manager.get_llm() @with_api_manager(streaming=True) async def reason( self, query, context=None, *, llm ): if context is None: template = \ """You are an expert at reasoning. Your task is to reason about the given user query and provide an answer. Rules: 1. Your response should only be the answer in valid markdown format. 2. You must use proper reasoning and logic to answer the query for your internal use but do not show your reasoning process in the response. Query: {query}""" prompt = ChatPromptTemplate.from_template(template) messages = prompt.format_messages(query=query) else: template = \ """You are an expert at reasoning. Given the user query and the relevant context, your task is to reason and provide an answer. Rules: 1. Your response should only be the answer in valid markdown format. 2. You must use proper reasoning and logic to answer the query for your internal use but do not show your reasoning process in the response. 3. You must not mention the context/documents provided to you in the response. Make it sound like you are the one who is answering the query. Context: [{context}] Query: {query}""" prompt = ChatPromptTemplate.from_template(template) messages = prompt.format_messages(context=context, query=query) try: async for chunk in llm.astream(messages): yield chunk.content except Exception as e: raise e @with_api_manager() async def summarize( self, query, content, model_name="minishlab/potion-base-8M", max_chunk_length=1000, max_tokens_allowed=None, overlap=200, *, llm ): if max_tokens_allowed: late_chunker = LateChunker(model_name=model_name) content_tokens = self.model.get_num_tokens(content) if content_tokens > max_tokens_allowed: print("Content is too long, applying late chunking...") content = await late_chunker.chunker( text=content, query=query, max_chunk_length=max_chunk_length, max_tokens=max_tokens_allowed, overlap=overlap ) template= \ """You are an expert at summarizing long documents. Your task is to create a concise but detailed summary of documents that ultimately lead to detailed and precise answers to the queries. Rules: 1. The summary should be concise but detailed, precise and accurate. 2. Focus on extracting key information, facts, and data that are directly relevant to the query. 3. Include specific details, numbers, and quotes when they are important. 4. Ensure that your summary preserves the original meaning and context of the information. Your response should ONLY be the detailed summary of documents in plain text without any formatting. Query: {query} Document: {content}""" prompt = ChatPromptTemplate.from_template(template) messages = prompt.format_messages(content=content, query=query) response = await llm.ainvoke(messages) return response.content.strip() if __name__ == "__main__": import asyncio from src.crawl.crawler import Crawler reasoner = Reasoner() crawler = Crawler() session_id = crawler.create_session() contents = asyncio.run(crawler.crawl_with_retry( "https://www.parliament.nz/en/pb/sc/make-a-submission/document/54SCJUST_SCF_227E6D0B-E632-42EB-CFFE-08DCFEB826C6/principles-of-the-treaty-of-waitangi-bill", session_id=session_id, rotate_proxy=False, return_html=True )) print(contents)