Spaces:
Runtime error
Runtime error
""" | |
Tool for summarizing legal documents. | |
""" | |
from langchain.tools import BaseTool | |
from langchain.chains import LLMChain, MapReduceDocumentsChain | |
from langchain.prompts import PromptTemplate | |
from langchain.document_loaders import PyPDFLoader | |
from AI_core.config import LLM, TEXT_SPLITTER | |
class SummarizationTool(BaseTool): | |
"""Tool to summarize legal documents using a map-reduce approach.""" | |
name: str = "document_summarization_tool" | |
description: str = "Summarizes legal documents. Input should be a file path to a PDF or text document." | |
def _run(self, file_path: str) -> str: | |
""" | |
Run the document summarization process. | |
Args: | |
file_path: Path to the document to summarize | |
Returns: | |
str: Summarized content | |
""" | |
# Load document | |
if file_path.endswith('.pdf'): | |
loader = PyPDFLoader(file_path) | |
documents = loader.load() | |
else: | |
with open(file_path, 'r') as f: | |
text = f.read() | |
documents = [text] | |
# Split documents | |
docs = TEXT_SPLITTER.split_documents(documents) | |
# Map step - summarize each chunk | |
map_template = """ | |
You are a legal expert summarizing complex legal documents. | |
Summarize the following text in a concise and accurate manner, preserving key legal points: | |
{text} | |
""" | |
map_prompt = PromptTemplate(template=map_template, input_variables=["text"]) | |
map_chain = LLMChain(llm=LLM, prompt=map_prompt, output_key="summary") | |
# Reduce step - combine summaries | |
reduce_template = """ | |
You are a legal expert creating a comprehensive summary from multiple text segments. | |
Combine these summaries into a cohesive overview of the entire document, organized by key legal themes and points: | |
{summaries} | |
""" | |
reduce_prompt = PromptTemplate(template=reduce_template, input_variables=["summaries"]) | |
reduce_chain = LLMChain(llm=LLM, prompt=reduce_prompt) | |
# Create MapReduce chain | |
map_reduce_chain = MapReduceDocumentsChain( | |
llm_chain=map_chain, | |
reduce_documents_chain=reduce_chain, | |
document_variable_name="text", | |
) | |
return map_reduce_chain.run(docs) |