legalLM / AI_core /tools /summarization_tool.py
Muhammad2003's picture
Upload 45 files
1f891e5 verified
"""
Tool for summarizing legal documents.
"""
from langchain.tools import BaseTool
from langchain.chains import LLMChain, MapReduceDocumentsChain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader
from AI_core.config import LLM, TEXT_SPLITTER
class SummarizationTool(BaseTool):
"""Tool to summarize legal documents using a map-reduce approach."""
name: str = "document_summarization_tool"
description: str = "Summarizes legal documents. Input should be a file path to a PDF or text document."
def _run(self, file_path: str) -> str:
"""
Run the document summarization process.
Args:
file_path: Path to the document to summarize
Returns:
str: Summarized content
"""
# Load document
if file_path.endswith('.pdf'):
loader = PyPDFLoader(file_path)
documents = loader.load()
else:
with open(file_path, 'r') as f:
text = f.read()
documents = [text]
# Split documents
docs = TEXT_SPLITTER.split_documents(documents)
# Map step - summarize each chunk
map_template = """
You are a legal expert summarizing complex legal documents.
Summarize the following text in a concise and accurate manner, preserving key legal points:
{text}
"""
map_prompt = PromptTemplate(template=map_template, input_variables=["text"])
map_chain = LLMChain(llm=LLM, prompt=map_prompt, output_key="summary")
# Reduce step - combine summaries
reduce_template = """
You are a legal expert creating a comprehensive summary from multiple text segments.
Combine these summaries into a cohesive overview of the entire document, organized by key legal themes and points:
{summaries}
"""
reduce_prompt = PromptTemplate(template=reduce_template, input_variables=["summaries"])
reduce_chain = LLMChain(llm=LLM, prompt=reduce_prompt)
# Create MapReduce chain
map_reduce_chain = MapReduceDocumentsChain(
llm_chain=map_chain,
reduce_documents_chain=reduce_chain,
document_variable_name="text",
)
return map_reduce_chain.run(docs)