File size: 2,397 Bytes
1f891e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
Tool for summarizing legal documents.
"""
from langchain.tools import BaseTool
from langchain.chains import LLMChain, MapReduceDocumentsChain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader

from AI_core.config import LLM, TEXT_SPLITTER

class SummarizationTool(BaseTool):
    """Tool to summarize legal documents using a map-reduce approach."""
    name: str = "document_summarization_tool"
    description: str = "Summarizes legal documents. Input should be a file path to a PDF or text document."
    
    def _run(self, file_path: str) -> str:
        """
        Run the document summarization process.
        
        Args:
            file_path: Path to the document to summarize
            
        Returns:
            str: Summarized content
        """
        # Load document
        if file_path.endswith('.pdf'):
            loader = PyPDFLoader(file_path)
            documents = loader.load()
        else:
            with open(file_path, 'r') as f:
                text = f.read()
            documents = [text]
        
        # Split documents
        docs = TEXT_SPLITTER.split_documents(documents)
        
        # Map step - summarize each chunk
        map_template = """
        You are a legal expert summarizing complex legal documents.
        Summarize the following text in a concise and accurate manner, preserving key legal points:
        
        {text}
        """
        map_prompt = PromptTemplate(template=map_template, input_variables=["text"])
        map_chain = LLMChain(llm=LLM, prompt=map_prompt, output_key="summary")
        
        # Reduce step - combine summaries
        reduce_template = """
        You are a legal expert creating a comprehensive summary from multiple text segments.
        Combine these summaries into a cohesive overview of the entire document, organized by key legal themes and points:
        
        {summaries}
        """
        reduce_prompt = PromptTemplate(template=reduce_template, input_variables=["summaries"])
        reduce_chain = LLMChain(llm=LLM, prompt=reduce_prompt)
        
        # Create MapReduce chain
        map_reduce_chain = MapReduceDocumentsChain(
            llm_chain=map_chain,
            reduce_documents_chain=reduce_chain,
            document_variable_name="text",
        )
        
        return map_reduce_chain.run(docs)