File size: 2,128 Bytes
102dc72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import streamlit as st


@st.cache_resource
def load_model():
    tokenizers = AutoTokenizer.from_pretrained("nsi319/legal-led-base-16384")
    model = AutoModelForSeq2SeqLM.from_pretrained("nsi319/legal-led-base-16384")
    return tokenizers, model


class BillSummarizer:
    def __init__(self):
        """
        Initialize a BillSummarizer, which uses the Hugging Face transformers library to summarize bills.
        """
        try:
            self.tokenizer, self.model = load_model()
        except Exception as e:
            print(f"Error initializing summarizer pipeline: {e}")

    def summarize(self, bill_text):
        """
        Summarize a bill's text using the summarization pipeline.

        Parameters:
            bill_text (str): The text of the bill to be summarized.

        Returns:
            str: The summarized text.
        """
        try:
            input_tokenized = self.tokenizer.encode(bill_text, return_tensors='pt',
                                                    padding="max_length",
                                                    pad_to_max_length=True,
                                                    max_length=6144,
                                                    truncation=True)

            summary_ids = self.model.generate(input_tokenized,
                                              num_beams=4,
                                              no_repeat_ngram_size=3,
                                              length_penalty=2,
                                              min_length=350,
                                              max_length=500)

            summary = [self.tokenizer.decode(g,
                                             skip_special_tokens=True,
                                             clean_up_tokenization_spaces=False)
                       for g in summary_ids][0]

            return summary
        except Exception as e:
            print(f"Error summarizing text: {e}")
            return "Sorry, I couldn't summarize this bill. Please try again."