lossLopes commited on
Commit
3d08ef2
·
1 Parent(s): 6596a58

Create app_summaries_longdoc.py

Browse files
Files changed (1) hide show
  1. app_summaries_longdoc.py +58 -0
app_summaries_longdoc.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+
3
+ def fragment_text(text, tokenizer):
4
+
5
+ sentences = nltk.tokenize.sent_tokenize(text)
6
+ max_len = tokenizer.max_len_single_sentence
7
+
8
+ chunks = []
9
+ chunk = ""
10
+ count = -1
11
+
12
+ for sentence in sentences:
13
+ count += 1
14
+ combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
15
+
16
+ if combined_length <= max_len:
17
+ chunk += sentence + " "
18
+ else:
19
+ chunks.append(chunk.strip())
20
+ chunk = sentence + " "
21
+
22
+ if chunk != "":
23
+ chunks.append(chunk.strip())
24
+
25
+ return chunks
26
+
27
+ def summarize_text(text):
28
+
29
+ chunks = fragment_text(text, tokenizer)
30
+
31
+ summaries = []
32
+ for chunk in chunks:
33
+ input = tokenizer(chunk, return_tensors='pt')
34
+ output = model.generate(**input)
35
+ summary = tokenizer.decode(*output, skip_special_tokens=True)
36
+ summaries.append(summary)
37
+
38
+ final_summary = " ".join(summaries)
39
+ return final_summary
40
+
41
+ # Importa o modelo de sumarização
42
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
43
+
44
+ # Define o modelo de sumarização
45
+ checkpoint = "tclopess/bart_samsum"
46
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
47
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
48
+
49
+
50
+ text_input = st.text_area("Cole ou digite o texto a ser resumido:")
51
+
52
+ button = st.button("Resumo")
53
+
54
+ # Chama a função de sumarização se o botão for clicado
55
+ if button:
56
+ summary = summarize_text(text_input)
57
+ st.write("Resumo:")
58
+ st.write(summary)