lossLopes commited on
Commit
cc8e1ef
·
1 Parent(s): 38e748f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ nltk.download('punkt')
3
+
4
+
5
+ def fragment_text(text, tokenizer):
6
+
7
+ sentences = nltk.tokenize.sent_tokenize(text)
8
+ max_len = tokenizer.max_len_single_sentence
9
+
10
+ chunks = []
11
+ chunk = ""
12
+ count = -1
13
+
14
+ for sentence in sentences:
15
+ count += 1
16
+ combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
17
+
18
+ if combined_length <= max_len:
19
+ chunk += sentence + " "
20
+ else:
21
+ chunks.append(chunk.strip())
22
+ chunk = sentence + " "
23
+
24
+ if chunk != "":
25
+ chunks.append(chunk.strip())
26
+
27
+ return chunks
28
+
29
+ def summarize_text(text):
30
+
31
+ chunks = fragment_text(text, tokenizer)
32
+
33
+ summaries = []
34
+ for chunk in chunks:
35
+ input = tokenizer(chunk, return_tensors='pt')
36
+ output = model.generate(**input)
37
+ summary = tokenizer.decode(*output, skip_special_tokens=True)
38
+ summaries.append(summary)
39
+
40
+ final_summary = " ".join(summaries)
41
+ return final_summary
42
+
43
+ # Importa o modelo de sumarização
44
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
45
+
46
+ # Define o modelo de sumarização
47
+ checkpoint = "tclopess/bart_samsum"
48
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
49
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
50
+
51
+ # Adiciona um campo de input
52
+ text_input = st.text_area("Cola ou digite o texto a ser resumido:")
53
+
54
+ # Adiciona um botão
55
+ button = st.button("Resumo")
56
+
57
+ # Chama a função de sumarização se o botão for clicado
58
+ if button:
59
+ summary = summarize_text(text_input)
60
+ st.write("Resumo:")
61
+ st.write(summary)