lossLopes commited on
Commit
884e1bf
·
1 Parent(s): 65e4068

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -48
app.py CHANGED
@@ -1,57 +1,60 @@
1
- #import gradio as gr
2
- #gr.load("models/tclopess/bart_samsum").launch()
3
  import gradio as gr
4
  import nltk
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
 
7
- # Carregue o modelo e o tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  checkpoint = "tclopess/bart_samsum"
9
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
10
  model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
11
 
12
- # Função para fragmentar o texto
13
- def fragment_text(text, tokenizer):
14
- sentences = nltk.tokenize.sent_tokenize(text)
15
- max_len = tokenizer.max_len_single_sentence
16
-
17
- chunks = []
18
- chunk = ""
19
- count = -1
20
-
21
- for sentence in sentences:
22
- count += 1
23
- combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
24
-
25
- if combined_length <= max_len:
26
- chunk += sentence + " "
27
- else:
28
- chunks.append(chunk.strip())
29
- chunk = sentence + " "
30
-
31
- if chunk != "":
32
- chunks.append(chunk.strip())
33
-
34
- return chunks
35
-
36
- # Função para gerar resumos
37
- def generate_summaries(text):
38
- chunks = fragment_text(text, tokenizer)
39
- summaries = []
40
- for chunk in chunks:
41
- input = tokenizer(chunk, return_tensors='pt')
42
- output = model.generate(**input)
43
- summary = tokenizer.decode(*output, skip_special_tokens=True)
44
- summaries.append(summary)
45
- return summaries
46
-
47
- # Função para exibir o resumo final
48
- def display_summary(summaries):
49
- summary = " ".join(summaries)
50
- gr.text("Resumo final:", summary)
51
-
52
- # Crie um campo de input do tipo text
53
- input_text = gr.inputs.Textbox(label="Insira ou cole o texto aqui:")
54
-
55
- # Lance o aplicativo Gradio
56
- gr.Interface(generate_summaries, input_text, display_summary).launch()
57
 
 
 
 
1
  import gradio as gr
2
  import nltk
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
 
5
+ nltk.download('punkt')
6
+
7
+ def fragment_text(text, tokenizer):
8
+ sentences = nltk.tokenize.sent_tokenize(text)
9
+ max_len = tokenizer.max_len_single_sentence
10
+
11
+ chunks = []
12
+ chunk = ""
13
+ count = -1
14
+
15
+ for sentence in sentences:
16
+ count += 1
17
+ combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
18
+
19
+ if combined_length <= max_len:
20
+ chunk += sentence + " "
21
+ else:
22
+ chunks.append(chunk.strip())
23
+ chunk = sentence + " "
24
+
25
+ if chunk != "":
26
+ chunks.append(chunk.strip())
27
+
28
+ return chunks
29
+
30
+
31
+ def summarize_text(text, tokenizer, model):
32
+ chunks = fragment_text(text, tokenizer)
33
+
34
+ summaries = []
35
+ for chunk in chunks:
36
+ input = tokenizer(chunk, return_tensors='pt')
37
+ output = model.generate(**input)
38
+ summary = tokenizer.decode(*output, skip_special_tokens=True)
39
+ summaries.append(summary)
40
+
41
+ final_summary = " ".join(summaries)
42
+ return final_summary
43
+
44
+ # Load pre-trained model and tokenizer
45
  checkpoint = "tclopess/bart_samsum"
46
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
47
  model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
48
 
49
+ # Define Gradio Interface
50
+ iface = gr.Interface(
51
+ fn=summarize_text,
52
+ inputs=gr.Textbox(),
53
+ outputs=gr.Textbox(),
54
+ live=True,
55
+ interpretation="default"
56
+ )
57
+
58
+ # Launch the Gradio Interface
59
+ iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60