lossLopes commited on
Commit
509c8ba
·
1 Parent(s): 5ca455e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -123
app.py CHANGED
@@ -1,126 +1,3 @@
1
- """import gradio as gr
2
- import nltk
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
-
5
- nltk.download('punkt')
6
-
7
- def fragment_text(text, tokenizer):
8
- sentences = nltk.tokenize.sent_tokenize(text)
9
- max_len = tokenizer.max_len_single_sentence
10
-
11
- chunks = []
12
- chunk = ""
13
- count = -1
14
-
15
- for sentence in sentences:
16
- count += 1
17
- combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
18
-
19
- if combined_length <= max_len:
20
- chunk += sentence + " "
21
- else:
22
- chunks.append(chunk.strip())
23
- chunk = sentence + " "
24
-
25
- if chunk != "":
26
- chunks.append(chunk.strip())
27
-
28
- return chunks
29
-
30
-
31
- def summarize_text(text, tokenizer, model):
32
- chunks = fragment_text(text, tokenizer)
33
-
34
- summaries = []
35
- for chunk in chunks:
36
- input = tokenizer(chunk, return_tensors='pt')
37
- output = model.generate(**input)
38
- summary = tokenizer.decode(*output, skip_special_tokens=True)
39
- summaries.append(summary)
40
-
41
- final_summary = " ".join(summaries)
42
- return final_summary
43
-
44
- # Load pre-trained model and tokenizer
45
- checkpoint = "tclopess/bart_samsum"
46
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
47
- model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
48
-
49
- # Define Gradio Interface
50
- iface = gr.Interface(
51
- fn=summarize_text,
52
- inputs=gr.Textbox(),
53
- outputs=gr.Textbox(),
54
- live=True
55
- )
56
-
57
- # Launch the Gradio Interface
58
- iface.launch()
59
-
60
-
61
- import gradio as gr
62
- import nltk
63
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
64
-
65
- nltk.download('punkt')
66
-
67
- def fragment_text(text, tokenizer):
68
- sentences = nltk.tokenize.sent_tokenize(text)
69
- max_len = tokenizer.max_len_single_sentence
70
-
71
- chunks = []
72
- chunk = ""
73
- count = -1
74
-
75
- for sentence in sentences:
76
- count += 1
77
- combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
78
-
79
- if combined_length <= max_len:
80
- chunk += sentence + " "
81
- else:
82
- chunks.append(chunk.strip())
83
- chunk = sentence + " "
84
-
85
- if chunk != "":
86
- chunks.append(chunk.strip())
87
-
88
- return chunks
89
-
90
-
91
- def summarize_text(text, tokenizer, model):
92
- chunks = fragment_text(text, tokenizer)
93
-
94
- summaries = []
95
- for chunk in chunks:
96
- input = tokenizer(chunk, return_tensors='pt')
97
- output = model.generate(**input)
98
- summary = tokenizer.decode(*output, skip_special_tokens=True)
99
- summaries.append(summary)
100
-
101
- final_summary = " ".join(summaries)
102
- return final_summary
103
-
104
- checkpoint = "tclopess/bart_samsum"
105
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
106
- model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
107
-
108
- def summarize_and_display(text):
109
- summary = summarize_text(text, tokenizer, model)
110
- return summary
111
-
112
- iface = gr.Interface(
113
- fn=summarize_and_display,
114
- inputs=gr.Textbox(label="Enter text to summarize:"),
115
- outputs=gr.Textbox(label="Summary:"),
116
- live=True,
117
- title="Text Summarizer with Button",
118
- description="Click the 'Summarize' button to generate a summary of the text.",
119
- )
120
-
121
- iface.launch(share=True)
122
- """"
123
-
124
  import gradio as gr
125
  import nltk
126
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import nltk
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM