ardavey commited on
Commit
f20513c
·
verified ·
1 Parent(s): 83a5f58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -59
app.py CHANGED
@@ -1,50 +1,3 @@
1
- Hugging Face's logo
2
- Hugging Face
3
- Search models, datasets, users...
4
- Models
5
- Datasets
6
- Spaces
7
- Posts
8
- Docs
9
- Solutions
10
- Pricing
11
-
12
-
13
-
14
- Spaces:
15
-
16
- ardavey
17
- /
18
- ber2gpt-indosum-app
19
-
20
-
21
- like
22
- 0
23
-
24
- Logs
25
- App
26
- Files
27
- Community
28
- Settings
29
- ber2gpt-indosum-app
30
- /
31
- app.py
32
-
33
- ardavey's picture
34
- ardavey
35
- Update app.py
36
- 5f6d6ed
37
- verified
38
- 1 minute ago
39
- raw
40
-
41
- Copy download link
42
- history
43
- blame
44
- edit
45
- delete
46
-
47
- 2.12 kB
48
  ### STREAMLIT APP ####
49
  import streamlit as st
50
  from transformers import BertTokenizer, EncoderDecoderModel, EncoderDecoderConfig
@@ -65,18 +18,20 @@ except Exception as e:
65
  text = st.text_area('Enter an article to summarize:')
66
 
67
  if text and len(text) > 0:
 
68
  input_ids = tokenizer.encode(text, return_tensors='pt', padding='longest', truncation=True, max_length=512)
 
69
  summary_ids = model.generate(input_ids,
70
- min_length=40,
71
- max_length=200,
72
- num_beams=10,
73
- repetition_penalty=2.0,
74
- length_penalty=1.0,
75
- no_repeat_ngram_size=3,
76
- use_cache=True,
77
- do_sample=False,
78
- top_k=50,
79
- )
80
 
81
  summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
82
 
@@ -101,5 +56,4 @@ if text and len(text) > 0:
101
  summary_text = summary_text.replace(wrong_term, correct_term)
102
 
103
  summary_text = capitalize_sentences(summary_text)
104
- st.info(summary_text)
105
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ### STREAMLIT APP ####
2
  import streamlit as st
3
  from transformers import BertTokenizer, EncoderDecoderModel, EncoderDecoderConfig
 
18
  text = st.text_area('Enter an article to summarize:')
19
 
20
  if text and len(text) > 0:
21
+ # Ensure input is a string
22
  input_ids = tokenizer.encode(text, return_tensors='pt', padding='longest', truncation=True, max_length=512)
23
+
24
  summary_ids = model.generate(input_ids,
25
+ min_length=40,
26
+ max_length=200,
27
+ num_beams=10,
28
+ repetition_penalty=2.0,
29
+ length_penalty=1.0,
30
+ no_repeat_ngram_size=3,
31
+ use_cache=True,
32
+ do_sample=False,
33
+ top_k=50,
34
+ )
35
 
36
  summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
37
 
 
56
  summary_text = summary_text.replace(wrong_term, correct_term)
57
 
58
  summary_text = capitalize_sentences(summary_text)
59
+ st.info(summary_text) # Ensure this is passed a string