ashhadahsan commited on
Commit
4022606
·
1 Parent(s): 647d063

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -74
app.py CHANGED
@@ -58,84 +58,96 @@ if st.button("Process"):
58
  columns = [x.lower() for x in columns]
59
  df.columns = columns
60
  print(summarizer_option)
61
- if summarizer_option == "Custom trained on the dataset":
62
- model = custom_model()
63
- print(summarizer_option)
64
- text = df["text"].values.tolist()
65
- progress_text = "Summarization in progress. Please wait."
66
- summary = []
67
-
68
- for x in stqdm(range(len(text))):
69
- try:
70
- summary.append(
71
- model(
72
- f"summarize: {text[x]}", max_length=50, early_stopping=True
73
- )[0]["summary_text"]
74
- )
75
- except:
76
- pass
77
- output = pd.DataFrame(
78
- {"text": df["text"].values.tolist(), "summary": summary}
79
- )
80
- csv = convert_df(output)
81
- st.download_button(
82
- label="Download data as CSV",
83
- data=csv,
84
- file_name=f"{summarizer_option}_df.csv",
85
- mime="text/csv",
86
- )
87
- if summarizer_option == "t5-base":
88
- model, tokenizer = load_t5()
89
- text = df["text"].values.tolist()
90
- summary = []
91
- for x in stqdm(range(len(text))):
92
-
93
- tokens_input = tokenizer.encode(
94
- "summarize: " + text[x],
95
- return_tensors="pt",
96
- max_length=tokenizer.model_max_length,
97
- truncation=True,
98
  )
99
- summary_ids = model.generate(
100
- tokens_input,
101
- min_length=80,
102
- max_length=150,
103
- length_penalty=20,
104
- num_beams=2,
105
  )
106
- summary_gen = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
107
- summary.append(summary_gen)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- output = pd.DataFrame(
110
- {"text": df["text"].values.tolist(), "summary": summary}
111
- )
112
- csv = convert_df(output)
113
- st.download_button(
114
- label="Download data as CSV",
115
- data=csv,
116
- file_name=f"{summarizer_option}_df.csv",
117
- mime="text/csv",
118
- )
119
 
120
- if summarizer_option == "t5-one-line-summary":
121
- model = SimpleT5()
122
- text = df["text"].values.tolist()
123
 
124
- load_one_line_summarizer(model=model)
125
 
126
- summary = []
127
- for x in stqdm(range(len(text))):
128
- try:
129
- summary.append(model.predict(text[x])[0])
130
- except:
131
- pass
132
- output = pd.DataFrame(
133
- {"text": df["text"].values.tolist(), "summary": summary}
134
- )
135
- csv = convert_df(output)
136
- st.download_button(
137
- label="Download data as CSV",
138
- data=csv,
139
- file_name=f"{summarizer_option}_df.csv",
140
- mime="text/csv",
 
 
 
 
 
141
  )
 
 
58
  columns = [x.lower() for x in columns]
59
  df.columns = columns
60
  print(summarizer_option)
61
+ try:
62
+
63
+ if summarizer_option == "Custom trained on the dataset":
64
+ model = custom_model()
65
+ print(summarizer_option)
66
+ text = df["text"].values.tolist()
67
+ progress_text = "Summarization in progress. Please wait."
68
+ summary = []
69
+
70
+ for x in stqdm(range(len(text))):
71
+ try:
72
+ summary.append(
73
+ model(
74
+ f"summarize: {text[x]}",
75
+ max_length=50,
76
+ early_stopping=True,
77
+ )[0]["summary_text"]
78
+ )
79
+ except:
80
+ pass
81
+ output = pd.DataFrame(
82
+ {"text": df["text"].values.tolist(), "summary": summary}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  )
84
+ csv = convert_df(output)
85
+ st.download_button(
86
+ label="Download data as CSV",
87
+ data=csv,
88
+ file_name=f"{summarizer_option}_df.csv",
89
+ mime="text/csv",
90
  )
91
+ if summarizer_option == "t5-base":
92
+ model, tokenizer = load_t5()
93
+ text = df["text"].values.tolist()
94
+ summary = []
95
+ for x in stqdm(range(len(text))):
96
+
97
+ tokens_input = tokenizer.encode(
98
+ "summarize: " + text[x],
99
+ return_tensors="pt",
100
+ max_length=tokenizer.model_max_length,
101
+ truncation=True,
102
+ )
103
+ summary_ids = model.generate(
104
+ tokens_input,
105
+ min_length=80,
106
+ max_length=150,
107
+ length_penalty=20,
108
+ num_beams=2,
109
+ )
110
+ summary_gen = tokenizer.decode(
111
+ summary_ids[0], skip_special_tokens=True
112
+ )
113
+ summary.append(summary_gen)
114
 
115
+ output = pd.DataFrame(
116
+ {"text": df["text"].values.tolist(), "summary": summary}
117
+ )
118
+ csv = convert_df(output)
119
+ st.download_button(
120
+ label="Download data as CSV",
121
+ data=csv,
122
+ file_name=f"{summarizer_option}_df.csv",
123
+ mime="text/csv",
124
+ )
125
 
126
+ if summarizer_option == "t5-one-line-summary":
127
+ model = SimpleT5()
128
+ text = df["text"].values.tolist()
129
 
130
+ load_one_line_summarizer(model=model)
131
 
132
+ summary = []
133
+ for x in stqdm(range(len(text))):
134
+ try:
135
+ summary.append(model.predict(text[x])[0])
136
+ except:
137
+ pass
138
+ output = pd.DataFrame(
139
+ {"text": df["text"].values.tolist(), "summary": summary}
140
+ )
141
+ csv = convert_df(output)
142
+ st.download_button(
143
+ label="Download data as CSV",
144
+ data=csv,
145
+ file_name=f"{summarizer_option}_df.csv",
146
+ mime="text/csv",
147
+ )
148
+ except KeyError:
149
+ st.error(
150
+ "Please Make sure that your data must have a column named text",
151
+ icon="🚨",
152
  )
153
+ st.info("Text column must have amazon reviews", icon="ℹ️")