Spaces:
Runtime error
Runtime error
Ryan Kim
commited on
Commit
•
fda5a48
1
Parent(s):
06f9c28
streamlined appearance of overall app
Browse files- README.md +10 -39
- src/main.py +70 -73
README.md
CHANGED
@@ -86,48 +86,24 @@ class ModelImplementation(object):
|
|
86 |
|
87 |
The main idea is that for every model that's needed, we create a new instance of this class. In each case, we can store a reference to the tokenizer, model, and pipeline; the model will then use that tokenizer, model, and pipeline in the `predict()` call. If the output of a model needs to be curated in some way (ex. we need to post-process the output of a model so that it's more human-readable), we can also pass a custom method alongside the other parameters too. This is useful when we are switching between models in the Sentiment Analysis page or between the Sentiment Analysis and Patent Acceptance Prediction page - we merely have to create or modify an instance of the `ModelImplementation` class with the proper tokenizer, model, pipeline, and post-process method (if needed). Placeholder text for any inputs can also be stored as well in an array.
|
88 |
|
89 |
-
The Sentiment Analysis and Patent Acceptance Prediction pages are both stored on one interface, with a
|
90 |
|
91 |
````python
|
92 |
# Title
|
93 |
st.title("CSGY-6613 Project")
|
94 |
# Subtitle
|
95 |
st.markdown("_**Ryan Kim (rk2546)**_")
|
96 |
-
st.markdown("---")
|
97 |
-
|
98 |
-
def PageToHome():
|
99 |
-
st.session_state.page = "home"
|
100 |
-
def PageToEmotion():
|
101 |
-
st.session_state.page = "emotion"
|
102 |
-
def PageToPatent():
|
103 |
-
st.session_state.page = "patent"
|
104 |
-
|
105 |
-
with st.sidebar:
|
106 |
-
st.subheader("Toolbox")
|
107 |
-
home_selected = st.button("Home", on_click=PageToHome)
|
108 |
-
emotion_selected = st.button(
|
109 |
-
"Emotion Analysis [Milestone #2]",
|
110 |
-
on_click=PageToEmotion
|
111 |
-
)
|
112 |
-
patent_selected = st.button(
|
113 |
-
"Patent Prediction [Milestone #3]",
|
114 |
-
on_click=PageToPatent
|
115 |
-
)
|
116 |
-
````
|
117 |
-
|
118 |
-
We store the current page of the user inside an `st.session_state` dictionary, which persists every time the page loads or changes. Because **Streamlit** will only re-render the page every time a change is made to the interface - this means that variables not stored in a session will be re-set. Alongside the current page, we also store models and user inputs inside of the session as well, which allows them to persist between **Streamlit** re-renderings.
|
119 |
|
120 |
-
|
|
|
|
|
|
|
121 |
|
122 |
-
|
123 |
-
if st.session_state.page == "emotion":
|
124 |
st.subheader("Sentiment Analysis")
|
125 |
-
|
126 |
-
st.write("Loading model...")
|
127 |
-
else:
|
128 |
-
// ...
|
129 |
|
130 |
-
|
131 |
st.subheader("USPTO Patent Evaluation")
|
132 |
// ...
|
133 |
````
|
@@ -187,13 +163,8 @@ if submit:
|
|
187 |
to_eval = st.session_state.emotion_model.placeholders[0]
|
188 |
else:
|
189 |
to_eval = text_input.strip()
|
190 |
-
st.
|
191 |
-
|
192 |
-
st.write("Using the NLP model:")
|
193 |
-
st.markdown("> {}".format(st.session_state.emotion_model_name))
|
194 |
-
label, score = st.session_state.emotion_model.predict(to_eval)
|
195 |
-
st.markdown("#### Result:")
|
196 |
-
st.markdown("**{}**: {}".format(label,score))
|
197 |
````
|
198 |
|
199 |
### **USPTO Patent Acceptance Prediction**
|
|
|
86 |
|
87 |
The main idea is that for every model that's needed, we create a new instance of this class. In each case, we can store a reference to the tokenizer, model, and pipeline; the model will then use that tokenizer, model, and pipeline in the `predict()` call. If the output of a model needs to be curated in some way (ex. we need to post-process the output of a model so that it's more human-readable), we can also pass a custom method alongside the other parameters too. This is useful when we are switching between models in the Sentiment Analysis page or between the Sentiment Analysis and Patent Acceptance Prediction page - we merely have to create or modify an instance of the `ModelImplementation` class with the proper tokenizer, model, pipeline, and post-process method (if needed). Placeholder text for any inputs can also be stored as well in an array.
|
88 |
|
89 |
+
The Sentiment Analysis and Patent Acceptance Prediction pages are both stored on one interface, with a tab menu allowing a user to switch between the two.
|
90 |
|
91 |
````python
|
92 |
# Title
|
93 |
st.title("CSGY-6613 Project")
|
94 |
# Subtitle
|
95 |
st.markdown("_**Ryan Kim (rk2546)**_")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
+
sentimentTab, patentTab = st.tabs([
|
98 |
+
"Emotion Analysis [Milestone #2]",
|
99 |
+
"Patent Prediction [Milestone #3]"
|
100 |
+
])
|
101 |
|
102 |
+
with sentimentTab:
|
|
|
103 |
st.subheader("Sentiment Analysis")
|
104 |
+
// ...
|
|
|
|
|
|
|
105 |
|
106 |
+
with patentTab:
|
107 |
st.subheader("USPTO Patent Evaluation")
|
108 |
// ...
|
109 |
````
|
|
|
163 |
to_eval = st.session_state.emotion_model.placeholders[0]
|
164 |
else:
|
165 |
to_eval = text_input.strip()
|
166 |
+
label, score, output_func = st.session_state.emotion_model.predict(to_eval)
|
167 |
+
output_func("**{}**: {}".format(label,score))
|
|
|
|
|
|
|
|
|
|
|
168 |
````
|
169 |
|
170 |
### **USPTO Patent Acceptance Prediction**
|
src/main.py
CHANGED
@@ -6,10 +6,6 @@ import streamlit as st
|
|
6 |
from transformers import TextClassificationPipeline, pipeline
|
7 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DistilBertTokenizerFast, DistilBertForSequenceClassification
|
8 |
|
9 |
-
# We'll be using Torch this time around
|
10 |
-
import torch
|
11 |
-
import torch.nn.functional as F
|
12 |
-
|
13 |
emotion_model_names = (
|
14 |
"cardiffnlp/twitter-roberta-base-sentiment",
|
15 |
"finiteautomata/beto-sentiment-analysis",
|
@@ -47,14 +43,45 @@ class ModelImplementation(object):
|
|
47 |
def ParseEmotionOutput(self, result):
|
48 |
label = result[0]['label']
|
49 |
score = result[0]['score']
|
|
|
50 |
if self.transformer_model_name == "cardiffnlp/twitter-roberta-base-sentiment":
|
51 |
if label == "LABEL_0":
|
52 |
-
label = "
|
|
|
53 |
elif label == "LABEL_2":
|
54 |
-
label = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
else:
|
56 |
-
label = "
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
def ParsePatentOutput(self, result):
|
60 |
return result
|
@@ -115,28 +142,13 @@ if "patent_data" not in st.session_state:
|
|
115 |
st.title("CSGY-6613 Project")
|
116 |
# Subtitle
|
117 |
st.markdown("_**Ryan Kim (rk2546)**_")
|
118 |
-
st.markdown("---")
|
119 |
-
|
120 |
-
def PageToHome():
|
121 |
-
st.session_state.page = "home"
|
122 |
-
def PageToEmotion():
|
123 |
-
st.session_state.page = "emotion"
|
124 |
-
def PageToPatent():
|
125 |
-
st.session_state.page = "patent"
|
126 |
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
"Emotion Analysis [Milestone #2]",
|
132 |
-
on_click=PageToEmotion
|
133 |
-
)
|
134 |
-
patent_selected = st.button(
|
135 |
-
"Patent Prediction [Milestone #3]",
|
136 |
-
on_click=PageToPatent
|
137 |
-
)
|
138 |
|
139 |
-
|
140 |
st.subheader("Sentiment Analysis")
|
141 |
if "emotion_model" not in st.session_state:
|
142 |
st.write("Loading model...")
|
@@ -158,15 +170,10 @@ if st.session_state.page == "emotion":
|
|
158 |
to_eval = st.session_state.emotion_model.placeholders[0]
|
159 |
else:
|
160 |
to_eval = text_input.strip()
|
161 |
-
st.
|
162 |
-
|
163 |
-
st.write("Using the NLP model:")
|
164 |
-
st.markdown("> {}".format(st.session_state.emotion_model_name))
|
165 |
-
label, score = st.session_state.emotion_model.predict(to_eval)
|
166 |
-
st.markdown("#### Result:")
|
167 |
-
st.markdown("**{}**: {}".format(label,score))
|
168 |
|
169 |
-
|
170 |
st.subheader("USPTO Patent Evaluation")
|
171 |
st.markdown("Below are two inputs - one for an **ABSTRACT** and another for a list of **CLAIMS**. Enter both and select the \"Submit\" button to evaluate the patenteability of your idea.")
|
172 |
|
@@ -177,8 +184,6 @@ elif st.session_state.page == "patent":
|
|
177 |
key="patent_num",
|
178 |
)
|
179 |
|
180 |
-
print(patent_index_option)
|
181 |
-
|
182 |
if "patent_abstract_model" not in st.session_state or "patent_claim_model" not in st.session_state:
|
183 |
st.write("Loading models...")
|
184 |
else:
|
@@ -188,13 +193,13 @@ elif st.session_state.page == "patent":
|
|
188 |
abstract_input = st.text_area(
|
189 |
"Enter the abstract of the patent below",
|
190 |
placeholder=st.session_state.patent_data[st.session_state.patent_num]["abstract"],
|
191 |
-
height=
|
192 |
)
|
193 |
with col2:
|
194 |
claim_input = st.text_area(
|
195 |
"Enter the claims of the patent below",
|
196 |
placeholder=st.session_state.patent_data[st.session_state.patent_num]["claim"],
|
197 |
-
height=
|
198 |
)
|
199 |
weight_val = st.slider(
|
200 |
"How much do the abstract and claims weight when aggregating a total softmax score?",
|
@@ -219,17 +224,8 @@ elif st.session_state.page == "patent":
|
|
219 |
claim_to_eval = claim_input.strip()
|
220 |
is_custom = True
|
221 |
|
222 |
-
#tokenized_claim = st.session_state.patent_claim_model.tokenizer.encode(claim_to_eval, padding=True, truncation=True, max_length=512, add_special_tokens = True)
|
223 |
-
#untokenized_claim = st.session_state.patent_claim_model.tokenizer.decode(tokenized_claim)
|
224 |
-
#claim_to_eval2 = untokenized_claim.replace("[CLS]","")
|
225 |
-
#claim_to_eval2 = claim_to_eval2.replace("[SEP]","")
|
226 |
-
#print(claim_to_eval2)
|
227 |
-
|
228 |
abstract_response = st.session_state.patent_abstract_model.predict(abstract_to_eval)
|
229 |
claim_response = st.session_state.patent_claim_model.predict(claim_to_eval)
|
230 |
-
print(abstract_response[0])
|
231 |
-
print(claim_response[0])
|
232 |
-
print(weight_val)
|
233 |
|
234 |
claim_weight = (1+weight_val)/2
|
235 |
abstract_weight = 1-claim_weight
|
@@ -238,36 +234,37 @@ elif st.session_state.page == "patent":
|
|
238 |
{'label':'ACCEPTED','score':abstract_response[0][1]['score']*abstract_weight + claim_response[0][1]['score']*claim_weight}
|
239 |
]
|
240 |
aggregate_score_sorted = sorted(aggregate_score, key=lambda d: d['score'], reverse=True)
|
241 |
-
print(aggregate_score_sorted)
|
242 |
-
print(f'Original Rating: {st.session_state.patent_data[st.session_state.patent_num]["label"]}')
|
243 |
|
244 |
-
st.
|
245 |
-
answerCol1, answerCol2 = st.columns(2)
|
246 |
with answerCol1:
|
247 |
-
st.
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
|
|
|
|
252 |
with answerCol2:
|
253 |
-
st.
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
|
265 |
#if not is_custom:
|
266 |
# st.markdown('**Original Score:**')
|
267 |
# st.markdown(st.session_state.patent_data[st.session_state.patent_num]["label"])
|
268 |
|
269 |
-
|
270 |
-
else:
|
271 |
-
st.write("To get started, access the sidebar on the left (click the arrow in the top-left corner of the screen) and select a tool.")
|
272 |
-
|
273 |
st.write("")
|
|
|
6 |
from transformers import TextClassificationPipeline, pipeline
|
7 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DistilBertTokenizerFast, DistilBertForSequenceClassification
|
8 |
|
|
|
|
|
|
|
|
|
9 |
emotion_model_names = (
|
10 |
"cardiffnlp/twitter-roberta-base-sentiment",
|
11 |
"finiteautomata/beto-sentiment-analysis",
|
|
|
43 |
def ParseEmotionOutput(self, result):
|
44 |
label = result[0]['label']
|
45 |
score = result[0]['score']
|
46 |
+
output_func = st.info
|
47 |
if self.transformer_model_name == "cardiffnlp/twitter-roberta-base-sentiment":
|
48 |
if label == "LABEL_0":
|
49 |
+
label = "NEGATIVE"
|
50 |
+
output_func = st.error
|
51 |
elif label == "LABEL_2":
|
52 |
+
label = "POSITIVE"
|
53 |
+
output_func = st.success
|
54 |
+
else:
|
55 |
+
label = "NEUTRAL"
|
56 |
+
elif self.transformer_model_name == "finiteautomata/beto-sentiment-analysis":
|
57 |
+
if label == "NEG":
|
58 |
+
label = "NEGATIVE"
|
59 |
+
output_func = st.error
|
60 |
+
elif label == "POS":
|
61 |
+
label = "POSITIVE"
|
62 |
+
output_func = st.success
|
63 |
else:
|
64 |
+
label = "NEUTRAL"
|
65 |
+
elif self.transformer_model_name == "bhadresh-savani/distilbert-base-uncased-emotion":
|
66 |
+
if label == "sadness":
|
67 |
+
output_func = st.info
|
68 |
+
elif label == "joy":
|
69 |
+
output_func = st.success
|
70 |
+
elif label == "love":
|
71 |
+
output_func = st.success
|
72 |
+
elif label == "anger":
|
73 |
+
output_func = st.error
|
74 |
+
elif label == "fear":
|
75 |
+
output_func = st.info
|
76 |
+
elif label == "surprise":
|
77 |
+
output_func = st.error
|
78 |
+
label = label.upper()
|
79 |
+
elif self.transformer_model_name == "siebert/sentiment-roberta-large-english":
|
80 |
+
if label == "NEGATIVE":
|
81 |
+
output_func = st.error
|
82 |
+
elif label == "POSITIVE":
|
83 |
+
output_func = st.success
|
84 |
+
return label, score, output_func
|
85 |
|
86 |
def ParsePatentOutput(self, result):
|
87 |
return result
|
|
|
142 |
st.title("CSGY-6613 Project")
|
143 |
# Subtitle
|
144 |
st.markdown("_**Ryan Kim (rk2546)**_")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
+
sentimentTab, patentTab = st.tabs([
|
147 |
+
"Emotion Analysis [Milestone #2]",
|
148 |
+
"Patent Prediction [Milestone #3]"
|
149 |
+
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
+
with sentimentTab:
|
152 |
st.subheader("Sentiment Analysis")
|
153 |
if "emotion_model" not in st.session_state:
|
154 |
st.write("Loading model...")
|
|
|
170 |
to_eval = st.session_state.emotion_model.placeholders[0]
|
171 |
else:
|
172 |
to_eval = text_input.strip()
|
173 |
+
label, score, output_func = st.session_state.emotion_model.predict(to_eval)
|
174 |
+
output_func("**{}**: {}".format(label,score))
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
+
with patentTab:
|
177 |
st.subheader("USPTO Patent Evaluation")
|
178 |
st.markdown("Below are two inputs - one for an **ABSTRACT** and another for a list of **CLAIMS**. Enter both and select the \"Submit\" button to evaluate the patenteability of your idea.")
|
179 |
|
|
|
184 |
key="patent_num",
|
185 |
)
|
186 |
|
|
|
|
|
187 |
if "patent_abstract_model" not in st.session_state or "patent_claim_model" not in st.session_state:
|
188 |
st.write("Loading models...")
|
189 |
else:
|
|
|
193 |
abstract_input = st.text_area(
|
194 |
"Enter the abstract of the patent below",
|
195 |
placeholder=st.session_state.patent_data[st.session_state.patent_num]["abstract"],
|
196 |
+
height=200
|
197 |
)
|
198 |
with col2:
|
199 |
claim_input = st.text_area(
|
200 |
"Enter the claims of the patent below",
|
201 |
placeholder=st.session_state.patent_data[st.session_state.patent_num]["claim"],
|
202 |
+
height=200
|
203 |
)
|
204 |
weight_val = st.slider(
|
205 |
"How much do the abstract and claims weight when aggregating a total softmax score?",
|
|
|
224 |
claim_to_eval = claim_input.strip()
|
225 |
is_custom = True
|
226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
abstract_response = st.session_state.patent_abstract_model.predict(abstract_to_eval)
|
228 |
claim_response = st.session_state.patent_claim_model.predict(claim_to_eval)
|
|
|
|
|
|
|
229 |
|
230 |
claim_weight = (1+weight_val)/2
|
231 |
abstract_weight = 1-claim_weight
|
|
|
234 |
{'label':'ACCEPTED','score':abstract_response[0][1]['score']*abstract_weight + claim_response[0][1]['score']*claim_weight}
|
235 |
]
|
236 |
aggregate_score_sorted = sorted(aggregate_score, key=lambda d: d['score'], reverse=True)
|
|
|
|
|
237 |
|
238 |
+
answerCol1, answerCol2, answerCol3 = st.columns(3)
|
|
|
239 |
with answerCol1:
|
240 |
+
st.slider(
|
241 |
+
"Abstract Acceptance Likelihood",
|
242 |
+
min_value=0.0,
|
243 |
+
max_value=100.0,
|
244 |
+
value=abstract_response[0][1]["score"]*100.0,
|
245 |
+
disabled=True
|
246 |
+
)
|
247 |
with answerCol2:
|
248 |
+
output_func = st.info
|
249 |
+
if aggregate_score_sorted[0]["label"] == "REJECTED":
|
250 |
+
output_func = st.error
|
251 |
+
else:
|
252 |
+
output_func = st.success
|
253 |
+
output_func("""
|
254 |
+
**Final Rating: {}**
|
255 |
+
{}%
|
256 |
+
""".format(aggregate_score_sorted[0]["label"],aggregate_score_sorted[0]["score"]*100.0))
|
257 |
+
with answerCol3:
|
258 |
+
st.slider(
|
259 |
+
"Claim Acceptance Likelihood",
|
260 |
+
min_value=0.0,
|
261 |
+
max_value=100.0,
|
262 |
+
value=claim_response[0][1]["score"]*100.0,
|
263 |
+
disabled=True
|
264 |
+
)
|
265 |
|
266 |
#if not is_custom:
|
267 |
# st.markdown('**Original Score:**')
|
268 |
# st.markdown(st.session_state.patent_data[st.session_state.patent_num]["label"])
|
269 |
|
|
|
|
|
|
|
|
|
270 |
st.write("")
|