Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -40,21 +40,11 @@ monochrome = Monochrome()
|
|
40 |
|
41 |
auth_token = os.environ['HF_TOKEN']
|
42 |
|
43 |
-
tokenizer_bin = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
|
44 |
-
model_bin = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
|
45 |
-
tokenizer_bin.model_max_length = 512
|
46 |
-
pipe_bin = pipeline("ner", model=model_bin, tokenizer=tokenizer_bin)
|
47 |
-
|
48 |
tokenizer_ext = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
|
49 |
model_ext = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
|
50 |
tokenizer_ext.model_max_length = 512
|
51 |
pipe_ext = pipeline("ner", model=model_ext, tokenizer=tokenizer_ext)
|
52 |
|
53 |
-
model1 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_Int_segment", num_labels=1, token=auth_token)
|
54 |
-
tokenizer1 = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_Int_segment", token=auth_token)
|
55 |
-
|
56 |
-
model2 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_seq_ext", num_labels=1, token=auth_token)
|
57 |
-
|
58 |
def process_ner(text: str, pipeline) -> dict:
|
59 |
output = pipeline(text)
|
60 |
entities = []
|
@@ -84,39 +74,14 @@ def process_ner(text: str, pipeline) -> dict:
|
|
84 |
|
85 |
return {"text": text, "entities": entities}
|
86 |
|
87 |
-
def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str, str, str]:
|
88 |
-
inputs1 = tokenizer1(text, max_length=512, return_tensors='pt', truncation=True, padding=True)
|
89 |
-
|
90 |
-
with torch.no_grad():
|
91 |
-
outputs1 = model1(**inputs1)
|
92 |
-
outputs2 = model2(**inputs1)
|
93 |
-
|
94 |
-
prediction1 = outputs1[0].item()
|
95 |
-
prediction2 = outputs2[0].item()
|
96 |
-
score = prediction1 / (prediction2 + prediction1)
|
97 |
-
|
98 |
-
return f"{round(prediction1, 1)}", f"{round(prediction2, 1)}", f"{round(score, 2)}"
|
99 |
-
|
100 |
-
|
101 |
def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[go.Figure, go.Figure, np.ndarray]:
|
102 |
-
entities_bin = [entity['entity'] for entity in ner_output_bin['entities']]
|
103 |
entities_ext = [entity['entity'] for entity in ner_output_ext['entities']]
|
104 |
|
105 |
-
# Counting entities for binary classification
|
106 |
-
entity_counts_bin = {entity: entities_bin.count(entity) for entity in set(entities_bin)}
|
107 |
-
bin_labels = list(entity_counts_bin.keys())
|
108 |
-
bin_sizes = list(entity_counts_bin.values())
|
109 |
-
|
110 |
# Counting entities for extended classification
|
111 |
entity_counts_ext = {entity: entities_ext.count(entity) for entity in set(entities_ext)}
|
112 |
ext_labels = list(entity_counts_ext.keys())
|
113 |
ext_sizes = list(entity_counts_ext.values())
|
114 |
|
115 |
-
bin_color_map = {
|
116 |
-
"External": "#6ad5bc",
|
117 |
-
"Internal": "#ee8bac"
|
118 |
-
}
|
119 |
-
|
120 |
ext_color_map = {
|
121 |
"INTemothou": "#FF7F50", # Coral
|
122 |
"INTpercept": "#FF4500", # OrangeRed
|
@@ -128,7 +93,6 @@ def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[go.Figu
|
|
128 |
"EXTother": "#00CED1", # DarkTurquoise
|
129 |
}
|
130 |
|
131 |
-
bin_colors = [bin_color_map.get(label, "#FFFFFF") for label in bin_labels]
|
132 |
ext_colors = [ext_color_map.get(label, "#FFFFFF") for label in ext_labels]
|
133 |
|
134 |
# Create pie chart for extended classification
|
@@ -139,20 +103,11 @@ def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[go.Figu
|
|
139 |
paper_bgcolor='rgba(0,0,0,0)'
|
140 |
)
|
141 |
|
142 |
-
# Create bar chart for binary classification
|
143 |
-
fig2 = go.Figure(data=[go.Bar(x=bin_labels, y=bin_sizes, marker=dict(color=bin_colors))])
|
144 |
-
fig2.update_layout(
|
145 |
-
xaxis_title='Entity Type',
|
146 |
-
yaxis_title='Count',
|
147 |
-
template='plotly_dark',
|
148 |
-
plot_bgcolor='rgba(0,0,0,0)',
|
149 |
-
paper_bgcolor='rgba(0,0,0,0)'
|
150 |
-
)
|
151 |
|
152 |
# Generate word cloud
|
153 |
wordcloud_image = generate_wordcloud(ner_output_ext['entities'], ext_color_map)
|
154 |
|
155 |
-
return fig1,
|
156 |
|
157 |
|
158 |
def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str]) -> np.ndarray:
|
@@ -192,15 +147,11 @@ def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str]) -> np.nd
|
|
192 |
|
193 |
@spaces.GPU
|
194 |
def all(text: str):
|
195 |
-
ner_output_bin = process_ner(text, pipe_bin)
|
196 |
ner_output_ext = process_ner(text, pipe_ext)
|
197 |
-
classification_output = process_classification(text, model1, model2, tokenizer1)
|
198 |
|
199 |
-
pie_chart,
|
200 |
|
201 |
-
return (
|
202 |
-
classification_output[0], classification_output[1], classification_output[2],
|
203 |
-
pie_chart, bar_chart, wordcloud_image)
|
204 |
|
205 |
examples = [
|
206 |
['Bevor ich meinen Hund kaufte bin ich immer alleine durch den Park gelaufen. Gestern war ich aber mit dem Hund losgelaufen. Das Wetter war sehr schön, nicht wie sonst im Winter. Ich weiß nicht genau. Mir fällt sonst nichts dazu ein. Wir trafen auf mehrere Spaziergänger. Ein Mann mit seinem Kind. Das Kind hat ein Eis gegessen.'],
|
@@ -210,11 +161,6 @@ iface = gr.Interface(
|
|
210 |
fn=all,
|
211 |
inputs=gr.Textbox(lines=5, label="Input Text", placeholder="Write about how your breakfast went or anything else that happened or might happen to you ..."),
|
212 |
outputs=[
|
213 |
-
gr.HighlightedText(label="Binary Sequence Classification",
|
214 |
-
color_map={
|
215 |
-
"External": "#6ad5bcff",
|
216 |
-
"Internal": "#ee8bacff"}
|
217 |
-
),
|
218 |
gr.HighlightedText(label="Extended Sequence Classification",
|
219 |
color_map={
|
220 |
"INTemothou": "#FF7F50", # Coral
|
@@ -227,11 +173,7 @@ iface = gr.Interface(
|
|
227 |
"EXTother": "#00CED1", # DarkTurquoise
|
228 |
}
|
229 |
),
|
230 |
-
gr.Label(label="Internal Detail Count"),
|
231 |
-
gr.Label(label="External Detail Count"),
|
232 |
-
gr.Label(label="Approximated Internal Detail Ratio"),
|
233 |
gr.Plot(label="Extended SeqClass Entity Distribution Pie Chart"),
|
234 |
-
gr.Plot(label="Binary SeqClass Entity Count Bar Chart"),
|
235 |
gr.Image(label="Entity Word Cloud")
|
236 |
],
|
237 |
title="Scoring Demo",
|
|
|
40 |
|
41 |
auth_token = os.environ['HF_TOKEN']
|
42 |
|
|
|
|
|
|
|
|
|
|
|
43 |
tokenizer_ext = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
|
44 |
model_ext = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
|
45 |
tokenizer_ext.model_max_length = 512
|
46 |
pipe_ext = pipeline("ner", model=model_ext, tokenizer=tokenizer_ext)
|
47 |
|
|
|
|
|
|
|
|
|
|
|
48 |
def process_ner(text: str, pipeline) -> dict:
|
49 |
output = pipeline(text)
|
50 |
entities = []
|
|
|
74 |
|
75 |
return {"text": text, "entities": entities}
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[go.Figure, go.Figure, np.ndarray]:
|
|
|
78 |
entities_ext = [entity['entity'] for entity in ner_output_ext['entities']]
|
79 |
|
|
|
|
|
|
|
|
|
|
|
80 |
# Counting entities for extended classification
|
81 |
entity_counts_ext = {entity: entities_ext.count(entity) for entity in set(entities_ext)}
|
82 |
ext_labels = list(entity_counts_ext.keys())
|
83 |
ext_sizes = list(entity_counts_ext.values())
|
84 |
|
|
|
|
|
|
|
|
|
|
|
85 |
ext_color_map = {
|
86 |
"INTemothou": "#FF7F50", # Coral
|
87 |
"INTpercept": "#FF4500", # OrangeRed
|
|
|
93 |
"EXTother": "#00CED1", # DarkTurquoise
|
94 |
}
|
95 |
|
|
|
96 |
ext_colors = [ext_color_map.get(label, "#FFFFFF") for label in ext_labels]
|
97 |
|
98 |
# Create pie chart for extended classification
|
|
|
103 |
paper_bgcolor='rgba(0,0,0,0)'
|
104 |
)
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
# Generate word cloud
|
108 |
wordcloud_image = generate_wordcloud(ner_output_ext['entities'], ext_color_map)
|
109 |
|
110 |
+
return fig1, wordcloud_image
|
111 |
|
112 |
|
113 |
def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str]) -> np.ndarray:
|
|
|
147 |
|
148 |
@spaces.GPU
|
149 |
def all(text: str):
|
|
|
150 |
ner_output_ext = process_ner(text, pipe_ext)
|
|
|
151 |
|
152 |
+
pie_chart, wordcloud_image = generate_charts(ner_output_bin, ner_output_ext)
|
153 |
|
154 |
+
return (ner_output_ext, pie_chart, wordcloud_image)
|
|
|
|
|
155 |
|
156 |
examples = [
|
157 |
['Bevor ich meinen Hund kaufte bin ich immer alleine durch den Park gelaufen. Gestern war ich aber mit dem Hund losgelaufen. Das Wetter war sehr schön, nicht wie sonst im Winter. Ich weiß nicht genau. Mir fällt sonst nichts dazu ein. Wir trafen auf mehrere Spaziergänger. Ein Mann mit seinem Kind. Das Kind hat ein Eis gegessen.'],
|
|
|
161 |
fn=all,
|
162 |
inputs=gr.Textbox(lines=5, label="Input Text", placeholder="Write about how your breakfast went or anything else that happened or might happen to you ..."),
|
163 |
outputs=[
|
|
|
|
|
|
|
|
|
|
|
164 |
gr.HighlightedText(label="Extended Sequence Classification",
|
165 |
color_map={
|
166 |
"INTemothou": "#FF7F50", # Coral
|
|
|
173 |
"EXTother": "#00CED1", # DarkTurquoise
|
174 |
}
|
175 |
),
|
|
|
|
|
|
|
176 |
gr.Plot(label="Extended SeqClass Entity Distribution Pie Chart"),
|
|
|
177 |
gr.Image(label="Entity Word Cloud")
|
178 |
],
|
179 |
title="Scoring Demo",
|