umarigan commited on
Commit
8c045a9
·
verified ·
1 Parent(s): c480c1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -4
app.py CHANGED
@@ -93,6 +93,19 @@ def entity_comb(output):
93
  output_comb.append(entity)
94
  return output_comb
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  Run_Button = st.button("Run")
97
 
98
  if Run_Button and input_text:
@@ -118,17 +131,34 @@ if Run_Button and input_text:
118
  # Combine entities
119
  output_comb = entity_comb(all_outputs)
120
 
 
 
 
 
 
 
 
 
 
 
121
  df = pd.DataFrame.from_dict(output_comb)
122
- cols_to_keep = ['word', 'entity_group', 'score', 'start', 'end']
123
  df_final = df[cols_to_keep]
124
 
125
  st.subheader("Recognized Entities")
126
  st.dataframe(df_final)
127
 
128
- # Spacy display logic
129
  spacy_display = {"ents": [], "text": input_text, "title": None}
130
  for entity in output_comb:
131
- spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
 
 
 
 
132
 
133
  html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
134
- st.write(html, unsafe_allow_html=True)
 
 
 
 
93
  output_comb.append(entity)
94
  return output_comb
95
 
96
+ def create_mask_dict(entities):
97
+ mask_dict = {}
98
+ entity_counters = {}
99
+ for entity in entities:
100
+ if entity['entity_group'] not in ['CARDINAL', 'EVENT']:
101
+ if entity['word'] not in mask_dict:
102
+ if entity['entity_group'] not in entity_counters:
103
+ entity_counters[entity['entity_group']] = 1
104
+ else:
105
+ entity_counters[entity['entity_group']] += 1
106
+ mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
107
+ return mask_dict
108
+
109
  Run_Button = st.button("Run")
110
 
111
  if Run_Button and input_text:
 
131
  # Combine entities
132
  output_comb = entity_comb(all_outputs)
133
 
134
+ # Create mask dictionary
135
+ mask_dict = create_mask_dict(output_comb)
136
+
137
+ # Apply masking and add masked_word column
138
+ for entity in output_comb:
139
+ if entity['entity_group'] not in ['CARDINAL', 'EVENT']:
140
+ entity['masked_word'] = mask_dict.get(entity['word'], entity['word'])
141
+ else:
142
+ entity['masked_word'] = entity['word']
143
+
144
  df = pd.DataFrame.from_dict(output_comb)
145
+ cols_to_keep = ['word', 'masked_word', 'entity_group', 'score', 'start', 'end']
146
  df_final = df[cols_to_keep]
147
 
148
  st.subheader("Recognized Entities")
149
  st.dataframe(df_final)
150
 
151
+ # Spacy display logic with entity numbering
152
  spacy_display = {"ents": [], "text": input_text, "title": None}
153
  for entity in output_comb:
154
+ if entity['entity_group'] not in ['CARDINAL', 'EVENT']:
155
+ label = f"{entity['entity_group']}_{mask_dict[entity['word']].split('_')[1]}"
156
+ else:
157
+ label = entity['entity_group']
158
+ spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": label})
159
 
160
  html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
161
+ st.write(html, unsafe_allow_html=True)
162
+
163
+ st.subheader("Masking Dictionary")
164
+ st.json(mask_dict)