Spaces:
Runtime error
Runtime error
Changing order of fuzzy_remove_duplicate_ent
Browse files
app.py
CHANGED
|
@@ -257,6 +257,20 @@ Non-organised entities with entiities.json
|
|
| 257 |
|
| 258 |
return pre_text
|
| 259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
def get_who_what_where_when(self):
|
| 261 |
"""Get entity information in a document.
|
| 262 |
|
|
@@ -323,17 +337,6 @@ TIME"""
|
|
| 323 |
return self.entity_df
|
| 324 |
|
| 325 |
|
| 326 |
-
def fuzzy_remove_duplicate_ent(deduped_ents, threshold=85, limit=1):
|
| 327 |
-
search_space = list(deduped_ents)
|
| 328 |
-
|
| 329 |
-
for ent in deduped_ents:
|
| 330 |
-
duplicates_found = process.extract(ent, search_space.remove(ent), limit =1) # process.extract return the ent match amongst search_space with it's score
|
| 331 |
-
duplicates_found = [entity[0] for entity in duplicates_found if entity[1]> threshold]
|
| 332 |
-
|
| 333 |
-
if (len(duplicates_found) >0 ):
|
| 334 |
-
deduped_ents =[entity for entity in deduped_ents if entity not in duplicates_found]
|
| 335 |
-
|
| 336 |
-
return deduped_ents
|
| 337 |
|
| 338 |
|
| 339 |
def entity_json(self):
|
|
|
|
| 257 |
|
| 258 |
return pre_text
|
| 259 |
|
| 260 |
+
|
| 261 |
+
def fuzzy_remove_duplicate_ent(deduped_ents, threshold=85, limit=1):
|
| 262 |
+
search_space = list(deduped_ents)
|
| 263 |
+
|
| 264 |
+
for ent in deduped_ents:
|
| 265 |
+
duplicates_found = process.extract(ent, search_space.remove(ent), limit =1) # process.extract return the ent match amongst search_space with it's score
|
| 266 |
+
duplicates_found = [entity[0] for entity in duplicates_found if entity[1]> threshold]
|
| 267 |
+
|
| 268 |
+
if (len(duplicates_found) >0 ):
|
| 269 |
+
deduped_ents =[entity for entity in deduped_ents if entity not in duplicates_found]
|
| 270 |
+
|
| 271 |
+
return deduped_ents
|
| 272 |
+
|
| 273 |
+
|
| 274 |
def get_who_what_where_when(self):
|
| 275 |
"""Get entity information in a document.
|
| 276 |
|
|
|
|
| 337 |
return self.entity_df
|
| 338 |
|
| 339 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
|
| 341 |
|
| 342 |
def entity_json(self):
|