Spaces:
Runtime error
Runtime error
Commit
·
3500aa6
1
Parent(s):
fa24845
Update comparativesIdentification.py
Browse files
comparativesIdentification.py
CHANGED
@@ -11,7 +11,6 @@ spacy.cli.download("en_core_web_sm")
|
|
11 |
# use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
|
12 |
nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
|
13 |
|
14 |
-
|
15 |
def find_comptives_symbols(sentence):
|
16 |
"""
|
17 |
Capture unique cases of symbols like <, >, =, <=, >= and ==
|
@@ -28,7 +27,8 @@ def find_comptives_symbols(sentence):
|
|
28 |
|
29 |
found_symbols = []
|
30 |
for matching in matches:
|
31 |
-
found_symbols.append({'comparative': ['symbol', matching]})
|
|
|
32 |
|
33 |
return found_symbols
|
34 |
|
@@ -616,11 +616,17 @@ def identify_double_symbol_comparisons(sentence):
|
|
616 |
|
617 |
comparative_list = [{'comparative': []}]
|
618 |
for phrase, operator in zip(found_phrases, found_operators):
|
619 |
-
comparative_list[0]['comparative'].append(phrase)
|
620 |
-
comparative_list[0]['comparative'].append(operator)
|
|
|
|
|
621 |
|
622 |
-
|
623 |
-
|
|
|
|
|
|
|
|
|
624 |
|
625 |
|
626 |
def check_substrings(lst):
|
@@ -643,6 +649,22 @@ def identify_comparatives(sentence):
|
|
643 |
This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
|
644 |
"""
|
645 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
646 |
# Identify straightforward patterns
|
647 |
straight_comptives = find_comptives_straight_patterns(sentence)
|
648 |
|
@@ -656,8 +678,6 @@ def identify_comparatives(sentence):
|
|
656 |
|
657 |
multi_verb = multiword_verb_comptives(sentence)
|
658 |
|
659 |
-
identify_double_symbols = identify_double_symbol_comparisons(sentence)
|
660 |
-
|
661 |
# return all the patterns that were captured
|
662 |
comparatives = straight_comptives + bigger_smaller_comparatives + equal_to_comparatives + single_verb + multi_verb + identify_double_symbols
|
663 |
|
@@ -748,4 +768,4 @@ def comparatives_binding(sentence):
|
|
748 |
return (0, "COMPARATIVES", "more_symbol_comparatives")
|
749 |
|
750 |
except:
|
751 |
-
return (0, "COMPARATIVES", "unknown_error")
|
|
|
11 |
# use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
|
12 |
nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
|
13 |
|
|
|
14 |
def find_comptives_symbols(sentence):
|
15 |
"""
|
16 |
Capture unique cases of symbols like <, >, =, <=, >= and ==
|
|
|
27 |
|
28 |
found_symbols = []
|
29 |
for matching in matches:
|
30 |
+
# found_symbols.append({'comparative': ['symbol', matching]})
|
31 |
+
found_symbols.append({'comparative': matching})
|
32 |
|
33 |
return found_symbols
|
34 |
|
|
|
616 |
|
617 |
comparative_list = [{'comparative': []}]
|
618 |
for phrase, operator in zip(found_phrases, found_operators):
|
619 |
+
# comparative_list[0]['comparative'].append(phrase)
|
620 |
+
comparative_list[0]['comparative'].append((phrase, operator))
|
621 |
+
|
622 |
+
final_comptives_list = [{'comparative': comparative_list[0]['comparative'][i:i + 2]} for i in range(0, len(comparative_list[0]['comparative']), 2)]
|
623 |
|
624 |
+
final_clean_list = []
|
625 |
+
for item in final_comptives_list:
|
626 |
+
for value in item['comparative']:
|
627 |
+
final_clean_list.append({'comparative': value})
|
628 |
+
|
629 |
+
return final_clean_list
|
630 |
|
631 |
|
632 |
def check_substrings(lst):
|
|
|
649 |
This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
|
650 |
"""
|
651 |
|
652 |
+
# first identify the double symbols (<= >= ==)
|
653 |
+
identify_double_symbols_initial = identify_double_symbol_comparisons(sentence)
|
654 |
+
|
655 |
+
# this is because (for example) bigger than is a subset of bigger or equal than (and it returns conflicts)
|
656 |
+
if identify_double_symbols_initial:
|
657 |
+
for elem in identify_double_symbols_initial:
|
658 |
+
sentence = sentence.replace(elem['comparative'][0], " ")
|
659 |
+
|
660 |
+
identify_double_symbols = []
|
661 |
+
|
662 |
+
for item in identify_double_symbols_initial:
|
663 |
+
for k, v in item.items():
|
664 |
+
if isinstance(v, tuple):
|
665 |
+
item[k] = v[1]
|
666 |
+
identify_double_symbols.append(item)
|
667 |
+
|
668 |
# Identify straightforward patterns
|
669 |
straight_comptives = find_comptives_straight_patterns(sentence)
|
670 |
|
|
|
678 |
|
679 |
multi_verb = multiword_verb_comptives(sentence)
|
680 |
|
|
|
|
|
681 |
# return all the patterns that were captured
|
682 |
comparatives = straight_comptives + bigger_smaller_comparatives + equal_to_comparatives + single_verb + multi_verb + identify_double_symbols
|
683 |
|
|
|
768 |
return (0, "COMPARATIVES", "more_symbol_comparatives")
|
769 |
|
770 |
except:
|
771 |
+
return (0, "COMPARATIVES", "unknown_error")
|