Spaces:

ValadisCERTH
/

NaturalLanguageModule_complete

Runtime error

App Files Files Community

ValadisCERTH commited on May 4, 2023

Commit

3500aa6

1 Parent(s): fa24845

Update comparativesIdentification.py

Browse files

Files changed (1) hide show

comparativesIdentification.py +29 -9

comparativesIdentification.py CHANGED Viewed

@@ -11,7 +11,6 @@ spacy.cli.download("en_core_web_sm")
 # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
 nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
 def find_comptives_symbols(sentence):
     """
     Capture unique cases of symbols like <, >, =, <=, >= and ==
@@ -28,7 +27,8 @@ def find_comptives_symbols(sentence):
     found_symbols = []
     for matching in matches:
-        found_symbols.append({'comparative': ['symbol', matching]})
     return found_symbols
@@ -616,11 +616,17 @@ def identify_double_symbol_comparisons(sentence):
     comparative_list = [{'comparative': []}]
     for phrase, operator in zip(found_phrases, found_operators):
-        comparative_list[0]['comparative'].append(phrase)
-        comparative_list[0]['comparative'].append(operator)
-    return [{'comparative': comparative_list[0]['comparative'][i:i + 2]} for i in
-            range(0, len(comparative_list[0]['comparative']), 2)]
 def check_substrings(lst):
@@ -643,6 +649,22 @@ def identify_comparatives(sentence):
     This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
     """
     # Identify straightforward patterns
     straight_comptives = find_comptives_straight_patterns(sentence)
@@ -656,8 +678,6 @@ def identify_comparatives(sentence):
     multi_verb = multiword_verb_comptives(sentence)
-    identify_double_symbols = identify_double_symbol_comparisons(sentence)
     # return all the patterns that were captured
     comparatives = straight_comptives + bigger_smaller_comparatives + equal_to_comparatives + single_verb + multi_verb + identify_double_symbols
@@ -748,4 +768,4 @@ def comparatives_binding(sentence):
       return (0, "COMPARATIVES", "more_symbol_comparatives")
   except:
-    return (0, "COMPARATIVES", "unknown_error")

 # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
 nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
 def find_comptives_symbols(sentence):
     """
     Capture unique cases of symbols like <, >, =, <=, >= and ==
     found_symbols = []
     for matching in matches:
+        # found_symbols.append({'comparative': ['symbol', matching]})
+        found_symbols.append({'comparative': matching})
     return found_symbols
     comparative_list = [{'comparative': []}]
     for phrase, operator in zip(found_phrases, found_operators):
+        # comparative_list[0]['comparative'].append(phrase)
+        comparative_list[0]['comparative'].append((phrase, operator))
+    final_comptives_list = [{'comparative': comparative_list[0]['comparative'][i:i + 2]} for i in range(0, len(comparative_list[0]['comparative']), 2)]
+    final_clean_list = []
+    for item in final_comptives_list:
+        for value in item['comparative']:
+            final_clean_list.append({'comparative': value})
+    return final_clean_list
 def check_substrings(lst):
     This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
     """
+    # first identify the double symbols (<= >= ==)
+    identify_double_symbols_initial = identify_double_symbol_comparisons(sentence)
+    # this is because (for example) bigger than is a subset of bigger or equal than (and it returns conflicts)
+    if identify_double_symbols_initial:
+        for elem in identify_double_symbols_initial:
+            sentence = sentence.replace(elem['comparative'][0], " ")
+    identify_double_symbols = []
+    for item in identify_double_symbols_initial:
+        for k, v in item.items():
+            if isinstance(v, tuple):
+                item[k] = v[1]
+        identify_double_symbols.append(item)
     # Identify straightforward patterns
     straight_comptives = find_comptives_straight_patterns(sentence)
     multi_verb = multiword_verb_comptives(sentence)
     # return all the patterns that were captured
     comparatives = straight_comptives + bigger_smaller_comparatives + equal_to_comparatives + single_verb + multi_verb + identify_double_symbols
       return (0, "COMPARATIVES", "more_symbol_comparatives")
   except:
+    return (0, "COMPARATIVES", "unknown_error")