ValadisCERTH commited on
Commit
3500aa6
·
1 Parent(s): fa24845

Update comparativesIdentification.py

Browse files
Files changed (1) hide show
  1. comparativesIdentification.py +29 -9
comparativesIdentification.py CHANGED
@@ -11,7 +11,6 @@ spacy.cli.download("en_core_web_sm")
11
  # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
12
  nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
13
 
14
-
15
  def find_comptives_symbols(sentence):
16
  """
17
  Capture unique cases of symbols like <, >, =, <=, >= and ==
@@ -28,7 +27,8 @@ def find_comptives_symbols(sentence):
28
 
29
  found_symbols = []
30
  for matching in matches:
31
- found_symbols.append({'comparative': ['symbol', matching]})
 
32
 
33
  return found_symbols
34
 
@@ -616,11 +616,17 @@ def identify_double_symbol_comparisons(sentence):
616
 
617
  comparative_list = [{'comparative': []}]
618
  for phrase, operator in zip(found_phrases, found_operators):
619
- comparative_list[0]['comparative'].append(phrase)
620
- comparative_list[0]['comparative'].append(operator)
 
 
621
 
622
- return [{'comparative': comparative_list[0]['comparative'][i:i + 2]} for i in
623
- range(0, len(comparative_list[0]['comparative']), 2)]
 
 
 
 
624
 
625
 
626
  def check_substrings(lst):
@@ -643,6 +649,22 @@ def identify_comparatives(sentence):
643
  This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
644
  """
645
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646
  # Identify straightforward patterns
647
  straight_comptives = find_comptives_straight_patterns(sentence)
648
 
@@ -656,8 +678,6 @@ def identify_comparatives(sentence):
656
 
657
  multi_verb = multiword_verb_comptives(sentence)
658
 
659
- identify_double_symbols = identify_double_symbol_comparisons(sentence)
660
-
661
  # return all the patterns that were captured
662
  comparatives = straight_comptives + bigger_smaller_comparatives + equal_to_comparatives + single_verb + multi_verb + identify_double_symbols
663
 
@@ -748,4 +768,4 @@ def comparatives_binding(sentence):
748
  return (0, "COMPARATIVES", "more_symbol_comparatives")
749
 
750
  except:
751
- return (0, "COMPARATIVES", "unknown_error")
 
11
  # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
12
  nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
13
 
 
14
  def find_comptives_symbols(sentence):
15
  """
16
  Capture unique cases of symbols like <, >, =, <=, >= and ==
 
27
 
28
  found_symbols = []
29
  for matching in matches:
30
+ # found_symbols.append({'comparative': ['symbol', matching]})
31
+ found_symbols.append({'comparative': matching})
32
 
33
  return found_symbols
34
 
 
616
 
617
  comparative_list = [{'comparative': []}]
618
  for phrase, operator in zip(found_phrases, found_operators):
619
+ # comparative_list[0]['comparative'].append(phrase)
620
+ comparative_list[0]['comparative'].append((phrase, operator))
621
+
622
+ final_comptives_list = [{'comparative': comparative_list[0]['comparative'][i:i + 2]} for i in range(0, len(comparative_list[0]['comparative']), 2)]
623
 
624
+ final_clean_list = []
625
+ for item in final_comptives_list:
626
+ for value in item['comparative']:
627
+ final_clean_list.append({'comparative': value})
628
+
629
+ return final_clean_list
630
 
631
 
632
  def check_substrings(lst):
 
649
  This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
650
  """
651
 
652
+ # first identify the double symbols (<= >= ==)
653
+ identify_double_symbols_initial = identify_double_symbol_comparisons(sentence)
654
+
655
+ # this is because (for example) bigger than is a subset of bigger or equal than (and it returns conflicts)
656
+ if identify_double_symbols_initial:
657
+ for elem in identify_double_symbols_initial:
658
+ sentence = sentence.replace(elem['comparative'][0], " ")
659
+
660
+ identify_double_symbols = []
661
+
662
+ for item in identify_double_symbols_initial:
663
+ for k, v in item.items():
664
+ if isinstance(v, tuple):
665
+ item[k] = v[1]
666
+ identify_double_symbols.append(item)
667
+
668
  # Identify straightforward patterns
669
  straight_comptives = find_comptives_straight_patterns(sentence)
670
 
 
678
 
679
  multi_verb = multiword_verb_comptives(sentence)
680
 
 
 
681
  # return all the patterns that were captured
682
  comparatives = straight_comptives + bigger_smaller_comparatives + equal_to_comparatives + single_verb + multi_verb + identify_double_symbols
683
 
 
768
  return (0, "COMPARATIVES", "more_symbol_comparatives")
769
 
770
  except:
771
+ return (0, "COMPARATIVES", "unknown_error")