ValadisCERTH commited on
Commit
988bfac
·
1 Parent(s): 66c9e8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -35
app.py CHANGED
@@ -12,7 +12,6 @@ nltk.download('punkt')
12
 
13
  # load the spacy model
14
  spacy.cli.download("en_core_web_sm")
15
- spacy.cli.download("en_core_web_lg")
16
 
17
  # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
18
  nlp = spacy.load('en_core_web_sm', disable=["parser", "ner"])
@@ -24,6 +23,7 @@ def find_comptives_symbols(sentence):
24
  If more than one symbols exist, return []
25
  """
26
 
 
27
  pattern = r"(?<![<=>])[%s](?![<=>])" % (re.escape("<=>"))
28
  matches = re.findall(pattern, sentence)
29
 
@@ -38,7 +38,7 @@ def find_comptives_symbols(sentence):
38
 
39
  def find_comptives_straight_patterns(sentence):
40
  """
41
- Function to identivy mentions of compartives. The form is "comparative adverbs/adjectives followed by than", "words like more/less followed by than", "equal to"
42
  """
43
 
44
  doc = nlp(sentence)
@@ -113,7 +113,6 @@ def find_comptives_straight_patterns(sentence):
113
  return comparatives
114
 
115
 
116
-
117
  # helper functions for 'identify_pattern_bigger_smaller'
118
 
119
  def identify_comparison(sentence):
@@ -319,7 +318,7 @@ def find_equal_to_comptives_ngrams(sentence):
319
  similarity = sentence_ngram_doc.similarity(emb_ref)
320
 
321
  if similarity >= max_similarity:
322
- possible_reference_list.append({'comparative': [sentence_ngram_str, emb_ref, similarity, "="]})
323
  break
324
 
325
  # if we have found a possible refernce that is similar enough with an n-gram of the input sentence, return the comparative '=', otherwise return 0
@@ -329,6 +328,7 @@ def find_equal_to_comptives_ngrams(sentence):
329
  return []
330
 
331
 
 
332
  def single_verb_comptives(sentence):
333
  """
334
  This function takes a sentence and identifies any mention of bigger than, smaller than, equal to, expressed
@@ -363,10 +363,22 @@ def single_verb_comptives(sentence):
363
  break
364
 
365
  elif any(lemma in equal_references_sg for lemma in syn.lemma_names()):
366
- # print(lemma)
367
  equal_list.append({'comparative': [token.text, "="]})
368
  break
369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  final_list = bigger_list + smaller_list + equal_list
371
 
372
  if final_list:
@@ -503,6 +515,7 @@ def multiword_verb_comptives(sentence):
503
  return bigger_l + smaller_l + equal_l
504
 
505
 
 
506
  def identify_comparatives(sentence):
507
  """
508
  This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
@@ -535,55 +548,74 @@ def identify_comparatives(sentence):
535
  return unique_output
536
 
537
 
538
- def magnitude_binding(sentence):
539
-
540
- comparative_symbols = find_comptives_symbols(sentence)
541
- comparative_mentions = identify_comparatives(sentence)
542
 
543
- # starting with the symbols, if one was captured
544
- if len(comparative_symbols) == 1:
545
 
546
- # if the rest of the functions are empty (meaning that there are no other references)
547
- if len(comparative_mentions) == 0:
548
- return comparative_symbols
549
 
550
- # in case that there is no symbol
551
- elif len(comparative_symbols) == 0:
552
 
553
- # we need only one mention of comparatives
554
- if len(comparative_mentions) == 1:
555
- return comparative_mentions
556
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
  else:
558
- return 0
559
 
560
- # case of multiple symbol references
561
- else:
562
- return 0
563
 
564
 
565
 
566
  from transformers import pipeline
567
  import gradio as gr
568
 
569
- title = "Natural Language module Demo for Comparatives identification"
570
- description = "This is a simple demo just for demonstration purposes, so that Serco team might have the chance to validate the results of the Natural Language module concerning the comparatives identification, while in progress"
571
 
572
  examples = [
573
- ["earthquake located in Ishkoshim higher than 5, Tajikistan in May the ninth with magnitude equal to 6.2"],
574
- ["earthquake located in Ishkoshim, Tajikistan in May the ninth with magnitude < 6.2"],
575
- ["earthquake located in Ishkoshim that is > than the one in Rome, and < than 8.2"],
576
- ["earthquake located in Ishkoshim, Tajikistan in May the ninth with magnitude lesser than 6.2"],
577
- ["earthquake located in Ishkoshim, Tajikistan in May the ninth with magnitude same with 6.2"],
578
- ["I want an earthquake that happend in Rome during 2016 with a magnitude dallying of 5."],
579
- ["I want an earthquake that happend in Rome during 2016 and surpassed the magnitude of 5."],
580
- ["I want an earthquake that happend in Rome during 2016 with a magnitude similar to 5."],
581
- ["I want an earthquaqe event that happend in Italy, Rome during 2016 February with a magnitude that was in a par with 5."]
 
 
 
 
 
 
582
  ]
583
 
584
 
585
  gr.Interface(
586
- fn=magnitude_binding,
587
  inputs="text",
588
  outputs="text",
589
  title=title,
 
12
 
13
  # load the spacy model
14
  spacy.cli.download("en_core_web_sm")
 
15
 
16
  # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
17
  nlp = spacy.load('en_core_web_sm', disable=["parser", "ner"])
 
23
  If more than one symbols exist, return []
24
  """
25
 
26
+ # symbols regex pattern
27
  pattern = r"(?<![<=>])[%s](?![<=>])" % (re.escape("<=>"))
28
  matches = re.findall(pattern, sentence)
29
 
 
38
 
39
  def find_comptives_straight_patterns(sentence):
40
  """
41
+ Function to identivy mentions of comparatives. The form is "comparative adverbs/adjectives followed by than", "words like more/less followed by than", "equal to"
42
  """
43
 
44
  doc = nlp(sentence)
 
113
  return comparatives
114
 
115
 
 
116
  # helper functions for 'identify_pattern_bigger_smaller'
117
 
118
  def identify_comparison(sentence):
 
318
  similarity = sentence_ngram_doc.similarity(emb_ref)
319
 
320
  if similarity >= max_similarity:
321
+ possible_reference_list.append({'comparative': [sentence_ngram_str, "="]})
322
  break
323
 
324
  # if we have found a possible refernce that is similar enough with an n-gram of the input sentence, return the comparative '=', otherwise return 0
 
328
  return []
329
 
330
 
331
+
332
  def single_verb_comptives(sentence):
333
  """
334
  This function takes a sentence and identifies any mention of bigger than, smaller than, equal to, expressed
 
363
  break
364
 
365
  elif any(lemma in equal_references_sg for lemma in syn.lemma_names()):
 
366
  equal_list.append({'comparative': [token.text, "="]})
367
  break
368
 
369
+ # for syn in synsets:
370
+ # antonyms = syn.lemmas()[0].antonyms()
371
+
372
+ # if antonyms and any(lemma in bigger_references_sg for lemma in antonyms[0].name()):
373
+ # return 0
374
+
375
+ # elif antonyms and any(lemma in lesser_references_sg for lemma in antonyms[0].name()):
376
+ # return 0
377
+
378
+
379
+ # elif antonyms and any(lemma in equal_references_sg for lemma in antonyms[0].name()):
380
+ # return 0
381
+
382
  final_list = bigger_list + smaller_list + equal_list
383
 
384
  if final_list:
 
515
  return bigger_l + smaller_l + equal_l
516
 
517
 
518
+
519
  def identify_comparatives(sentence):
520
  """
521
  This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
 
548
  return unique_output
549
 
550
 
 
 
 
 
551
 
552
+ def comparatives_binding(sentence):
 
553
 
554
+ try:
555
+ comparative_symbols = find_comptives_symbols(sentence)
556
+ comparative_mentions = identify_comparatives(sentence)
557
 
558
+ # starting with the symbols, if one was captured
559
+ if len(comparative_symbols) == 1:
560
 
561
+ # if the rest of the functions are empty (meaning that there are no other references)
562
+ if len(comparative_mentions) == 0:
563
+ return comparative_symbols
564
 
565
+ else:
566
+ return (0, "COMPARATIVES", "more_comparatives_mentions")
567
+
568
+ # in case that there is no symbol
569
+ elif len(comparative_symbols) == 0:
570
+
571
+ # we need only one mention of comparatives
572
+ if len(comparative_mentions) == 1:
573
+ return comparative_mentions
574
+
575
+ # case of no comparative mentions
576
+ elif len(comparative_mentions) == 0:
577
+ return (0, "COMPARATIVES", "no_comparatives")
578
+
579
+ # case of no more than one comparative mentions
580
+ else:
581
+ return (0, "COMPARATIVES", "more_comparatives_mentions")
582
+
583
+ # case of multiple symbol references
584
  else:
585
+ return (0, "COMPARATIVES", "more_symbol_comparatives")
586
 
587
+ except:
588
+ return (0, "COMPARATIVES", "unknown_error")
 
589
 
590
 
591
 
592
  from transformers import pipeline
593
  import gradio as gr
594
 
595
+ title = "Comparatives Demo"
596
+ description = "This is a simple demo just for demonstration purposes for Serco team, to validate the results of the Natural Language module concerning comparatives identification, while in progress"
597
 
598
  examples = [
599
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude > 6.2"],
600
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude = 6.2"],
601
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude bigger than 6.2"],
602
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude more than 6.2"],
603
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude higher than 6.2"],
604
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude smaller than 6.2"],
605
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude lesser than 6.2"],
606
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude equal to 6.2"],
607
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude equivalent to 6.2"],
608
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude surpassing 6.2"],
609
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude lagging of 6.2"],
610
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude that matches 6.2"],
611
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude that is superior of 6.2"],
612
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude that is inferior of 6.2"],
613
+ ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude that is in line with 6.2"]
614
  ]
615
 
616
 
617
  gr.Interface(
618
+ fn=comparatives_binding,
619
  inputs="text",
620
  outputs="text",
621
  title=title,