ValadisCERTH commited on
Commit
9fa76bc
·
1 Parent(s): 988bfac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -591
app.py CHANGED
@@ -1,597 +1,8 @@
1
- import spacy
2
- import re
3
- import nltk
4
- from nltk.corpus import wordnet
5
- import numpy as np
6
-
7
- from sklearn.metrics.pairwise import cosine_similarity
8
-
9
- nltk.download('wordnet')
10
- nltk.download('omw-1.4')
11
- nltk.download('punkt')
12
-
13
- # load the spacy model
14
- spacy.cli.download("en_core_web_sm")
15
-
16
- # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
17
- nlp = spacy.load('en_core_web_sm', disable=["parser", "ner"])
18
-
19
-
20
- def find_comptives_symbols(sentence):
21
- """
22
- Capture unique cases of symbols like <, >, =
23
- If more than one symbols exist, return []
24
- """
25
-
26
- # symbols regex pattern
27
- pattern = r"(?<![<=>])[%s](?![<=>])" % (re.escape("<=>"))
28
- matches = re.findall(pattern, sentence)
29
-
30
- found_symbols = []
31
- for matching in matches:
32
- # add symbol to list for each occurrence found
33
- found_symbols.append({'comparative': ['symbol', matching]})
34
-
35
- # return the found symbols
36
- return found_symbols
37
-
38
-
39
- def find_comptives_straight_patterns(sentence):
40
- """
41
- Function to identivy mentions of comparatives. The form is "comparative adverbs/adjectives followed by than", "words like more/less followed by than", "equal to"
42
- """
43
-
44
- doc = nlp(sentence)
45
- comparatives = []
46
-
47
- for token in doc:
48
-
49
- # find mentions of "equal" followed by "to"
50
- if token.text.lower() == "equal":
51
- next_token = token.nbor()
52
-
53
- if next_token.text.lower() == "to":
54
- prev_token = token.nbor(-1)
55
-
56
- if prev_token.pos_ == "NOUN":
57
- comparatives.append({'comparative': ["equal to", "="]})
58
-
59
-
60
- # find mentions of "more"/"less" followed by "than"
61
- elif token.text.lower() in ["more", "less"]:
62
-
63
- next_token = token.nbor()
64
-
65
- if next_token.text.lower() == "than":
66
- prev_token = token.nbor(-1)
67
-
68
- # this part is to check what will be before more/less. We can add a NOUN as mandatory (e.g magnitude) or even specifically the word magnitude
69
- # for the moment we have disable it
70
- # if prev_token.pos_ == "NOUN":
71
-
72
- if token.text.lower() == 'more':
73
- comparatives.append({'comparative': [token.text+" "+next_token.text, '>']})
74
- elif token.text.lower() == 'less':
75
- comparatives.append({'comparative': [token.text+" "+next_token.text, '<']})
76
-
77
-
78
- # find mentions of comparative adjectives or comparative adverbs followed by "than"
79
- elif token.tag_ == "JJR" or token.tag_ == "RBR":
80
- next_token = token.nbor()
81
-
82
- if next_token.text.lower() == "than" and next_token.nbor().pos_ != "NOUN":
83
-
84
- # check if the token is a synonym of "bigger"
85
-
86
- # retrieve a set of synonyms for the concepts of 'big' and 'bigger'
87
- big_synonyms = set(wordnet.synsets('big') + wordnet.synsets('large') + wordnet.synsets('great') + wordnet.synsets('huge') + wordnet.synsets('enormous') + wordnet.synsets('heavy') + wordnet.synsets('strong') + wordnet.synsets('enormous') + wordnet.synsets('massive') + wordnet.synsets('immense') + wordnet.synsets('substantial'))
88
- bigger_synonyms = set(wordnet.synsets('bigger') + wordnet.synsets('larger') + wordnet.synsets('greater') + wordnet.synsets('higher') + wordnet.synsets('taller') + wordnet.synsets('heavier') + wordnet.synsets('stronger'))
89
-
90
- bigger_related_words = big_synonyms.union(bigger_synonyms)
91
- bigger_rel_words = [word.name().split('.')[0] for word in bigger_related_words]
92
-
93
- flag_bigger = 0
94
-
95
- if token.text.lower() in bigger_rel_words:
96
- flag_bigger = 1
97
- comparatives.append({'comparative': [token.text+" "+next_token.text, '>']})
98
-
99
- # if no synonym of bigger was found, check for smaller synsets
100
- if not flag_bigger:
101
-
102
- # retrieve a set of synonyms for the concepts of 'small' and 'smaller'
103
- small_synonyms = set(wordnet.synsets('small') + wordnet.synsets('little') + wordnet.synsets('tiny') + wordnet.synsets('petite') + wordnet.synsets('miniature') + wordnet.synsets('slight') + wordnet.synsets('meager') + wordnet.synsets('inconsequential') + wordnet.synsets('minor'))
104
- smaller_synonyms = set(wordnet.synsets('smaller') + wordnet.synsets('lesser') + wordnet.synsets('lower') + wordnet.synsets('shorter') + wordnet.synsets('lighter') + wordnet.synsets('weaker'))
105
-
106
- smaller_related_words = small_synonyms.union(smaller_synonyms)
107
- smaller_rel_words = [word.name().split('.')[0] for word in smaller_related_words]
108
-
109
- if token.text.lower() in smaller_rel_words:
110
- flag_bigger = 0
111
- comparatives.append({'comparative': [token.text+" "+next_token.text, '<']})
112
-
113
- return comparatives
114
-
115
-
116
- # helper functions for 'identify_pattern_bigger_smaller'
117
-
118
- def identify_comparison(sentence):
119
- """
120
- Capture patterns of 'word-er' followed by 'than' (e.g. 'better than', 'lesser than', etc)
121
- """
122
-
123
- pattern = r'\b(\w+er than)\b'
124
- matches = re.findall(pattern, sentence)
125
-
126
- if matches:
127
- return matches
128
- else:
129
- return 0
130
-
131
-
132
- def find_more_than_reference(sentence):
133
- """
134
- Capture patterns of 'more' followed by 'word' followed by 'than' (e.g. more advanced than)
135
- """
136
-
137
- pattern = r"(more) (\w+) than"
138
- matches = re.findall(pattern, sentence)
139
-
140
- if matches:
141
- return [' '.join(match) for match in matches]
142
- else:
143
- return 0
144
-
145
-
146
- def find_less_than_reference(sentence):
147
- """
148
- Capture patterns of 'less' followed by 'word' followed by 'than' (e.g. less advanced than)
149
- """
150
-
151
- pattern = r"(less) (\w+) than"
152
- matches = re.findall(pattern, sentence)
153
-
154
- if matches:
155
- return [' '.join(match) for match in matches]
156
- else:
157
- return 0
158
-
159
-
160
- def is_related_to(word, target_word):
161
- """
162
- Returns True if the input 'word' is semantically related to the 'target_word', otherwise False.
163
- """
164
-
165
- target_synsets = set(wordnet.synsets(target_word))
166
- word_synsets = set(wordnet.synsets(word))
167
-
168
- if word_synsets.intersection(target_synsets):
169
- return True
170
- else:
171
- return False
172
-
173
-
174
- def is_related_to_bigger(word):
175
- """
176
- Returns True if the input 'word' is semantically related to the concept 'bigger', otherwise False.
177
- """
178
-
179
- if word.lower() == "more" or word.lower().startswith("more "):
180
- return True
181
-
182
- # retrieve a set of synonyms for the concepts of 'big' and 'bigger'
183
- big_synonyms = set(wordnet.synsets('big') + wordnet.synsets('large') + wordnet.synsets('great') + wordnet.synsets('huge') + wordnet.synsets('enormous') + wordnet.synsets('heavy') + wordnet.synsets('strong') + wordnet.synsets('enormous') + wordnet.synsets('massive') + wordnet.synsets('immense') + wordnet.synsets('substantial'))
184
- bigger_synonyms = set(wordnet.synsets('bigger') + wordnet.synsets('larger') + wordnet.synsets('greater') + wordnet.synsets('higher') + wordnet.synsets('taller') + wordnet.synsets('heavier') + wordnet.synsets('stronger'))
185
-
186
- related_words = big_synonyms.union(bigger_synonyms)
187
-
188
- # Check if the input word is semantically related to any of those 'big'/'bigger' synonyms
189
- for related_word in related_words:
190
- if is_related_to(word, related_word.name().split('.')[0]):
191
- return True
192
- return False
193
-
194
-
195
- def is_related_to_smaller(word):
196
- """
197
- Returns True if the input word is semantically related to the concept of 'smaller', otherwise False.
198
- """
199
- if word.lower() == "less" or word.lower().startswith("less "):
200
- return True
201
-
202
- # retrieve a set of synonyms for the concepts of 'small' and 'smaller'
203
- small_synonyms = set(wordnet.synsets('small') + wordnet.synsets('little') + wordnet.synsets('tiny') + wordnet.synsets('petite') + wordnet.synsets('miniature') + wordnet.synsets('slight') + wordnet.synsets('meager') + wordnet.synsets('inconsequential') + wordnet.synsets('minor'))
204
- smaller_synonyms = set(wordnet.synsets('smaller') + wordnet.synsets('lesser') + wordnet.synsets('lower') + wordnet.synsets('shorter') + wordnet.synsets('lighter') + wordnet.synsets('weaker'))
205
-
206
- related_words = small_synonyms.union(smaller_synonyms)
207
-
208
- # Check if the input word is semantically related to any of those 'small'/'smaller' synonyms
209
- for related_word in related_words:
210
- if is_related_to(word, related_word.name().split('.')[0]):
211
- return True
212
- return False
213
-
214
-
215
- def identify_bigger_smaller_advanced(sentence):
216
- """
217
- This is a complementary function to capture cases of 'words ending with -er' followed by 'than' and cases of 'more'/'less' followed 'word' followed by 'than'
218
- """
219
-
220
- # pattern 'words ending with -er' followed by 'than' (pattern1)
221
- word_er_than = identify_comparison(sentence)
222
-
223
- # pattern 'more' followed 'word' followed by 'than' (pattern2)
224
- more_word_than = find_more_than_reference(sentence)
225
-
226
- # pattern 'less' followed 'word' followed by 'than' (pattern3)
227
- less_word_than = find_less_than_reference(sentence)
228
-
229
- bigger_list = []
230
- smaller_list = []
231
-
232
- # in case any pattern is captured
233
- if word_er_than or more_word_than or less_word_than:
234
-
235
- # in case of pattern1
236
- if word_er_than:
237
- for word in word_er_than:
238
-
239
- # perform relevant substitutions
240
- target_word = word.replace("than", "").strip()
241
-
242
- # examine if it is a bigger-related or smaller-related word
243
- bigger_word = is_related_to_bigger(target_word)
244
- smaller_word = is_related_to_smaller(target_word)
245
-
246
- # case of bigger word
247
- if bigger_word and not smaller_word:
248
- bigger_list.append({"comparative":[word, ">"]})
249
-
250
- # case of smaller word
251
- elif smaller_word and not bigger_word:
252
- smaller_list.append({"comparative":[word, "<"]})
253
-
254
- # in case of pattern2
255
- if more_word_than:
256
- for word in more_word_than:
257
-
258
- # perform relevant substitutions
259
- target_word = word.replace("than", "").replace("more", "").strip()
260
-
261
- # in this case it must be a bigger-related word
262
- bigger_word = is_related_to_bigger(target_word)
263
-
264
- # case of bigger word
265
- if bigger_word:
266
- bigger_list.append({"comparative":[word, ">"]})
267
-
268
-
269
- # in case of pattern3
270
- if less_word_than:
271
- for word in less_word_than:
272
-
273
- # perform relevant substitutions
274
- target_word = word.replace("than", "").replace("less", "").strip()
275
-
276
- # in this case it must be a lesser-related word
277
- lesser_word = is_related_to_smaller(target_word)
278
-
279
- # case of bigger word
280
- if lesser_word:
281
- smaller_list.append({"comparative":[word, "<"]})
282
-
283
- # return the combined list
284
- return bigger_list + smaller_list
285
-
286
-
287
- def find_equal_to_comptives_ngrams(sentence):
288
- """
289
- This function takes a sentence as input and returns a reference phrase based on semantic similarity using n-grams.
290
- The possible reference phrases are provided as a list.
291
- """
292
-
293
- # This is a reference list for the concept of 'equal to'. It has many references to perform on them the semantic similarity examination
294
- possible_references = ["equal to", "same as", "similar to", "identical to", "equivalent to", "tantamount to", "corresponding to", "comparable to", "akin to", "commensurate with", "in line with", "on a par with" , "indistinguishable from" , "corresponding with", "congruent with"]
295
-
296
- # that thershold is enough empirically
297
- max_similarity = 0.85
298
-
299
- possible_reference_list = []
300
-
301
- # parse with the spacy model (embeddings each of the references)
302
- embedding_references = []
303
- for reference in possible_references:
304
- reference_doc = nlp(reference)
305
- embedding_references.append(reference_doc)
306
-
307
- # Check 2-grams, 3-grams, and 4-grams
308
- for n in range(2, 5):
309
-
310
- # get n-grams
311
- sentence_ngrams = list(nltk.ngrams(sentence.split(), n))
312
-
313
- for sent_ngram in sentence_ngrams:
314
- sentence_ngram_str = ' '.join(sent_ngram)
315
- sentence_ngram_doc = nlp(sentence_ngram_str)
316
-
317
- for emb_ref in embedding_references:
318
- similarity = sentence_ngram_doc.similarity(emb_ref)
319
-
320
- if similarity >= max_similarity:
321
- possible_reference_list.append({'comparative': [sentence_ngram_str, "="]})
322
- break
323
-
324
- # if we have found a possible refernce that is similar enough with an n-gram of the input sentence, return the comparative '=', otherwise return 0
325
- if possible_reference_list:
326
- return possible_reference_list
327
- else:
328
- return []
329
-
330
-
331
-
332
- def single_verb_comptives(sentence):
333
- """
334
- This function takes a sentence and identifies any mention of bigger than, smaller than, equal to, expressed
335
- as single-word verb. It uses wordnet synsets to examine for synonyms and antonyms
336
- """
337
-
338
- # base references
339
- bigger_references_sg = ["surpass", "exceed", "outstrip", "outdo", "outmatch", "outclass", "eclipse", "overshadow", "outrank", "overtake", "top", "beat", "transcend", "dominate", "prevail", "trump", "vanquish", "outperform", "outgun", "outdistance", "outshine"]
340
- lesser_references_sg = ["lag", "trail", "lose", "underperform", "yield", "surrender", "submit", "succumb", "straggle", "dawdle", "lollygag", "loiter", "delay", "defer", "postpone", "procrastinate", "linger", "hesitate", "prolong", "drag"]
341
- equal_references_sg = ["match", "equal", "tie", "correspond", "conform", "agree", "harmonize", "coordinate", "comply", "fit", "parallel", "resemble", "mirror", "emulate", "equilibrate", "balance", "counterbalance", "offset", "compensate"]
342
-
343
- doc = nlp(sentence)
344
-
345
- bigger_list = []
346
- smaller_list = []
347
- equal_list = []
348
-
349
- # search for all verbs and examine their lemma with all the synonyms of each of the previous references. Assign a label accordingly
350
- for token in doc:
351
- if token.pos_ == "VERB":
352
-
353
- for lemma in token.lemma_.split('|'):
354
- synsets = wordnet.synsets(lemma, pos='v')
355
-
356
- for syn in synsets:
357
- if any(lemma in bigger_references_sg for lemma in syn.lemma_names()):
358
- bigger_list.append({'comparative': [token.text, ">"]})
359
- break
360
-
361
- elif any(lemma in lesser_references_sg for lemma in syn.lemma_names()):
362
- smaller_list.append({'comparative': [token.text, "<"]})
363
- break
364
-
365
- elif any(lemma in equal_references_sg for lemma in syn.lemma_names()):
366
- equal_list.append({'comparative': [token.text, "="]})
367
- break
368
-
369
- # for syn in synsets:
370
- # antonyms = syn.lemmas()[0].antonyms()
371
-
372
- # if antonyms and any(lemma in bigger_references_sg for lemma in antonyms[0].name()):
373
- # return 0
374
-
375
- # elif antonyms and any(lemma in lesser_references_sg for lemma in antonyms[0].name()):
376
- # return 0
377
-
378
-
379
- # elif antonyms and any(lemma in equal_references_sg for lemma in antonyms[0].name()):
380
- # return 0
381
-
382
- final_list = bigger_list + smaller_list + equal_list
383
-
384
- if final_list:
385
- return final_list
386
- else:
387
- return []
388
-
389
-
390
- # helper functions for 'identify_multi_word_verbs'
391
-
392
- # Define multi-word verb lists
393
- bigger_list = ["is a cut above", "is ahead of", "is superior to", "is greater than", "raise the bar", "climb the ladder", "set the standard", "set the pace", "break the mold", "push the envelope", "raise the game", "is a class apart"]
394
- smaller_list = ["fall behind", "is inferior to", "is smaller than", "lag behind", "trail behind", "is second to", "bring up the rear", "lose ground", "bring up the tail end", "fall short", "fall beneath", "fail to measure up", "put off"]
395
- equal_list = ["is in line with", "is equal to", "is on a par with", "is on par with", "is the same as", "is comparable to", "is in sync with", "is in harmony with", "is in step with", "is in tune with", "is in accord with", "is consistent with", "is consonant with", "keep pace with", "keep up with", "is equivalent to", "balance out", "even out"]
396
-
397
- # Calculate embeddings of multi-word verbs
398
- bigger_embeddings = [np.mean([token.vector for token in nlp(verb)], axis=0) for verb in bigger_list]
399
- smaller_embeddings = [np.mean([token.vector for token in nlp(verb)], axis=0) for verb in smaller_list]
400
- equal_embeddings = [np.mean([token.vector for token in nlp(verb)], axis=0) for verb in equal_list]
401
-
402
-
403
- # Define function to check if n-gram is in multi-word verb list
404
- def check_list(ngram, verb_list):
405
- """
406
- This is a function to check if n-gram is in multi-word verb list
407
- """
408
-
409
- if ngram in verb_list:
410
- return True
411
- else:
412
- return False
413
-
414
-
415
- def cosine_sim(a, b):
416
- """
417
- This is a function to calculate cosine similarity
418
- """
419
-
420
- return cosine_similarity(a.reshape(1,-1), b.reshape(1,-1))[0][0]
421
-
422
-
423
- # we examine the n-grams reversely and any time we find a match, we "delete" that match, so that lesser ngrams will not be matched \
424
- # (e.g. is on a par with, would also match afterwords on a par with, par with, etc)
425
-
426
- def multiword_verb_comptives(sentence):
427
- """
428
- This function takes a sentence and identifies any mention of bigger than, smaller than, equal to, expressed
429
- as multi-word verbs. Based on three refernces lists it performs initially a simple string comparison with each
430
- of their elements and the ngrams of the input sentence. If there is no match there, it performs the same procedure
431
- with cosine similarity to identify any similar ngrams.
432
- """
433
-
434
- # Split sentence into tokens
435
- tokens = sentence.split()
436
-
437
- # Initialize variables to store label and max similarity
438
- label = None
439
- max_sim = 0
440
-
441
- # these lists are used to capture any possible reference
442
- bigger_l = []
443
- smaller_l = []
444
- equal_l = []
445
-
446
- # Define set to keep track of matched ngrams
447
- matched_ngrams = set()
448
-
449
- # Iterate through n-grams of sentence, starting with the largest n-grams
450
- for n in range(5, 1, -1):
451
- for i in range(len(tokens)-n+1):
452
- ngram = ' '.join(tokens[i:i+n])
453
-
454
- # Skip ngrams that have already been matched
455
- if ngram in matched_ngrams:
456
- continue
457
-
458
- # Check if n-gram is in bigger_list
459
- if check_list(ngram, bigger_list):
460
- matched_ngrams.update(set(ngram.split()))
461
- bigger_l.append({"comparative": [ngram, '>']})
462
-
463
- # Check if n-gram is in smaller_list
464
- elif check_list(ngram, smaller_list):
465
- matched_ngrams.update(set(ngram.split()))
466
- smaller_l.append({"comparative":[ngram, '<']})
467
-
468
- # Check if n-gram is in equal_list
469
- elif check_list(ngram, equal_list):
470
- matched_ngrams.update(set(ngram.split()))
471
- equal_l.append({"comparative":[ngram, '=']})
472
-
473
- # Check if n-gram is similar to any verb in bigger_list using pre-calculated embeddings
474
- else:
475
- ngram_emb = np.mean([token.vector for token in nlp(ngram)], axis=0)
476
- similarities_bigger = [cosine_sim(ngram_emb, verb_emb) for verb_emb in bigger_embeddings]
477
- max_sim_bigger = max(similarities_bigger)
478
-
479
- # Check if n-gram is similar to any verb in smaller_list using pre-calculated embeddings
480
- similarities_smaller = [cosine_sim(ngram_emb, verb_emb) for verb_emb in smaller_embeddings]
481
- max_sim_smaller = max(similarities_smaller)
482
-
483
- # Check if n-gram is similar to any verb in equal_list using pre-calculated embeddings
484
- similarities_equal = [cosine_sim(ngram_emb, verb_emb) for verb_emb in equal_embeddings]
485
- max_sim_equal = max(similarities_equal)
486
-
487
- # Determine the maximum similarity value among the three lists
488
- if max_sim_bigger > max_sim_smaller and max_sim_bigger > max_sim_equal and max_sim_bigger > max_sim:
489
- max_sim = max_sim_bigger
490
- if max_sim > 0.9:
491
- matched_ngrams.update(set(ngram.split()))
492
- bigger_l.append({"comparative":[ngram, '>']})
493
- else:
494
- matched_ngrams.update(set(ngram.split()))
495
-
496
-
497
- elif max_sim_smaller > max_sim_bigger and max_sim_smaller > max_sim_equal and max_sim_smaller > max_sim:
498
- max_sim = max_sim_smaller
499
- if max_sim > 0.9:
500
- matched_ngrams.update(set(ngram.split()))
501
- smaller_l.append({"comparative":[ngram, '<']})
502
- else:
503
- matched_ngrams.update(set(ngram.split()))
504
-
505
-
506
- elif max_sim_equal > max_sim_bigger and max_sim_equal > max_sim_smaller and max_sim_equal > max_sim:
507
- max_sim = max_sim_smaller
508
- if max_sim > 0.9:
509
- matched_ngrams.update(set(ngram.split()))
510
- equal_l.append({"comparative":[ngram, '=']})
511
- else:
512
- matched_ngrams.update(set(ngram.split()))
513
-
514
-
515
- return bigger_l + smaller_l + equal_l
516
-
517
-
518
-
519
- def identify_comparatives(sentence):
520
- """
521
- This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
522
- """
523
-
524
- # Identify straightforward patterns
525
- straight_comptives = find_comptives_straight_patterns(sentence)
526
-
527
- # Identify advanced bigger/smaller comparatives
528
- bigger_smaller_comparatives = identify_bigger_smaller_advanced(sentence)
529
-
530
- # Identify advanced equal-to comparatives
531
- equal_to_comparatives = find_equal_to_comptives_ngrams(sentence)
532
-
533
- single_verb = single_verb_comptives(sentence)
534
- multi_verb = multiword_verb_comptives(sentence)
535
-
536
- # return all the patterns that were captured
537
- comparatives = straight_comptives + bigger_smaller_comparatives + equal_to_comparatives + single_verb + multi_verb
538
-
539
- # since those different techniques might capture similar patterns, we keep only unique references. More precisely
540
- unique_comparatives = {}
541
-
542
- for item in comparatives:
543
- if item['comparative'][0] not in unique_comparatives:
544
- unique_comparatives[item['comparative'][0]] = item
545
-
546
- unique_output = list(unique_comparatives.values())
547
-
548
- return unique_output
549
-
550
-
551
-
552
- def comparatives_binding(sentence):
553
-
554
- try:
555
- comparative_symbols = find_comptives_symbols(sentence)
556
- comparative_mentions = identify_comparatives(sentence)
557
-
558
- # starting with the symbols, if one was captured
559
- if len(comparative_symbols) == 1:
560
-
561
- # if the rest of the functions are empty (meaning that there are no other references)
562
- if len(comparative_mentions) == 0:
563
- return comparative_symbols
564
-
565
- else:
566
- return (0, "COMPARATIVES", "more_comparatives_mentions")
567
-
568
- # in case that there is no symbol
569
- elif len(comparative_symbols) == 0:
570
-
571
- # we need only one mention of comparatives
572
- if len(comparative_mentions) == 1:
573
- return comparative_mentions
574
-
575
- # case of no comparative mentions
576
- elif len(comparative_mentions) == 0:
577
- return (0, "COMPARATIVES", "no_comparatives")
578
-
579
- # case of no more than one comparative mentions
580
- else:
581
- return (0, "COMPARATIVES", "more_comparatives_mentions")
582
-
583
- # case of multiple symbol references
584
- else:
585
- return (0, "COMPARATIVES", "more_symbol_comparatives")
586
-
587
- except:
588
- return (0, "COMPARATIVES", "unknown_error")
589
-
590
-
591
-
592
  from transformers import pipeline
593
  import gradio as gr
594
 
 
 
595
  title = "Comparatives Demo"
596
  description = "This is a simple demo just for demonstration purposes for Serco team, to validate the results of the Natural Language module concerning comparatives identification, while in progress"
597
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from transformers import pipeline
2
  import gradio as gr
3
 
4
+ from helper import comparatives_binding
5
+
6
  title = "Comparatives Demo"
7
  description = "This is a simple demo just for demonstration purposes for Serco team, to validate the results of the Natural Language module concerning comparatives identification, while in progress"
8