ValadisCERTH commited on
Commit
3f018ec
·
1 Parent(s): 6a57ce1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -240
app.py CHANGED
@@ -1,247 +1,19 @@
1
- import spacy
2
- import re
3
- from word2number import w2n
4
-
5
- # load the spacy model
6
- spacy.cli.download("en_core_web_lg")
7
- nlp = spacy.load("en_core_web_lg")
8
-
9
-
10
- def capture_numbers (input_sentence):
11
- '''
12
- This is a function to capture cases of refered numbers either in numeric or free-text form
13
- '''
14
-
15
- try:
16
- # Define the regular expression patterns
17
- pattern1 = r"\b(\w+)\s+(point|decimal|dot|comma)\s+(\w+)\b"
18
-
19
- # Find all matches in the text
20
- matches = re.findall(pattern1, input_sentence)
21
-
22
- # This part is to capture cases like six point five, 5 point five, six point 5, 5 point 5
23
- pattern_numbers = []
24
- for match in matches:
25
- if len(match) == 3:
26
- # add the $pattern string to easily specify them in a subsequent step
27
- full_string = "{} {} {} {}".format(match[0], match[1], match[2], '$pattern')
28
- pattern_numbers.append(full_string)
29
-
30
- for elem in pattern_numbers:
31
- input_sentence = input_sentence.replace(elem, " ")
32
-
33
- if pattern_numbers:
34
-
35
- # Remove duplicates with set and convert back to list
36
- final_numbers = list(set(pattern_numbers))
37
- return final_numbers
38
-
39
- else:
40
-
41
- # Parse the input sentence with Spacy
42
- doc = nlp(input_sentence)
43
-
44
- # This is to capture all the numbers in int and float form, as well as numbers like eight, two, hunded
45
- numbers = [token.text for token in doc if token.like_num]
46
-
47
- # Remove duplicates with set and convert back to list
48
- final_numbers = list(set(numbers))
49
-
50
- # Print the extracted numbers
51
- if final_numbers:
52
- return final_numbers
53
- else:
54
- return 0
55
-
56
- except:
57
- return 0
58
-
59
- def numeric_freetext_dot_freetext(text):
60
- '''
61
- This is a function to convert cases of 'six point five'
62
- '''
63
-
64
- # Define a dictionary to map freetext numbers to numeric values
65
- number_map = {
66
- 'zero': 0,
67
- 'one': 1,
68
- 'two': 2,
69
- 'three': 3,
70
- 'four': 4,
71
- 'five': 5,
72
- 'six': 6,
73
- 'seven': 7,
74
- 'eight': 8,
75
- 'nine': 9,
76
- 'ten': 10,
77
- 'eleven': 11,
78
- 'twelve': 12,
79
- 'thirteen': 13,
80
- 'fourteen': 14,
81
- 'fifteen': 15,
82
- 'sixteen': 16,
83
- 'seventeen': 17,
84
- 'eighteen': 18,
85
- 'nineteen': 19,
86
- 'twenty': 20,
87
- 'thirty': 30,
88
- 'forty': 40,
89
- 'fifty': 50,
90
- 'sixty': 60,
91
- 'seventy': 70,
92
- 'eighty': 80,
93
- 'ninety': 90,
94
- 'hundred': 100,
95
- 'thousand': 1000,
96
- 'million': 1000000,
97
- 'billion': 1000000000,
98
- 'trillion': 1000000000000
99
- }
100
-
101
- try:
102
-
103
- # Define regular expression to match freetext numbers
104
- pattern = re.compile(r'(\w+(?:\s+\w+)*)\s+(point|decimal|dot|comma)\s+(\w+(?:\s+\w+)*)')
105
-
106
- # Extract freetext number and decimal part from input text
107
- match = pattern.search(text)
108
-
109
- if match:
110
- whole_part = match.group(1).lower()
111
- decimal_part = match.group(3).lower()
112
- # Convert whole and decimal parts to numeric form
113
- numeric_whole = sum(number_map[word] * (10 ** (len(whole_part.split()) - i - 1)) for i, word in enumerate(whole_part.split()))
114
- numeric_decimal = sum(number_map[word] * (0.1 ** (i + 1)) for i, word in enumerate(decimal_part.split()))
115
- return numeric_whole + numeric_decimal
116
-
117
- # Return None if the input text doesn't match the regular expression
118
- return 0
119
-
120
- except:
121
- return 0
122
-
123
-
124
-
125
- def numeric_number_dot_freetext(text):
126
- '''
127
- This is a function to convert cases of '6 point five and six point 5'
128
- '''
129
-
130
- try:
131
- # Define a dictionary to map words to numbers
132
- num_dict = {"zero":0, "one":1, "two":2, "three":3, "four":4, "five":5,
133
- "six":6, "seven":7, "eight":8, "nine":9}
134
-
135
- # Define a regular expression pattern to extract the numeric form and free text form from input text
136
- pattern = r"(\d+|\w+)(?:\s+(?:decimal|point|dot|comma)\s+)(\d+|\w+)"
137
-
138
- # Use regular expression to extract the numeric form and free text form from input text
139
- match = re.search(pattern, text)
140
-
141
- if match:
142
- num1 = match.group(1)
143
- num2 = match.group(2)
144
-
145
- # If the numeric form is a word, map it to its numerical value
146
- if num1 in num_dict:
147
- num1 = num_dict[num1]
148
-
149
- # If the free text form is a word, map it to its numerical value
150
- if num2 in num_dict:
151
- num2 = num_dict[num2]
152
-
153
- # Convert both parts to float and add them together to get the final decimal value
154
- result = float(num1) + float(num2) / (10 ** len(str(num2)))
155
-
156
- return result
157
-
158
- else:
159
- # If input text doesn't match the expected pattern, return None
160
- return 0
161
-
162
- except:
163
- return 0
164
-
165
-
166
- def convert_into_numeric(num_list):
167
- '''
168
- This is a function to convert the identified numbers into a numeric form
169
- '''
170
-
171
- if num_list:
172
-
173
- # at first we examine how many numbers were captured. Only one number should exist
174
- if len(num_list) > 1:
175
- return 0
176
-
177
- else:
178
- target_num = num_list[0]
179
-
180
- # case it is an integer or float, convert it, otherwise move to following cases
181
- try:
182
- target_num_float = float(target_num)
183
- return {'Number' : target_num}
184
-
185
- except:
186
- # case that it belongs to one of the patterns of freetext number followed by numeric form etc (all the combinations)
187
- if "$pattern" in target_num:
188
- num, _ = target_num.split("$")
189
-
190
- # try at first with that function for the case of six point five
191
- num_conversion = numeric_freetext_dot_freetext(num)
192
-
193
- if num_conversion:
194
- return {'Number' : num_conversion}
195
-
196
- # if not, try with this function for all the rest of cases (6 point 5, 6 point five, six point 5)
197
- else:
198
- num_conversion = numeric_number_dot_freetext(num)
199
-
200
- if num_conversion:
201
- return {'Number' : num_conversion}
202
-
203
- # if none of the above has worked, then examine the case of freetext numbers without patterns (e.g. two, million, twenty three, etc)
204
- else:
205
- try:
206
- num_conversion = w2n.word_to_num(target_num)
207
- return {'Number' : num_conversion}
208
-
209
- # if none of the above, error.
210
- except:
211
- return 0
212
-
213
- else:
214
- return 0
215
-
216
-
217
-
218
- def magnitude_binding(input_text):
219
-
220
- try:
221
- target_numbers = capture_numbers(input_text)
222
- numeric_target_numbers = convert_into_numeric(target_numbers)
223
-
224
- return numeric_target_numbers
225
-
226
- except:
227
- return 0
228
-
229
-
230
-
231
  from transformers import pipeline
232
  import gradio as gr
233
 
234
- title = "Natural Language module Demo for Magnitude numbers identification"
235
- description = "This is a simple demo just for demonstration purposes, so that Serco team might have the chance to validate the results of the Natural Language module concerning magnitude number identification, while in progress \n\n NOTE: DO NOT ENTER DATES FOR THIS TESTING"
 
 
236
 
237
  examples = [
238
- ["Earthquake located in Ishkoshim, Tajikistan with magnitude greater than 6"],
239
- ["Give me all the earthquakes with magnitude above than 8.23 in the region of Athens"],
240
- ["Earthquake happened in Rome with a magnitude of four"],
241
- ["I want all earthquakes larger than five point six that occurred in Rome"],
242
- ["I want all earthquakes larger than five point 6 that occurred in Rome"],
243
- ["I want all earthquakes larger than 5 point six that occurred in Rome"],
244
- ["I want all earthquakes larger than 5 point 6 that occurred in Rome"]
245
  ]
246
 
247
  gr.Interface(
@@ -252,4 +24,4 @@ gr.Interface(
252
  description=description,
253
  examples=examples,
254
  enable_queue=True,
255
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from transformers import pipeline
2
  import gradio as gr
3
 
4
+ from helper import magnitude_binding
5
+
6
+ title = "Magnitudes Identification Demo"
7
+ description = "This is a simple demo just for demonstration purposes, so that Serco team might have the chance to validate the first results of the Natural Language module concerning the identification of magnitude numbers \n\n NOTE: DO NOT ENTER DATES FOR THIS TESTING"
8
 
9
  examples = [
10
+ ["I want all earthquakes that are located in Ishkoshim, Tajikistan with a magnitude greater than 6"],
11
+ ["I want all earthquakes that are located in Ishkoshim, Tajikistan with a magnitude greater than 6.5"],
12
+ ["I want all earthquakes that are located in Ishkoshim, Tajikistan with a magnitude greater than six"],
13
+ ["I want all earthquakes that are located in Ishkoshim, Tajikistan with a magnitude greater than six point five"],
14
+ ["I want all earthquakes that are located in Ishkoshim, Tajikistan with a magnitude greater than 6 point five"],
15
+ ["I want all earthquakes that are located in Ishkoshim, Tajikistan with a magnitude greater than six point 5"],
16
+ ["I want all earthquakes that are located in Ishkoshim, Tajikistan with a magnitude greater than 6 point 5"],
17
  ]
18
 
19
  gr.Interface(
 
24
  description=description,
25
  examples=examples,
26
  enable_queue=True,
27
+ ).launch()