ValadisCERTH commited on
Commit
13303bb
·
1 Parent(s): c4aefbf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -70
app.py CHANGED
@@ -3,10 +3,12 @@ import re
3
 
4
  from datetime import datetime
5
 
 
6
  # load the spacy model
7
  spacy.cli.download("en_core_web_lg")
8
  nlp = spacy.load("en_core_web_lg")
9
 
 
10
  # Define a function to extract dates from text
11
  def extract_dates(text):
12
  """
@@ -17,10 +19,11 @@ def extract_dates(text):
17
  # Regular expressions that include the \b word boundary character to ensure that the date pattern only matches if it is not part of a longer pattern that has already been matched
18
  date_patterns = [
19
  r'\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b', # Matches dates like "01/01/22" or "1-1-2022"
20
- r'\b\d{1,2}[-/]\d{1,2}\b(?!\d)', # Matches dates like "01/01" or "1-1", but not "10-10-2022"
21
  r'\b[A-Z][a-z]{2,8} \d{1,2},? \d{2,4}\b', # Matches dates like "January 1, 2022" or "Feb 28, 22"
22
  r'\b\d{1,2} [A-Z][a-z]{2,8} \d{2,4}\b', # Matches dates like "1 January 2022" or "28 Feb 22"
23
  r'\b[A-Z][a-z]{2,8} \d{2,4}\b', # Matches dates like "January 2022" or "Feb 22"
 
24
  ]
25
 
26
  # Find all matches for date patterns in the text
@@ -48,88 +51,128 @@ def extract_dates(text):
48
 
49
 
50
  def convert_dates(date_list):
51
- """
52
- Assign to the identified formatted dates the proper date format and then, on the formatted dates, assign the relevant date tags (e.g. specify which is the day, the month, etc)
53
- """
54
-
55
- output_list = []
56
- for date_str in date_list:
57
- try:
58
- dt = datetime.strptime(date_str, '%B %d, %Y')
59
- output_list.append(f"day:{dt.day}, month:{dt.month}, year:{dt.year}")
60
- except ValueError:
61
-
62
- try:
63
- dt = datetime.strptime(date_str, '%m-%d-%y')
64
- output_list.append(f"day:{dt.day}, month:{dt.month}, year:{dt.year}")
65
- except ValueError:
66
-
67
- try:
68
- dt = datetime.strptime(date_str, '%d/%m')
69
- output_list.append(f"day:{dt.day}, month:{dt.month}")
70
- except ValueError:
71
-
72
- try:
73
- dt = datetime.strptime(date_str, '%B %d')
74
- output_list.append(f"day:{dt.day}, month:{dt.month}")
75
- except ValueError:
76
-
77
- try:
78
- dt = datetime.strptime(date_str, '%b %d')
79
- output_list.append(f"day:{dt.day}, month:{dt.month}")
80
- except ValueError:
81
-
82
- try:
83
- dt = datetime.strptime(date_str, '%B %Y')
84
- output_list.append(f"month:{dt.month}, year:{dt.year}")
85
- except ValueError:
86
-
87
- try:
88
- dt = datetime.strptime(date_str, '%Y')
89
- output_list.append(f"year:{dt.year}")
90
- except ValueError:
91
-
92
- try:
93
- dt = datetime.strptime(date_str, '%d/%m/%y')
94
- output_list.append(f"day:{dt.day}, month:{dt.month}, year:{dt.year}")
95
- except ValueError:
96
-
97
- try:
98
- dt = datetime.strptime(date_str, '%Y %d %m')
99
- output_list.append({'day': dt.day, 'month': dt.month, 'year': dt.year})
100
- except ValueError:
101
-
102
- try:
103
- dt = datetime.strptime(date_str, '%B')
104
- output_list.append(dt.strftime('month:%-m'))
105
- except ValueError as e:
106
- output_list.append(f'INVALID FORMAT: {date_str}')
107
- return output_list
 
 
 
 
 
 
108
 
109
 
110
 
111
  def dates_binding(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
- identified_dates = extract_dates(text)
114
- formatted_dates = convert_dates(identified_dates)
115
 
116
- return formatted_dates
117
 
118
 
119
  from transformers import pipeline
120
  import gradio as gr
121
 
122
- title = "Natural Language module Demo for Dates"
123
- description = "This is a simple demo just for demonstration purposes, so that Serco team might have the chance to validate the results of the Natural Language module concerning date identification, while in progress"
124
 
125
  examples = [
126
- ["Earthquake located in Ishkoshim, Tajikistan in 2018 05 09 with magnitude greater than 6.2"],
127
- ["Give me all the earthquakes with magnitude above than 6 in the region of Athens for the month of January 2, 2023"],
128
- ["Earthquake happened in Rome during 1986 with a magnitude of 6"],
129
- ["I want all earthquakes larger than 5.0 that occurred in Rome during 3/5/20"],
130
- ["I want all earthquakes happened in Greece 31/12"],
131
- ["I want all earthquakes happened in Greece January 5"],
132
- ["I want all earthquakes happened in Greece 3-4-20"]
 
 
133
  ]
134
 
135
  gr.Interface(
 
3
 
4
  from datetime import datetime
5
 
6
+
7
  # load the spacy model
8
  spacy.cli.download("en_core_web_lg")
9
  nlp = spacy.load("en_core_web_lg")
10
 
11
+
12
  # Define a function to extract dates from text
13
  def extract_dates(text):
14
  """
 
19
  # Regular expressions that include the \b word boundary character to ensure that the date pattern only matches if it is not part of a longer pattern that has already been matched
20
  date_patterns = [
21
  r'\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b', # Matches dates like "01/01/22" or "1-1-2022"
22
+ r'\b\d{1,2}[-/]\d{1,2}\b(?!\d)', # Matches dates like "01/01" or "1-1"
23
  r'\b[A-Z][a-z]{2,8} \d{1,2},? \d{2,4}\b', # Matches dates like "January 1, 2022" or "Feb 28, 22"
24
  r'\b\d{1,2} [A-Z][a-z]{2,8} \d{2,4}\b', # Matches dates like "1 January 2022" or "28 Feb 22"
25
  r'\b[A-Z][a-z]{2,8} \d{2,4}\b', # Matches dates like "January 2022" or "Feb 22"
26
+
27
  ]
28
 
29
  # Find all matches for date patterns in the text
 
51
 
52
 
53
  def convert_dates(date_list):
54
+ """
55
+ Assign to the identified formatted dates the proper date format and then, on the formatted dates, assign the relevant date tags (e.g. specify which is the day, the month, etc)
56
+ """
57
+
58
+ DATE_FORMATS = {
59
+ '%B %d, %Y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
60
+ '%-m-%d-%Y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
61
+ '%m-%d-%y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
62
+ '%d/%m': 'day:{dt.day}, month:{dt.month}',
63
+ '%B %d': 'day:{dt.day}, month:{dt.month}',
64
+ '%b %d': 'day:{dt.day}, month:{dt.month}',
65
+ '%B %Y': 'month:{dt.month}, year:{dt.year}',
66
+ '%Y': 'year:{dt.year}',
67
+ '%d/%m/%y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
68
+ '%B %d, %y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
69
+ '%b %d, %y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
70
+ '%d-%m-%Y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
71
+ '%d/%m/%Y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
72
+ '%d-%m-%y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
73
+ '%m/%d/%y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
74
+ '%m/%d/%Y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
75
+ '%m-%d-%Y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
76
+ '%m-%d-%y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
77
+ '%d/%m/%Y %H:%M:%S': 'day:{dt.day}, month:{dt.month}, year:{dt.year}, time:{dt.strftime("%H:%M:%S")}',
78
+ '%d/%m/%y %H:%M:%S': 'day:{dt.day}, month:{dt.month}, year:{dt.year}, time:{dt.strftime("%H:%M:%S")}',
79
+ '%m/%d/%Y %H:%M:%S': 'day:{dt.day}, month:{dt.month}, year:{dt.year}, time:{dt.strftime("%H:%M:%S")}',
80
+ '%m/%d/%y %H:%M:%S': 'day:{dt.day}, month:{dt.month}, year:{dt.year}, time:{dt.strftime("%H:%M:%S")}',
81
+ '%Y-%m-%d': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
82
+ '%y-%m-%d': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
83
+ '%m-%d-%Y %H:%M:%S': 'day:{dt.day}, month:{dt.month}, year:{dt.year}, time:{dt.strftime("%H:%M:%S")}',
84
+ '%m-%d-%y %H:%M:%S': 'day:{dt.day}, month:{dt.month}, year:{dt.year}, time:{dt.strftime("%H:%M:%S")}',
85
+ '%m-%d': 'month:{dt.month}, day:{dt.day}',
86
+ '%-m-%-d': 'month:{dt.month}, day:{dt.day}',
87
+ '%d %b %y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
88
+ '%d %B %Y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
89
+ '%b %Y': 'month:{dt.month}, year:{dt.year}',
90
+ '%b %d, %Y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}',
91
+ '%d %B %y': 'day:{dt.day}, month:{dt.month}, year:{dt.year}'
92
+ }
93
+
94
+
95
+ output_list = []
96
+ for date_str in date_list:
97
+ valid_format = False
98
+ for fmt, out_fmt in DATE_FORMATS.items():
99
+ try:
100
+ dt = datetime.strptime(date_str, fmt)
101
+ output_list.append(out_fmt.format(dt=dt))
102
+ valid_format = True
103
+ break
104
+ except ValueError:
105
+ pass
106
+ if not valid_format:
107
+ # Attempt to parse using a custom format
108
+ try:
109
+ if '-' in date_str:
110
+ dt = datetime.strptime(date_str, '%m-%d-%y')
111
+ else:
112
+ dt = datetime.strptime(date_str, '%d/%m/%y')
113
+ output_list.append(f'day:{dt.day}, month:{dt.month}, year:{dt.year}')
114
+ except ValueError:
115
+ output_list.append(f'INVALID FORMAT: {date_str}')
116
+ return output_list
117
 
118
 
119
 
120
  def dates_binding(text):
121
+ '''
122
+ This is a function that binds together all the subcomponents of the dates identification, while also controlling for multiple, or zero date references
123
+ '''
124
+
125
+ try:
126
+
127
+ # capture the referred dates
128
+ identified_dates = extract_dates(text)
129
+
130
+ # we only accept for one date reference
131
+ if len(identified_dates) == 1:
132
+
133
+ formatted_dates = convert_dates(identified_dates)
134
+
135
+
136
+ # in case there is a wrong date format then return the appropriate code to prompt back the proper message
137
+ if 'INVALID FORMAT' in formatted_dates[0]:
138
+ return (0,'DATES','wrong_date_format')
139
+
140
+ else:
141
+ return formatted_dates
142
+
143
+ # in case of zero references return the appropriate code (to aid returning the correct prompt)
144
+ elif len(identified_dates) == 0:
145
+ return (0,'DATES','no_date')
146
+
147
+ # in case of more than one references return the appropriate code (to aid returning the correct prompt)
148
+ elif len(identified_dates) > 1:
149
+ return (0,'DATES','more_dates')
150
+
151
+ # in case of unexpected error return the appropriate code (to aid returning the correct prompt)
152
+ else:
153
+ return (0,'DATES','unknown_error')
154
 
155
+ except:
156
+ return (0,'DATES','unknown_error')
157
 
 
158
 
159
 
160
  from transformers import pipeline
161
  import gradio as gr
162
 
163
+ title = "Dates Demo"
164
+ description = "This is a simple demo just for demonstration purposes for Serco team, to validate the results of the Natural Language module concerning dates identification, while in progress"
165
 
166
  examples = [
167
+ ["Earthquake located in Rome, Italy in 01/01/23 with magnitude greater than 6.2"],
168
+ ["Earthquake located in Rome, Italy in 1-1-2023 with magnitude greater than 6.2"],
169
+ ["Earthquake located in Rome, Italy in 1/1/23 with magnitude greater than 6.2"],
170
+ ["Earthquake located in Rome, Italy in January 1, 2023 with magnitude greater than 6.2"],
171
+ ["Earthquake located in Rome, Italy in Jan 1, 23 with magnitude greater than 6.2"],
172
+ ["Earthquake located in Rome, Italy in 1 January 2023 with magnitude greater than 6.2"],
173
+ ["Earthquake located in Rome, Italy in 1 Jan 23 with magnitude greater than 6.2"],
174
+ ["Earthquake located in Rome, Italy in January 2023 with magnitude greater than 6.2"],
175
+ ["Earthquake located in Rome, Italy in Jan 23 with magnitude greater than 6.2"],
176
  ]
177
 
178
  gr.Interface(