21501A0580 commited on
Commit
8024be0
·
verified ·
1 Parent(s): 44d5108

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -1
app.py CHANGED
@@ -63,7 +63,121 @@ def extract_expiry_dates(text):
63
  r'(\d{1,2}\s*[A-Za-z]{3,}\s*\d{4})', # 20 MAY 2024
64
  r'([A-Za-z]{3,}\s*\d{1,2}[,\s]*\d{4})', # July 20, 2024
65
  r'(\d{4}[\/\-]\d{1,2}[\/\-]\d{1,2})', # 2024/07/20 or 2024-07-20
66
- r'([A-Za-z]{3}[\-]\d{1,2}[\-]\d{4})' # JAN-15-2024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  ]
68
  dates = []
69
  for pattern in patterns:
 
63
  r'(\d{1,2}\s*[A-Za-z]{3,}\s*\d{4})', # 20 MAY 2024
64
  r'([A-Za-z]{3,}\s*\d{1,2}[,\s]*\d{4})', # July 20, 2024
65
  r'(\d{4}[\/\-]\d{1,2}[\/\-]\d{1,2})', # 2024/07/20 or 2024-07-20
66
+ r'([A-Za-z]{3}[\-]\d{1,2}[\-]\d{4})',
67
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Expiry Date: 20/07/2O24
68
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-]\d{4}))', # Expiry Date: 20/07/2024
69
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Expiry Date: 20/07/2O24
70
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Expiry Date: 20 MAY 2O24
71
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*\d{4}))', # Expiry Date: 20 MAY 2024
72
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Expiry Date: 20 MAY 2O24
73
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Expiry Date: 2024/07/2O24
74
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-]\d{2}))', # Expiry Date: 2024/07/20
75
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{4}))', # Best Before: 2025
76
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Best Before: 20/07/2O24
77
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-]\d{4}))', # Best Before: 20/07/2024
78
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Best Before: 20/07/2O24
79
+ r'(?:best\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Best Before: 20 MAY 2O24
80
+ r'(?:best\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*\d{4}))', # Best Before: 20 MAY 2024
81
+ r'(?:best\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Best Before: 20 MAY 2O24
82
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Best Before: 2024/07/2O24
83
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-]\d{2}))', # Best Before: 2024/07/20
84
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{1,2}\d{2}\d{2}))',
85
+ r'(?:best\s*before\s*[:\-]?\s*(\d{6}))',
86
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{1,2}[A-Za-z]{3,}[0O]\d{2}))', # Consume Before: 3ODEC2O24
87
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{1,2}[A-Za-z]{3,}\d{2}))', # Consume Before: 30DEC23
88
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Consume Before: 20/07/2O24
89
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-]\d{4}))', # Consume Before: 20/07/2024
90
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Consume Before: 20/07/2O24
91
+ r'(?:consume\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Consume Before: 20 MAY 2O24
92
+ r'(?:consume\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*\d{4}))', # Consume Before: 20 MAY 2024
93
+ r'(?:consume\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Consume Before: 20 MAY 2O24
94
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Consume Before: 2024/07/2O24
95
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-]\d{2}))', # Consume Before: 2024/07/20
96
+ r'(?:exp\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Exp: 20/07/2O24
97
+ r'(?:exp\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-]\d{4}))', # Exp: 20/07/2024
98
+ r'(?:exp\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Exp: 20/07/2O24
99
+ r'(?:exp\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Exp: 20 MAY 2O24
100
+ r'(?:exp\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*\d{4}))', # Exp: 20 MAY 2024
101
+ r'(?:exp\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Exp: 20 MAY 2O24
102
+ r'(?:exp\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Exp: 2024/07/2O24
103
+ r'(?:exp\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-]\d{2}))', # Exp: 2024/07/20
104
+ r'Exp\.Date\s+(\d{2}[A-Z]{3}\d{4})',
105
+ r'(?:exp\s*\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Exp. Date: 16 MAR 2O30 (with typo)
106
+ r'(?:exp\s*\.?\s*date\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Exp. Date: 15/12/2O30 (with typo)
107
+ r'(?:exp\s*\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Exp. Date: 15 MAR 2O30 (with typo)
108
+ r'(?:exp\s*\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Exp. Date cdsyubfuyef 15 MAR 2O30 (with typo)
109
+ r'(\d{2}[\/\-]\d{2}[\/\-]\d{4})', # 20/07/2024
110
+ r'(\d{2}[\/\-]\d{2}[\/\-]\d{2})', # 20/07/24
111
+ r'(\d{2}\s*[A-Za-z]{3,}\s*\d{4})', # 20 MAY 2024
112
+ r'(\d{2}\s*[A-Za-z]{3,}\s*\d{2})', # 20 MAY 24
113
+ r'(\d{4}[\/\-]\d{2}[\/\-]\d{2})', # 2024/07/20
114
+ r'(\d{4}[\/\-]\d{2}[\/\-]\d{2})', # 2024-07-20
115
+ r'(\d{4}[A-Za-z]{3,}\d{2})', # 2024MAY20
116
+ r'(\d{2}[A-Za-z]{3,}\d{4})', # 20MAY2024
117
+ r'(?:exp\.?\s*date\s*[:\-]?\s*(\d{2}\s*[A-Za-z]{3,}\s*(\d{4}|\d{2})))',
118
+ r'(?:exp\.?\s*date\s*[:\-]?\s*(\d{2}\s*\d{2}\s*\d{4}))', # Exp. Date: 20 05 2025
119
+ r'(\d{4}[A-Za-z]{3}\d{2})', # 2025MAY11
120
+ r'(?:best\s*before\s*[:\-]?\s*(\d+)\s*(days?|months?|years?))', # Best Before: 6 months
121
+ r'(?:best\s*before\s*[:\-]?\s*(three)\s*(months?))',
122
+ r'(\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\b\s*\d{4})',
123
+ r'\bUSE BY\s+(\d{1,2}[A-Za-z]{3}\d{4})\b',
124
+ r'Exp\.Date\s*(\d{2}[A-Z]{3}\d{4})',
125
+ r'EXP:\d{4}/\d{2}/\d{4}/\d{1}/[A-Z]', # JAN-15-2024
126
+ r'USE BY[:\-]?\s*(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Use by date
127
+ r'BEST BEFORE[:\-]?\s*(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Best before date
128
+ r'EXPIRY DATE[:\-]?\s*(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Expiry Date
129
+ r'EXPIRY[:\-]?\s*(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Expiry
130
+ r'EXP[:\-]?\s*(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Exp
131
+ r'VALID UNTIL[:\-]?\s*(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Valid Until
132
+ r'CONSUME BY[:\-]?\s*(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Consume By
133
+ r'EXPIRES ON[:\-]?\s*(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Expires On
134
+ # DDMMMYYYY format
135
+ r'(\d{1,2}[A-Za-z]{3}\d{4})', # DDMMMYYYY format
136
+ # Short year formats
137
+ r'(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2})', # Short year date format (DD/MM/YY)
138
+ r'(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # General date format (DD/MM/YYYY)
139
+ # Year-month-day formats
140
+ r'(\d{4}[\/\-]\d{1,2}[\/\-]\d{1,2})', # Year-month-day format (YYYY/MM/DD)
141
+ # Month/Year formats
142
+ r'(\d{1,2}[\/\-]\d{1,2})', # MM/DD format
143
+ r'(\d{1,2}[\/\-]\d{2})', # MM/YY format
144
+ # Month name formats
145
+ r'(\d{1,2}\s*[A-Za-z]{3,}\s*\d{4})', # Month name with day and year
146
+ r'(\d{1,2}\s*[A-Za-z]{3,}\s*\d{2})', # Month name with day and short year
147
+ # Year with month name
148
+ r'(\d{4}[A-Za-z]{3,}\d{1,2})', # Year with month name
149
+ r'(\d{1,2}[A-Za-z]{3,}\d{4})', # Day with month name and full year
150
+ # Additional formats
151
+ r'(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2})', # MM/DD/YY format
152
+ r'(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # MM/DD/YYYY format
153
+ r'(\d{1,2}[\/\-]\d{1,2})', # MM/DD format
154
+ r'(\d{1,2}[\/\-]\d{2})', # MM/YY format
155
+ # Best before phrases
156
+ r'Best before (\d+) months', # Best before in months
157
+ r'Expiration Date[:\-]?\s*(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Expiration Date
158
+ r'Expires[:\-]?\s*(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Expires
159
+ # Additional variations
160
+ r'(\d{1,2}\s*[A-Za-z]{3,}\s*\d{4})', # Month name with day and year
161
+ r'(\d{1,2}\s*[A-Za-z]{3,}\s*\d{2})', # Month name with day and short year
162
+ # More variations
163
+ r'(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # MM/DD/YYYY format
164
+ r'(\d{1,2}[\/\-]\d{1,2})', # MM/DD format
165
+ r'(\d{1,2}[\/\-]\d{2})', # MM/YY format
166
+ # Additional expiry phrases
167
+ r'(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Expiry in various formats
168
+ r'(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2})', # Expiry in short year formats
169
+ r'(\d{1,2}[\/\-]\d{1,2})', # Expiry in MM/DD format
170
+ r'(\d{1,2}[\/\-]\d{2})', # Expiry in MM/YY format
171
+ # Additional phrases
172
+ r'(\d{1,2}\s*[A-Za-z]{3,}\s*\d{4})', # Month name with day and year
173
+ r'(\d{1,2}\s*[A-Za-z]{3,}\s*\d{2})', # Month name with day and short year
174
+ r'(\d{4}[A-Za-z]{3,}\d{1,2})', # Year with month name
175
+ r'(\d{1,2}[A-Za-z]{3,}\d{4})', # Day with month name and full year
176
+ # Additional expiry phrases
177
+ r'(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{4})', # Expiry in various formats
178
+ r'(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2})', # Expiry in short year formats
179
+ r'(\d{1,2}[\/\-]\d{1,2})', # Expiry in MM/DD format
180
+ r'(\d{1,2}[\/\-]\d{2})', # Expiry in MM/YY format
181
  ]
182
  dates = []
183
  for pattern in patterns: