21501A0580 commited on
Commit
72f6c62
·
1 Parent(s): 449f4e3

Initial Commit

Browse files
Files changed (1) hide show
  1. app.py +87 -9
app.py CHANGED
@@ -56,13 +56,91 @@ def extract_expiry_dates(text):
56
  r'(\d{4}[\/\-]\d{1,2}[\/\-]\d{1,2})',
57
  r'(\d{4}[A-Za-z]{3,}\d{1,2})',
58
  r'(\d{1,2}[A-Za-z]{3,}\d{4})',
59
- r'Best before (\d+) months'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  ]
61
- dates = []
 
 
 
62
  for pattern in expiry_date_patterns:
63
- matches = re.findall(pattern, text, re.IGNORECASE)
64
- dates.extend(matches)
65
- return dates
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  # Streamlit app
68
  st.title("Image Processing Application")
@@ -95,11 +173,11 @@ if uploaded_file is not None:
95
 
96
  # Expiry Date Extraction
97
  st.header("Expiry Date Extraction")
98
- expiry_dates = extract_expiry_dates(text)
99
- if expiry_dates:
100
  st.write("Expiry Dates Found:")
101
- for date in expiry_dates:
102
- st.text(date)
103
  else:
104
  st.write("No expiry dates found.")
105
 
 
56
  r'(\d{4}[\/\-]\d{1,2}[\/\-]\d{1,2})',
57
  r'(\d{4}[A-Za-z]{3,}\d{1,2})',
58
  r'(\d{1,2}[A-Za-z]{3,}\d{4})',
59
+ r'Best before (\d+) months',
60
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Expiry Date: 20/07/2O24
61
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-]\d{4}))', # Expiry Date: 20/07/2024
62
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Expiry Date: 20/07/2O24
63
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Expiry Date: 20 MAY 2O24
64
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*\d{4}))', # Expiry Date: 20 MAY 2024
65
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Expiry Date: 20 MAY 2O24
66
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Expiry Date: 2024/07/2O24
67
+ r'(?:exp(?:iry)?\.?\s*date\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-]\d{2}))', # Expiry Date: 2024/07/20
68
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{4}))', # Best Before: 2025
69
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Best Before: 20/07/2O24
70
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-]\d{4}))', # Best Before: 20/07/2024
71
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Best Before: 20/07/2O24
72
+ r'(?:best\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Best Before: 20 MAY 2O24
73
+ r'(?:best\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*\d{4}))', # Best Before: 20 MAY 2024
74
+ r'(?:best\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Best Before: 20 MAY 2O24
75
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Best Before: 2024/07/2O24
76
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-]\d{2}))', # Best Before: 2024/07/20
77
+ r'(?:best\s*before\s*[:\-]?\s*.*?(\d{1,2}\d{2}\d{2}))',
78
+ r'(?:best\s*before\s*[:\-]?\s*(\d{6}))',
79
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{1,2}[A-Za-z]{3,}[0O]\d{2}))', # Consume Before: 3ODEC2O24
80
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{1,2}[A-Za-z]{3,}\d{2}))', # Consume Before: 30DEC23
81
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Consume Before: 20/07/2O24
82
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-]\d{4}))', # Consume Before: 20/07/2024
83
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Consume Before: 20/07/2O24
84
+ r'(?:consume\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Consume Before: 20 MAY 2O24
85
+ r'(?:consume\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*\d{4}))', # Consume Before: 20 MAY 2024
86
+ r'(?:consume\s*before\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Consume Before: 20 MAY 2O24
87
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Consume Before: 2024/07/2O24
88
+ r'(?:consume\s*before\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-]\d{2}))', # Consume Before: 2024/07/20
89
+ r'(?:exp\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Exp: 20/07/2O24
90
+ r'(?:exp\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-]\d{4}))', # Exp: 20/07/2024
91
+ r'(?:exp\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Exp: 20/07/2O24
92
+ r'(?:exp\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Exp: 20 MAY 2O24
93
+ r'(?:exp\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*\d{4}))', # Exp: 20 MAY 2024
94
+ r'(?:exp\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Exp: 20 MAY 2O24
95
+ r'(?:exp\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Exp: 2024/07/2O24
96
+ r'(?:exp\s*[:\-]?\s*.*?(\d{4}[\/\-]\d{2}[\/\-]\d{2}))', # Exp: 2024/07/20
97
+ r"Exp\.Date\s+(\d{2}[A-Z]{3}\d{4})",
98
+ r'(?:exp\s*\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Exp. Date: 16 MAR 2O30 (with typo)
99
+ r'(?:exp\s*\.?\s*date\s*[:\-]?\s*.*?(\d{2}[\/\-]\d{2}[\/\-][0O]\d{2}))', # Exp. Date: 15/12/2O30 (with typo)
100
+ r'(?:exp\s*\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Exp. Date: 15 MAR 2O30 (with typo)
101
+ r'(?:exp\s*\.?\s*date\s*[:\-]?\s*.?(\d{2}\s[A-Za-z]{3,}\s*[0O]\d{2}))', # Exp. Date cdsyubfuyef 15 MAR 2O30 (with typo)
102
+ r'(\d{2}[\/\-]\d{2}[\/\-]\d{4})', # 20/07/2024
103
+ r'(\d{2}[\/\-]\d{2}[\/\-]\d{2})', # 20/07/24
104
+ r'(\d{2}\s*[A-Za-z]{3,}\s*\d{4})', # 20 MAY 2024
105
+ r'(\d{2}\s*[A-Za-z]{3,}\s*\d{2})', # 20 MAY 24
106
+ r'(\d{4}[\/\-]\d{2}[\/\-]\d{2})', # 2024/07/20
107
+ r'(\d{4}[\/\-]\d{2}[\/\-]\d{2})', # 2024-07-20
108
+ r'(\d{4}[A-Za-z]{3,}\d{2})', # 2024MAY20
109
+ r'(\d{2}[A-Za-z]{3,}\d{4})', # 20MAY2024
110
+ r'(?:DX3\s*[:\-]?\s*(\d{2}\s*\d{2}\s*\d{4}))',
111
+ r'(?:exp\.?\s*date\s*[:\-]?\s*(\d{2}\s*[A-Za-z]{3,}\s*(\d{4}|\d{2})))',
112
+ r'(?:exp\.?\s*date\s*[:\-]?\s*(\d{2}\s*\d{2}\s*\d{4}))', # Exp. Date: 20 05 2025
113
+ r'(\d{4}[A-Za-z]{3}\d{2})', # 2025MAY11
114
+ r'(?:best\s*before\s*[:\-]?\s*(\d+)\s*(days?|months?|years?))', # Best Before: 6 months
115
+ r'(?:best\s*before\s*[:\-]?\s*(three)\s*(months?))',
116
+ r'(\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\b\s*\d{4})',
117
+ r'\bUSE BY\s+(\d{1,2}[A-Za-z]{3}\d{4})\b',
118
+ r"Exp\.Date\s*(\d{2}[A-Z]{3}\d{4})",
119
+ r"EXP:\d{4}/\d{2}/\d{4}/\d{1}/[A-Z]"
120
  ]
121
+
122
+ current_date = datetime.now()
123
+ dates_info = []
124
+
125
  for pattern in expiry_date_patterns:
126
+ match = re.search(pattern, text, re.IGNORECASE)
127
+ if match:
128
+ date_str = match.group(1)
129
+ try:
130
+ # Try parsing the date
131
+ expiry_date = datetime.strptime(date_str, '%d/%m/%Y')
132
+ except ValueError:
133
+ try:
134
+ expiry_date = datetime.strptime(date_str, '%d-%m-%Y')
135
+ except ValueError:
136
+ # Add more date formats as needed
137
+ continue
138
+
139
+ days_left = (expiry_date - current_date).days
140
+ dates_info.append((date_str, days_left))
141
+ break # Stop after finding the first valid date
142
+
143
+ return dates_info
144
 
145
  # Streamlit app
146
  st.title("Image Processing Application")
 
173
 
174
  # Expiry Date Extraction
175
  st.header("Expiry Date Extraction")
176
+ expiry_dates_info = extract_expiry_dates(text)
177
+ if expiry_dates_info:
178
  st.write("Expiry Dates Found:")
179
+ for date_str, days_left in expiry_dates_info:
180
+ st.text(f"{date_str} - {days_left} days left")
181
  else:
182
  st.write("No expiry dates found.")
183