pendar02 commited on
Commit
cf44c2f
·
verified ·
1 Parent(s): ee10f7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -15
app.py CHANGED
@@ -130,29 +130,23 @@ def validate_excel_structure(df):
130
  return False, validation_messages
131
 
132
  try:
133
- # Check abstract length
134
- min_length = df['Abstract'].fillna('').astype(str).str.len().min()
135
- if min_length < 50:
136
- validation_messages.append("Some abstracts are too short (less than 50 characters)")
137
-
138
- # Check publication year format
139
  df['Publication Year'] = pd.to_numeric(df['Publication Year'], errors='coerce')
140
  if df['Publication Year'].isna().any():
141
- validation_messages.append("Some publication years are invalid")
142
  else:
143
  years = df['Publication Year'].dropna()
144
- if len(years) > 0: # Only check if we have valid years
145
  if years.min() < 1900 or years.max() > 2025:
146
  validation_messages.append("Publication years must be between 1900 and 2025")
147
-
148
- # Check DOIs (allow empty DOIs)
149
- doi_pattern = r'10\.\d{4,}/.+'
150
- valid_dois = df['DOI'].fillna('').astype(str).str.contains(doi_pattern, na=True, regex=True)
151
- if not valid_dois.all() and len(valid_dois) > 0:
152
- validation_messages.append("Some DOIs are in invalid format")
153
 
154
  except Exception as e:
155
- validation_messages.append(f"Error validating data: {str(e)}")
156
 
157
  return len(validation_messages) == 0, validation_messages
158
 
 
130
  return False, validation_messages
131
 
132
  try:
133
+ # Check publication year format - this is useful for sorting/filtering
 
 
 
 
 
134
  df['Publication Year'] = pd.to_numeric(df['Publication Year'], errors='coerce')
135
  if df['Publication Year'].isna().any():
136
+ validation_messages.append("Some publication years are invalid. Please ensure all years are in numeric format (e.g., 2024)")
137
  else:
138
  years = df['Publication Year'].dropna()
139
+ if len(years) > 0:
140
  if years.min() < 1900 or years.max() > 2025:
141
  validation_messages.append("Publication years must be between 1900 and 2025")
142
+
143
+ # For short abstracts - just show a warning
144
+ short_abstracts = df['Abstract'].fillna('').astype(str).str.len() < 50
145
+ if short_abstracts.any():
146
+ st.warning("ℹ️ Some abstracts are quite short, but will still be processed")
 
147
 
148
  except Exception as e:
149
+ validation_messages.append(f"Error checking data format: {str(e)}")
150
 
151
  return len(validation_messages) == 0, validation_messages
152