Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -130,29 +130,23 @@ def validate_excel_structure(df):
|
|
130 |
return False, validation_messages
|
131 |
|
132 |
try:
|
133 |
-
# Check
|
134 |
-
min_length = df['Abstract'].fillna('').astype(str).str.len().min()
|
135 |
-
if min_length < 50:
|
136 |
-
validation_messages.append("Some abstracts are too short (less than 50 characters)")
|
137 |
-
|
138 |
-
# Check publication year format
|
139 |
df['Publication Year'] = pd.to_numeric(df['Publication Year'], errors='coerce')
|
140 |
if df['Publication Year'].isna().any():
|
141 |
-
validation_messages.append("Some publication years are invalid")
|
142 |
else:
|
143 |
years = df['Publication Year'].dropna()
|
144 |
-
if len(years) > 0:
|
145 |
if years.min() < 1900 or years.max() > 2025:
|
146 |
validation_messages.append("Publication years must be between 1900 and 2025")
|
147 |
-
|
148 |
-
#
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
validation_messages.append("Some DOIs are in invalid format")
|
153 |
|
154 |
except Exception as e:
|
155 |
-
validation_messages.append(f"Error
|
156 |
|
157 |
return len(validation_messages) == 0, validation_messages
|
158 |
|
|
|
130 |
return False, validation_messages
|
131 |
|
132 |
try:
|
133 |
+
# Check publication year format - this is useful for sorting/filtering
|
|
|
|
|
|
|
|
|
|
|
134 |
df['Publication Year'] = pd.to_numeric(df['Publication Year'], errors='coerce')
|
135 |
if df['Publication Year'].isna().any():
|
136 |
+
validation_messages.append("Some publication years are invalid. Please ensure all years are in numeric format (e.g., 2024)")
|
137 |
else:
|
138 |
years = df['Publication Year'].dropna()
|
139 |
+
if len(years) > 0:
|
140 |
if years.min() < 1900 or years.max() > 2025:
|
141 |
validation_messages.append("Publication years must be between 1900 and 2025")
|
142 |
+
|
143 |
+
# For short abstracts - just show a warning
|
144 |
+
short_abstracts = df['Abstract'].fillna('').astype(str).str.len() < 50
|
145 |
+
if short_abstracts.any():
|
146 |
+
st.warning("ℹ️ Some abstracts are quite short, but will still be processed")
|
|
|
147 |
|
148 |
except Exception as e:
|
149 |
+
validation_messages.append(f"Error checking data format: {str(e)}")
|
150 |
|
151 |
return len(validation_messages) == 0, validation_messages
|
152 |
|