Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -271,138 +271,135 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
|
|
271 |
|
272 |
return summaries, topic_model
|
273 |
|
274 |
-
#
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
|
|
|
|
281 |
|
282 |
-
# Main app interface
|
283 |
-
st.title("๐ Arabic Poem Analysis")
|
284 |
-
st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
|
285 |
|
286 |
-
# File upload
|
287 |
-
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
|
288 |
|
289 |
-
if uploaded_file is not None:
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
# Validate columns
|
298 |
-
required_columns = ['country', 'poem']
|
299 |
-
if not all(col in df.columns for col in required_columns):
|
300 |
-
st.error("File must contain 'country' and 'poem' columns.")
|
301 |
-
st.stop()
|
302 |
-
|
303 |
-
# Clean data
|
304 |
-
df['country'] = df['country'].str.strip()
|
305 |
-
df = df.dropna(subset=['country', 'poem'])
|
306 |
-
|
307 |
-
# Add topic modeling controls
|
308 |
-
st.subheader("Topic Modeling Settings")
|
309 |
-
col1, col2 = st.columns(2)
|
310 |
-
|
311 |
-
with col1:
|
312 |
-
topic_strategy = st.radio(
|
313 |
-
"Topic Number Strategy",
|
314 |
-
["Auto", "Manual"],
|
315 |
-
help="Choose whether to let the model determine the optimal number of topics or set it manually"
|
316 |
-
)
|
317 |
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
|
|
|
|
|
|
|
|
|
|
332 |
)
|
333 |
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
max_value=100,
|
346 |
-
value=10
|
347 |
-
)
|
348 |
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
|
|
|
|
362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
else:
|
399 |
-
st.info("๐ Upload a file to get started!")
|
400 |
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
|
|
|
|
|
|
408 |
|
|
|
|
|
|
271 |
|
272 |
return summaries, topic_model
|
273 |
|
274 |
+
# Main application logic
|
275 |
+
def main():
|
276 |
+
# Load models
|
277 |
+
try:
|
278 |
+
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
279 |
+
st.success("Models loaded successfully!")
|
280 |
+
except Exception as e:
|
281 |
+
st.error(f"Error loading models: {str(e)}")
|
282 |
+
st.stop()
|
283 |
|
284 |
+
# Main app interface
|
285 |
+
st.title("๐ Arabic Poem Analysis")
|
286 |
+
st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
|
287 |
|
288 |
+
# File upload
|
289 |
+
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
|
290 |
|
291 |
+
if uploaded_file is not None:
|
292 |
+
try:
|
293 |
+
# Read the file
|
294 |
+
if uploaded_file.name.endswith('.csv'):
|
295 |
+
df = pd.read_csv(uploaded_file)
|
296 |
+
else:
|
297 |
+
df = pd.read_excel(uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
+
# Validate columns
|
300 |
+
required_columns = ['country', 'poem']
|
301 |
+
if not all(col in df.columns for col in required_columns):
|
302 |
+
st.error("File must contain 'country' and 'poem' columns.")
|
303 |
+
st.stop()
|
304 |
+
|
305 |
+
# Clean data
|
306 |
+
df['country'] = df['country'].str.strip()
|
307 |
+
df = df.dropna(subset=['country', 'poem'])
|
308 |
+
|
309 |
+
# Add topic modeling controls
|
310 |
+
st.subheader("Topic Modeling Settings")
|
311 |
+
col1, col2 = st.columns(2)
|
312 |
+
|
313 |
+
with col1:
|
314 |
+
topic_strategy = st.radio(
|
315 |
+
"Topic Number Strategy",
|
316 |
+
["Auto", "Manual"],
|
317 |
+
help="Choose whether to let the model determine the optimal number of topics or set it manually"
|
318 |
)
|
319 |
|
320 |
+
if topic_strategy == "Manual":
|
321 |
+
n_documents = len(df)
|
322 |
+
max_topics = min(500, int(np.log10(n_documents) * 100))
|
323 |
+
|
324 |
+
n_topics = st.slider(
|
325 |
+
"Number of Topics",
|
326 |
+
min_value=2,
|
327 |
+
max_value=max_topics,
|
328 |
+
value=min(20, max_topics),
|
329 |
+
help=f"Select the desired number of topics (max {max_topics} based on dataset size)"
|
330 |
+
)
|
|
|
|
|
|
|
331 |
|
332 |
+
with col2:
|
333 |
+
top_n = st.number_input(
|
334 |
+
"Number of top topics/emotions to display:",
|
335 |
+
min_value=1,
|
336 |
+
max_value=100,
|
337 |
+
value=10
|
338 |
+
)
|
339 |
+
|
340 |
+
min_topic_size = st.slider(
|
341 |
+
"Minimum Topic Size",
|
342 |
+
min_value=10,
|
343 |
+
max_value=100,
|
344 |
+
value=30,
|
345 |
+
help="Minimum number of documents required to form a topic"
|
346 |
+
)
|
347 |
|
348 |
+
if st.button("Process Data"):
|
349 |
+
with st.spinner("Processing your data..."):
|
350 |
+
summaries, topic_model = process_and_summarize(
|
351 |
+
df,
|
352 |
+
top_n=top_n,
|
353 |
+
topic_strategy=topic_strategy,
|
354 |
+
n_topics=n_topics if topic_strategy == "Manual" else None,
|
355 |
+
min_topic_size=min_topic_size
|
356 |
+
)
|
357 |
|
358 |
+
if summaries:
|
359 |
+
st.success("Analysis complete!")
|
360 |
+
|
361 |
+
# Display results in tabs
|
362 |
+
tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
|
363 |
+
|
364 |
+
with tab1:
|
365 |
+
for summary in summaries:
|
366 |
+
with st.expander(f"๐ {summary['country']} ({summary['total_poems']} poems)"):
|
367 |
+
col1, col2 = st.columns(2)
|
368 |
+
|
369 |
+
with col1:
|
370 |
+
st.subheader("Top Topics")
|
371 |
+
for topic in summary['top_topics']:
|
372 |
+
st.write(f"โข {topic['topic']}: {topic['count']} poems")
|
373 |
+
|
374 |
+
with col2:
|
375 |
+
st.subheader("Emotions")
|
376 |
+
for emotion in summary['top_emotions']:
|
377 |
+
st.write(f"โข {emotion['emotion']}: {emotion['count']} poems")
|
378 |
+
|
379 |
+
with tab2:
|
380 |
+
st.subheader("Global Topic Distribution")
|
381 |
+
topic_info = topic_model.get_topic_info()
|
382 |
+
for _, row in topic_info.iterrows():
|
383 |
+
if row['Topic'] == -1:
|
384 |
+
topic_name = "Miscellaneous"
|
385 |
+
else:
|
386 |
+
words = topic_model.get_topic(row['Topic'])
|
387 |
+
topic_name = " | ".join([word for word, _ in words[:5]])
|
388 |
+
st.write(f"โข Topic {row['Topic']}: {topic_name} ({row['Count']} poems)")
|
389 |
+
|
390 |
+
except Exception as e:
|
391 |
+
st.error(f"Error processing file: {str(e)}")
|
|
|
|
|
392 |
|
393 |
+
else:
|
394 |
+
st.info("๐ Upload a file to get started!")
|
395 |
+
|
396 |
+
# Example format
|
397 |
+
st.write("### Expected File Format:")
|
398 |
+
example_df = pd.DataFrame({
|
399 |
+
'country': ['Egypt', 'Palestine'],
|
400 |
+
'poem': ['ูุตูุฏุฉ ู
ุตุฑูุฉ', 'ูุตูุฏุฉ ููุณุทูููุฉ']
|
401 |
+
})
|
402 |
+
st.dataframe(example_df)
|
403 |
|
404 |
+
if __name__ == "__main__":
|
405 |
+
main()
|