kambris commited on
Commit
e9be7bd
·
verified ·
1 Parent(s): e8e9aaf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -11
app.py CHANGED
@@ -14,7 +14,6 @@ import pkg_resources
14
  current_dir = os.path.dirname(os.path.abspath(__file__))
15
  font_path = os.path.join(current_dir, "ArabicR2013-J25x.ttf")
16
 
17
- # Add Arabic stop words
18
  ARABIC_STOP_WORDS = {
19
  'في', 'من', 'إلى', 'على', 'عن', 'مع', 'خلال', 'حتى', 'إذا', 'ثم',
20
  'أو', 'و', 'ف', 'ل', 'ب', 'ك', 'لل', 'ال', 'هذا', 'هذه', 'ذلك',
@@ -31,7 +30,7 @@ ARABIC_STOP_WORDS = {
31
  'اول', 'ضمن', 'انها', 'جميع', 'الذي', 'قبل', 'بعد', 'حول', 'ايضا',
32
  'لازم', 'حاجة', 'علي', 'يجب', 'صار', 'صارت', 'تحت', 'ضد'
33
  }
34
- # Configure page
35
  st.set_page_config(
36
  page_title="Arabic Poem Analysis",
37
  page_icon="📚",
@@ -250,7 +249,6 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
250
  embedding_model=bert_model,
251
  **topic_model_params)
252
 
253
- # Create vectorizer with stop words
254
  vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS),
255
  min_df=1,
256
  max_df=1.0)
@@ -316,7 +314,7 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
316
  continue
317
 
318
  return summaries, topic_model
319
- # Load models
320
  try:
321
  bert_tokenizer, bert_model, emotion_classifier = load_models()
322
  st.success("Models loaded successfully!")
@@ -328,28 +326,23 @@ except Exception as e:
328
  st.title("📚 Arabic Poem Analysis")
329
  st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
330
 
331
- # File upload
332
  uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
333
 
334
  if uploaded_file is not None:
335
  try:
336
- # Read the file
337
  if uploaded_file.name.endswith('.csv'):
338
  df = pd.read_csv(uploaded_file)
339
  else:
340
  df = pd.read_excel(uploaded_file)
341
 
342
- # Validate columns
343
  required_columns = ['country', 'poem']
344
  if not all(col in df.columns for col in required_columns):
345
  st.error("File must contain 'country' and 'poem' columns.")
346
  st.stop()
347
 
348
- # Clean data
349
  df['country'] = df['country'].str.strip()
350
  df = df.dropna(subset=['country', 'poem'])
351
 
352
- # Add topic modeling controls
353
  st.subheader("Topic Modeling Settings")
354
  col1, col2 = st.columns(2)
355
 
@@ -404,7 +397,6 @@ if uploaded_file is not None:
404
  if summaries:
405
  st.success("Analysis complete!")
406
 
407
- # Display results in tabs
408
  tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
409
 
410
  with tab1:
@@ -445,7 +437,6 @@ if uploaded_file is not None:
445
  else:
446
  st.info("👆 Upload a file to get started!")
447
 
448
- # Example format
449
  st.write("### Expected File Format:")
450
  example_df = pd.DataFrame({
451
  'country': ['Egypt', 'Palestine'],
 
14
  current_dir = os.path.dirname(os.path.abspath(__file__))
15
  font_path = os.path.join(current_dir, "ArabicR2013-J25x.ttf")
16
 
 
17
  ARABIC_STOP_WORDS = {
18
  'في', 'من', 'إلى', 'على', 'عن', 'مع', 'خلال', 'حتى', 'إذا', 'ثم',
19
  'أو', 'و', 'ف', 'ل', 'ب', 'ك', 'لل', 'ال', 'هذا', 'هذه', 'ذلك',
 
30
  'اول', 'ضمن', 'انها', 'جميع', 'الذي', 'قبل', 'بعد', 'حول', 'ايضا',
31
  'لازم', 'حاجة', 'علي', 'يجب', 'صار', 'صارت', 'تحت', 'ضد'
32
  }
33
+
34
  st.set_page_config(
35
  page_title="Arabic Poem Analysis",
36
  page_icon="📚",
 
249
  embedding_model=bert_model,
250
  **topic_model_params)
251
 
 
252
  vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS),
253
  min_df=1,
254
  max_df=1.0)
 
314
  continue
315
 
316
  return summaries, topic_model
317
+
318
  try:
319
  bert_tokenizer, bert_model, emotion_classifier = load_models()
320
  st.success("Models loaded successfully!")
 
326
  st.title("📚 Arabic Poem Analysis")
327
  st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
328
 
 
329
  uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
330
 
331
  if uploaded_file is not None:
332
  try:
 
333
  if uploaded_file.name.endswith('.csv'):
334
  df = pd.read_csv(uploaded_file)
335
  else:
336
  df = pd.read_excel(uploaded_file)
337
 
 
338
  required_columns = ['country', 'poem']
339
  if not all(col in df.columns for col in required_columns):
340
  st.error("File must contain 'country' and 'poem' columns.")
341
  st.stop()
342
 
 
343
  df['country'] = df['country'].str.strip()
344
  df = df.dropna(subset=['country', 'poem'])
345
 
 
346
  st.subheader("Topic Modeling Settings")
347
  col1, col2 = st.columns(2)
348
 
 
397
  if summaries:
398
  st.success("Analysis complete!")
399
 
 
400
  tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
401
 
402
  with tab1:
 
437
  else:
438
  st.info("👆 Upload a file to get started!")
439
 
 
440
  st.write("### Expected File Format:")
441
  example_df = pd.DataFrame({
442
  'country': ['Egypt', 'Palestine'],