kambris commited on
Commit
506a6c4
·
verified ·
1 Parent(s): 1cf0ee0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -2
app.py CHANGED
@@ -10,6 +10,10 @@ import os
10
  from wordcloud import WordCloud
11
  import matplotlib.pyplot as plt
12
  import pkg_resources
 
 
 
 
13
 
14
  current_dir = os.path.dirname(os.path.abspath(__file__))
15
  font_path = os.path.join(current_dir, "ArabicR2013-J25x.ttf")
@@ -89,6 +93,40 @@ def get_cached_embeddings(text, tokenizer, model):
89
  """Cache embeddings to avoid recomputation"""
90
  return get_embedding_for_text(text, tokenizer, model)
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  def split_text(text, max_length=512):
94
  """Split text into chunks of maximum token length while preserving word boundaries."""
@@ -430,7 +468,7 @@ if uploaded_file is not None:
430
  if summaries:
431
  st.success("Analysis complete!")
432
 
433
- tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
434
 
435
  with tab1:
436
  for summary in summaries:
@@ -463,7 +501,10 @@ if uploaded_file is not None:
463
  words = topic_model.get_topic(row['Topic'])
464
  topic_name = " | ".join([word for word, _ in words[:5]])
465
  st.write(f"• Topic {row['Topic']}: {topic_name} ({row['Count']} poems)")
466
-
 
 
 
467
  except Exception as e:
468
  st.error(f"Error processing file: {str(e)}")
469
 
 
10
  from wordcloud import WordCloud
11
  import matplotlib.pyplot as plt
12
  import pkg_resources
13
+ import folium
14
+ from folium.plugins import HeatMap
15
+ import country_converter as coco
16
+ from streamlit_folium import folium_static
17
 
18
  current_dir = os.path.dirname(os.path.abspath(__file__))
19
  font_path = os.path.join(current_dir, "ArabicR2013-J25x.ttf")
 
93
  """Cache embeddings to avoid recomputation"""
94
  return get_embedding_for_text(text, tokenizer, model)
95
 
96
+ def create_theme_map(summaries, topic_model):
97
+ """Create an interactive map showing theme distributions across countries"""
98
+ # Create a base map centered on the Arab world
99
+ m = folium.Map(location=[25, 45], zoom_start=4)
100
+
101
+ # Convert country names to coordinates
102
+ cc = coco.CountryConverter()
103
+
104
+ for summary in summaries:
105
+ try:
106
+ # Get country coordinates
107
+ country_iso = cc.convert(names=[summary['country']], to='ISO2')
108
+ country_data = cc.convert(names=[summary['country']], to='name_short')
109
+
110
+ # Create popup content with theme information
111
+ popup_content = f"""
112
+ <h4>{summary['country']}</h4>
113
+ <b>Top Themes:</b><br>
114
+ {'<br>'.join([f"• {topic['topic']}: {topic['count']}"
115
+ for topic in summary['top_topics'][:5]])}
116
+ """
117
+
118
+ # Add marker for each country
119
+ folium.CircleMarker(
120
+ location=[cc.convert(country_iso, to='latitude')[0],
121
+ cc.convert(country_iso, to='longitude')[0]],
122
+ radius=20,
123
+ popup=folium.Popup(popup_content, max_width=300),
124
+ color='red',
125
+ fill=True,
126
+ fill_opacity=0.7
127
+ ).add_to(m)
128
+
129
+ return m
130
 
131
  def split_text(text, max_length=512):
132
  """Split text into chunks of maximum token length while preserving word boundaries."""
 
468
  if summaries:
469
  st.success("Analysis complete!")
470
 
471
+ tab1, tab2, tab3 = st.tabs(["Country Summaries", "Global Topics", "Theme Map"])
472
 
473
  with tab1:
474
  for summary in summaries:
 
501
  words = topic_model.get_topic(row['Topic'])
502
  topic_name = " | ".join([word for word, _ in words[:5]])
503
  st.write(f"• Topic {row['Topic']}: {topic_name} ({row['Count']} poems)")
504
+ with tab3:
505
+ st.subheader("Thematic Distribution Map")
506
+ theme_map = create_theme_map(summaries, topic_model)
507
+ folium_static(theme_map)
508
  except Exception as e:
509
  st.error(f"Error processing file: {str(e)}")
510