kambris commited on
Commit
d0f4851
·
verified ·
1 Parent(s): 188a8f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -33
app.py CHANGED
@@ -172,36 +172,56 @@ class SpeechAnalyzer:
172
  devices_found[device] = count
173
  return devices_found
174
 
175
- def create_semantic_network(self, text, top_n=20):
176
- """Create semantic network graph"""
177
- # Use spaCy for advanced parsing
178
  doc = nlp(text)
179
 
180
  # Create graph
181
  G = nx.Graph()
182
 
183
  # Extract top nouns and their relationships
184
- nouns = [token.text for token in doc if token.pos_ == 'NOUN']
185
  noun_freq = nltk.FreqDist(nouns)
186
- top_nouns = [noun for noun, _ in noun_freq.most_common(top_n)]
187
 
188
- # Add nodes and edges
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  for noun in top_nouns:
190
- G.add_node(noun)
191
 
192
- # Connect related nouns and add positions
193
- pos = nx.spring_layout(G)
194
  for i in range(len(top_nouns)):
195
  for j in range(i+1, len(top_nouns)):
196
- if top_nouns[i] in text and top_nouns[j] in text:
197
- G.add_edge(top_nouns[i], top_nouns[j])
 
 
 
 
 
 
198
 
199
- # Store positions in graph
200
  for node in G.nodes():
201
  G.nodes[node]['pos'] = pos[node]
202
 
203
  return G
204
-
205
  def main():
206
  st.title("🗣️ Advanced Political Speech Analysis Toolkit")
207
 
@@ -314,24 +334,35 @@ def main():
314
 
315
  # Create the network visualization
316
  network_fig = go.Figure()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
- # Add edges
319
- network_fig.add_trace(go.Scatter(
320
- x=edge_x, y=edge_y,
321
- mode='lines',
322
- line=dict(width=0.5, color='#888'),
323
- hoverinfo='none'
324
- ))
325
-
326
- # Add nodes
327
- network_fig.add_trace(go.Scatter(
328
- x=node_x, y=node_y,
329
- mode='markers+text',
330
- marker=dict(size=10),
331
- text=node_text,
332
- textposition="top center",
333
- hoverinfo='text'
334
- ))
335
 
336
  network_fig.update_layout(
337
  showlegend=False,
@@ -340,9 +371,6 @@ def main():
340
  xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
341
  yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
342
  )
343
-
344
- st.plotly_chart(network_fig)
345
-
346
 
347
  with tab5:
348
  st.subheader("Advanced NLP Analysis")
 
172
  devices_found[device] = count
173
  return devices_found
174
 
175
+ def create_semantic_network(self, text, top_n=20, window_size=10):
176
+ """Create semantic network graph with weighted edges"""
 
177
  doc = nlp(text)
178
 
179
  # Create graph
180
  G = nx.Graph()
181
 
182
  # Extract top nouns and their relationships
183
+ nouns = [token.text.lower() for token in doc if token.pos_ == 'NOUN']
184
  noun_freq = nltk.FreqDist(nouns)
185
+ top_nouns = [noun for noun, freq in noun_freq.most_common(top_n)]
186
 
187
+ # Create co-occurrence matrix
188
+ cooc_matrix = np.zeros((len(top_nouns), len(top_nouns)))
189
+ noun_to_idx = {noun: idx for idx, noun in enumerate(top_nouns)}
190
+
191
+ # Calculate co-occurrences within window_size
192
+ words = [token.text.lower() for token in doc]
193
+ for i in range(len(words)):
194
+ window_words = words[max(0, i-window_size):min(len(words), i+window_size)]
195
+ for noun1 in top_nouns:
196
+ if noun1 in window_words:
197
+ for noun2 in top_nouns:
198
+ if noun1 != noun2 and noun2 in window_words:
199
+ idx1, idx2 = noun_to_idx[noun1], noun_to_idx[noun2]
200
+ cooc_matrix[idx1][idx2] += 1
201
+ cooc_matrix[idx2][idx1] += 1
202
+
203
+ # Add nodes and weighted edges
204
  for noun in top_nouns:
205
+ G.add_node(noun, size=noun_freq[noun])
206
 
207
+ # Add edges with weights based on co-occurrence
208
+ max_weight = np.max(cooc_matrix)
209
  for i in range(len(top_nouns)):
210
  for j in range(i+1, len(top_nouns)):
211
+ weight = cooc_matrix[i][j]
212
+ if weight > 0:
213
+ G.add_edge(top_nouns[i], top_nouns[j],
214
+ weight=weight,
215
+ width=3 * (weight/max_weight))
216
+
217
+ # Calculate layout with weighted edges
218
+ pos = nx.spring_layout(G, k=1, iterations=50)
219
 
220
+ # Store positions and attributes in graph
221
  for node in G.nodes():
222
  G.nodes[node]['pos'] = pos[node]
223
 
224
  return G
 
225
  def main():
226
  st.title("🗣️ Advanced Political Speech Analysis Toolkit")
227
 
 
334
 
335
  # Create the network visualization
336
  network_fig = go.Figure()
337
+
338
+ # Add edges with varying widths
339
+ for edge in semantic_graph.edges():
340
+ x0, y0 = semantic_graph.nodes[edge[0]]['pos']
341
+ x1, y1 = semantic_graph.nodes[edge[1]]['pos']
342
+ width = semantic_graph.edges[edge]['width']
343
+
344
+ network_fig.add_trace(go.Scatter(
345
+ x=[x0, x1, None],
346
+ y=[y0, y1, None],
347
+ mode='lines',
348
+ line=dict(width=width, color='#888'),
349
+ hoverinfo='none'
350
+ ))
351
 
352
+ # Add nodes with varying sizes
353
+ for node in semantic_graph.nodes():
354
+ x, y = semantic_graph.nodes[node]['pos']
355
+ size = semantic_graph.nodes[node]['size']
356
+
357
+ network_fig.add_trace(go.Scatter(
358
+ x=[x],
359
+ y=[y],
360
+ mode='markers+text',
361
+ marker=dict(size=10 + size/2),
362
+ text=[node],
363
+ textposition="top center",
364
+ hoverinfo='text'
365
+ ))
 
 
 
366
 
367
  network_fig.update_layout(
368
  showlegend=False,
 
371
  xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
372
  yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
373
  )
 
 
 
374
 
375
  with tab5:
376
  st.subheader("Advanced NLP Analysis")