Update modules/semantic_analysis.py
Browse files- modules/semantic_analysis.py +71 -48
modules/semantic_analysis.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
#
|
2 |
import streamlit as st
|
3 |
import spacy
|
4 |
import networkx as nx
|
@@ -84,71 +84,94 @@ POS_TRANSLATIONS = {
|
|
84 |
}
|
85 |
}
|
86 |
########################################################################################################################################
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
for
|
97 |
-
if
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
G, word_colors = create_syntax_graph(doc, lang)
|
119 |
|
120 |
-
plt.figure(figsize=(24, 18))
|
121 |
-
pos = nx.spring_layout(G, k=0.9, iterations=50)
|
122 |
|
123 |
node_colors = [data['color'] for _, data in G.nodes(data=True)]
|
124 |
node_sizes = [data['size'] for _, data in G.nodes(data=True)]
|
125 |
|
126 |
nx.draw(G, pos, with_labels=False, node_color=node_colors, node_size=node_sizes, arrows=True,
|
127 |
-
arrowsize=20, width=2, edge_color='gray')
|
128 |
|
129 |
nx.draw_networkx_labels(G, pos, {node: data['label'] for node, data in G.nodes(data=True)},
|
130 |
-
font_size=10, font_weight='bold')
|
131 |
|
132 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
133 |
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
|
134 |
|
135 |
-
plt.title("
|
136 |
-
fontsize=20, fontweight='bold')
|
137 |
plt.axis('off')
|
138 |
|
139 |
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color, edgecolor='none',
|
140 |
label=f"{POS_TRANSLATIONS[lang][pos]} ({count_pos(doc)[pos]})")
|
141 |
for pos, color in POS_COLORS.items() if pos in set(nx.get_node_attributes(G, 'pos').values())]
|
142 |
-
plt.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12)
|
143 |
|
144 |
return plt
|
145 |
-
################################################################################################################################
|
146 |
-
def visualize_syntax(text, nlp, lang):
|
147 |
-
max_tokens = 5000
|
148 |
-
doc = nlp(text)
|
149 |
-
if len(doc) > max_tokens:
|
150 |
-
doc = nlp(text[:max_tokens])
|
151 |
-
print(f"Warning: The input text is too long. Only the first {max_tokens} tokens will be visualized.")
|
152 |
-
return visualize_syntax_graph(doc, lang)
|
153 |
|
154 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
#semantic_analysis.py
|
2 |
import streamlit as st
|
3 |
import spacy
|
4 |
import networkx as nx
|
|
|
84 |
}
|
85 |
}
|
86 |
########################################################################################################################################
|
87 |
+
|
88 |
+
def extract_entities(doc):
|
89 |
+
entities = {
|
90 |
+
"Personas": [],
|
91 |
+
"Conceptos": [],
|
92 |
+
"Lugares": [],
|
93 |
+
"Fechas": []
|
94 |
+
}
|
95 |
+
|
96 |
+
for ent in doc.ents:
|
97 |
+
if ent.label_ == "PER":
|
98 |
+
entities["Personas"].append(ent.text)
|
99 |
+
elif ent.label_ in ["LOC", "GPE"]:
|
100 |
+
entities["Lugares"].append(ent.text)
|
101 |
+
elif ent.label_ == "DATE":
|
102 |
+
entities["Fechas"].append(ent.text)
|
103 |
+
else:
|
104 |
+
entities["Conceptos"].append(ent.text)
|
105 |
+
|
106 |
+
return entities
|
107 |
+
|
108 |
+
def visualize_context_graph(doc, lang):
|
109 |
+
G = nx.Graph()
|
110 |
+
entities = extract_entities(doc)
|
111 |
+
|
112 |
+
# Add nodes
|
113 |
+
for category, items in entities.items():
|
114 |
+
for item in items:
|
115 |
+
G.add_node(item, category=category)
|
116 |
+
|
117 |
+
# Add edges
|
118 |
+
for sent in doc.sents:
|
119 |
+
sent_entities = [ent.text for ent in sent.ents if ent.text in G.nodes()]
|
120 |
+
for i in range(len(sent_entities)):
|
121 |
+
for j in range(i+1, len(sent_entities)):
|
122 |
+
G.add_edge(sent_entities[i], sent_entities[j])
|
123 |
+
|
124 |
+
# Visualize
|
125 |
+
plt.figure(figsize=(20, 15))
|
126 |
+
pos = nx.spring_layout(G, k=0.5, iterations=50)
|
127 |
+
|
128 |
+
color_map = {"Personas": "lightblue", "Conceptos": "lightgreen", "Lugares": "lightcoral", "Fechas": "lightyellow"}
|
129 |
+
node_colors = [color_map[G.nodes[node]['category']] for node in G.nodes()]
|
130 |
+
|
131 |
+
nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=3000, font_size=8, font_weight='bold')
|
132 |
+
|
133 |
+
# Add a legend
|
134 |
+
legend_elements = [plt.Rectangle((0,0),1,1,fc=color, edgecolor='none') for color in color_map.values()]
|
135 |
+
plt.legend(legend_elements, color_map.keys(), loc='upper left', bbox_to_anchor=(1, 1))
|
136 |
+
|
137 |
+
plt.title("An谩lisis de Contexto" if lang == 'es' else "Context Analysis" if lang == 'en' else "Analyse de Contexte", fontsize=20)
|
138 |
+
plt.axis('off')
|
139 |
+
|
140 |
+
return plt
|
141 |
+
|
142 |
+
def visualize_semantic_relations(doc, lang):
|
143 |
+
# Esta funci贸n puede mantener la l贸gica que ya tienes en visualize_syntax_graph
|
144 |
+
# con algunas modificaciones para enfocarse en relaciones sem谩nticas
|
145 |
G, word_colors = create_syntax_graph(doc, lang)
|
146 |
|
147 |
+
plt.figure(figsize=(24, 18))
|
148 |
+
pos = nx.spring_layout(G, k=0.9, iterations=50)
|
149 |
|
150 |
node_colors = [data['color'] for _, data in G.nodes(data=True)]
|
151 |
node_sizes = [data['size'] for _, data in G.nodes(data=True)]
|
152 |
|
153 |
nx.draw(G, pos, with_labels=False, node_color=node_colors, node_size=node_sizes, arrows=True,
|
154 |
+
arrowsize=20, width=2, edge_color='gray')
|
155 |
|
156 |
nx.draw_networkx_labels(G, pos, {node: data['label'] for node, data in G.nodes(data=True)},
|
157 |
+
font_size=10, font_weight='bold')
|
158 |
|
159 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
160 |
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
|
161 |
|
162 |
+
plt.title("An谩lisis de Relaciones Sem谩nticas" if lang == 'es' else "Semantic Relations Analysis" if lang == 'en' else "Analyse des Relations S茅mantiques",
|
163 |
+
fontsize=20, fontweight='bold')
|
164 |
plt.axis('off')
|
165 |
|
166 |
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color, edgecolor='none',
|
167 |
label=f"{POS_TRANSLATIONS[lang][pos]} ({count_pos(doc)[pos]})")
|
168 |
for pos, color in POS_COLORS.items() if pos in set(nx.get_node_attributes(G, 'pos').values())]
|
169 |
+
plt.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12)
|
170 |
|
171 |
return plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
+
def perform_semantic_analysis(text, nlp, lang):
|
174 |
+
doc = nlp(text)
|
175 |
+
context_graph = visualize_context_graph(doc, lang)
|
176 |
+
relations_graph = visualize_semantic_relations(doc, lang)
|
177 |
+
return context_graph, relations_graph
|