Update app.py
Browse files
app.py
CHANGED
@@ -93,7 +93,7 @@ def process_model(target_word):
|
|
93 |
# Word2Vec ๋ชจ๋ธ ๋ก๋
|
94 |
model = Word2Vec.load("word2vec.model")
|
95 |
unique_words = get_unique(model)
|
96 |
-
|
97 |
# ๊ฐ ๋จ์ด์ ์๋ฒ ๋ฉ ๋ฒกํฐ ์ถ์ถ
|
98 |
word_vectors = np.array([model.wv[word] for word in unique_words])
|
99 |
|
@@ -101,26 +101,26 @@ def process_model(target_word):
|
|
101 |
word_vectors_3d = apply_pca(word_vectors)
|
102 |
|
103 |
# ์์ ์ค์ (ํฌ๋ช
๋ ์ถ๊ฐ)
|
104 |
-
colors = ['rgba(
|
105 |
|
106 |
# ๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ ์ฐพ๊ธฐ
|
107 |
if target_word in model.wv:
|
108 |
similar_words = model.wv.most_similar(target_word, topn=10)
|
109 |
similar_word_indices = [unique_words.index(word) for word, _ in similar_words]
|
110 |
for idx in similar_word_indices:
|
111 |
-
colors[idx] = 'rgba(
|
112 |
|
113 |
# ๊ฐ์ฅ ๋จผ ๋จ์ด 10๊ฐ ์ฐพ๊ธฐ
|
114 |
if target_word in model.wv:
|
115 |
all_words = model.wv.index_to_key # ๋ชจ๋ธ์ ํฌํจ๋ ๋ชจ๋ ๋จ์ด ๋ฆฌ์คํธ
|
116 |
-
dissimilar_words = sorted(
|
117 |
-
|
118 |
-
key=lambda x: x[1]
|
|
|
119 |
|
120 |
dissimilar_word_indices = [unique_words.index(word) for word, _ in dissimilar_words]
|
121 |
for idx in dissimilar_word_indices:
|
122 |
-
colors[idx] = 'rgba(
|
123 |
-
|
124 |
|
125 |
# Plotly๋ฅผ ์ฌ์ฉํ 3D ์ฐ์ ๋ ์์ฑ
|
126 |
fig = go.Figure(data=[go.Scatter3d(
|
@@ -131,7 +131,7 @@ def process_model(target_word):
|
|
131 |
text=unique_words,
|
132 |
textposition="top center",
|
133 |
marker=dict(
|
134 |
-
size=
|
135 |
color=colors,
|
136 |
)
|
137 |
)])
|
@@ -139,12 +139,12 @@ def process_model(target_word):
|
|
139 |
fig.update_layout(
|
140 |
title="Word Embeddings 3D Visualization",
|
141 |
scene=dict(
|
142 |
-
xaxis_title="
|
143 |
-
yaxis_title="
|
144 |
-
zaxis_title="
|
145 |
),
|
146 |
-
width=
|
147 |
-
height=
|
148 |
)
|
149 |
|
150 |
# ๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ ๋ชฉ๋ก ์์ฑ
|
@@ -152,7 +152,11 @@ def process_model(target_word):
|
|
152 |
if target_word in model.wv:
|
153 |
similar_words_text = "๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in similar_words])
|
154 |
|
155 |
-
|
|
|
|
|
|
|
|
|
156 |
|
157 |
|
158 |
# Gradio ์ธํฐํ์ด์ค ์์
|
@@ -176,7 +180,7 @@ with gr.Blocks(css=".plot-box {width: 70%; height: 500px;}") as iface:
|
|
176 |
dissimilar_words_output = gr.Textbox(label="์ ์ฌํ์ง ์์ ๋จ์ด", interactive=False, lines=5)
|
177 |
|
178 |
submit_btn.click(
|
179 |
-
fn=
|
180 |
inputs=[word_input],
|
181 |
outputs=[plot_output, similar_words_output, dissimilar_words_output]
|
182 |
)
|
|
|
93 |
# Word2Vec ๋ชจ๋ธ ๋ก๋
|
94 |
model = Word2Vec.load("word2vec.model")
|
95 |
unique_words = get_unique(model)
|
96 |
+
|
97 |
# ๊ฐ ๋จ์ด์ ์๋ฒ ๋ฉ ๋ฒกํฐ ์ถ์ถ
|
98 |
word_vectors = np.array([model.wv[word] for word in unique_words])
|
99 |
|
|
|
101 |
word_vectors_3d = apply_pca(word_vectors)
|
102 |
|
103 |
# ์์ ์ค์ (ํฌ๋ช
๋ ์ถ๊ฐ)
|
104 |
+
colors = ['rgba(255, 255, 255, 0.15)' if word != target_word else 'rgba(255, 20, 147, 0.9)' for word in unique_words]
|
105 |
|
106 |
# ๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ ์ฐพ๊ธฐ
|
107 |
if target_word in model.wv:
|
108 |
similar_words = model.wv.most_similar(target_word, topn=10)
|
109 |
similar_word_indices = [unique_words.index(word) for word, _ in similar_words]
|
110 |
for idx in similar_word_indices:
|
111 |
+
colors[idx] = 'rgba(255, 165, 0, 1)' # ๊ฐ๊น์ด ๋จ์ด๋ค์ ์ฃผํฉ์์ผ๋ก ํ์
|
112 |
|
113 |
# ๊ฐ์ฅ ๋จผ ๋จ์ด 10๊ฐ ์ฐพ๊ธฐ
|
114 |
if target_word in model.wv:
|
115 |
all_words = model.wv.index_to_key # ๋ชจ๋ธ์ ํฌํจ๋ ๋ชจ๋ ๋จ์ด ๋ฆฌ์คํธ
|
116 |
+
dissimilar_words = sorted(
|
117 |
+
[(word, model.wv.similarity(target_word, word)) for word in all_words if word != target_word],
|
118 |
+
key=lambda x: x[1]
|
119 |
+
)[:10] # ์ ์ฌ๋๊ฐ ๊ฐ์ฅ ๋ฎ์ 10๊ฐ ๋จ์ด ์ ํ
|
120 |
|
121 |
dissimilar_word_indices = [unique_words.index(word) for word, _ in dissimilar_words]
|
122 |
for idx in dissimilar_word_indices:
|
123 |
+
colors[idx] = 'rgba(138, 43, 226, 0.8)' # ๊ฐ์ฅ ๋จผ ๋จ์ด๋ค์ ๋ณด๋ผ์์ผ๋ก ํ์
|
|
|
124 |
|
125 |
# Plotly๋ฅผ ์ฌ์ฉํ 3D ์ฐ์ ๋ ์์ฑ
|
126 |
fig = go.Figure(data=[go.Scatter3d(
|
|
|
131 |
text=unique_words,
|
132 |
textposition="top center",
|
133 |
marker=dict(
|
134 |
+
size=4,
|
135 |
color=colors,
|
136 |
)
|
137 |
)])
|
|
|
139 |
fig.update_layout(
|
140 |
title="Word Embeddings 3D Visualization",
|
141 |
scene=dict(
|
142 |
+
xaxis_title="X",
|
143 |
+
yaxis_title="Y",
|
144 |
+
zaxis_title="Z"
|
145 |
),
|
146 |
+
width=800,
|
147 |
+
height=800
|
148 |
)
|
149 |
|
150 |
# ๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ ๋ชฉ๋ก ์์ฑ
|
|
|
152 |
if target_word in model.wv:
|
153 |
similar_words_text = "๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in similar_words])
|
154 |
|
155 |
+
dissimilar_words_text = ""
|
156 |
+
if target_word in model.wv:
|
157 |
+
dissimilar_words_text = "๊ฐ์ฅ ๋จผ ๋จ์ด 10๊ฐ:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in dissimilar_words])
|
158 |
+
|
159 |
+
return fig, similar_words_text, dissimilar_words_text
|
160 |
|
161 |
|
162 |
# Gradio ์ธํฐํ์ด์ค ์์
|
|
|
180 |
dissimilar_words_output = gr.Textbox(label="์ ์ฌํ์ง ์์ ๋จ์ด", interactive=False, lines=5)
|
181 |
|
182 |
submit_btn.click(
|
183 |
+
fn=process_model,
|
184 |
inputs=[word_input],
|
185 |
outputs=[plot_output, similar_words_output, dissimilar_words_output]
|
186 |
)
|