Ridealist commited on
Commit
7112f5e
ยท
verified ยท
1 Parent(s): 8c42cdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -16
app.py CHANGED
@@ -93,7 +93,7 @@ def process_model(target_word):
93
  # Word2Vec ๋ชจ๋ธ ๋กœ๋“œ
94
  model = Word2Vec.load("word2vec.model")
95
  unique_words = get_unique(model)
96
-
97
  # ๊ฐ ๋‹จ์–ด์˜ ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ ์ถ”์ถœ
98
  word_vectors = np.array([model.wv[word] for word in unique_words])
99
 
@@ -101,26 +101,26 @@ def process_model(target_word):
101
  word_vectors_3d = apply_pca(word_vectors)
102
 
103
  # ์ƒ‰์ƒ ์„ค์ • (ํˆฌ๋ช…๋„ ์ถ”๊ฐ€)
104
- colors = ['rgba(128, 128, 128, 0.15)' if word != target_word else 'rgba(255, 0, 0, 1)' for word in unique_words]
105
 
106
  # ๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ๋‹จ์–ด 10๊ฐœ ์ฐพ๊ธฐ
107
  if target_word in model.wv:
108
  similar_words = model.wv.most_similar(target_word, topn=10)
109
  similar_word_indices = [unique_words.index(word) for word, _ in similar_words]
110
  for idx in similar_word_indices:
111
- colors[idx] = 'rgba(0, 255, 0, 1)' # ๊ฐ€๊นŒ์šด ๋‹จ์–ด๋“ค์„ ์ดˆ๋ก์ƒ‰์œผ๋กœ ํ‘œ์‹œ
112
 
113
  # ๊ฐ€์žฅ ๋จผ ๋‹จ์–ด 10๊ฐœ ์ฐพ๊ธฐ
114
  if target_word in model.wv:
115
  all_words = model.wv.index_to_key # ๋ชจ๋ธ์— ํฌํ•จ๋œ ๋ชจ๋“  ๋‹จ์–ด ๋ฆฌ์ŠคํŠธ
116
- dissimilar_words = sorted([(word, model.wv.similarity(target_word, word))
117
- for word in all_words if word != target_word],
118
- key=lambda x: x[1])[:10] # ์œ ์‚ฌ๋„๊ฐ€ ๊ฐ€์žฅ ๋‚ฎ์€ 10๊ฐœ ๋‹จ์–ด ์„ ํƒ
 
119
 
120
  dissimilar_word_indices = [unique_words.index(word) for word, _ in dissimilar_words]
121
  for idx in dissimilar_word_indices:
122
- colors[idx] = 'rgba(128, 0, 128, 1)' # ๊ฐ€์žฅ ๋จผ ๋‹จ์–ด๋“ค์„ ๋ณด๋ผ์ƒ‰์œผ๋กœ ํ‘œ์‹œ
123
-
124
 
125
  # Plotly๋ฅผ ์‚ฌ์šฉํ•œ 3D ์‚ฐ์ ๋„ ์ƒ์„ฑ
126
  fig = go.Figure(data=[go.Scatter3d(
@@ -131,7 +131,7 @@ def process_model(target_word):
131
  text=unique_words,
132
  textposition="top center",
133
  marker=dict(
134
- size=6,
135
  color=colors,
136
  )
137
  )])
@@ -139,12 +139,12 @@ def process_model(target_word):
139
  fig.update_layout(
140
  title="Word Embeddings 3D Visualization",
141
  scene=dict(
142
- xaxis_title="PCA 1",
143
- yaxis_title="PCA 2",
144
- zaxis_title="PCA 3"
145
  ),
146
- width=1000,
147
- height=1000
148
  )
149
 
150
  # ๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ๋‹จ์–ด 10๊ฐœ ๋ชฉ๋ก ์ƒ์„ฑ
@@ -152,7 +152,11 @@ def process_model(target_word):
152
  if target_word in model.wv:
153
  similar_words_text = "๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ๋‹จ์–ด 10๊ฐœ:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in similar_words])
154
 
155
- return fig, similar_words_text
 
 
 
 
156
 
157
 
158
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ˆ˜์ •
@@ -176,7 +180,7 @@ with gr.Blocks(css=".plot-box {width: 70%; height: 500px;}") as iface:
176
  dissimilar_words_output = gr.Textbox(label="์œ ์‚ฌํ•˜์ง€ ์•Š์€ ๋‹จ์–ด", interactive=False, lines=5)
177
 
178
  submit_btn.click(
179
- fn=process_text,
180
  inputs=[word_input],
181
  outputs=[plot_output, similar_words_output, dissimilar_words_output]
182
  )
 
93
  # Word2Vec ๋ชจ๋ธ ๋กœ๋“œ
94
  model = Word2Vec.load("word2vec.model")
95
  unique_words = get_unique(model)
96
+
97
  # ๊ฐ ๋‹จ์–ด์˜ ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ ์ถ”์ถœ
98
  word_vectors = np.array([model.wv[word] for word in unique_words])
99
 
 
101
  word_vectors_3d = apply_pca(word_vectors)
102
 
103
  # ์ƒ‰์ƒ ์„ค์ • (ํˆฌ๋ช…๋„ ์ถ”๊ฐ€)
104
+ colors = ['rgba(255, 255, 255, 0.15)' if word != target_word else 'rgba(255, 20, 147, 0.9)' for word in unique_words]
105
 
106
  # ๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ๋‹จ์–ด 10๊ฐœ ์ฐพ๊ธฐ
107
  if target_word in model.wv:
108
  similar_words = model.wv.most_similar(target_word, topn=10)
109
  similar_word_indices = [unique_words.index(word) for word, _ in similar_words]
110
  for idx in similar_word_indices:
111
+ colors[idx] = 'rgba(255, 165, 0, 1)' # ๊ฐ€๊นŒ์šด ๋‹จ์–ด๋“ค์„ ์ฃผํ™ฉ์ƒ‰์œผ๋กœ ํ‘œ์‹œ
112
 
113
  # ๊ฐ€์žฅ ๋จผ ๋‹จ์–ด 10๊ฐœ ์ฐพ๊ธฐ
114
  if target_word in model.wv:
115
  all_words = model.wv.index_to_key # ๋ชจ๋ธ์— ํฌํ•จ๋œ ๋ชจ๋“  ๋‹จ์–ด ๋ฆฌ์ŠคํŠธ
116
+ dissimilar_words = sorted(
117
+ [(word, model.wv.similarity(target_word, word)) for word in all_words if word != target_word],
118
+ key=lambda x: x[1]
119
+ )[:10] # ์œ ์‚ฌ๋„๊ฐ€ ๊ฐ€์žฅ ๋‚ฎ์€ 10๊ฐœ ๋‹จ์–ด ์„ ํƒ
120
 
121
  dissimilar_word_indices = [unique_words.index(word) for word, _ in dissimilar_words]
122
  for idx in dissimilar_word_indices:
123
+ colors[idx] = 'rgba(138, 43, 226, 0.8)' # ๊ฐ€์žฅ ๋จผ ๋‹จ์–ด๋“ค์„ ๋ณด๋ผ์ƒ‰์œผ๋กœ ํ‘œ์‹œ
 
124
 
125
  # Plotly๋ฅผ ์‚ฌ์šฉํ•œ 3D ์‚ฐ์ ๋„ ์ƒ์„ฑ
126
  fig = go.Figure(data=[go.Scatter3d(
 
131
  text=unique_words,
132
  textposition="top center",
133
  marker=dict(
134
+ size=4,
135
  color=colors,
136
  )
137
  )])
 
139
  fig.update_layout(
140
  title="Word Embeddings 3D Visualization",
141
  scene=dict(
142
+ xaxis_title="X",
143
+ yaxis_title="Y",
144
+ zaxis_title="Z"
145
  ),
146
+ width=800,
147
+ height=800
148
  )
149
 
150
  # ๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ๋‹จ์–ด 10๊ฐœ ๋ชฉ๋ก ์ƒ์„ฑ
 
152
  if target_word in model.wv:
153
  similar_words_text = "๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ๋‹จ์–ด 10๊ฐœ:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in similar_words])
154
 
155
+ dissimilar_words_text = ""
156
+ if target_word in model.wv:
157
+ dissimilar_words_text = "๊ฐ€์žฅ ๋จผ ๋‹จ์–ด 10๊ฐœ:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in dissimilar_words])
158
+
159
+ return fig, similar_words_text, dissimilar_words_text
160
 
161
 
162
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ˆ˜์ •
 
180
  dissimilar_words_output = gr.Textbox(label="์œ ์‚ฌํ•˜์ง€ ์•Š์€ ๋‹จ์–ด", interactive=False, lines=5)
181
 
182
  submit_btn.click(
183
+ fn=process_model,
184
  inputs=[word_input],
185
  outputs=[plot_output, similar_words_output, dissimilar_words_output]
186
  )