Update app.py
Browse files
app.py
CHANGED
@@ -50,8 +50,21 @@ def create_heatmap(embeddings, words):
|
|
50 |
return fig
|
51 |
|
52 |
def create_word_scatter(embeddings, words):
|
53 |
-
#
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
embeddings_2d = tsne.fit_transform(embeddings)
|
56 |
|
57 |
# Create scatter plot
|
@@ -59,11 +72,21 @@ def create_word_scatter(embeddings, words):
|
|
59 |
x=embeddings_2d[:, 0],
|
60 |
y=embeddings_2d[:, 1],
|
61 |
text=words,
|
62 |
-
title='Word Embeddings in 2D Space'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
)
|
64 |
|
65 |
-
fig.update_traces(textposition='top center')
|
66 |
-
fig.update_layout(height=400)
|
67 |
return fig
|
68 |
|
69 |
def main():
|
|
|
50 |
return fig
|
51 |
|
52 |
def create_word_scatter(embeddings, words):
|
53 |
+
# Calculate appropriate perplexity value
|
54 |
+
n_samples = len(embeddings)
|
55 |
+
# Perplexity should be between 5 and 50, and less than n_samples
|
56 |
+
perplexity = min(30, n_samples - 1) # Default is 30, but ensure it's less than n_samples
|
57 |
+
|
58 |
+
# Reduce dimensions for visualization using t-SNE
|
59 |
+
tsne = TSNE(
|
60 |
+
n_components=2,
|
61 |
+
perplexity=perplexity,
|
62 |
+
random_state=42,
|
63 |
+
init='random',
|
64 |
+
learning_rate='auto'
|
65 |
+
)
|
66 |
+
|
67 |
+
# Perform t-SNE dimensionality reduction
|
68 |
embeddings_2d = tsne.fit_transform(embeddings)
|
69 |
|
70 |
# Create scatter plot
|
|
|
72 |
x=embeddings_2d[:, 0],
|
73 |
y=embeddings_2d[:, 1],
|
74 |
text=words,
|
75 |
+
title=f'Word Embeddings in 2D Space (perplexity={perplexity})'
|
76 |
+
)
|
77 |
+
|
78 |
+
# Update layout for better visualization
|
79 |
+
fig.update_traces(
|
80 |
+
textposition='top center',
|
81 |
+
mode='markers+text'
|
82 |
+
)
|
83 |
+
fig.update_layout(
|
84 |
+
height=400,
|
85 |
+
showlegend=False,
|
86 |
+
xaxis_title="t-SNE dimension 1",
|
87 |
+
yaxis_title="t-SNE dimension 2"
|
88 |
)
|
89 |
|
|
|
|
|
90 |
return fig
|
91 |
|
92 |
def main():
|