Spaces:
Sleeping
Sleeping
File size: 3,042 Bytes
db01b5c 38636da db01b5c 9382862 db01b5c 3eaacbe db01b5c 88cf807 db01b5c 72de3b8 c32bd8b 72de3b8 c32bd8b 39f357f 72de3b8 37cb828 39f357f 72de3b8 39f357f 72de3b8 db01b5c 9382862 db01b5c 6aa5b29 db01b5c 12c38a8 db01b5c f381ee7 83fbef1 db01b5c 9f8fcc2 db01b5c 88cf807 db01b5c 524d13b db01b5c 524d13b db01b5c 1ef8e00 db01b5c 1ef8e00 db01b5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import gensim.downloader
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
model = gensim.downloader.load("word2vec-google-news-300") #glove-wiki-gigaword-50
cache = "/home/user/app/d.tif"
# Function to reduce dimensions
def reduce_dimensions(data, method='PCA'):
if method == 'PCA':
model = PCA(n_components=2)
elif method == 'TSNE':
model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=4)
return model.fit_transform(data)
# Plotting function
def plot_reduced_data(reduced_data, labels, title):
plt.figure(figsize=(10, 8))
plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.6)
for i, label in enumerate(labels):
plt.annotate(" " + label, (reduced_data[i, 0], reduced_data[i, 1]), fontsize=18)
plt.title(title)
# Data for the arrow 1
start_point = (reduced_data[0, 0], reduced_data[0, 1]) # Starting point of the arrow
end_point = (reduced_data[1, 0], reduced_data[1, 1]) # Ending point of the arrow
# Adding an arrow 1
plt.annotate('', xy=end_point, xytext=start_point,
arrowprops=dict(arrowstyle="->", color='green', lw=3))
# Data for the arrow 2
end_point = (reduced_data[-1, 0] , reduced_data[-1, 1]) # Starting point of the arrow
start_point = (reduced_data[2, 0], reduced_data[2, 1]) # Ending point of the arrow
# Adding an arrow 2
plt.annotate('', xy=end_point, xytext=start_point,
arrowprops=dict(arrowstyle="->", color='green', lw=3))
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.grid(True)
plt.savefig(cache, dpi=300)
description = """
### Word Embedding Demo App
Universidade Federal de São Paulo - Escola Paulista de Medicina
The output is Word3 + (Word2 - Word1)
Credits:
* Gensim
* Word2Vec
"""
Word1 = gr.Textbox()
Word2 = gr.Textbox()
Word3 = gr.Textbox()
label = gr.Label(show_label=True, label="Word4")
sp = gr.Image()
def inference(word1, word2, word3):
transform = model[word3] + model[word2] - model[word1]
output = model.similar_by_vector(transform)
print(output)
word_list = [word1, word2, word3]
word_list.extend([x for x,y in [item for item in output[:6]]])
words = {key: model[key] for key in word_list}
words[word3 + " + (" + word2 + " - " + word1 + ")"] = transform
data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
print(data.shape)
labels = words.keys()
reduced_data_pca = reduce_dimensions(data, method='PCA')
print(reduced_data_pca.shape)
plot_reduced_data(reduced_data_pca, labels, 'PCA Results')
return cache
examples = [
["woman", "man", "girl"],
["woman", "man", "granddaughter"],
["woman", "man", "aunt"],
]
iface = gr.Interface(
fn=inference,
inputs=[Word1, Word2, Word3],
outputs=sp,
description=description,
examples=examples
)
iface.launch() |