Spaces:
Sleeping
Sleeping
import gensim.downloader | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.decomposition import PCA | |
from sklearn.manifold import TSNE | |
model = gensim.downloader.load("word2vec-google-news-300") #glove-wiki-gigaword-50 | |
cache = "/home/user/app/d.tif" | |
# Function to reduce dimensions | |
def reduce_dimensions(data, method='PCA'): | |
if method == 'PCA': | |
model = PCA(n_components=2) | |
elif method == 'TSNE': | |
model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=4) | |
return model.fit_transform(data) | |
# Plotting function | |
def plot_reduced_data(reduced_data, labels, title): | |
plt.figure(figsize=(10, 8)) | |
plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.6) | |
for i, label in enumerate(labels): | |
plt.annotate(" " + label, (reduced_data[i, 0], reduced_data[i, 1]), fontsize=18) | |
plt.title(title) | |
# Data for the arrow 1 | |
start_point = (reduced_data[0, 0], reduced_data[0, 1]) # Starting point of the arrow | |
end_point = (reduced_data[1, 0], reduced_data[1, 1]) # Ending point of the arrow | |
# Adding an arrow 1 | |
plt.annotate('', xy=end_point, xytext=start_point, | |
arrowprops=dict(arrowstyle="->", color='green', lw=3)) | |
# Data for the arrow 2 | |
end_point = (reduced_data[-1, 0] , reduced_data[-1, 1]) # Starting point of the arrow | |
start_point = (reduced_data[2, 0], reduced_data[2, 1]) # Ending point of the arrow | |
# Adding an arrow 2 | |
plt.annotate('', xy=end_point, xytext=start_point, | |
arrowprops=dict(arrowstyle="->", color='green', lw=3)) | |
plt.xlabel('Component 1') | |
plt.ylabel('Component 2') | |
plt.grid(True) | |
plt.savefig(cache, dpi=300) | |
description = """ | |
### Word Embedding Demo App | |
Universidade Federal de São Paulo - Escola Paulista de Medicina | |
The output is Word3 + (Word2 - Word1) | |
Credits: | |
* Gensim | |
* Word2Vec | |
""" | |
Word1 = gr.Textbox() | |
Word2 = gr.Textbox() | |
Word3 = gr.Textbox() | |
label = gr.Label(show_label=True, label="Word4") | |
sp = gr.Image() | |
def inference(word1, word2, word3): | |
transform = model[word3] + model[word2] - model[word1] | |
output = model.similar_by_vector(transform) | |
print(output) | |
word_list = [word1, word2, word3] | |
word_list.extend([x for x,y in [item for item in output[:6]]]) | |
words = {key: model[key] for key in word_list} | |
words[word3 + " + (" + word2 + " - " + word1 + ")"] = transform | |
data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0) | |
print(data.shape) | |
labels = words.keys() | |
reduced_data_pca = reduce_dimensions(data, method='PCA') | |
print(reduced_data_pca.shape) | |
plot_reduced_data(reduced_data_pca, labels, 'PCA Results') | |
return cache | |
examples = [ | |
["woman", "man", "girl"], | |
["woman", "man", "granddaughter"], | |
["woman", "man", "aunt"], | |
] | |
iface = gr.Interface( | |
fn=inference, | |
inputs=[Word1, Word2, Word3], | |
outputs=sp, | |
description=description, | |
examples=examples | |
) | |
iface.launch() |