Spaces:

felipekitamura
/

word_embeddings

Sleeping

File size: 1,949 Bytes

f5d7b87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0acfaf
f5d7b87
 
 
 
ceb61af
f5d7b87
a7e2cd3
f5d7b87
a0828e2
2dafbbf
f5d7b87
 
2dafbbf
d267fce
f5d7b87
a1d9ff4
 
582fe73
810b96d
f5d7b87
d267fce
 
f5d7b87

import gensim.downloader 
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
model = gensim.downloader.load("glove-wiki-gigaword-50")

# Function to reduce dimensions
def reduce_dimensions(data, method='PCA'):
    if method == 'PCA':
        model = PCA(n_components=2)
    elif method == 'TSNE':
        model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=3)
    return model.fit_transform(data)

description = """
### Word Embedding Demo App
Universidade Federal de São Paulo - Escola Paulista de Medicina

The output is Word3 + (Word2 - Word1)

Credits:  
* Gensim
* Glove
"""

Word1 = gr.Textbox()
Word2 = gr.Textbox()
Word3 = gr.Textbox()
label = gr.Label(show_label=True, label="Word4")
sp = gr.ScatterPlot(x="x", y="y", color="color", label="label")


def inference(word1, word2, word3):
    output = model.similar_by_vector(model[word3] + model[word2] - model[word1])
    print(output)
    word_list = [word1, word2, word3]
    word_list.extend([x for x,y in [item for item in output[:4]]])
    words = {key: model[key] for key in word_list}
    data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
    print(data.shape)
    labels = words.keys()
    reduced_data_pca = reduce_dimensions(data, method='PCA')
    print(reduced_data_pca.shape)
    #'''
    df = pd.DataFrame({
        "x": reduced_data_pca[:, 0], 
        "y": reduced_data_pca[:, 1],
        "color": [x for x in labels][:len(data)]
        #"label": ["W1", "W2", "W3", "W4", "W5", "W6", "W7"][:len(data)]
    })
    #'''
    return df

examples = [
    ["woman", "man", "aunt"],  
    ["woman", "man", "girl"],
    ["woman", "man", "granddaughter"],
]

iface = gr.Interface(
    fn=inference,
    inputs=[Word1, Word2, Word3],
    outputs=sp,
    description=description,
    examples=examples
    )

iface.launch()