File size: 687 Bytes
8e40d73
 
 
32d4546
8e40d73
32d4546
2e10db6
8e40d73
b1ca11c
 
8e40d73
2e10db6
 
 
8e40d73
2e10db6
8e40d73
 
 
32d4546
8e40d73
 
4d806ec
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from lynxkite.core.ops import op
import pandas as pd

ENV = "LynxKite Graph Analytics"


@op(ENV, "Word2vec for the top 1000 words", slow=True)
def word2vec_1000():
    import staticvectors

    model = staticvectors.StaticVectors("neuml/word2vec-quantized")
    df = pd.read_csv(
        "https://gist.githubusercontent.com/deekayen/4148741/raw/98d35708fa344717d8eee15d11987de6c8e26d7d/1-1000.txt",
        names=["word"],
    )
    df["embedding"] = model.embeddings(df.word.tolist()).tolist()
    return df


@op(ENV, "Take first N")
def first_n(df: pd.DataFrame, *, n=10):
    return df.head(n)


@op(ENV, "Sample N")
def sample_n(df: pd.DataFrame, *, n=10):
    return df.sample(n)