File size: 687 Bytes
1ae7dde
 
 
9a9d700
1ae7dde
9a9d700
cd2383d
1ae7dde
07d606f
 
1ae7dde
cd2383d
 
 
1ae7dde
cd2383d
1ae7dde
 
 
9a9d700
1ae7dde
 
6a3b521
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from lynxkite.core.ops import op
import pandas as pd

ENV = "LynxKite Graph Analytics"


@op(ENV, "Word2vec for the top 1000 words", slow=True)
def word2vec_1000():
    import staticvectors

    model = staticvectors.StaticVectors("neuml/word2vec-quantized")
    df = pd.read_csv(
        "https://gist.githubusercontent.com/deekayen/4148741/raw/98d35708fa344717d8eee15d11987de6c8e26d7d/1-1000.txt",
        names=["word"],
    )
    df["embedding"] = model.embeddings(df.word.tolist()).tolist()
    return df


@op(ENV, "Take first N")
def first_n(df: pd.DataFrame, *, n=10):
    return df.head(n)


@op(ENV, "Sample N")
def sample_n(df: pd.DataFrame, *, n=10):
    return df.sample(n)