File size: 323 Bytes
d95db82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import numpy as np
import pandas as pd
from scipy.cluster.vq import kmeans2

if __name__ == "__main__":

    np.random.seed(0)

    df = pd.read_csv("raw/geyser.csv")
    df.columns = ["duration", "waiting"]

    _, z = kmeans2(df, 2)
    df["kind"] = np.where(z, "long", "short")

    df.to_csv("geyser.csv", index=False)