File size: 323 Bytes
d95db82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
import numpy as np
import pandas as pd
from scipy.cluster.vq import kmeans2
if __name__ == "__main__":
np.random.seed(0)
df = pd.read_csv("raw/geyser.csv")
df.columns = ["duration", "waiting"]
_, z = kmeans2(df, 2)
df["kind"] = np.where(z, "long", "short")
df.to_csv("geyser.csv", index=False)
|