import numpy as np import pandas as pd from scipy.cluster.vq import kmeans2 if __name__ == "__main__": np.random.seed(0) df = pd.read_csv("raw/geyser.csv") df.columns = ["duration", "waiting"] _, z = kmeans2(df, 2) df["kind"] = np.where(z, "long", "short") df.to_csv("geyser.csv", index=False)