File size: 4,402 Bytes
23b39f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import math
import os
import tempfile
import csv

import pandas as pd
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed
from transformers.integrations import INTEGRATION_TO_CALLBACK

from tsfm_public import TimeSeriesPreprocessor, TrackingCallback, count_parameters, get_datasets
from tsfm_public.toolkit.get_model import get_model
from tsfm_public.toolkit.lr_finder import optimal_lr_finder

import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

# Set seed for reproducibility
SEED = 42
set_seed(SEED)

# TTM Model path. The default model path is Granite-R2. Below, you can choose other TTM releases.
TTM_MODEL_PATH = "ibm-granite/granite-timeseries-ttm-r2"
# TTM_MODEL_PATH = "ibm-granite/granite-timeseries-ttm-r1"
# TTM_MODEL_PATH = "ibm-research/ttm-research-r2"

# Context length, Or Length of the history.
# Currently supported values are: 512/1024/1536 for Granite-TTM-R2 and Research-Use-TTM-R2, and 512/1024 for Granite-TTM-R1
CONTEXT_LENGTH = 512

# Granite-TTM-R2 supports forecast length upto 720 and Granite-TTM-R1 supports forecast length upto 96
PREDICTION_LENGTH = 96

# Results dir
OUT_DIR = "ttm_finetuned_models/"

# Dataset
TARGET_DATASET = "binance-btcusdt-futures-2020-2021-1s"
dataset_path = "./test.csv"
timestamp_column = "timestamp"
id_columns = []  # mention the ids that uniquely identify a time-series.

target_columns = ["bid"]
split_config = {
    "train": 0.1,
    "test": 0.9
}
# Understanding the split config -- slides

data = pd.read_csv(
    dataset_path,
    parse_dates=[timestamp_column],
    header=0
)

column_specifiers = {
    "timestamp_column": timestamp_column,
    "id_columns": id_columns,
    "target_columns": target_columns,
    "control_columns": [],
}

def zeroshot_eval(dataset_name, batch_size, context_length=512, forecast_length=96):
    # Get data

    tsp = TimeSeriesPreprocessor(
        **column_specifiers,
        context_length=context_length,
        prediction_length=forecast_length,
        scaling=True,
        encode_categorical=False,
        scaler_type="standard",
    )

    dset_train, dset_valid, dset_test = get_datasets(tsp, data, split_config)

    # Load model
    zeroshot_model = get_model(TTM_MODEL_PATH, context_length=context_length, prediction_length=forecast_length)

    temp_dir = tempfile.mkdtemp()
    # zeroshot_trainer
    zeroshot_trainer = Trainer(
        model=zeroshot_model,
        args=TrainingArguments(
            output_dir=temp_dir,
            per_device_eval_batch_size=batch_size,
            seed=SEED,
            report_to="none",
        ),
    )

    # train predictions

    print("+" * 20, "Train MSE zero-shot", "+" * 20)
    zeroshot_output = zeroshot_trainer.evaluate(dset_train)
    print(zeroshot_output)

    predictions_dict = zeroshot_trainer.predict(dset_train)

    predictions_np_train = predictions_dict.predictions[0]

    # test predictions

    print("+" * 20, "Test MSE zero-shot", "+" * 20)
    zeroshot_output = zeroshot_trainer.evaluate(dset_test)
    print(zeroshot_output)

    predictions_dict = zeroshot_trainer.predict(dset_test)

    predictions_np_test = predictions_dict.predictions[0]

    with open('results.csv', 'w', newline='') as csvfile:

        writer = csv.writer(csvfile, delimiter=',')

        for i in range(len(dset_train)):
            writer.writerow([
                dset_train[i]['timestamp'],
                dset_train[i]['past_values'][CONTEXT_LENGTH-1][0].detach().item(),
                predictions_np_train[i][PREDICTION_LENGTH-1][0],
                dset_train[i]['future_values'][PREDICTION_LENGTH-1][0].detach().item()
            ])

        for i in range(len(dset_test)):
            writer.writerow([
                dset_test[i]['timestamp'],
                dset_test[i]['past_values'][CONTEXT_LENGTH-1][0].detach().item(),
                predictions_np_test[i][PREDICTION_LENGTH-1][0],
                dset_test[i]['future_values'][PREDICTION_LENGTH-1][0].detach().item()
            ])


    # get backbone embeddings (if needed for further analysis)

    #backbone_embedding = predictions_dict.predictions[1]

    #print(backbone_embedding.shape)


zeroshot_eval(
    dataset_name=TARGET_DATASET, context_length=CONTEXT_LENGTH, forecast_length=PREDICTION_LENGTH, batch_size=128
)