wi-lab commited on
Commit
16c29a6
·
verified ·
1 Parent(s): 309f04a

Upload tutorial.py

Browse files
Files changed (1) hide show
  1. tutorial.py +275 -0
tutorial.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import os
3
+ import shutil
4
+
5
+ def clone_dataset_scenario(repo_url, model_repo_dir="./LWM", scenarios_dir="scenarios"):
6
+ """
7
+ Clones all scenarios from a repository, ensuring all files (small and large) are downloaded.
8
+
9
+ Args:
10
+ repo_url (str): URL of the Git repository
11
+ model_repo_dir (str): Path to the model repository
12
+ scenarios_dir (str): Directory name for storing scenarios
13
+ """
14
+ current_dir = os.path.basename(os.getcwd())
15
+ if current_dir == "LWM":
16
+ model_repo_dir = "."
17
+
18
+ scenarios_path = os.path.join(model_repo_dir, scenarios_dir)
19
+ os.makedirs(scenarios_path, exist_ok=True)
20
+
21
+ original_dir = os.getcwd()
22
+
23
+ try:
24
+ if os.path.exists(scenarios_path):
25
+ shutil.rmtree(scenarios_path)
26
+
27
+ print("Cloning entire repository into temporary directory ...")
28
+ subprocess.run([
29
+ "git", "clone",
30
+ repo_url,
31
+ scenarios_path
32
+ ], check=True)
33
+
34
+ os.chdir(scenarios_path)
35
+
36
+ print("Pulling all files using Git LFS ...")
37
+ subprocess.run(["git", "lfs", "install"], check=True)
38
+ subprocess.run(["git", "lfs", "pull"], check=True)
39
+
40
+ print(f"Successfully cloned all scenarios into {scenarios_path}")
41
+
42
+ except subprocess.CalledProcessError as e:
43
+ print(f"Error cloning scenarios: {str(e)}")
44
+ finally:
45
+ if os.path.exists(scenarios_path):
46
+ shutil.rmtree(scenarios_path)
47
+ os.chdir(original_dir)
48
+ #%%
49
+ model_repo_url = "https://huggingface.co/wi-lab/lwm"
50
+ model_repo_dir = "./LWM"
51
+
52
+ if not os.path.exists(model_repo_dir):
53
+ print(f"Cloning model repository from {model_repo_url}...")
54
+ subprocess.run(["git", "clone", model_repo_url, model_repo_dir], check=True)
55
+ #%%
56
+ import numpy as np
57
+ dataset_repo_url = "https://huggingface.co/datasets/wi-lab/lwm"
58
+ clone_dataset_scenario(dataset_repo_url, model_repo_dir)
59
+ #%%
60
+ if os.path.exists(model_repo_dir):
61
+ os.chdir(model_repo_dir)
62
+ print(f"Changed working directory to {os.getcwd()}")
63
+ else:
64
+ print(f"Directory {model_repo_dir} does not exist. Please check if the repository is cloned properly.")
65
+ #%%
66
+ from input_preprocess import tokenizer
67
+ from lwm_model import lwm
68
+ import torch
69
+
70
+ scenario_names = np.array([
71
+ "city_18_denver", "city_15_indianapolis", "city_19_oklahoma",
72
+ "city_12_fortworth", "city_11_santaclara", "city_7_sandiego"
73
+ ])
74
+ scenario_idxs = np.array([0, 1, 2, 3, 4, 5])[3]
75
+ selected_scenario_names = scenario_names[scenario_idxs]
76
+
77
+ preprocessed_chs = tokenizer(
78
+ selected_scenario_names=selected_scenario_names,
79
+ manual_data=None,
80
+ gen_raw=True,
81
+ snr_db=None
82
+ )
83
+
84
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
85
+ print(f"Loading the LWM model on {device} ...")
86
+ model = lwm.from_pretrained(device=device)
87
+ #%%
88
+ from inference import lwm_inference, create_raw_dataset
89
+ input_types = ['cls_emb', 'channel_emb', 'raw']
90
+ selected_input_type = input_types[2]
91
+
92
+ if selected_input_type in ['cls_emb', 'channel_emb']:
93
+ dataset = lwm_inference(preprocessed_chs, selected_input_type, model, device)
94
+ else:
95
+ dataset = create_raw_dataset(preprocessed_chs, device)
96
+ #%%
97
+ from input_preprocess import create_labels
98
+ n_beams = 16
99
+ tasks = ['LoS/NLoS Classification', 'Beam Prediction']
100
+ task = tasks[0]
101
+ labels = create_labels(task, selected_scenario_names, n_beams=n_beams)
102
+ # %% Dimensionality Reduction Visualization
103
+
104
+ # Import the dimensionality reduction plotting function
105
+ from utils import plot_dimensionality_reduction
106
+
107
+ # Iterate over tasks (e.g., LoS/NLoS Classification, Beam Prediction)
108
+ for task in tasks:
109
+
110
+ # Create labels for the current task
111
+ labels = create_labels(task, selected_scenario_names, n_beams=n_beams)
112
+
113
+ # Iterate over input types (e.g., raw data or embeddings)
114
+ for input_type_idx, input_type in enumerate(input_types):
115
+
116
+ # Select the current input type
117
+ selected_input_type = input_types[input_type_idx]
118
+
119
+ # Prepare dataset based on input type
120
+ if selected_input_type in ['cls_emb', 'channel_emb']:
121
+ dataset = lwm_inference(
122
+ preprocessed_chs,
123
+ selected_input_type,
124
+ model,
125
+ device
126
+ )
127
+ else:
128
+ dataset = create_raw_dataset(preprocessed_chs, device)
129
+
130
+ # Plot dimensionality reduction for the dataset
131
+ plot_dimensionality_reduction(
132
+ dataset,
133
+ method='all', # Use all available dimensionality reduction methods
134
+ labels=labels, # Labels for visualization
135
+ task=task, # Current task (for title or labeling)
136
+ input_type=input_type # Current input type (for title or labeling)
137
+ )
138
+
139
+ #%% TRAINING
140
+ #%% TRAINING PARAMETERS
141
+ task = ['LoS/NLoS Classification', 'Beam Prediction'][0] # Select the task
142
+ n_trials = 10 # Number of trials for each configuration
143
+ num_classes = 2 if task == 'LoS/NLoS Classification' else n_beams # Set number of classes based on the task
144
+ input_types = ['raw', 'cls_emb'] # Types of input data
145
+ split_ratios = np.array([.005, .0075, .01, .015, .02, .03,
146
+ .05, .1, .25, .5, .8]) # Dataset split ratios
147
+ f1_scores = np.zeros((n_trials, len(input_types), len(split_ratios))) # Store F1 scores for each trial, input type, and split ratio
148
+ labels = create_labels(task, selected_scenario_names, n_beams=n_beams) # Create labels for the selected task
149
+
150
+ #%% TRAINING
151
+ from utils import get_data_loaders, FCN, train_model, plot_metrics
152
+
153
+ # Iterate over input types (e.g., raw data or embeddings)
154
+ for input_type_idx, input_type in enumerate(input_types):
155
+
156
+ # Prepare dataset based on input type
157
+ if input_type in ['cls_emb', 'channel_emb']:
158
+ dataset = lwm_inference(preprocessed_chs, input_type, model, device)
159
+ else:
160
+ dataset = create_raw_dataset(preprocessed_chs, device)
161
+
162
+ # Reshape dataset for training
163
+ dataset = dataset.view(dataset.size(0), -1)
164
+ input_dim = dataset.shape[-1] # Get input dimension for the model
165
+
166
+ # Iterate over different dataset split ratios
167
+ for split_ratio_idx, split_ratio in enumerate(split_ratios):
168
+
169
+ n_train = int(split_ratio * dataset.shape[0]) # Calculate number of training samples
170
+
171
+ # Run multiple trials for each split ratio
172
+ for trial in range(n_trials):
173
+
174
+ print(f"\ninput type: {input_type}, \nnumber of training samples: {int(split_ratio*len(dataset))}, \ntrial: {trial}\n")
175
+
176
+ torch.manual_seed(trial) # Set seed for reproducibility
177
+ train_loader, test_loader = get_data_loaders(
178
+ dataset,
179
+ labels,
180
+ batch_size=128,
181
+ split_ratio=split_ratio
182
+ )
183
+
184
+ # Initialize the Fully Connected Network (FCN) model
185
+ FCN_model = FCN(input_dim=input_dim, num_classes=num_classes)
186
+
187
+ # Train the model and retrieve losses and F1 scores
188
+ train_losses, test_f1_scores = train_model(
189
+ FCN_model,
190
+ train_loader,
191
+ test_loader,
192
+ epochs=120,
193
+ lr=0.0001 if input_type == "raw" else 0.001, # Learning rate depends on input type
194
+ device=device,
195
+ decay_step=30,
196
+ decay_rate=0.5
197
+ )
198
+
199
+ # Store the final F1 score for this trial
200
+ f1_scores[trial, input_type_idx, split_ratio_idx] = test_f1_scores[0, -1]
201
+
202
+ # Plot metrics for the current trial
203
+ # plot_metrics(test_f1_scores, [input_type])
204
+
205
+ # Plot average F1 scores across all trials for each input type and split ratio
206
+ plot_metrics(
207
+ np.mean(f1_scores, axis=0), # Average F1 scores across trials
208
+ input_types,
209
+ np.asarray(split_ratios * dataset.shape[0], dtype=int), # Convert split ratios to actual sample counts
210
+ flag=1
211
+ )
212
+
213
+ # %% Few-Shot Learning with Pretrained Embeddings
214
+
215
+ # Initialize array to store F1 scores for KNN classification
216
+ f1_scores_knn = np.zeros((n_trials, len(input_types), len(split_ratios)))
217
+
218
+ # Import the classification function
219
+ from utils import classify_by_euclidean_distance
220
+
221
+ # Iterate over input types (e.g., raw data or embeddings)
222
+ for input_type_idx, input_type in enumerate(input_types):
223
+
224
+ # Prepare dataset based on input type
225
+ if input_type in ['cls_emb', 'channel_emb']:
226
+ dataset = lwm_inference(preprocessed_chs, input_type, model, device)
227
+ else:
228
+ dataset = create_raw_dataset(preprocessed_chs, device)
229
+
230
+ # Reshape dataset for compatibility
231
+ dataset = dataset.view(dataset.size(0), -1)
232
+ input_dim = dataset.shape[-1] # Get input dimension
233
+
234
+ # Iterate over different dataset split ratios
235
+ for split_ratio_idx, split_ratio in enumerate(split_ratios):
236
+
237
+ n_train = int(split_ratio * dataset.shape[0]) # Calculate number of training samples
238
+
239
+ # Run multiple trials for each split ratio
240
+ for trial in range(n_trials):
241
+
242
+ torch.manual_seed(trial) # Set seed for reproducibility
243
+ train_loader, test_loader = get_data_loaders(
244
+ dataset,
245
+ labels,
246
+ batch_size=128,
247
+ split_ratio=split_ratio
248
+ )
249
+
250
+ # Perform classification using Euclidean distance
251
+ f1 = classify_by_euclidean_distance(
252
+ train_loader,
253
+ test_loader,
254
+ device="cpu"
255
+ )
256
+
257
+ # Store the F1 score for this trial
258
+ f1_scores_knn[trial, input_type_idx, split_ratio_idx] = f1
259
+
260
+ # Plot average F1 scores across all trials for each input type and split ratio
261
+ plot_metrics(
262
+ np.mean(f1_scores_knn, axis=0), # Average F1 scores across trials
263
+ input_types,
264
+ np.asarray(split_ratios * dataset.shape[0], dtype=int), # Convert split ratios to actual sample counts
265
+ flag=1
266
+ )
267
+
268
+
269
+
270
+
271
+
272
+
273
+
274
+
275
+