kevincluo commited on
Commit
e4e80da
·
1 Parent(s): 4b9b8de

Upload final_run_concurrent.py

Browse files
Files changed (1) hide show
  1. final_run_concurrent.py +205 -0
final_run_concurrent.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Final Run-Concurrent
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/19foLOwCXRH0e0P_Xqqc-9VgpnmjYyAX8
8
+ """
9
+
10
+ # Install the Necessary Packages
11
+
12
+ !pip install datasets huggingface_hub sentence-transformers gradio evaluate
13
+ !pip install git+https://github.com/huggingface/accelerate
14
+ !pip install transformers==4.28.0
15
+
16
+ import datasets
17
+ from datasets import load_dataset
18
+ import pandas
19
+ from PIL import Image
20
+ import cv2
21
+ import os
22
+ from pandas import read_csv
23
+ from google.colab import drive
24
+
25
+ drive.mount('/content/drive/')
26
+
27
+ raw_dataset = load_dataset("imagefolder", data_dir="/content/drive/MyDrive/california_fire_damage_classification_merged/train")
28
+ dataset = raw_dataset["train"].train_test_split(test_size=0.2, stratify_by_column="label")
29
+
30
+ from transformers import ViTImageProcessor, ViTForImageClassification
31
+ import torch
32
+
33
+ device = 'cuda' # for GPU
34
+
35
+ model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
36
+
37
+ model.eval()
38
+ #model.to(device);
39
+
40
+ # image_processor is the same as Tokenizer
41
+ extractor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
42
+
43
+ labels = raw_dataset['train'].features['label'].names
44
+
45
+ labels
46
+
47
+ from transformers import AutoFeatureExtractor, AutoModelForImageClassification, AutoTokenizer
48
+
49
+ extractor = AutoFeatureExtractor.from_pretrained("/content/drive/MyDrive/california_fire_damage_classification_merged/saved_model_files")
50
+ model = AutoModelForImageClassification.from_pretrained("/content/drive/MyDrive/california_fire_damage_classification_merged/saved_model_files")
51
+
52
+ import torch
53
+
54
+ def transform(example_batch):
55
+ inputs = extractor([x.convert("RGB") for x in example_batch['image']], return_tensors='pt')
56
+ inputs['labels'] = example_batch['label']
57
+ return inputs
58
+
59
+ prepared_ds = dataset.with_transform(transform)
60
+
61
+ ### RUNNING EVALUATION ON PRETRAINED MODEL
62
+
63
+ from transformers import TrainingArguments, Trainer
64
+
65
+ training_args = TrainingArguments("test_trainer"),
66
+
67
+
68
+ import numpy as np
69
+ from datasets import load_metric
70
+
71
+ metric = load_metric("accuracy")
72
+
73
+
74
+ def compute_metrics(eval_pred):
75
+ logits, labels = eval_pred
76
+ predictions = np.argmax(logits, axis=-1)
77
+ return metric.compute(predictions=predictions, references=labels)
78
+
79
+
80
+ trainer = Trainer(
81
+ model=model,
82
+ args=training_args,
83
+ train_dataset=None,
84
+ eval_dataset=prepared_ds['test'],
85
+ compute_metrics=compute_metrics,
86
+ )
87
+
88
+ j = 2095
89
+
90
+ print('Groundtruth: ', y_test_np[j], ' ', labels[y_test_np[j]], 'Prediction: ', y_predicts_np[j], ' ', labels[y_predicts_np[j]])
91
+ dataset['test'][j]['image']
92
+
93
+ pixel_values_array = []
94
+ y_test = []
95
+ counter = 0
96
+
97
+ for img_pair in prepared_ds['test']:
98
+ pixel_values_array.append(img_pair['pixel_values'])
99
+ y_test.append(img_pair["labels"])
100
+ #pixel_values_tensor = torch.concat((pixel_values_tensor, img_pair['pixel_values']), 0)
101
+ counter += 1
102
+ print(counter)
103
+
104
+ #pixel_values_tensor = torch.stack(pixel_values_array)
105
+
106
+ #pixel_values_tensor
107
+
108
+ len(pixel_values_tensor)
109
+ len(y_predicts_merged)
110
+
111
+ import numpy as np
112
+ y_test_np = np.array(y_test)
113
+ y_predicts_np = np.array(y_predicts_merged)
114
+
115
+ np.where((y_test_np == y_predicts_np) == False)
116
+
117
+ y_predicts = []
118
+
119
+ for i in range(len(pixel_values_tensor)):
120
+ logits = model(pixel_values_tensor[i:i+1])[-1]
121
+ y_predict = [logit.argmax(-1).item() for logit in logits]
122
+ y_predicts.append(y_predict)
123
+
124
+ y_predicts
125
+
126
+ y_predicts_merged = [inner for outer in y_predicts for inner in outer]
127
+
128
+ y_predicts_merged
129
+
130
+ logits = model(pixel_values_tensor[0:1])[-1]
131
+
132
+ logits
133
+
134
+ y_predict = [logit.argmax(-1).item() for logit in logits]
135
+ y_predict
136
+
137
+ #y_test = [img_pair["labels"] for img_pair in prepared_ds['test']]
138
+ y_test = prepared_ds['test'][0:100]["labels"]
139
+ y_test
140
+
141
+ from sklearn.metrics import classification_report, confusion_matrix
142
+
143
+ print(confusion_matrix(y_test, y_predicts_merged))
144
+ print(classification_report(y_test, y_predicts_merged))
145
+
146
+ probability = torch.nn.functional.softmax(logits, dim=-1)
147
+ probability
148
+
149
+ probs = probability.detach().numpy()
150
+ probs
151
+
152
+ confidences = [{label: float(prob[j]) for j, label in enumerate(labels)} for prob in probs]
153
+ confidences
154
+
155
+ # First we get the features corresponding to the first training image
156
+ encoding = image_processor(images=prepared_ds['test'][0]['image'], return_tensors="pt").to(device)
157
+
158
+ # Then pass it through the model and get a prediction
159
+
160
+ ######
161
+ outputs = model(**encoding)
162
+ logits = outputs.logits
163
+ ######
164
+
165
+ prediction = logits.argmax(-1).item()
166
+
167
+ print("Predicted class:", model.config.id2label[prediction])
168
+
169
+ # For 1 Sample -> look at distribution of probabilities assigned
170
+
171
+ tokenizer = AutoTokenizer.from_pretrained("google/vit-base-patch16-224")
172
+
173
+ def tokenize_function(examples):
174
+ return tokenizer(examples["text"], padding="max_length", truncation=True)
175
+
176
+ encoding = image_processor(images=[prepared_ds["test"][0]['image']], return_tensors="pt").to(device)
177
+ outputs = model(**encoding)
178
+ logits = outputs.logits
179
+ prediction = logits.argmax(-1).item()
180
+
181
+ print("Predicted class:", model.config.id2label[prediction])
182
+
183
+ im_test = [dataset['test'][0]['image'], dataset['test'][1]['image']]
184
+ features_test = extractor(im_test, return_tensors='pt')
185
+ features_test['pixel_values'][0]
186
+
187
+
188
+
189
+ features_test['pixel_values'][-1]
190
+
191
+ logits = model(features_test["pixel_values"])
192
+ logits[-1]
193
+
194
+ probability = torch.nn.functional.softmax(logits, dim=-1)
195
+
196
+ logits = model(features_test["pixel_values"])[-1]
197
+
198
+ probs = probability[0].detach().numpy()
199
+ confidences = {label: float(probs[i]) for i, label in enumerate(labels)}
200
+
201
+ probability = torch.nn.functional.softmax(logits, dim=-1)
202
+ probability
203
+
204
+ prepared_ds['test'][0]['pixel_values']
205
+