Spaces:
Sleeping
Sleeping
Upload seasons_classifier.py
Browse files- seasons_classifier.py +356 -0
seasons_classifier.py
ADDED
@@ -0,0 +1,356 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""Seasons_Classifier
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1XxlbGT6sV_f6wQHDI89ZEBhmLYFYGjxj
|
8 |
+
|
9 |
+
###Links for this code###
|
10 |
+
|
11 |
+
**REMEMBER**! "Save a copy in Drive" to make it your own notebook!
|
12 |
+
|
13 |
+
https://shorturl.at/BxhkJ
|
14 |
+
|
15 |
+
https://colab.research.google.com/drive/1Pd8rZMQwvo9miqsDSr18av2V5W9_Do9Z?usp=sharing
|
16 |
+
|
17 |
+
###If you want state-of-the-art results (4 parts)###
|
18 |
+
|
19 |
+
https://www.kaggle.com/code/jhoward/first-steps-road-to-the-top-part-1/
|
20 |
+
|
21 |
+
### Setting up ###
|
22 |
+
|
23 |
+
Installing resources... (!) means this piece is not Python.
|
24 |
+
|
25 |
+
Installing Gradio is optional (useful if you want to test the app locally for faster iterations).
|
26 |
+
|
27 |
+
The special command for fastai installs the latest develpment version, which has some problems fixed, but is always a bit riskier. The commented-out line would install the current stable version.
|
28 |
+
"""
|
29 |
+
|
30 |
+
!pip install gradio --upgrade
|
31 |
+
|
32 |
+
!pip install --upgrade git+https://github.com/fastai/fastai.git # development version, with some problems fixed, but more risky
|
33 |
+
# !pip install fastai --upgrade # stable version. Some known bugs, but less risky
|
34 |
+
|
35 |
+
!pip install fastbook
|
36 |
+
!pip install timm
|
37 |
+
!pip install huggingface_hub["fastai"]
|
38 |
+
|
39 |
+
""""Loading" resources (some we have just installed, others like the pathlib library come by default with Python)"""
|
40 |
+
|
41 |
+
from pathlib import Path
|
42 |
+
from time import sleep
|
43 |
+
from time import time
|
44 |
+
|
45 |
+
from fastai.vision.all import *
|
46 |
+
from fastai.vision.widgets import *
|
47 |
+
from fastbook import *
|
48 |
+
from fastcore.parallel import *
|
49 |
+
from fastdownload import download_url
|
50 |
+
|
51 |
+
from google.colab import drive
|
52 |
+
import gradio as gr
|
53 |
+
from huggingface_hub import from_pretrained_fastai, notebook_login, push_to_hub_fastai
|
54 |
+
import timm
|
55 |
+
from torchvision.models import resnet18
|
56 |
+
|
57 |
+
"""## Testing automatic dataset tools...
|
58 |
+
|
59 |
+
We need to connect to Drive, so that we can access our files:
|
60 |
+
"""
|
61 |
+
|
62 |
+
drive.mount('/content/drive')
|
63 |
+
|
64 |
+
"""We test downloading one image:"""
|
65 |
+
|
66 |
+
urls = search_images_ddg('spring scenery', max_images=1) # URLs returned as a list
|
67 |
+
urls[0] # this will print the first element of the list
|
68 |
+
|
69 |
+
dest = '/content/drive/My Drive/Colab Notebooks/tmp/spring_scenery.jpg' # any Path we like!
|
70 |
+
download_url(urls[0], dest, show_progress=False) # Downloads the img from a URL to a destination path
|
71 |
+
|
72 |
+
im = Image.open(dest) # opens the image as an Image object
|
73 |
+
im.thumbnail((256,256))
|
74 |
+
im # remember this is a fast way to print stuff using journals, in normal scripts: print(im)
|
75 |
+
|
76 |
+
"""! **Remember** !
|
77 |
+
|
78 |
+
It is best to download any images not intended for training to a different folder (tmp for example). Keep your training-data folder clean, or Fastai may confuse the structure of labels.
|
79 |
+
|
80 |
+
# Preparing the data
|
81 |
+
|
82 |
+
Here we define our categories, and we will search the Internet for examples of each. We also have "extra query texts". We will append each text in this list to the category name to search images.
|
83 |
+
|
84 |
+
This specific choices will result later in three categories (spain, france, japan), and the Internet queries will be "spain castle exterior", "spain castle interior", and so on.
|
85 |
+
"""
|
86 |
+
|
87 |
+
my_categories = 'spring','summer', 'autumn', 'winter' # here you can insert your own categories
|
88 |
+
extra_query_texts = [" scenery"] # this is part of the Internet query, but not part of the category name (you can also leave it empty: [""])
|
89 |
+
main_img_folder = Path('/content/drive/My Drive/Colab Notebooks/scenes') # this is where you want your training images
|
90 |
+
|
91 |
+
"""**Note**: The next part of the code is actually optional, what we need is to have a subfolder with the name of each category within our main folder, with the images of each category inside. But you can create this however you prefer."""
|
92 |
+
|
93 |
+
max_images = 50
|
94 |
+
for o in my_categories:
|
95 |
+
dest = (main_img_folder / o)
|
96 |
+
dest.mkdir(exist_ok=True, parents=True)
|
97 |
+
|
98 |
+
for suffix in extra_query_texts:
|
99 |
+
download_images(dest, urls=search_images_ddg(f'{o}{suffix}', max_images=max_images))
|
100 |
+
sleep(10) # Pause between searches to avoid over-loading server
|
101 |
+
|
102 |
+
resize_images(dest, max_size=400, dest=dest) # CAREFUL, DO YOU WANT TO USE THIS?
|
103 |
+
# resize_images(dest, max_size=800, dest=dest) # you can comment-out lines that you don't want, like this
|
104 |
+
|
105 |
+
"""Here we already have our pictures in their folders. **Stop and go look at them**! Do they look right? Maybe you need to use a different `extra_query_text` for better results? Do you have all the categories that you need? **Feel free to move or delete any images that seem out of place**.
|
106 |
+
|
107 |
+
# Cleaning images...
|
108 |
+
|
109 |
+
Good! Now we want to quickly check that the methods can load our images with no problems, to avoid errors during training:
|
110 |
+
"""
|
111 |
+
|
112 |
+
# CLEANING STEP 1-2 (get paths, then try to load images and return paths that failed)
|
113 |
+
image_files = get_image_files(main_img_folder) # looks for the paths of all image files in a folder and subfolders
|
114 |
+
failed = verify_images(image_files) # are these images ok?
|
115 |
+
failed # Do not advance to the next section untill this is empty ([]). Repeat cleaning steps 1 through 3
|
116 |
+
|
117 |
+
# CLEANING STEP 3 (remove "unlink" bad images)
|
118 |
+
failed.map(Path.unlink) # deletes any bad image files
|
119 |
+
|
120 |
+
"""Here we can see the sizes of our images. If they are too big, training will be slow."""
|
121 |
+
|
122 |
+
files = get_image_files(main_img_folder)
|
123 |
+
def f(o): return PILImage.create(o).size
|
124 |
+
sizes = parallel(f, files, n_workers=8)
|
125 |
+
pd.Series(sizes).value_counts()
|
126 |
+
|
127 |
+
"""# Resizing images
|
128 |
+
|
129 |
+
Sometimes images from the Internet can be big. While this will not particularly result in a better model, it will certainly mean much slower training. This is why we can resize them (agressively while training, more conservatively for the final model; 1080 pixels would be much more than safe).
|
130 |
+
"""
|
131 |
+
|
132 |
+
original_main_img_folder = main_img_folder
|
133 |
+
main_img_folder = Path('/content/drive/My Drive/Colab Notebooks/scenes_small')
|
134 |
+
|
135 |
+
resize_images(original_main_img_folder, dest=main_img_folder, max_size=256, recurse=True)
|
136 |
+
|
137 |
+
files = get_image_files(main_img_folder)
|
138 |
+
def f(o): return PILImage.create(o).size
|
139 |
+
sizes = parallel(f, files, n_workers=8)
|
140 |
+
pd.Series(sizes).value_counts()
|
141 |
+
|
142 |
+
"""# Defining data structures
|
143 |
+
|
144 |
+
The `DataBlock` puts together all the algorithm needs to know about our data (what kind of data is it? where do I get it? how do I make splits? where are the labels? what augmentations would you like?)
|
145 |
+
"""
|
146 |
+
|
147 |
+
data_info = DataBlock(
|
148 |
+
blocks=(ImageBlock, CategoryBlock), # working with images and classification
|
149 |
+
get_items=get_image_files, # get images using the method we tried before
|
150 |
+
splitter=RandomSplitter(valid_pct=0.2, seed=42), # random split for train/validation
|
151 |
+
get_y=parent_label, # the folder name tells us the image category!
|
152 |
+
item_tfms=Resize(256),
|
153 |
+
batch_tfms=aug_transforms(size=224, min_scale=0.5, flip_vert=False, max_lighting=0.1, max_zoom=1.05, max_warp=0.1))
|
154 |
+
|
155 |
+
"""Here you have a lot more info on the available (standard) augmentation methods. https://docs.fast.ai/vision.augment.html
|
156 |
+
|
157 |
+
But you even have more weird options! Look for example: https://docs.fast.ai/callback.mixup.html
|
158 |
+
|
159 |
+
With the info in our `DataBlock` we can now create our dataloaders, which is what we really need for training and metrics. In our case we have to pass a folder here.
|
160 |
+
"""
|
161 |
+
|
162 |
+
dls = data_info.dataloaders(main_img_folder)
|
163 |
+
|
164 |
+
dls.valid.show_batch(max_n=8, nrows=2) # always look at your data before training!
|
165 |
+
|
166 |
+
"""Images look good, but we want to look at train as well, so we can get an idea of what transformations are doing (**the behaviour of transforms (data augmentation) is different when applied to validation**).
|
167 |
+
|
168 |
+
Here you can check if your data augmentation makes sense, and if it is strong enough. If you are unsure, don't be afraid to try different options.
|
169 |
+
"""
|
170 |
+
|
171 |
+
dls.train.show_batch(max_n=4, nrows=1, unique=True) # unique means: always use the same example
|
172 |
+
|
173 |
+
"""**Remember**: `pretrained` means we are starting with a network already trained in another dataset. We use transfer learning any time we can!
|
174 |
+
|
175 |
+
# Training
|
176 |
+
|
177 |
+
The "learner" groups useful data for training, including our newly created dataloaders ("dls" in Fastai style), metrics and the type of architecture to use. Fastai has some predefined, like "resnet18". Others come with the Timm library, and we can use them by just typing their name. For example, "mobilenetv2_100".
|
178 |
+
"""
|
179 |
+
|
180 |
+
learn = cnn_learner(dls, 'mobilenetv2_100', pretrained=True, metrics=error_rate)
|
181 |
+
# learn = cnn_learner(dls, resnet18, pretrained=True, metrics=error_rate) # resnet18 is ideal for fast iterations, but check out other models!
|
182 |
+
learn = learn.to_fp16() # mixed-precision gives faster results with no real downside
|
183 |
+
|
184 |
+
"""lr_find, or learning rate finder, implements a method that allows to find a reasonable region for our learning rate. Remember: pick a learning rate where the curve is dropping fast, not where it is at the bottom!"""
|
185 |
+
|
186 |
+
learn.lr_find() # this may take a few minutes...
|
187 |
+
|
188 |
+
lr = 0.08 # a bit more agressive than suggested :)
|
189 |
+
|
190 |
+
"""The error_rate (% of errors) is useful to understand how good the model is, but judge training by looking at valid_loss instead (error_rate can be "jumpy", as it considers the most probable category, regardless of confidence)."""
|
191 |
+
|
192 |
+
learn.fine_tune(epochs=10, base_lr=lr) # you can specify how many "epochs" and "freeze_epochs" (default=1) you want
|
193 |
+
|
194 |
+
"""That's it! Once we have data loaders training a model is really very easy. The difficult part is preparing your data and making sure your data loaders are correctly set up.
|
195 |
+
|
196 |
+
# Results
|
197 |
+
|
198 |
+
A confusion matrix is much better than a simple number (like "3% error"). This is even more useful if we have many categories.
|
199 |
+
"""
|
200 |
+
|
201 |
+
interp = ClassificationInterpretation.from_learner(learn)
|
202 |
+
interp.plot_confusion_matrix()
|
203 |
+
|
204 |
+
"""It is also very useful to examine the images where the model was worst. This can help identify problems. Remember: if results are very good the "top losses" will include images correctly classified (but with lower confidence)."""
|
205 |
+
|
206 |
+
interp.plot_top_losses(9, largest=True, nrows=3, figsize=(15, 11))
|
207 |
+
|
208 |
+
"""The confusion matrix can sometimes be hard to understand, specially when you deal with many categories. "most_confused" will return your most confused categories, which can be easier to grasp."""
|
209 |
+
|
210 |
+
interp.most_confused (min_val=1)
|
211 |
+
|
212 |
+
"""# Saving and sharing your work
|
213 |
+
|
214 |
+
Do you like your model? Would you like to play with it? Do you want to show off to family and friends? Do you want to make it visible (CV, personal website, etc)? Then you have already done the hardest part! For more on how to use it with new data ("inference") and even how to publish a free online app with your model... check out lessons 1 and 2 from Fastai. If you do create an app please, **please share the link in your project** report and enjoy many bonus points :)
|
215 |
+
https://course.fast.ai/
|
216 |
+
|
217 |
+
Here we will get individual predictions for some images (this will be a good sanity check for saving and loading models):
|
218 |
+
"""
|
219 |
+
|
220 |
+
# First we download some example images:
|
221 |
+
|
222 |
+
spring_dest = '/content/drive/My Drive/Colab Notebooks/tmp/spring.jpg'
|
223 |
+
summer_dest = '/content/drive/My Drive/Colab Notebooks/tmp/summer.jpg'
|
224 |
+
autumn_dest = '/content/drive/My Drive/Colab Notebooks/tmp/autumn.jpg'
|
225 |
+
winter_dest = '/content/drive/My Drive/Colab Notebooks/tmp/winter.jpg'
|
226 |
+
|
227 |
+
urls = search_images_ddg('spring scenery', max_images=1)
|
228 |
+
download_url(urls[0], spring_dest, show_progress=False)
|
229 |
+
|
230 |
+
urls = search_images_ddg('summer scenery', max_images=1)
|
231 |
+
download_url(urls[0], summer_dest, show_progress=False)
|
232 |
+
|
233 |
+
urls = search_images_ddg('autumn scenery', max_images=1)
|
234 |
+
download_url(urls[0], autumn_dest, show_progress=False)
|
235 |
+
|
236 |
+
urls = search_images_ddg('winter scenery', max_images=1)
|
237 |
+
download_url(urls[0], winter_dest, show_progress=False)
|
238 |
+
|
239 |
+
"""We define the operation so we don't need to rewrite it later."""
|
240 |
+
|
241 |
+
def predict_test_images(learner_):
|
242 |
+
learner_.model.eval()
|
243 |
+
start = time.time()
|
244 |
+
|
245 |
+
is_bird,_,probs = learner_.predict(PILImage.create(spring_dest))
|
246 |
+
print(f"Spring image: {is_bird} ({probs})")
|
247 |
+
|
248 |
+
is_bird,_,probs = learner_.predict(PILImage.create(summer_dest))
|
249 |
+
print(f"Summer image: {is_bird} ({probs})")
|
250 |
+
|
251 |
+
is_bird,_,probs = learner_.predict(PILImage.create(autumn_dest))
|
252 |
+
print(f"Autumn image: {is_bird} ({probs})")
|
253 |
+
|
254 |
+
is_bird,_,probs = learner_.predict(PILImage.create(winter_dest))
|
255 |
+
print(f"Winter image: {is_bird} ({probs})")
|
256 |
+
|
257 |
+
end = time.time()
|
258 |
+
elapsed = end - start
|
259 |
+
print(f"Elapsed: {elapsed:.2f} s")
|
260 |
+
|
261 |
+
predict_test_images(learn)
|
262 |
+
|
263 |
+
"""# Save / load"""
|
264 |
+
|
265 |
+
learn.export('model.pkl')
|
266 |
+
|
267 |
+
local_learn = load_learner('model.pkl')
|
268 |
+
|
269 |
+
"""We expect our predictions to be the same with the loaded model as they were before!"""
|
270 |
+
|
271 |
+
predict_test_images(local_learn)
|
272 |
+
|
273 |
+
"""# Uploading our model to HuggingFace
|
274 |
+
|
275 |
+
Let's save the model properly, as a HuggingFace model that we can share.
|
276 |
+
|
277 |
+
Here you will need to create an account and define a Token in settings (Access tokens). I created one with "write" privileges.
|
278 |
+
"""
|
279 |
+
|
280 |
+
notebook_login()
|
281 |
+
|
282 |
+
"""One line to save or load a model from HuggingFace!"""
|
283 |
+
|
284 |
+
push_to_hub_fastai(learner=learn, repo_id="Ener3122/Seasons_Classifier")
|
285 |
+
|
286 |
+
hf_learn = from_pretrained_fastai("Ener3122/Seasons_Classifier")
|
287 |
+
|
288 |
+
"""Again, we check that the predictions are the same (the first time I tried they were not, because informatics are cursed).
|
289 |
+
|
290 |
+
(There seems to be some kind of bug with printing at this point, don't worry too much if you get no prints after here).
|
291 |
+
"""
|
292 |
+
|
293 |
+
predict_test_images(hf_learn)
|
294 |
+
|
295 |
+
"""#Gradio
|
296 |
+
|
297 |
+
Gradio allows us to create apps very easily!
|
298 |
+
|
299 |
+
https://www.gradio.app/guides/quickstart
|
300 |
+
|
301 |
+
The main thing we need to do is to get our model and create a main function (in our case to predict images). After that, one line is enough for the main app (locally in this notebook).
|
302 |
+
"""
|
303 |
+
|
304 |
+
# Here we set our model and get our categories (stored as a "vocab")
|
305 |
+
learn = hf_learn
|
306 |
+
labels = learn.dls.vocab
|
307 |
+
|
308 |
+
# This is the main function, used to predict categories for one image
|
309 |
+
def predict(img):
|
310 |
+
img = PILImage.create(img)
|
311 |
+
pred,pred_idx,probs = learn.predict(img)
|
312 |
+
return {labels[i]: float(probs[i]) for i in range(len(labels))}
|
313 |
+
|
314 |
+
# We can test the results of prediction are what we expect
|
315 |
+
pred_str = predict(spring_dest)
|
316 |
+
print(pred_str)
|
317 |
+
pred_str = predict(summer_dest)
|
318 |
+
print(pred_str)
|
319 |
+
pred_str = predict(autumn_dest)
|
320 |
+
print(pred_str)
|
321 |
+
pred_str = predict(winter_dest)
|
322 |
+
print(pred_str)
|
323 |
+
|
324 |
+
gr.Interface(fn=predict, inputs=gr.Image(), outputs=gr.Label(num_top_classes=2)).launch(share=True)
|
325 |
+
|
326 |
+
"""#Online app
|
327 |
+
|
328 |
+
But what good is a local app? We want to be able to share it!
|
329 |
+
|
330 |
+
1. Create HuggingFace account
|
331 |
+
2. Create a new HuggingFace Space
|
332 |
+
3. Go to "files" and click "+ Add File" or eddit files
|
333 |
+
|
334 |
+
Using git:
|
335 |
+
1. git clone https:...
|
336 |
+
2. sudo apt-get install git-lfs
|
337 |
+
3. git lfs track "*.pkl"
|
338 |
+
4. git remote set-url origin https://user_name:[email protected]/repo_path
|
339 |
+
|
340 |
+
You can copy the structure of this Space: https://huggingface.co/spaces/Pablogps/castles
|
341 |
+
|
342 |
+
As you can see, there are three main files:
|
343 |
+
|
344 |
+
|
345 |
+
* requirements.txt, with stuff your Space will need (timm, fastai, toml)
|
346 |
+
* app.py, where you load the model and set the app, just like we did in this notebook
|
347 |
+
* example images for a nicer app
|
348 |
+
|
349 |
+
Maybe this will be useful, but note the app.py contents are a bit outdated and will produce errors (use `gr.Image` instead of `gr.inputs.Image`, similarly for the outputs).
|
350 |
+
|
351 |
+
https://www.tanishq.ai/blog/posts/2021-11-16-gradio-huggingface.html#footnotes
|
352 |
+
|
353 |
+
For more references check the files at my own quick example:
|
354 |
+
|
355 |
+
https://huggingface.co/spaces/Pablogps/gradio-test
|
356 |
+
"""
|