{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "# Try out the PhytoClassUCSC model for yourself\n", "\n", "Using this notebook, you should able to clone the model repo off of Hugging Face, grab an syringe from the Santa Cruz Wharf IFCB dataset on the CalOOS Dashboard instance, and run it through the classifier.\n", "\n", "Using the __GPU Hardware Accelerator__ will significantly increase the processing time.\n", "\n", "\n", "### REMOVE USERNAME AND PW before publishing" ], "metadata": { "id": "UQP7BLJX1281" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vvpmWDgOzzau", "outputId": "7306bae1-982c-425a-ed6b-5e7b38808ae8" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting keras_preprocessing\n", " Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)\n", "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/42.6 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.6/42.6 kB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from keras_preprocessing) (1.22.4)\n", "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from keras_preprocessing) (1.16.0)\n", "Installing collected packages: keras_preprocessing\n", "Successfully installed keras_preprocessing-1.1.2\n", "Cloning into 'phytoClassUCSC'...\n", "remote: Enumerating objects: 53, done.\u001b[K\n", "remote: Counting objects: 100% (53/53), done.\u001b[K\n", "remote: Compressing objects: 100% (48/48), done.\u001b[K\n", "remote: Total 53 (delta 12), reused 0 (delta 0), pack-reused 0\u001b[K\n", "Unpacking objects: 100% (53/53), 557.16 KiB | 1.24 MiB/s, done.\n", "Collecting git+https://github.com/joefutrelle/pyifcb.git\n", " Cloning https://github.com/joefutrelle/pyifcb.git to /tmp/pip-req-build-h_9ypdyg\n", " Running command git clone --filter=blob:none --quiet https://github.com/joefutrelle/pyifcb.git /tmp/pip-req-build-h_9ypdyg\n", " Resolved https://github.com/joefutrelle/pyifcb.git to commit e7ecbd925170ac59f0a728f48a751657a4c40307\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting scipy<1.9.2 (from pyifcb==0.0.1)\n", " Downloading scipy-1.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (43.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.9/43.9 MB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (1.5.3)\n", "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (3.8.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (2.27.1)\n", "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (9.4.0)\n", "Collecting rectpack (from pyifcb==0.0.1)\n", " Downloading rectpack-0.2.2.tar.gz (17 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: scikit-image in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (0.19.3)\n", "Collecting pysmb (from pyifcb==0.0.1)\n", " Downloading pysmb-1.2.9.1.zip (1.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m76.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (6.0.1)\n", "Requirement already satisfied: numpy<1.25.0,>=1.18.5 in /usr/local/lib/python3.10/dist-packages (from scipy<1.9.2->pyifcb==0.0.1) (1.22.4)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->pyifcb==0.0.1) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->pyifcb==0.0.1) (2022.7.1)\n", "Requirement already satisfied: pyasn1 in /usr/local/lib/python3.10/dist-packages (from pysmb->pyifcb==0.0.1) (0.5.0)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from pysmb->pyifcb==0.0.1) (4.65.0)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->pyifcb==0.0.1) (1.26.16)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->pyifcb==0.0.1) (2023.7.22)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->pyifcb==0.0.1) (2.0.12)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->pyifcb==0.0.1) (3.4)\n", "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image->pyifcb==0.0.1) (3.1)\n", "Requirement already satisfied: imageio>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->pyifcb==0.0.1) (2.25.1)\n", "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image->pyifcb==0.0.1) (2023.7.18)\n", "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->pyifcb==0.0.1) (1.4.1)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image->pyifcb==0.0.1) (23.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->pyifcb==0.0.1) (1.16.0)\n", "Building wheels for collected packages: pyifcb, pysmb, rectpack\n", " Building wheel for pyifcb (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for pyifcb: filename=pyifcb-0.0.1-py3-none-any.whl size=61518 sha256=39bbb112eb2e88ba944d3a7cc51387080e1147dc634b8cd0ed95cc76a86413f6\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-gcsarmzt/wheels/61/fa/8f/c0c33addc3ecffe7d8dc392af68e3f8eb0316b2808da6897e6\n", " Building wheel for pysmb (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for pysmb: filename=pysmb-1.2.9.1-py3-none-any.whl size=84802 sha256=c093dd18f06490f77d7b649d26e85125f2a6941b5e4c0d39eb7eef3f2539aa28\n", " Stored in directory: /root/.cache/pip/wheels/b6/13/a6/22f752798d4429d1f973f90e1fdaf8eb782a899fc691b57f48\n", " Building wheel for rectpack (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for rectpack: filename=rectpack-0.2.2-py3-none-any.whl size=19333 sha256=ac7f7862793ba19a9f3b379dd5b2c8f9f7f08fd5e1c09b1bbcab36804aa79c9f\n", " Stored in directory: /root/.cache/pip/wheels/e9/ea/e9/cd0237c0ccb9cb7312bb94cc023689592c4f07e4f3b1b9dd00\n", "Successfully built pyifcb pysmb rectpack\n", "Installing collected packages: rectpack, scipy, pysmb, pyifcb\n", " Attempting uninstall: scipy\n", " Found existing installation: scipy 1.10.1\n", " Uninstalling scipy-1.10.1:\n", " Successfully uninstalled scipy-1.10.1\n", "Successfully installed pyifcb-0.0.1 pysmb-1.2.9.1 rectpack-0.2.2 scipy-1.9.1\n" ] } ], "source": [ "!pip install keras_preprocessing\n", "!git clone https://patcdaniel:zozmir-1qempa-kenrAb@huggingface.co/patcdaniel/phytoClassUCSC\n", "!pip install -U git+https://github.com/joefutrelle/pyifcb.git\n" ] }, { "cell_type": "code", "source": [ "import tensorflow as tf\n", "import keras_preprocessing.image as keras_img\n", "import numpy as np\n", "import ifcb\n", "import json, os\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import urllib.request, urllib.parse\n", "from PIL import Image\n", "import pandas as pd" ], "metadata": { "id": "0NXIPkqB049o" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Load the Model (phytoClassUCSC.h5)" ], "metadata": { "id": "doj0y_g02dyI" } }, { "cell_type": "code", "source": [ "model = tf.keras.saving.load_model(\"./phytoClassUCSC/phytoClassUCSC.h5\")\n", "with open(\"./phytoClassUCSC/class_list.json\") as json_file:\n", " class_list = list(json.load(json_file))" ], "metadata": { "id": "nfX2zKtB0-vP" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Grab an entire syringe (.roi, .hdr, .adc) from the CalOOS Dashboard.\n", "\n", "Let's download some images off of the CalOOS IFCB Dashboard ([ifcb.caloos.org](ifcb.caloos.org))\n", "\n" ], "metadata": { "id": "5fZDcm3T2l_V" } }, { "cell_type": "code", "source": [ "base_url = \"https://ifcb.caloos.org\"\n", "data_set = \"santa-cruz-municipal-wharf\"\n", "syringe = \"D20230719T064404_IFCB104\"\n", "url = \"/\".join([base_url, data_set, syringe])\n", "\n", "for base in ['.roi', '.adc','.hdr']:\n", " full_url = url + base\n", " save_name = full_url.split(\"/\")[-1]\n", " print(\"Retrieving {} from {}\".format(save_name, full_url))\n", " urllib.request.urlretrieve(full_url, filename=os.path.join(\"/content\",save_name))" ], "metadata": { "id": "3v8h0UnwKGMa", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ce53a507-bf5c-4f5a-c02d-8ba924217515" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Retrieving D20230719T064404_IFCB104.roi from https://ifcb.caloos.org/santa-cruz-municipal-wharf/D20230719T064404_IFCB104.roi\n", "Retrieving D20230719T064404_IFCB104.adc from https://ifcb.caloos.org/santa-cruz-municipal-wharf/D20230719T064404_IFCB104.adc\n", "Retrieving D20230719T064404_IFCB104.hdr from https://ifcb.caloos.org/santa-cruz-municipal-wharf/D20230719T064404_IFCB104.hdr\n" ] } ] }, { "cell_type": "markdown", "source": [ "The images need to be reshaped into a certain size and format for the model, so the code below loads the image, resizes it, changes it to a three channel R,G,B and returns the image as an array." ], "metadata": { "id": "Pqwu31gzBFt2" } }, { "cell_type": "code", "source": [ "def prep_image(img_data):\n", "\n", " \"\"\"Load and prep images for model, reshape and normalize rgb to greyscale\"\"\"\n", "\n", " target_size=(224,224)\n", " img = keras_img.img_to_array(Image.fromarray(img_data).resize(target_size))\n", " img /= 255\n", " img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))\n", " return img\n", "\n", "\n", "def build_image_stack(roi_fname):\n", "\n", " \"\"\" Return a matric of preprocessed images from a singe syringe\"\"\"\n", "\n", " with ifcb.open_raw(os.path.join(\"/content\",roi_fname)) as roi_data:\n", " array_index = 0\n", " roi_names = []\n", " img_stack = np.empty(shape=(len(roi_data.images),224,224,3))\n", " for roi_num, img_data in roi_data.images.items():\n", " img_stack[array_index,:,:,:] = prep_image(img_data)\n", " array_index += 1\n", " roi_names.append(roi_num)\n", "\n", " # Also return run and inhibit times for sample volume calculation\n", " run_time = roi_data.hdr_attributes['runTime']\n", " inhibit_time = roi_data.hdr_attributes['inhibitTime']\n", "\n", " return img_stack, roi_names, inhibit_time, run_time" ], "metadata": { "id": "7aU9WZCQLoyG" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "img_stack, roi_names, inhibit_time, run_time = build_image_stack(\"D20230719T064404_IFCB104.roi\")\n" ], "metadata": { "id": "3Bjtay0R4LWS" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Use the model to predict the class" ], "metadata": { "id": "j6s9ytvHAa7q" } }, { "cell_type": "code", "source": [ "yhat = model.predict(img_stack)" ], "metadata": { "id": "4ZMlMC-p5oqL", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "9adcacd2-d35c-43f1-9b44-5327f39571a0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "86/86 [==============================] - 23s 154ms/step\n" ] } ] }, { "cell_type": "markdown", "source": [ "Now select the top class for each prediction" ], "metadata": { "id": "BgCICpRbTJEH" } }, { "cell_type": "code", "source": [ "top_ix = np.argmax(yhat,axis=1)\n", "top_prob = []\n", "top_class = []\n", "for i, ix in enumerate(top_ix):\n", " top_prob.append(yhat[i,ix])\n", " top_class.append(class_list[ix])" ], "metadata": { "id": "3KjrVwSxRDu1" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df_full = pd.DataFrame(data= {\"roi\":roi_names, \"top_class\":top_class,\"top_prob\":top_prob})\n", "df_full['img_url'] = [\"/\".join([base_url,\"data\", syringe + \"_{:04d}.png\".format(r)]) for r in roi_names ]\n", "flowrate = 0.25; # .25 mls per minute\n", "volume_analyzed = round(((run_time - inhibit_time) * flowrate)/60, 3)\n", "print(\"Sample Volume: {} mL\".format(volume_analyzed))\n", "df_full" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 441 }, "id": "aHyEaEGTSgTX", "outputId": "3278b587-e3f4-4cd7-c61f-9756a93c5067" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Sample Volume: 4.108 mL\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " roi top_class top_prob \\\n", "0 2 Ciliates 0.329502 \n", "1 3 Centric 0.717917 \n", "2 4 Centric 0.861939 \n", "3 5 Eucampia 0.983304 \n", "4 6 Eucampia 0.999946 \n", "... ... ... ... \n", "2723 2726 NanoP_less10 0.878550 \n", "2724 2727 Chaetoceros 0.994962 \n", "2725 2728 Eucampia 0.997788 \n", "2726 2729 Prorocentrum 0.999849 \n", "2727 2730 Eucampia 0.999993 \n", "\n", " img_url \n", "0 https://ifcb.caloos.org/data/D20230719T064404_... \n", "1 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2 https://ifcb.caloos.org/data/D20230719T064404_... \n", "3 https://ifcb.caloos.org/data/D20230719T064404_... \n", "4 https://ifcb.caloos.org/data/D20230719T064404_... \n", "... ... \n", "2723 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2724 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2725 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2726 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2727 https://ifcb.caloos.org/data/D20230719T064404_... \n", "\n", "[2728 rows x 4 columns]" ], "text/html": [ "\n", "\n", "
\n", " | roi | \n", "top_class | \n", "top_prob | \n", "img_url | \n", "
---|---|---|---|---|
0 | \n", "2 | \n", "Ciliates | \n", "0.329502 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "
1 | \n", "3 | \n", "Centric | \n", "0.717917 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "
2 | \n", "4 | \n", "Centric | \n", "0.861939 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "
3 | \n", "5 | \n", "Eucampia | \n", "0.983304 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "
4 | \n", "6 | \n", "Eucampia | \n", "0.999946 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2723 | \n", "2726 | \n", "NanoP_less10 | \n", "0.878550 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "
2724 | \n", "2727 | \n", "Chaetoceros | \n", "0.994962 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "
2725 | \n", "2728 | \n", "Eucampia | \n", "0.997788 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "
2726 | \n", "2729 | \n", "Prorocentrum | \n", "0.999849 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "
2727 | \n", "2730 | \n", "Eucampia | \n", "0.999993 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "
2728 rows × 4 columns
\n", "\n", " | roi | \n", "top_class | \n", "top_prob | \n", "img_url | \n", "top_class_thresh | \n", "class_threshold | \n", "thresh_diff | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "2 | \n", "Ciliates | \n", "0.329502 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "Unclassified | \n", "0.49 | \n", "-0.160498 | \n", "
1 | \n", "3 | \n", "Centric | \n", "0.717917 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "Centric | \n", "0.70 | \n", "0.017917 | \n", "
2 | \n", "4 | \n", "Centric | \n", "0.861939 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "Centric | \n", "0.70 | \n", "0.161939 | \n", "
3 | \n", "5 | \n", "Eucampia | \n", "0.983304 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "Eucampia | \n", "0.88 | \n", "0.103304 | \n", "
4 | \n", "6 | \n", "Eucampia | \n", "0.999946 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "Eucampia | \n", "0.88 | \n", "0.119946 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2723 | \n", "2726 | \n", "NanoP_less10 | \n", "0.878550 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "Unclassified | \n", "0.92 | \n", "-0.041450 | \n", "
2724 | \n", "2727 | \n", "Chaetoceros | \n", "0.994962 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "Chaetoceros | \n", "0.89 | \n", "0.104962 | \n", "
2725 | \n", "2728 | \n", "Eucampia | \n", "0.997788 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "Eucampia | \n", "0.88 | \n", "0.117788 | \n", "
2726 | \n", "2729 | \n", "Prorocentrum | \n", "0.999849 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "Prorocentrum | \n", "0.92 | \n", "0.079849 | \n", "
2727 | \n", "2730 | \n", "Eucampia | \n", "0.999993 | \n", "https://ifcb.caloos.org/data/D20230719T064404_... | \n", "Eucampia | \n", "0.88 | \n", "0.119993 | \n", "
2728 rows × 7 columns
\n", "