{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "# Try out the PhytoClassUCSC model for yourself\n", "\n", "Using this notebook, you should able to clone the model repo off of Hugging Face, grab an syringe from the Santa Cruz Wharf IFCB dataset on the CalOOS Dashboard instance, and run it through the classifier.\n", "\n", "Using the __GPU Hardware Accelerator__ will significantly increase the processing time.\n", "\n", "\n", "### REMOVE USERNAME AND PW before publishing" ], "metadata": { "id": "UQP7BLJX1281" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vvpmWDgOzzau", "outputId": "7306bae1-982c-425a-ed6b-5e7b38808ae8" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting keras_preprocessing\n", " Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)\n", "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/42.6 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.6/42.6 kB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from keras_preprocessing) (1.22.4)\n", "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from keras_preprocessing) (1.16.0)\n", "Installing collected packages: keras_preprocessing\n", "Successfully installed keras_preprocessing-1.1.2\n", "Cloning into 'phytoClassUCSC'...\n", "remote: Enumerating objects: 53, done.\u001b[K\n", "remote: Counting objects: 100% (53/53), done.\u001b[K\n", "remote: Compressing objects: 100% (48/48), done.\u001b[K\n", "remote: Total 53 (delta 12), reused 0 (delta 0), pack-reused 0\u001b[K\n", "Unpacking objects: 100% (53/53), 557.16 KiB | 1.24 MiB/s, done.\n", "Collecting git+https://github.com/joefutrelle/pyifcb.git\n", " Cloning https://github.com/joefutrelle/pyifcb.git to /tmp/pip-req-build-h_9ypdyg\n", " Running command git clone --filter=blob:none --quiet https://github.com/joefutrelle/pyifcb.git /tmp/pip-req-build-h_9ypdyg\n", " Resolved https://github.com/joefutrelle/pyifcb.git to commit e7ecbd925170ac59f0a728f48a751657a4c40307\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting scipy<1.9.2 (from pyifcb==0.0.1)\n", " Downloading scipy-1.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (43.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.9/43.9 MB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (1.5.3)\n", "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (3.8.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (2.27.1)\n", "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (9.4.0)\n", "Collecting rectpack (from pyifcb==0.0.1)\n", " Downloading rectpack-0.2.2.tar.gz (17 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: scikit-image in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (0.19.3)\n", "Collecting pysmb (from pyifcb==0.0.1)\n", " Downloading pysmb-1.2.9.1.zip (1.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m76.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from pyifcb==0.0.1) (6.0.1)\n", "Requirement already satisfied: numpy<1.25.0,>=1.18.5 in /usr/local/lib/python3.10/dist-packages (from scipy<1.9.2->pyifcb==0.0.1) (1.22.4)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->pyifcb==0.0.1) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->pyifcb==0.0.1) (2022.7.1)\n", "Requirement already satisfied: pyasn1 in /usr/local/lib/python3.10/dist-packages (from pysmb->pyifcb==0.0.1) (0.5.0)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from pysmb->pyifcb==0.0.1) (4.65.0)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->pyifcb==0.0.1) (1.26.16)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->pyifcb==0.0.1) (2023.7.22)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->pyifcb==0.0.1) (2.0.12)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->pyifcb==0.0.1) (3.4)\n", "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image->pyifcb==0.0.1) (3.1)\n", "Requirement already satisfied: imageio>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->pyifcb==0.0.1) (2.25.1)\n", "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image->pyifcb==0.0.1) (2023.7.18)\n", "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->pyifcb==0.0.1) (1.4.1)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image->pyifcb==0.0.1) (23.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->pyifcb==0.0.1) (1.16.0)\n", "Building wheels for collected packages: pyifcb, pysmb, rectpack\n", " Building wheel for pyifcb (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for pyifcb: filename=pyifcb-0.0.1-py3-none-any.whl size=61518 sha256=39bbb112eb2e88ba944d3a7cc51387080e1147dc634b8cd0ed95cc76a86413f6\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-gcsarmzt/wheels/61/fa/8f/c0c33addc3ecffe7d8dc392af68e3f8eb0316b2808da6897e6\n", " Building wheel for pysmb (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for pysmb: filename=pysmb-1.2.9.1-py3-none-any.whl size=84802 sha256=c093dd18f06490f77d7b649d26e85125f2a6941b5e4c0d39eb7eef3f2539aa28\n", " Stored in directory: /root/.cache/pip/wheels/b6/13/a6/22f752798d4429d1f973f90e1fdaf8eb782a899fc691b57f48\n", " Building wheel for rectpack (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for rectpack: filename=rectpack-0.2.2-py3-none-any.whl size=19333 sha256=ac7f7862793ba19a9f3b379dd5b2c8f9f7f08fd5e1c09b1bbcab36804aa79c9f\n", " Stored in directory: /root/.cache/pip/wheels/e9/ea/e9/cd0237c0ccb9cb7312bb94cc023689592c4f07e4f3b1b9dd00\n", "Successfully built pyifcb pysmb rectpack\n", "Installing collected packages: rectpack, scipy, pysmb, pyifcb\n", " Attempting uninstall: scipy\n", " Found existing installation: scipy 1.10.1\n", " Uninstalling scipy-1.10.1:\n", " Successfully uninstalled scipy-1.10.1\n", "Successfully installed pyifcb-0.0.1 pysmb-1.2.9.1 rectpack-0.2.2 scipy-1.9.1\n" ] } ], "source": [ "!pip install keras_preprocessing\n", "!git clone https://patcdaniel:zozmir-1qempa-kenrAb@huggingface.co/patcdaniel/phytoClassUCSC\n", "!pip install -U git+https://github.com/joefutrelle/pyifcb.git\n" ] }, { "cell_type": "code", "source": [ "import tensorflow as tf\n", "import keras_preprocessing.image as keras_img\n", "import numpy as np\n", "import ifcb\n", "import json, os\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import urllib.request, urllib.parse\n", "from PIL import Image\n", "import pandas as pd" ], "metadata": { "id": "0NXIPkqB049o" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Load the Model (phytoClassUCSC.h5)" ], "metadata": { "id": "doj0y_g02dyI" } }, { "cell_type": "code", "source": [ "model = tf.keras.saving.load_model(\"./phytoClassUCSC/phytoClassUCSC.h5\")\n", "with open(\"./phytoClassUCSC/class_list.json\") as json_file:\n", " class_list = list(json.load(json_file))" ], "metadata": { "id": "nfX2zKtB0-vP" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Grab an entire syringe (.roi, .hdr, .adc) from the CalOOS Dashboard.\n", "\n", "Let's download some images off of the CalOOS IFCB Dashboard ([ifcb.caloos.org](ifcb.caloos.org))\n", "\n" ], "metadata": { "id": "5fZDcm3T2l_V" } }, { "cell_type": "code", "source": [ "base_url = \"https://ifcb.caloos.org\"\n", "data_set = \"santa-cruz-municipal-wharf\"\n", "syringe = \"D20230719T064404_IFCB104\"\n", "url = \"/\".join([base_url, data_set, syringe])\n", "\n", "for base in ['.roi', '.adc','.hdr']:\n", " full_url = url + base\n", " save_name = full_url.split(\"/\")[-1]\n", " print(\"Retrieving {} from {}\".format(save_name, full_url))\n", " urllib.request.urlretrieve(full_url, filename=os.path.join(\"/content\",save_name))" ], "metadata": { "id": "3v8h0UnwKGMa", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ce53a507-bf5c-4f5a-c02d-8ba924217515" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Retrieving D20230719T064404_IFCB104.roi from https://ifcb.caloos.org/santa-cruz-municipal-wharf/D20230719T064404_IFCB104.roi\n", "Retrieving D20230719T064404_IFCB104.adc from https://ifcb.caloos.org/santa-cruz-municipal-wharf/D20230719T064404_IFCB104.adc\n", "Retrieving D20230719T064404_IFCB104.hdr from https://ifcb.caloos.org/santa-cruz-municipal-wharf/D20230719T064404_IFCB104.hdr\n" ] } ] }, { "cell_type": "markdown", "source": [ "The images need to be reshaped into a certain size and format for the model, so the code below loads the image, resizes it, changes it to a three channel R,G,B and returns the image as an array." ], "metadata": { "id": "Pqwu31gzBFt2" } }, { "cell_type": "code", "source": [ "def prep_image(img_data):\n", "\n", " \"\"\"Load and prep images for model, reshape and normalize rgb to greyscale\"\"\"\n", "\n", " target_size=(224,224)\n", " img = keras_img.img_to_array(Image.fromarray(img_data).resize(target_size))\n", " img /= 255\n", " img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))\n", " return img\n", "\n", "\n", "def build_image_stack(roi_fname):\n", "\n", " \"\"\" Return a matric of preprocessed images from a singe syringe\"\"\"\n", "\n", " with ifcb.open_raw(os.path.join(\"/content\",roi_fname)) as roi_data:\n", " array_index = 0\n", " roi_names = []\n", " img_stack = np.empty(shape=(len(roi_data.images),224,224,3))\n", " for roi_num, img_data in roi_data.images.items():\n", " img_stack[array_index,:,:,:] = prep_image(img_data)\n", " array_index += 1\n", " roi_names.append(roi_num)\n", "\n", " # Also return run and inhibit times for sample volume calculation\n", " run_time = roi_data.hdr_attributes['runTime']\n", " inhibit_time = roi_data.hdr_attributes['inhibitTime']\n", "\n", " return img_stack, roi_names, inhibit_time, run_time" ], "metadata": { "id": "7aU9WZCQLoyG" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "img_stack, roi_names, inhibit_time, run_time = build_image_stack(\"D20230719T064404_IFCB104.roi\")\n" ], "metadata": { "id": "3Bjtay0R4LWS" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Use the model to predict the class" ], "metadata": { "id": "j6s9ytvHAa7q" } }, { "cell_type": "code", "source": [ "yhat = model.predict(img_stack)" ], "metadata": { "id": "4ZMlMC-p5oqL", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "9adcacd2-d35c-43f1-9b44-5327f39571a0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "86/86 [==============================] - 23s 154ms/step\n" ] } ] }, { "cell_type": "markdown", "source": [ "Now select the top class for each prediction" ], "metadata": { "id": "BgCICpRbTJEH" } }, { "cell_type": "code", "source": [ "top_ix = np.argmax(yhat,axis=1)\n", "top_prob = []\n", "top_class = []\n", "for i, ix in enumerate(top_ix):\n", " top_prob.append(yhat[i,ix])\n", " top_class.append(class_list[ix])" ], "metadata": { "id": "3KjrVwSxRDu1" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df_full = pd.DataFrame(data= {\"roi\":roi_names, \"top_class\":top_class,\"top_prob\":top_prob})\n", "df_full['img_url'] = [\"/\".join([base_url,\"data\", syringe + \"_{:04d}.png\".format(r)]) for r in roi_names ]\n", "flowrate = 0.25; # .25 mls per minute\n", "volume_analyzed = round(((run_time - inhibit_time) * flowrate)/60, 3)\n", "print(\"Sample Volume: {} mL\".format(volume_analyzed))\n", "df_full" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 441 }, "id": "aHyEaEGTSgTX", "outputId": "3278b587-e3f4-4cd7-c61f-9756a93c5067" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Sample Volume: 4.108 mL\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " roi top_class top_prob \\\n", "0 2 Ciliates 0.329502 \n", "1 3 Centric 0.717917 \n", "2 4 Centric 0.861939 \n", "3 5 Eucampia 0.983304 \n", "4 6 Eucampia 0.999946 \n", "... ... ... ... \n", "2723 2726 NanoP_less10 0.878550 \n", "2724 2727 Chaetoceros 0.994962 \n", "2725 2728 Eucampia 0.997788 \n", "2726 2729 Prorocentrum 0.999849 \n", "2727 2730 Eucampia 0.999993 \n", "\n", " img_url \n", "0 https://ifcb.caloos.org/data/D20230719T064404_... \n", "1 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2 https://ifcb.caloos.org/data/D20230719T064404_... \n", "3 https://ifcb.caloos.org/data/D20230719T064404_... \n", "4 https://ifcb.caloos.org/data/D20230719T064404_... \n", "... ... \n", "2723 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2724 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2725 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2726 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2727 https://ifcb.caloos.org/data/D20230719T064404_... \n", "\n", "[2728 rows x 4 columns]" ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
roitop_classtop_probimg_url
02Ciliates0.329502https://ifcb.caloos.org/data/D20230719T064404_...
13Centric0.717917https://ifcb.caloos.org/data/D20230719T064404_...
24Centric0.861939https://ifcb.caloos.org/data/D20230719T064404_...
35Eucampia0.983304https://ifcb.caloos.org/data/D20230719T064404_...
46Eucampia0.999946https://ifcb.caloos.org/data/D20230719T064404_...
...............
27232726NanoP_less100.878550https://ifcb.caloos.org/data/D20230719T064404_...
27242727Chaetoceros0.994962https://ifcb.caloos.org/data/D20230719T064404_...
27252728Eucampia0.997788https://ifcb.caloos.org/data/D20230719T064404_...
27262729Prorocentrum0.999849https://ifcb.caloos.org/data/D20230719T064404_...
27272730Eucampia0.999993https://ifcb.caloos.org/data/D20230719T064404_...
\n", "

2728 rows × 4 columns

\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 40 } ] }, { "cell_type": "markdown", "source": [ "*Now* lets convert the data output into cell concentrations.\n", "- Calculate the sample volume\n", "- \\# per mL" ], "metadata": { "id": "Lj1gNXIdLpbv" } }, { "cell_type": "code", "source": [ "def load_thresholds(fname):\n", " \"\"\" Load pre-determined class-specific thresholds \"\"\"\n", " with open(fname, 'r') as file:\n", " thresh = json.load(file)\n", " thresh_vals = np.array([thresh[k] for k in thresh.keys()])\n", " return thresh_vals\n", "\n", "threshold_vals = load_thresholds(\"/content/phytoClassUCSC/class_threshold_v1.0.json\")" ], "metadata": { "id": "-l9WN8PLZzx8" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Map the threshold values for each class to the dataframe\n", "thresh = pd.DataFrame(data={\"class_val\":threshold_vals})\n", "thresh.index = class_list[:-1]\n", "df_full['class_threshold'] = df_full['top_class'].map(thresh['class_val'])\n", "\n", "# Prefill the series with value \"Unclassified\" and then replace the rows where the classifier is greater than the\n", "df_full[\"top_class_thresh\"] = \"Unclassified\"\n", "greater_than_thresh = df_full['top_prob'] > df_full[\"class_threshold\"] # Boolean series, True where greater than threshold\n", "df_full[\"top_class_thresh\"][greater_than_thresh] = df_full[\"top_class\"][greater_than_thresh]; # replace \"Unclassifed\" values with actual class, where True\n", "df_full['thresh_diff'] = df_full['top_prob'] - df_full[\"class_threshold\"]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BIuj5gS6gano", "outputId": "7625df86-c470-4363-917d-4ecfcbd32120" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":9: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_full[\"top_class_thresh\"][greater_than_thresh] = df_full[\"top_class\"][greater_than_thresh]; # replace \"Unclassifed\" values with actual class, where True\n" ] } ] }, { "cell_type": "code", "source": [ "df_full" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 687 }, "id": "Pn_ytX-0h8iZ", "outputId": "0ebedd65-d0c0-4213-8e34-e28cc514a260" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " roi top_class top_prob \\\n", "0 2 Ciliates 0.329502 \n", "1 3 Centric 0.717917 \n", "2 4 Centric 0.861939 \n", "3 5 Eucampia 0.983304 \n", "4 6 Eucampia 0.999946 \n", "... ... ... ... \n", "2723 2726 NanoP_less10 0.878550 \n", "2724 2727 Chaetoceros 0.994962 \n", "2725 2728 Eucampia 0.997788 \n", "2726 2729 Prorocentrum 0.999849 \n", "2727 2730 Eucampia 0.999993 \n", "\n", " img_url top_class_thresh \\\n", "0 https://ifcb.caloos.org/data/D20230719T064404_... Unclassified \n", "1 https://ifcb.caloos.org/data/D20230719T064404_... Centric \n", "2 https://ifcb.caloos.org/data/D20230719T064404_... Centric \n", "3 https://ifcb.caloos.org/data/D20230719T064404_... Eucampia \n", "4 https://ifcb.caloos.org/data/D20230719T064404_... Eucampia \n", "... ... ... \n", "2723 https://ifcb.caloos.org/data/D20230719T064404_... Unclassified \n", "2724 https://ifcb.caloos.org/data/D20230719T064404_... Chaetoceros \n", "2725 https://ifcb.caloos.org/data/D20230719T064404_... Eucampia \n", "2726 https://ifcb.caloos.org/data/D20230719T064404_... Prorocentrum \n", "2727 https://ifcb.caloos.org/data/D20230719T064404_... Eucampia \n", "\n", " class_threshold thresh_diff \n", "0 0.49 -0.160498 \n", "1 0.70 0.017917 \n", "2 0.70 0.161939 \n", "3 0.88 0.103304 \n", "4 0.88 0.119946 \n", "... ... ... \n", "2723 0.92 -0.041450 \n", "2724 0.89 0.104962 \n", "2725 0.88 0.117788 \n", "2726 0.92 0.079849 \n", "2727 0.88 0.119993 \n", "\n", "[2728 rows x 7 columns]" ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
roitop_classtop_probimg_urltop_class_threshclass_thresholdthresh_diff
02Ciliates0.329502https://ifcb.caloos.org/data/D20230719T064404_...Unclassified0.49-0.160498
13Centric0.717917https://ifcb.caloos.org/data/D20230719T064404_...Centric0.700.017917
24Centric0.861939https://ifcb.caloos.org/data/D20230719T064404_...Centric0.700.161939
35Eucampia0.983304https://ifcb.caloos.org/data/D20230719T064404_...Eucampia0.880.103304
46Eucampia0.999946https://ifcb.caloos.org/data/D20230719T064404_...Eucampia0.880.119946
........................
27232726NanoP_less100.878550https://ifcb.caloos.org/data/D20230719T064404_...Unclassified0.92-0.041450
27242727Chaetoceros0.994962https://ifcb.caloos.org/data/D20230719T064404_...Chaetoceros0.890.104962
27252728Eucampia0.997788https://ifcb.caloos.org/data/D20230719T064404_...Eucampia0.880.117788
27262729Prorocentrum0.999849https://ifcb.caloos.org/data/D20230719T064404_...Prorocentrum0.920.079849
27272730Eucampia0.999993https://ifcb.caloos.org/data/D20230719T064404_...Eucampia0.880.119993
\n", "

2728 rows × 7 columns

\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 89 } ] }, { "cell_type": "markdown", "source": [ "## Count number of images for each class ##" ], "metadata": { "id": "wfIWHO46mHZW" } }, { "cell_type": "code", "source": [ "total = df_full.groupby('top_class_thresh')['roi'].count()\n", "total = total.sort_values(ascending=False)\n", "labels = total.index\n", "total_val = total.values" ], "metadata": { "id": "NwPpJK_4mNzR" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "fig, ax = plt.subplots()\n", "fig.set_size_inches(12,6)\n", "sns.barplot(x=labels,y=total_val, palette=\"Blues_d\")\n", "ax.tick_params(axis='x', rotation=90)\n", "ax.set_yscale('log')\n", "ax.set_ylabel(\"# per syringe\")\n", "for i, count in enumerate(total_val):\n", " div_by = 1/len(total_val)\n", " ax.text(div_by*i + .004,.03,str(int(count)),rotation='vertical', transform=ax.transAxes,c='w',weight='bold')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 669 }, "id": "OhXYZ3JpmN5G", "outputId": "76b20351-732b-42f4-ffb1-2d88478f0c92" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "HpjV5SgGo_wW" }, "execution_count": null, "outputs": [] } ] }