# Try out the PhytoClassUCSC model for yourself

Using this notebook, you should able to clone the model repo off of Hugging Face, grab an syringe from the Santa Cruz Wharf IFCB dataset on the CalOOS Dashboard instance, and run it through the classifier.

Using the __GPU Hardware Accelerator__ will significantly increase the processing time. keras_img\n", "import numpy as np\n", "import ifcb\n", "import json, os\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import urllib.request, urllib.parse\n", "from PIL import Image\n", "import pandas as pd" ], "metadata": { "id": "0NXIPkqB049o" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Load the Model (phytoClassUCSC.h5)" ], "metadata": { "id": "doj0y_g02dyI" } }, { "cell_type": "code", "source": [ "model = tf.keras.saving.load_model(\"./phytoClassUCSC/phytoClassUCSC.h5\")\n", "with open(\"./phytoClassUCSC/class_list.json\") as json_file:\n", " class_list = list(json.load(json_file))" ], "metadata": { "id": "nfX2zKtB0-vP" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Grab an entire syringe (.roi, .hdr, .adc) from the CalOOS Dashboard.\n", "\n", "Let's download some images off of the CalOOS IFCB Dashboard ([ifcb.caloos.org](ifcb.caloos.org))\n", "\n" ], "metadata": { "id": "5fZDcm3T2l_V" } }, { "cell_type": "code", "source": [ "base_url = \"https://ifcb.caloos.org\"\n", "data_set = \"santa-cruz-municipal-wharf\"\n", "syringe = \"D20230719T064404_IFCB104\"\n", "url = \"/\".join([base_url, data_set, syringe])\n", "\n", "for base in ['.roi', '.adc','.hdr']:\n", " full_url = url + base\n", " save_name = full_url.split(\"/\")[-1]\n", " print(\"Retrieving {} from {}\".format(save_name, full_url))\n", " urllib.request.urlretrieve(full_url, filename=os.path.join(\"/content\",save_name))" ], "metadata": { "id": "3v8h0UnwKGMa", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ce53a507-bf5c-4f5a-c02d-8ba924217515" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Retrieving D20230719T064404_IFCB104.roi from https://ifcb.caloos.org/santa-cruz-municipal-wharf/D20230719T064404_IFCB104.roi\n", "Retrieving D20230719T064404_IFCB104.adc from https://ifcb.caloos.org/santa-cruz-municipal-wharf/D20230719T064404_IFCB104.adc\n", "Retrieving D20230719T064404_IFCB104.hdr from https://ifcb.caloos.org/santa-cruz-municipal-wharf/D20230719T064404_IFCB104.hdr\n" ] } ] }, { "cell_type": "markdown", "source": [ "The images need to be reshaped into a certain size and format for the model, so the code below loads the image, resizes it, changes it to a three channel R,G,B and returns the image as an array." ], "metadata": { "id": "Pqwu31gzBFt2" } }, { "cell_type": "code", "source": [ "def prep_image(img_data):\n", "\n", " \"\"\"Load and prep images for model, reshape and normalize rgb to greyscale\"\"\"\n", "\n", " target_size=(224,224)\n", " img = keras_img.img_to_array(Image.fromarray(img_data).resize(target_size))\n", " img /= 255\n", " img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))\n", " return img\n", "\n", "\n", "def build_image_stack(roi_fname):\n", "\n", " \"\"\" Return a matric of preprocessed images from a singe syringe\"\"\"\n", "\n", " with ifcb.open_raw(os.path.join(\"/content\",roi_fname)) as roi_data:\n", " array_index = 0\n", " roi_names = []\n", " img_stack = np.empty(shape=(len(roi_data.images),224,224,3))\n", " for roi_num, img_data in roi_data.images.items():\n", " img_stack[array_index,:,:,:] = prep_image(img_data)\n", " array_index += 1\n", " roi_names.append(roi_num)\n", "\n", " # Also return run and inhibit times for sample volume calculation\n", " run_time = roi_data.hdr_attributes['runTime']\n", " inhibit_time = roi_data.hdr_attributes['inhibitTime']\n", "\n", " return img_stack, roi_names, inhibit_time, run_time" ], "metadata": { "id": "7aU9WZCQLoyG" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "img_stack, roi_names, inhibit_time, run_time = build_image_stack(\"D20230719T064404_IFCB104.roi\")\n" ], "metadata": { "id": "3Bjtay0R4LWS" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Use the model to predict the class" ], "metadata": { "id": "j6s9ytvHAa7q" } }, { "cell_type": "code", "source": [ "yhat = model.predict(img_stack)" ], "metadata": { "id": "4ZMlMC-p5oqL", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "9adcacd2-d35c-43f1-9b44-5327f39571a0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "86/86 [==============================] - 23s 154ms/step\n" ] } ] }, { "cell_type": "markdown", "source": [ "Now select the top class for each prediction" ], "metadata": { "id": "BgCICpRbTJEH" } }, { "cell_type": "code", "source": [ "top_ix = np.argmax(yhat,axis=1)\n", "top_prob = []\n", "top_class = []\n", "for i, ix in enumerate(top_ix):\n", " top_prob.append(yhat[i,ix])\n", " top_class.append(class_list[ix])" ], "metadata": { "id": "3KjrVwSxRDu1" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df_full = pd.DataFrame(data= {\"roi\":roi_names, \"top_class\":top_class,\"top_prob\":top_prob})\n", "df_full['img_url'] = [\"/\".join([base_url,\"data\", syringe + \"_{:04d}.png\".format(r)]) for r in roi_names ]\n", "flowrate = 0.25; # .25 mls per minute\n", "volume_analyzed = round(((run_time - inhibit_time) * flowrate)/60, 3)\n", "print(\"Sample Volume: {} mL\".format(volume_analyzed))\n", "df_full" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 441 }, "id": "aHyEaEGTSgTX", "outputId": "3278b587-e3f4-4cd7-c61f-9756a93c5067" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Sample Volume: 4.108 mL\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " roi top_class top_prob \\\n", "0 2 Ciliates 0.329502 \n", "1 3 Centric 0.717917 \n", "2 4 Centric 0.861939 \n", "3 5 Eucampia 0.983304 \n", "4 6 Eucampia 0.999946 \n", "... ... ... ... \n", "2723 2726 NanoP_less10 0.878550 \n", "2724 2727 Chaetoceros 0.994962 \n", "2725 2728 Eucampia 0.997788 \n", "2726 2729 Prorocentrum 0.999849 \n", "2727 2730 Eucampia 0.999993 \n", "\n", " img_url \n", "0 https://ifcb.caloos.org/data/D20230719T064404_... \n", "1 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2 https://ifcb.caloos.org/data/D20230719T064404_... \n", "3 https://ifcb.caloos.org/data/D20230719T064404_... \n", "4 https://ifcb.caloos.org/data/D20230719T064404_... \n", "... ... \n", "2723 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2724 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2725 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2726 https://ifcb.caloos.org/data/D20230719T064404_... \n", "2727 https://ifcb.caloos.org/data/D20230719T064404_... \n", "\n", "[2728 rows x 4 columns]" ], "text/html": [ "\n", "\n", "
