Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +396 -0
config.json +360 -0
local_compartment_classifier_bd_boxes.skops +0 -0
train.py +359 -0

README.md ADDED Viewed

	@@ -0,0 +1,396 @@

+---
+license: mit
+library_name: sklearn
+tags:
+- sklearn
+- skops
+- tabular-classification
+model_format: skops
+model_file: local_compartment_classifier_bd_boxes.skops
+widget:
+- structuredData:
+    area_nm2:
+    - 693824.0
+    - 4852608.0
+    - 17088896.0
+    area_nm2_neighbor_mean:
+    - 10181485.714285716
+    - 9884429.714285716
+    - 9010409.142857144
+    area_nm2_neighbor_std:
+    - 8312409.263207569
+    - 8587259.418816902
+    - 8418630.640116522
+    max_dt_nm:
+    - 69.0
+    - 543.0
+    - 1287.0
+    max_dt_nm_neighbor_mean:
+    - 664.7142857142857
+    - 630.8571428571429
+    - 577.7142857142857
+    max_dt_nm_neighbor_std:
+    - 479.64240342658945
+    - 504.9563358340017
+    - 468.41868657651344
+    mean_dt_nm:
+    - 24.4375
+    - 156.5
+    - 416.0
+    mean_dt_nm_neighbor_mean:
+    - 198.62946428571428
+    - 189.19642857142856
+    - 170.66071428571428
+    mean_dt_nm_neighbor_std:
+    - 150.614304054458
+    - 157.4368957825056
+    - 143.32375093543624
+    pca_ratio_01:
+    - 1.3849340770961909
+    - 1.181656878273399
+    - 1.128046800200765
+    pca_ratio_01_neighbor_mean:
+    - 1.8575624906424115
+    - 1.8760422359899387
+    - 1.880915879451087
+    pca_ratio_01_neighbor_std:
+    - 0.641580757345606
+    - 0.6228187048854344
+    - 0.6165585104590592
+    pca_unwrapped_0:
+    - -0.0046539306640625
+    - -0.497314453125
+    - -0.258544921875
+    pca_unwrapped_0_neighbor_mean:
+    - 0.039224624633789
+    - 0.0840119448575106
+    - 0.0623056238347833
+    pca_unwrapped_0_neighbor_std:
+    - 0.3114910605258688
+    - 0.2573427692683507
+    - 0.296254177168357
+    pca_unwrapped_1:
+    - 0.7392578125
+    - -0.11553955078125
+    - 0.2169189453125
+    pca_unwrapped_1_neighbor_mean:
+    - 0.0941687497225674
+    - 0.1718776009299538
+    - 0.1416541012850674
+    pca_unwrapped_1_neighbor_std:
+    - 0.3179467337379631
+    - 0.3628551035117971
+    - 0.372447324946889
+    pca_unwrapped_2:
+    - -0.673828125
+    - -0.85986328125
+    - 0.94140625
+    pca_unwrapped_2_neighbor_mean:
+    - 0.2258744673295454
+    - 0.2427867542613636
+    - 0.0790349786931818
+    pca_unwrapped_2_neighbor_std:
+    - 0.9134250264562896
+    - 0.8928014788058292
+    - 0.9167197839332804
+    pca_unwrapped_3:
+    - -0.0302886962890625
+    - -0.86572265625
+    - 0.57177734375
+    pca_unwrapped_3_neighbor_mean:
+    - -0.2933238636363636
+    - -0.2173753218217329
+    - -0.3480571400035511
+    pca_unwrapped_3_neighbor_std:
+    - 0.6203425764161097
+    - 0.5938304683645145
+    - 0.5600074530240728
+    pca_unwrapped_4:
+    - 0.67333984375
+    - -0.0005474090576171
+    - 0.81982421875
+    pca_unwrapped_4_neighbor_mean:
+    - 0.2915762121027166
+    - 0.3528386896306818
+    - 0.2782594507390802
+    pca_unwrapped_4_neighbor_std:
+    - 0.6415192812587974
+    - 0.6430080201673403
+    - 0.6308895861182334
+    pca_unwrapped_5:
+    - 0.73876953125
+    - 0.50048828125
+    - -0.03192138671875
+    pca_unwrapped_5_neighbor_mean:
+    - 0.2028697620738636
+    - 0.2245316938920454
+    - 0.2729325727982954
+    pca_unwrapped_5_neighbor_std:
+    - 0.265173781606759
+    - 0.2994363858938455
+    - 0.2968562365279343
+    pca_unwrapped_6:
+    - 0.99951171875
+    - 0.05828857421875
+    - -0.77880859375
+    pca_unwrapped_6_neighbor_mean:
+    - -0.2386505820534446
+    - -0.1530848416415128
+    - -0.0769850990988991
+    pca_unwrapped_6_neighbor_std:
+    - 0.6776577717043619
+    - 0.7717860533115238
+    - 0.7447135522384378
+    pca_unwrapped_7:
+    - 0.023834228515625
+    - -0.9931640625
+    - 0.52978515625
+    pca_unwrapped_7_neighbor_mean:
+    - -0.4803272594105113
+    - -0.3878728693181818
+    - -0.5263227982954546
+    pca_unwrapped_7_neighbor_std:
+    - 0.4799926318285017
+    - 0.4691567465869561
+    - 0.3891669942534205
+    pca_unwrapped_8:
+    - 0.0192413330078125
+    - 0.0997314453125
+    - -0.3359375
+    pca_unwrapped_8_neighbor_mean:
+    - -0.0384375832297585
+    - -0.0457548661665482
+    - -0.0061485984108664
+    pca_unwrapped_8_neighbor_std:
+    - 0.3037878488292577
+    - 0.3010843368506175
+    - 0.2874409267860334
+    pca_val_unwrapped_0:
+    - 15657.09765625
+    - 40668.40625
+    - 66863.0
+    pca_val_unwrapped_0_neighbor_mean:
+    - 69378.52059659091
+    - 67104.76526988637
+    - 64723.43856534091
+    pca_val_unwrapped_0_neighbor_std:
+    - 20242.245019019712
+    - 24702.906417865197
+    - 25959.16138296664
+    pca_val_unwrapped_1:
+    - 11305.3017578125
+    - 34416.42578125
+    - 59273.25
+    pca_val_unwrapped_1_neighbor_mean:
+    - 41190.40261008523
+    - 39089.39133522727
+    - 36829.68004261364
+    pca_val_unwrapped_1_neighbor_std:
+    - 16625.870141811894
+    - 18875.56976212627
+    - 17666.778281657556
+    pca_val_unwrapped_2:
+    - 1270.4095458984375
+    - 13551.6748046875
+    - 47764.625
+    pca_val_unwrapped_2_neighbor_mean:
+    - 28717.50048828125
+    - 27601.021828391335
+    - 24490.75362881747
+    pca_val_unwrapped_2_neighbor_std:
+    - 14988.204981576571
+    - 16601.48080038032
+    - 15622.078784778376
+    post_synapse_count:
+    - 0.0
+    - 0.0
+    - 0.0
+    post_synapse_count_neighbor_mean:
+    - 0.0
+    - 0.0
+    - 0.0
+    post_synapse_count_neighbor_std:
+    - 0.0
+    - 0.0
+    - 0.0
+    pre_synapse_count:
+    - 0.0
+    - 0.0
+    - 0.0
+    pre_synapse_count_neighbor_mean:
+    - 0.0
+    - 0.0
+    - 0.0
+    pre_synapse_count_neighbor_std:
+    - 0.0
+    - 0.0
+    - 0.0
+    size_nm3:
+    - 12771840.0
+    - 697943040.0
+    - 7550330880.0
+    size_nm3_neighbor_mean:
+    - 3233702034.285714
+    - 3184761234.285714
+    - 2695304960.0
+    size_nm3_neighbor_std:
+    - 3650678969.7909584
+    - 3691650923.5639486
+    - 3518520747.0511127
+---
+# Model description
+[More Information Needed]
+## Intended uses & limitations
+[More Information Needed]
+## Training Procedure
+[More Information Needed]
+### Hyperparameters
+<details>
+<summary> Click to expand </summary>
+| Hyperparameter                     | Value                                                                                                                     |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------------|
+| memory                             |                                                                                                                           |
+| steps                              | [('transformer', QuantileTransformer(output_distribution='normal')), ('lda', LinearDiscriminantAnalysis(n_components=3))] |
+| verbose                            | False                                                                                                                     |
+| transformer                        | QuantileTransformer(output_distribution='normal')                                                                         |
+| lda                                | LinearDiscriminantAnalysis(n_components=3)                                                                                |
+| transformer__copy                  | True                                                                                                                      |
+| transformer__ignore_implicit_zeros | False                                                                                                                     |
+| transformer__n_quantiles           | 1000                                                                                                                      |
+| transformer__output_distribution   | normal                                                                                                                    |
+| transformer__random_state          |                                                                                                                           |
+| transformer__subsample             | 10000                                                                                                                     |
+| lda__covariance_estimator          |                                                                                                                           |
+| lda__n_components                  | 3                                                                                                                         |
+| lda__priors                        |                                                                                                                           |
+| lda__shrinkage                     |                                                                                                                           |
+| lda__solver                        | svd                                                                                                                       |
+| lda__store_covariance              | False                                                                                                                     |
+| lda__tol                           | 0.0001                                                                                                                    |
+</details>
+### Model Plot
+<style>#sk-container-id-4 {/* Definition of color scheme common for light and dark mode */--sklearn-color-text: black;--sklearn-color-line: gray;/* Definition of color scheme for unfitted estimators */--sklearn-color-unfitted-level-0: #fff5e6;--sklearn-color-unfitted-level-1: #f6e4d2;--sklearn-color-unfitted-level-2: #ffe0b3;--sklearn-color-unfitted-level-3: chocolate;/* Definition of color scheme for fitted estimators */--sklearn-color-fitted-level-0: #f0f8ff;--sklearn-color-fitted-level-1: #d4ebff;--sklearn-color-fitted-level-2: #b3dbfd;--sklearn-color-fitted-level-3: cornflowerblue;/* Specific color for light theme */--sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));--sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));--sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));--sklearn-color-icon: #696969;@media (prefers-color-scheme: dark) {/* Redefinition of color scheme for dark theme */--sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));--sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));--sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));--sklearn-color-icon: #878787;}
+}#sk-container-id-4 {color: var(--sklearn-color-text);
+}#sk-container-id-4 pre {padding: 0;
+}#sk-container-id-4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;
+}#sk-container-id-4 div.sk-dashed-wrapped {border: 1px dashed var(--sklearn-color-line);margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: var(--sklearn-color-background);
+}#sk-container-id-4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }`but bootstrap.min.css set `[hidden] { display: none !important; }`so we also need the `!important` here to be able to override thedefault hidden behavior on the sphinx rendered scikit-learn.org.See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;
+}#sk-container-id-4 div.sk-text-repr-fallback {display: none;
+}div.sk-parallel-item,
+div.sk-serial,
+div.sk-item {/* draw centered vertical line to link estimators */background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));background-size: 2px 100%;background-repeat: no-repeat;background-position: center center;
+}/* Parallel-specific style estimator block */#sk-container-id-4 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 2px solid var(--sklearn-color-text-on-default-background);flex-grow: 1;
+}#sk-container-id-4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: var(--sklearn-color-background);position: relative;
+}#sk-container-id-4 div.sk-parallel-item {display: flex;flex-direction: column;
+}#sk-container-id-4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;
+}#sk-container-id-4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;
+}#sk-container-id-4 div.sk-parallel-item:only-child::after {width: 0;
+}/* Serial-specific style estimator block */#sk-container-id-4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: var(--sklearn-color-background);padding-right: 1em;padding-left: 1em;
+}/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is
+clickable and can be expanded/collapsed.
+- Pipeline and ColumnTransformer use this feature and define the default style
+- Estimators will overwrite some part of the style using the `sk-estimator` class
+*//* Pipeline and ColumnTransformer style (default) */#sk-container-id-4 div.sk-toggleable {/* Default theme specific background. It is overwritten whether we have aspecific estimator or a Pipeline/ColumnTransformer */background-color: var(--sklearn-color-background);
+}/* Toggleable label */
+#sk-container-id-4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.5em;box-sizing: border-box;text-align: center;
+}#sk-container-id-4 label.sk-toggleable__label-arrow:before {/* Arrow on the left of the label */content: "▸";float: left;margin-right: 0.25em;color: var(--sklearn-color-icon);
+}#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {color: var(--sklearn-color-text);
+}/* Toggleable content - dropdown */#sk-container-id-4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;/* unfitted */background-color: var(--sklearn-color-unfitted-level-0);
+}#sk-container-id-4 div.sk-toggleable__content.fitted {/* fitted */background-color: var(--sklearn-color-fitted-level-0);
+}#sk-container-id-4 div.sk-toggleable__content pre {margin: 0.2em;border-radius: 0.25em;color: var(--sklearn-color-text);/* unfitted */background-color: var(--sklearn-color-unfitted-level-0);
+}#sk-container-id-4 div.sk-toggleable__content.fitted pre {/* unfitted */background-color: var(--sklearn-color-fitted-level-0);
+}#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {/* Expand drop-down */max-height: 200px;max-width: 100%;overflow: auto;
+}#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";
+}/* Pipeline/ColumnTransformer-specific style */#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {color: var(--sklearn-color-text);background-color: var(--sklearn-color-unfitted-level-2);
+}#sk-container-id-4 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: var(--sklearn-color-fitted-level-2);
+}/* Estimator-specific style *//* Colorize estimator box */
+#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {/* unfitted */background-color: var(--sklearn-color-unfitted-level-2);
+}#sk-container-id-4 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {/* fitted */background-color: var(--sklearn-color-fitted-level-2);
+}#sk-container-id-4 div.sk-label label.sk-toggleable__label,
+#sk-container-id-4 div.sk-label label {/* The background is the default theme color */color: var(--sklearn-color-text-on-default-background);
+}/* On hover, darken the color of the background */
+#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {color: var(--sklearn-color-text);background-color: var(--sklearn-color-unfitted-level-2);
+}/* Label box, darken color on hover, fitted */
+#sk-container-id-4 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {color: var(--sklearn-color-text);background-color: var(--sklearn-color-fitted-level-2);
+}/* Estimator label */#sk-container-id-4 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;
+}#sk-container-id-4 div.sk-label-container {text-align: center;
+}/* Estimator-specific */
+#sk-container-id-4 div.sk-estimator {font-family: monospace;border: 1px dotted var(--sklearn-color-border-box);border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;/* unfitted */background-color: var(--sklearn-color-unfitted-level-0);
+}#sk-container-id-4 div.sk-estimator.fitted {/* fitted */background-color: var(--sklearn-color-fitted-level-0);
+}/* on hover */
+#sk-container-id-4 div.sk-estimator:hover {/* unfitted */background-color: var(--sklearn-color-unfitted-level-2);
+}#sk-container-id-4 div.sk-estimator.fitted:hover {/* fitted */background-color: var(--sklearn-color-fitted-level-2);
+}/* Specification for estimator info (e.g. "i" and "?") *//* Common style for "i" and "?" */.sk-estimator-doc-link,
+a:link.sk-estimator-doc-link,
+a:visited.sk-estimator-doc-link {float: right;font-size: smaller;line-height: 1em;font-family: monospace;background-color: var(--sklearn-color-background);border-radius: 1em;height: 1em;width: 1em;text-decoration: none !important;margin-left: 1ex;/* unfitted */border: var(--sklearn-color-unfitted-level-1) 1pt solid;color: var(--sklearn-color-unfitted-level-1);
+}.sk-estimator-doc-link.fitted,
+a:link.sk-estimator-doc-link.fitted,
+a:visited.sk-estimator-doc-link.fitted {/* fitted */border: var(--sklearn-color-fitted-level-1) 1pt solid;color: var(--sklearn-color-fitted-level-1);
+}/* On hover */
+div.sk-estimator:hover .sk-estimator-doc-link:hover,
+.sk-estimator-doc-link:hover,
+div.sk-label-container:hover .sk-estimator-doc-link:hover,
+.sk-estimator-doc-link:hover {/* unfitted */background-color: var(--sklearn-color-unfitted-level-3);color: var(--sklearn-color-background);text-decoration: none;
+}div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,
+.sk-estimator-doc-link.fitted:hover,
+div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,
+.sk-estimator-doc-link.fitted:hover {/* fitted */background-color: var(--sklearn-color-fitted-level-3);color: var(--sklearn-color-background);text-decoration: none;
+}/* Span, style for the box shown on hovering the info icon */
+.sk-estimator-doc-link span {display: none;z-index: 9999;position: relative;font-weight: normal;right: .2ex;padding: .5ex;margin: .5ex;width: min-content;min-width: 20ex;max-width: 50ex;color: var(--sklearn-color-text);box-shadow: 2pt 2pt 4pt #999;/* unfitted */background: var(--sklearn-color-unfitted-level-0);border: .5pt solid var(--sklearn-color-unfitted-level-3);
+}.sk-estimator-doc-link.fitted span {/* fitted */background: var(--sklearn-color-fitted-level-0);border: var(--sklearn-color-fitted-level-3);
+}.sk-estimator-doc-link:hover span {display: block;
+}/* "?"-specific style due to the `<a>` HTML tag */#sk-container-id-4 a.estimator_doc_link {float: right;font-size: 1rem;line-height: 1em;font-family: monospace;background-color: var(--sklearn-color-background);border-radius: 1rem;height: 1rem;width: 1rem;text-decoration: none;/* unfitted */color: var(--sklearn-color-unfitted-level-1);border: var(--sklearn-color-unfitted-level-1) 1pt solid;
+}#sk-container-id-4 a.estimator_doc_link.fitted {/* fitted */border: var(--sklearn-color-fitted-level-1) 1pt solid;color: var(--sklearn-color-fitted-level-1);
+}/* On hover */
+#sk-container-id-4 a.estimator_doc_link:hover {/* unfitted */background-color: var(--sklearn-color-unfitted-level-3);color: var(--sklearn-color-background);text-decoration: none;
+}#sk-container-id-4 a.estimator_doc_link.fitted:hover {/* fitted */background-color: var(--sklearn-color-fitted-level-3);
+}
+</style><div id="sk-container-id-4" class="sk-top-container" style="overflow: auto;"><div class="sk-text-repr-fallback"><pre>Pipeline(steps=[(&#x27;transformer&#x27;,QuantileTransformer(output_distribution=&#x27;normal&#x27;)),(&#x27;lda&#x27;, LinearDiscriminantAnalysis(n_components=3))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-10" type="checkbox" ><label for="sk-estimator-id-10" class="sk-toggleable__label fitted sk-toggleable__label-arrow fitted">&nbsp;&nbsp;Pipeline<a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html">?<span>Documentation for Pipeline</span></a><span class="sk-estimator-doc-link fitted">i<span>Fitted</span></span></label><div class="sk-toggleable__content fitted"><pre>Pipeline(steps=[(&#x27;transformer&#x27;,QuantileTransformer(output_distribution=&#x27;normal&#x27;)),(&#x27;lda&#x27;, LinearDiscriminantAnalysis(n_components=3))])</pre></div> </div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-11" type="checkbox" ><label for="sk-estimator-id-11" class="sk-toggleable__label fitted sk-toggleable__label-arrow fitted">&nbsp;QuantileTransformer<a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.QuantileTransformer.html">?<span>Documentation for QuantileTransformer</span></a></label><div class="sk-toggleable__content fitted"><pre>QuantileTransformer(output_distribution=&#x27;normal&#x27;)</pre></div> </div></div><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-12" type="checkbox" ><label for="sk-estimator-id-12" class="sk-toggleable__label fitted sk-toggleable__label-arrow fitted">&nbsp;LinearDiscriminantAnalysis<a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.4/modules/generated/sklearn.discriminant_analysis.LinearDiscriminantAnalysis.html">?<span>Documentation for LinearDiscriminantAnalysis</span></a></label><div class="sk-toggleable__content fitted"><pre>LinearDiscriminantAnalysis(n_components=3)</pre></div> </div></div></div></div></div></div>
+## Evaluation Results
+[More Information Needed]
+# How to Get Started with the Model
+[More Information Needed]
+# Model Card Authors
+This model card is written by following authors:
+[More Information Needed]
+# Model Card Contact
+You can contact the model card authors through following channels:
+[More Information Needed]
+# Citation
+Below you can find information related to citation.
+**BibTeX:**
+```
+[More Information Needed]
+```
+# model_card_authors
+bdpedigo
+# model_description
+This is a model trained to classify pieces of neuron as axon, dendrite, soma, orglia, based only on their local shape and synapse features.The model is a linear discriminant classifier which was trained on compartment labels generated by Bethanny Danskin for 3 6x6x6 um boxes in the Minnie65 Phase3 dataset.

config.json ADDED Viewed

	@@ -0,0 +1,360 @@

+{
+    "sklearn": {
+        "columns": [
+            "area_nm2",
+            "max_dt_nm",
+            "mean_dt_nm",
+            "size_nm3",
+            "pca_unwrapped_0",
+            "pca_unwrapped_1",
+            "pca_unwrapped_2",
+            "pca_unwrapped_3",
+            "pca_unwrapped_4",
+            "pca_unwrapped_5",
+            "pca_unwrapped_6",
+            "pca_unwrapped_7",
+            "pca_unwrapped_8",
+            "pca_val_unwrapped_0",
+            "pca_val_unwrapped_1",
+            "pca_val_unwrapped_2",
+            "pca_ratio_01",
+            "pre_synapse_count",
+            "post_synapse_count",
+            "area_nm2_neighbor_mean",
+            "area_nm2_neighbor_std",
+            "max_dt_nm_neighbor_mean",
+            "max_dt_nm_neighbor_std",
+            "mean_dt_nm_neighbor_mean",
+            "mean_dt_nm_neighbor_std",
+            "size_nm3_neighbor_mean",
+            "size_nm3_neighbor_std",
+            "pca_unwrapped_0_neighbor_mean",
+            "pca_unwrapped_0_neighbor_std",
+            "pca_unwrapped_1_neighbor_mean",
+            "pca_unwrapped_1_neighbor_std",
+            "pca_unwrapped_2_neighbor_mean",
+            "pca_unwrapped_2_neighbor_std",
+            "pca_unwrapped_3_neighbor_mean",
+            "pca_unwrapped_3_neighbor_std",
+            "pca_unwrapped_4_neighbor_mean",
+            "pca_unwrapped_4_neighbor_std",
+            "pca_unwrapped_5_neighbor_mean",
+            "pca_unwrapped_5_neighbor_std",
+            "pca_unwrapped_6_neighbor_mean",
+            "pca_unwrapped_6_neighbor_std",
+            "pca_unwrapped_7_neighbor_mean",
+            "pca_unwrapped_7_neighbor_std",
+            "pca_unwrapped_8_neighbor_mean",
+            "pca_unwrapped_8_neighbor_std",
+            "pca_val_unwrapped_0_neighbor_mean",
+            "pca_val_unwrapped_0_neighbor_std",
+            "pca_val_unwrapped_1_neighbor_mean",
+            "pca_val_unwrapped_1_neighbor_std",
+            "pca_val_unwrapped_2_neighbor_mean",
+            "pca_val_unwrapped_2_neighbor_std",
+            "pca_ratio_01_neighbor_mean",
+            "pca_ratio_01_neighbor_std",
+            "pre_synapse_count_neighbor_mean",
+            "pre_synapse_count_neighbor_std",
+            "post_synapse_count_neighbor_mean",
+            "post_synapse_count_neighbor_std"
+        ],
+        "environment": [
+            "scikit-learn",
+            "caveclient"
+        ],
+        "example_input": {
+            "area_nm2": [
+                693824.0,
+                4852608.0,
+                17088896.0
+            ],
+            "area_nm2_neighbor_mean": [
+                10181485.714285716,
+                9884429.714285716,
+                9010409.142857144
+            ],
+            "area_nm2_neighbor_std": [
+                8312409.263207569,
+                8587259.418816902,
+                8418630.640116522
+            ],
+            "max_dt_nm": [
+                69.0,
+                543.0,
+                1287.0
+            ],
+            "max_dt_nm_neighbor_mean": [
+                664.7142857142857,
+                630.8571428571429,
+                577.7142857142857
+            ],
+            "max_dt_nm_neighbor_std": [
+                479.64240342658945,
+                504.9563358340017,
+                468.41868657651344
+            ],
+            "mean_dt_nm": [
+                24.4375,
+                156.5,
+                416.0
+            ],
+            "mean_dt_nm_neighbor_mean": [
+                198.62946428571428,
+                189.19642857142856,
+                170.66071428571428
+            ],
+            "mean_dt_nm_neighbor_std": [
+                150.614304054458,
+                157.4368957825056,
+                143.32375093543624
+            ],
+            "pca_ratio_01": [
+                1.3849340770961909,
+                1.181656878273399,
+                1.128046800200765
+            ],
+            "pca_ratio_01_neighbor_mean": [
+                1.8575624906424115,
+                1.8760422359899387,
+                1.880915879451087
+            ],
+            "pca_ratio_01_neighbor_std": [
+                0.641580757345606,
+                0.6228187048854344,
+                0.6165585104590592
+            ],
+            "pca_unwrapped_0": [
+                -0.0046539306640625,
+                -0.497314453125,
+                -0.258544921875
+            ],
+            "pca_unwrapped_0_neighbor_mean": [
+                0.039224624633789,
+                0.0840119448575106,
+                0.0623056238347833
+            ],
+            "pca_unwrapped_0_neighbor_std": [
+                0.3114910605258688,
+                0.2573427692683507,
+                0.296254177168357
+            ],
+            "pca_unwrapped_1": [
+                0.7392578125,
+                -0.11553955078125,
+                0.2169189453125
+            ],
+            "pca_unwrapped_1_neighbor_mean": [
+                0.0941687497225674,
+                0.1718776009299538,
+                0.1416541012850674
+            ],
+            "pca_unwrapped_1_neighbor_std": [
+                0.3179467337379631,
+                0.3628551035117971,
+                0.372447324946889
+            ],
+            "pca_unwrapped_2": [
+                -0.673828125,
+                -0.85986328125,
+                0.94140625
+            ],
+            "pca_unwrapped_2_neighbor_mean": [
+                0.2258744673295454,
+                0.2427867542613636,
+                0.0790349786931818
+            ],
+            "pca_unwrapped_2_neighbor_std": [
+                0.9134250264562896,
+                0.8928014788058292,
+                0.9167197839332804
+            ],
+            "pca_unwrapped_3": [
+                -0.0302886962890625,
+                -0.86572265625,
+                0.57177734375
+            ],
+            "pca_unwrapped_3_neighbor_mean": [
+                -0.2933238636363636,
+                -0.2173753218217329,
+                -0.3480571400035511
+            ],
+            "pca_unwrapped_3_neighbor_std": [
+                0.6203425764161097,
+                0.5938304683645145,
+                0.5600074530240728
+            ],
+            "pca_unwrapped_4": [
+                0.67333984375,
+                -0.0005474090576171,
+                0.81982421875
+            ],
+            "pca_unwrapped_4_neighbor_mean": [
+                0.2915762121027166,
+                0.3528386896306818,
+                0.2782594507390802
+            ],
+            "pca_unwrapped_4_neighbor_std": [
+                0.6415192812587974,
+                0.6430080201673403,
+                0.6308895861182334
+            ],
+            "pca_unwrapped_5": [
+                0.73876953125,
+                0.50048828125,
+                -0.03192138671875
+            ],
+            "pca_unwrapped_5_neighbor_mean": [
+                0.2028697620738636,
+                0.2245316938920454,
+                0.2729325727982954
+            ],
+            "pca_unwrapped_5_neighbor_std": [
+                0.265173781606759,
+                0.2994363858938455,
+                0.2968562365279343
+            ],
+            "pca_unwrapped_6": [
+                0.99951171875,
+                0.05828857421875,
+                -0.77880859375
+            ],
+            "pca_unwrapped_6_neighbor_mean": [
+                -0.2386505820534446,
+                -0.1530848416415128,
+                -0.0769850990988991
+            ],
+            "pca_unwrapped_6_neighbor_std": [
+                0.6776577717043619,
+                0.7717860533115238,
+                0.7447135522384378
+            ],
+            "pca_unwrapped_7": [
+                0.023834228515625,
+                -0.9931640625,
+                0.52978515625
+            ],
+            "pca_unwrapped_7_neighbor_mean": [
+                -0.4803272594105113,
+                -0.3878728693181818,
+                -0.5263227982954546
+            ],
+            "pca_unwrapped_7_neighbor_std": [
+                0.4799926318285017,
+                0.4691567465869561,
+                0.3891669942534205
+            ],
+            "pca_unwrapped_8": [
+                0.0192413330078125,
+                0.0997314453125,
+                -0.3359375
+            ],
+            "pca_unwrapped_8_neighbor_mean": [
+                -0.0384375832297585,
+                -0.0457548661665482,
+                -0.0061485984108664
+            ],
+            "pca_unwrapped_8_neighbor_std": [
+                0.3037878488292577,
+                0.3010843368506175,
+                0.2874409267860334
+            ],
+            "pca_val_unwrapped_0": [
+                15657.09765625,
+                40668.40625,
+                66863.0
+            ],
+            "pca_val_unwrapped_0_neighbor_mean": [
+                69378.52059659091,
+                67104.76526988637,
+                64723.43856534091
+            ],
+            "pca_val_unwrapped_0_neighbor_std": [
+                20242.245019019712,
+                24702.906417865197,
+                25959.16138296664
+            ],
+            "pca_val_unwrapped_1": [
+                11305.3017578125,
+                34416.42578125,
+                59273.25
+            ],
+            "pca_val_unwrapped_1_neighbor_mean": [
+                41190.40261008523,
+                39089.39133522727,
+                36829.68004261364
+            ],
+            "pca_val_unwrapped_1_neighbor_std": [
+                16625.870141811894,
+                18875.56976212627,
+                17666.778281657556
+            ],
+            "pca_val_unwrapped_2": [
+                1270.4095458984375,
+                13551.6748046875,
+                47764.625
+            ],
+            "pca_val_unwrapped_2_neighbor_mean": [
+                28717.50048828125,
+                27601.021828391335,
+                24490.75362881747
+            ],
+            "pca_val_unwrapped_2_neighbor_std": [
+                14988.204981576571,
+                16601.48080038032,
+                15622.078784778376
+            ],
+            "post_synapse_count": [
+                0.0,
+                0.0,
+                0.0
+            ],
+            "post_synapse_count_neighbor_mean": [
+                0.0,
+                0.0,
+                0.0
+            ],
+            "post_synapse_count_neighbor_std": [
+                0.0,
+                0.0,
+                0.0
+            ],
+            "pre_synapse_count": [
+                0.0,
+                0.0,
+                0.0
+            ],
+            "pre_synapse_count_neighbor_mean": [
+                0.0,
+                0.0,
+                0.0
+            ],
+            "pre_synapse_count_neighbor_std": [
+                0.0,
+                0.0,
+                0.0
+            ],
+            "size_nm3": [
+                12771840.0,
+                697943040.0,
+                7550330880.0
+            ],
+            "size_nm3_neighbor_mean": [
+                3233702034.285714,
+                3184761234.285714,
+                2695304960.0
+            ],
+            "size_nm3_neighbor_std": [
+                3650678969.7909584,
+                3691650923.5639486,
+                3518520747.0511127
+            ]
+        },
+        "model": {
+            "file": "local_compartment_classifier_bd_boxes.skops"
+        },
+        "model_format": "skops",
+        "task": "tabular-classification",
+        "use_intelex": false
+    }
+}

local_compartment_classifier_bd_boxes.skops ADDED Viewed

Binary file (515 kB). View file

train.py ADDED Viewed

	@@ -0,0 +1,359 @@

+# %%
+from pathlib import Path
+import caveclient as cc
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import classification_report
+from sklearn.model_selection import KFold
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import QuantileTransformer
+from skops.io import dump
+client = cc.CAVEclient("minnie65_phase3_v1")
+out_path = Path("./troglobyte-sandbox/models/")
+model_name = "local_compartment_classifier_bd_boxes"
+data_path = Path("./troglobyte-sandbox/data/bounding_box_labels")
+files = list(data_path.glob("*.csv"))
+# %%
+label_df = pd.read_csv(out_path / model_name / "labels.csv", index_col=[0, 1])
+label_df = label_df.rename(columns=lambda x: x.replace(".1", ""))
+# # %%
+# X_df = wrangler.features_.copy()
+# X_df = X_df.drop(columns=[col for col in X_df.columns if "rep_coord" in col])
+# %%
+X_df = pd.read_csv(out_path / model_name / "features.csv", index_col=[0, 1])
+# %%
+def box_train_test_split(
+    train_box_indices, test_box_indices, X_df, label_df, label_column
+):
+    train_label_df = label_df.loc[train_box_indices + 1].droplevel("bbox_id")
+    test_label_df = label_df.loc[test_box_indices + 1].droplevel("bbox_id")
+    train_X_df = X_df.loc[train_label_df["root_id"]]
+    test_X_df = X_df.loc[test_label_df["root_id"]]
+    train_X_df = train_X_df.dropna()
+    test_X_df = test_X_df.dropna()
+    train_l2_y = train_X_df.index.get_level_values("object_id").map(
+        train_label_df[label_column]
+    )
+    test_l2_y = test_X_df.index.get_level_values("object_id").map(
+        test_label_df[label_column]
+    )
+    # TODO do something more fair here w/ evaluation on the uncertains
+    train_X_df = train_X_df.loc[train_l2_y.notna()]
+    train_l2_y = train_l2_y[train_l2_y.notna()].values.astype(str)
+    test_X_df = test_X_df.loc[test_l2_y.notna()]
+    test_l2_y = test_l2_y[test_l2_y.notna()].values.astype(str)
+    return train_X_df, test_X_df, train_l2_y, test_l2_y
+def aggregate_votes_by_object(X_df, l2_node_predictions):
+    l2_node_predictions = pd.Series(
+        index=X_df.index, data=l2_node_predictions, name="label"
+    )
+    object_prediction_counts = (
+        l2_node_predictions.groupby(level="object_id").value_counts().to_frame()
+    )
+    object_n_predictions = object_prediction_counts.groupby("object_id").sum()
+    sufficient_data_index = object_n_predictions.query("count > 3").index
+    object_prediction_counts = object_prediction_counts.loc[sufficient_data_index]
+    object_prediction_probs = object_prediction_counts.unstack(fill_value=0)
+    object_prediction_probs = object_prediction_probs.div(
+        object_prediction_probs.sum(axis=1), axis=0
+    )
+    object_prediction_counts.reset_index(drop=False, inplace=True)
+    max_locs = object_prediction_counts.groupby("object_id")["count"].idxmax()
+    max_predictions = object_prediction_counts.loc[max_locs]
+    max_predictions["proportion"] = (
+        max_predictions["count"]
+        / object_n_predictions.loc[max_predictions["object_id"]]["count"].values
+    )
+    max_predictions = max_predictions.set_index("object_id")
+    return max_predictions, object_prediction_probs
+# models to evaluate
+def get_lda(n_classes):
+    lda = Pipeline(
+        [
+            ("transformer", QuantileTransformer(output_distribution="normal")),
+            ("lda", LinearDiscriminantAnalysis(n_components=n_classes - 1)),
+        ]
+    )
+    return lda
+rf = RandomForestClassifier(n_estimators=500, max_depth=4)
+box_indices = np.arange(1, 4)
+rows = []
+for fold, (train_box_indices, test_box_indices) in enumerate(
+    KFold(n_splits=3).split(box_indices.reshape(-1, 1))
+):
+    for label_column in ["axon_label", "simple_label"]:
+        train_X_df, test_X_df, train_l2_y, test_l2_y = box_train_test_split(
+            train_box_indices, test_box_indices, X_df, label_df, label_column
+        )
+        n_classes = label_df[label_column].nunique()
+        models = {"rf": rf, "lda": get_lda(n_classes)}
+        for model_type, model in models.items():
+            model.fit(train_X_df, train_l2_y)
+            train_preds = model.predict(train_X_df)
+            test_preds = model.predict(test_X_df)
+            # evaluate at the L2 level
+            train_report = classification_report(
+                train_l2_y, train_preds, output_dict=True
+            )
+            rows.append(
+                {
+                    "model": model_type,
+                    "fold": fold,
+                    "accuracy": train_report["accuracy"],
+                    "macro_f1": train_report["macro avg"]["f1-score"],
+                    "weighted_f1": train_report["weighted avg"]["f1-score"],
+                    "evaluation": "train",
+                    "labeling": label_column,
+                    "level": "level2",
+                }
+            )
+            test_report = classification_report(test_l2_y, test_preds, output_dict=True)
+            rows.append(
+                {
+                    "model": model_type,
+                    "fold": fold,
+                    "accuracy": test_report["accuracy"],
+                    "macro_f1": test_report["macro avg"]["f1-score"],
+                    "weighted_f1": test_report["weighted avg"]["f1-score"],
+                    "evaluation": "test",
+                    "labeling": label_column,
+                    "level": "level2",
+                }
+            )
+            # evaluate at the object level
+            train_object_predictions, train_object_probs = aggregate_votes_by_object(
+                train_X_df, train_preds
+            )
+            train_object_y = (
+                label_df.droplevel(0)
+                .loc[train_object_predictions.index, label_column]
+                .values.astype(str)
+            )
+            train_object_report = classification_report(
+                train_object_y, train_object_predictions["label"], output_dict=True
+            )
+            rows.append(
+                {
+                    "model": model_type + "-vote",
+                    "fold": fold,
+                    "accuracy": train_object_report["accuracy"],
+                    "macro_f1": train_object_report["macro avg"]["f1-score"],
+                    "weighted_f1": train_object_report["weighted avg"]["f1-score"],
+                    "evaluation": "train",
+                    "labeling": label_column,
+                    "level": "root",
+                }
+            )
+            test_object_predictions, test_object_probs = aggregate_votes_by_object(
+                test_X_df, test_preds
+            )
+            test_object_y = (
+                label_df.droplevel(0)
+                .loc[test_object_predictions.index, label_column]
+                .values.astype(str)
+            )
+            test_object_report = classification_report(
+                test_object_y, test_object_predictions["label"], output_dict=True
+            )
+            rows.append(
+                {
+                    "model": model_type + "-vote",
+                    "fold": fold,
+                    "accuracy": test_object_report["accuracy"],
+                    "macro_f1": train_object_report["macro avg"]["f1-score"],
+                    "weighted_f1": train_object_report["weighted avg"]["f1-score"],
+                    "evaluation": "test",
+                    "labeling": label_column,
+                    "level": "root",
+                }
+            )
+# %%
+evaluation_df = pd.DataFrame(rows)
+sns.set_context("talk")
+fig, axs = plt.subplots(2, 3, figsize=(15, 10), constrained_layout=True, sharey="col")
+for i, labeling in enumerate(["simple_label", "axon_label"]):
+    for j, metric in enumerate(["accuracy", "weighted_f1", "macro_f1"]):
+        ax = axs[i, j]
+        show_legend = (i == 0) & (j == 0)
+        sns.stripplot(
+            data=evaluation_df.query("labeling == @labeling"),
+            x="model",
+            y=metric,
+            hue="evaluation",
+            ax=ax,
+            legend=show_legend,
+            s=10,
+            jitter=True,
+        )
+        ax.spines[["right", "top"]].set_visible(False)
+        if j == 1:
+            ax.set_title("Labeling: " + labeling)
+# %%
+lda = model
+train_X_transformed = lda.transform(train_X_df)
+# %%
+fig, ax = plt.subplots(1, 1, figsize=(10, 10))
+sns.scatterplot(
+    x=train_X_transformed[:, 0],
+    y=train_X_transformed[:, 1],
+    hue=train_l2_y,
+    ax=ax,
+    s=10,
+    alpha=0.7,
+)
+ax.set(xticks=[], yticks=[], xlabel="LDA1", ylabel="LDA2")
+ax.spines[["right", "top"]].set_visible(False)
+# %%
+fig, ax = plt.subplots(1, 1, figsize=(10, 10))
+sns.scatterplot(
+    x=train_X_transformed[:, 0],
+    y=train_X_transformed[:, 2],
+    hue=train_l2_y,
+    ax=ax,
+    s=10,
+    alpha=0.7,
+)
+ax.set(xticks=[], yticks=[], xlabel="LDA1", ylabel="LDA3")
+ax.spines[["right", "top"]].set_visible(False)
+# %%
+final_lda = Pipeline(
+    [
+        ("transformer", QuantileTransformer(output_distribution="normal")),
+        ("lda", LinearDiscriminantAnalysis(n_components=n_classes - 1)),
+    ]
+)
+train_X_df, test_X_df, train_l2_y, test_l2_y = box_train_test_split(
+    np.array([0, 1, 2]), np.array([]), X_df, label_df, label_column
+)
+final_lda.fit(train_X_df, train_l2_y)
+# %%
+model_pickle_file = out_path / model_name / f"{model_name}.skops"
+with open(model_pickle_file, mode="bw") as f:
+    dump(final_lda, file=f)
+# %%
+from pathlib import Path
+from skops import card, hub_utils
+hub_out_path = Path(
+    "troglobyte-sandbox/models/local_compartment_classifier_bd_boxes/hub"
+)
+if not hub_out_path.exists():
+    hub_utils.init(
+        model=model_pickle_file,
+        requirements=["scikit-learn", "caveclient"],
+        dst=hub_out_path,
+        task="tabular-classification",
+        data=train_X_df,
+    )
+hub_utils.add_files(__file__, dst=hub_out_path, exist_ok=True)
+model_card = card.Card(model, metadata=card.metadata_from_config(hub_out_path))
+model_card.metadata.license = "mit"
+model_description = (
+    "This is a model trained to classify pieces of neuron as axon, dendrite, soma, or"
+    "glia, "
+    "based only on their local shape and synapse features."
+    "The model is a linear discriminant classifier which was trained on compartment "
+    "labels generated by Bethanny Danskin for 3 6x6x6 um boxes in the Minnie65 Phase3 "
+    "dataset."
+)
+model_card_authors = "bdpedigo"
+model_card.add(
+    model_card_authors=model_card_authors,
+    model_description=model_description,
+)
+model_card.save(hub_out_path / "README.md")
+hub_utils.push(
+    repo_id=f"bdpedigo/{model_name}",
+    source=hub_out_path,
+    create_remote=False,
+    private=False,
+)
+# %%
+syn_features = [col for col in X_df.columns if "syn" in col]
+train_X_df_no_syn = train_X_df.drop(columns=syn_features)
+final_lda_no_syn = Pipeline(
+    [
+        ("transformer", QuantileTransformer(output_distribution="normal")),
+        ("lda", LinearDiscriminantAnalysis(n_components=n_classes - 1)),
+    ]
+)
+final_lda_no_syn.fit(train_X_df_no_syn, train_l2_y)
+print(classification_report(train_l2_y, final_lda_no_syn.predict(train_X_df_no_syn)))
+with open(out_path / model_name / f"{model_name}_no_syn.skops", mode="bw") as f:
+    dump(final_lda_no_syn, file=f)