|
--- |
|
pipeline_tag: tabular-classification |
|
tags: |
|
- sklearn |
|
language: |
|
- ko |
|
library_name: sklearn |
|
--- |
|
|
|
# label_encoder_map |
|
```python |
|
label_encoder_map = { |
|
"μμ½νμ ν": LabelEncoder(), |
|
"μμμ": LabelEncoder(), |
|
"μμλ€": LabelEncoder(), |
|
"λΆν μ μ": LabelEncoder(), |
|
"λΆν μ λ€": LabelEncoder(), |
|
"μ νμ½λλͺ
": LabelEncoder(), |
|
} |
|
|
|
ds = ds.add_column('μμ½νμ ν_encoded', label_encoder_map['μμ½νμ ν'].fit_transform(ds.to_pandas()['μμ½νμ ν'])) |
|
ds = ds.add_column('μμμ_encoded', label_encoder_map['μμμ'].fit_transform(ds.to_pandas()['μμμ'])) |
|
ds = ds.add_column('μμλ€_encoded', label_encoder_map['μμλ€'].fit_transform(ds.to_pandas()['μμλ€'])) |
|
ds = ds.add_column('λΆν μ μ_encoded', label_encoder_map['λΆν μ μ'].fit_transform(ds.to_pandas()['λΆν μ μ'])) |
|
ds = ds.add_column('λΆν μ λ€_encoded', label_encoder_map['λΆν μ λ€'].fit_transform(ds.to_pandas()['λΆν μ λ€'])) |
|
ds = ds.add_column('μ νμ½λλͺ
_encoded', label_encoder_map['μ νμ½λλͺ
'].fit_transform(ds.to_pandas()['μ νμ½λλͺ
'])) |
|
``` |
|
|
|
```python |
|
knn = KNeighborsClassifier(n_neighbors=5, metric='cosine') |
|
knn.fit(ds.select_columns( |
|
['μμ½νμ ν_encoded', 'μμμ_encoded', 'μμλ€_encoded', 'λΆν μ μ_encoded', 'λΆν μ λ€_encoded', 'μ νμ½λλͺ
_encoded']).to_pandas(), ds.select_columns("νλͺ©λͺ
").to_pandas()) |
|
``` |
|
|
|
[Full code](https://gist.github.com/brainer3220/4176af5b013c9cd1dd419626f1a7b0d9) |
|
|
|
# Condensed-Co-Graph-And-Size-Graph |
|
```python |
|
from datasets import load_dataset, disable_caching, Value |
|
import numpy as np |
|
from sklearn.preprocessing import LabelEncoder |
|
|
|
co_graph_edges = load_dataset('brainer/pill_identification_graph', 'co-graph-edges')['train'] |
|
co_graph_nodes = load_dataset('brainer/pill_identification_graph', 'co-graph-nodes')['train'] |
|
size_graph_edges = load_dataset('brainer/pill_identification_graph', 'size-graph-edges')['train'] |
|
size_graph_nodes = load_dataset('brainer/pill_identification_graph', 'size-graph-nodes')['train'] |
|
pill_ingredients_edges = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-edges')['train'] |
|
pill_ingredients_nodes = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-nodes')['train'] |
|
co_graph_nodes, co_graph_edges, size_graph_nodes, size_graph_edges, pill_ingredients_nodes, pill_ingredients_edges |
|
pill_identification_data = load_dataset('brainer/pill_identification_data', 'default') |
|
|
|
|
|
|
|
drug_name_encoder = LabelEncoder() |
|
gnl_nm_encoder = LabelEncoder() |
|
|
|
|
|
item_serial_number = pill_identification_data.cast_column('νλͺ©μΌλ ¨λ²νΈ', Value(dtype='string'))['train']['νλͺ©μΌλ ¨λ²νΈ'] |
|
|
|
drug_name_encoder.fit(list(set(np.asarray(size_graph_nodes['id'] + size_graph_edges['target'] + pill_ingredients_edges['target'] + item_serial_number)))) |
|
|
|
gnl_nm_encoder.fit(list(set(np.asarray(co_graph_nodes['id'] + pill_ingredients_nodes['id'])))) |
|
``` |