Spaces:
Runtime error
Runtime error
lyangas
commited on
Commit
β’
da7535b
1
Parent(s):
afc3da6
add voting
Browse files- Dockerfile +2 -0
- app.py +71 -18
- classifiers/.DS_Store +0 -0
- classifiers/codes/.DS_Store +0 -0
- model_finetuned_clear.pkl β classifiers/codes/mlp_codes.pkl +2 -2
- classifiers/codes/svc_codes.pkl +3 -0
- classifiers/codes/xgboost_codes.pkl +3 -0
- classifiers/groups/.DS_Store +0 -0
- classifiers/groups/mlp_groups.pkl +3 -0
- classifiers/groups/svc_groups.pkl +3 -0
- classifiers/groups/xgboost_groups.pkl +3 -0
- embedder/embedder.pkl +3 -0
- required_classes.py +1 -1
- requirements.txt +2 -1
Dockerfile
CHANGED
@@ -6,6 +6,8 @@ COPY ./requirements.txt /code/requirements.txt
|
|
6 |
|
7 |
RUN pip install --upgrade -r /code/requirements.txt
|
8 |
|
|
|
|
|
9 |
COPY ./model_finetuned_clear.pkl ./model_finetuned_clear.pkl
|
10 |
COPY ./required_classes.py ./required_classes.py
|
11 |
COPY ./app.py ./app.py
|
|
|
6 |
|
7 |
RUN pip install --upgrade -r /code/requirements.txt
|
8 |
|
9 |
+
COPY ./embedder ./embedder
|
10 |
+
COPY ./classifiers ./classifiers
|
11 |
COPY ./model_finetuned_clear.pkl ./model_finetuned_clear.pkl
|
12 |
COPY ./required_classes.py ./required_classes.py
|
13 |
COPY ./app.py ./app.py
|
app.py
CHANGED
@@ -4,33 +4,86 @@ from flask import Flask, request
|
|
4 |
import json
|
5 |
import pickle
|
6 |
import numpy as np
|
|
|
7 |
|
8 |
from required_classes import BertEmbedder, PredictModel
|
9 |
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
try:
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
17 |
except Exception as e:
|
18 |
-
print(f"ERROR: loading
|
|
|
19 |
|
20 |
def classify_code(text, top_n):
|
21 |
-
embed =
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
25 |
return preds
|
26 |
|
|
|
27 |
def classify_group(text, top_n):
|
28 |
-
embed =
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
32 |
return preds
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
app = Flask(__name__)
|
36 |
|
@@ -49,8 +102,6 @@ def read_root():
|
|
49 |
base64_bytes = str(data['textB64']).encode("ascii")
|
50 |
sample_string_bytes = base64.b64decode(base64_bytes)
|
51 |
text = sample_string_bytes.decode("ascii")
|
52 |
-
print(text)
|
53 |
-
# text = str(data['text'])
|
54 |
top_n = int(data['top_n'])
|
55 |
|
56 |
if top_n < 1:
|
@@ -60,11 +111,13 @@ def read_root():
|
|
60 |
|
61 |
pred_codes = classify_code(text, top_n)
|
62 |
pred_groups = classify_group(text, top_n)
|
|
|
|
|
63 |
result = {
|
64 |
"icd10":
|
65 |
-
{'result':
|
66 |
"dx_group":
|
67 |
-
{'result':
|
68 |
}
|
69 |
return result
|
70 |
|
|
|
4 |
import json
|
5 |
import pickle
|
6 |
import numpy as np
|
7 |
+
import os
|
8 |
|
9 |
from required_classes import BertEmbedder, PredictModel
|
10 |
|
11 |
|
12 |
+
CLS_WEIGHTS = {'mlp': 0.3, 'svc': 0.4, 'xgboost': 0.3}
|
13 |
+
|
14 |
+
print('INFO: loading models')
|
15 |
+
try:
|
16 |
+
with open('embedder/embedder.pkl', 'rb') as f:
|
17 |
+
embedder = pickle.load(f)
|
18 |
+
print('INFO: embedder loaded')
|
19 |
+
except Exception as e:
|
20 |
+
print(f"ERROR: loading embedder failed with: {str(e)}")
|
21 |
+
|
22 |
+
|
23 |
+
classifiers_codes = {}
|
24 |
+
try:
|
25 |
+
for clf_name in os.listdir('classifiers/codes'):
|
26 |
+
if '.' == clf_name[0]:
|
27 |
+
continue
|
28 |
+
with open('classifiers/codes/'+clf_name, 'rb') as f:
|
29 |
+
model = pickle.load(f)
|
30 |
+
classifiers_codes[clf_name.split('.')[0]] = model
|
31 |
+
print(f'INFO: classifier {clf_name} loaded')
|
32 |
+
except Exception as e:
|
33 |
+
print(f"ERROR: loading classifiers failed with: {str(e)}")
|
34 |
+
|
35 |
+
classifiers_groups = {}
|
36 |
try:
|
37 |
+
for clf_name in os.listdir('classifiers/groups'):
|
38 |
+
if '.' == clf_name[0]:
|
39 |
+
continue
|
40 |
+
with open('classifiers/groups/'+clf_name, 'rb') as f:
|
41 |
+
model = pickle.load(f)
|
42 |
+
classifiers_groups[clf_name.split('.')[0]] = model
|
43 |
+
print(f'INFO: classifier {clf_name} loaded')
|
44 |
except Exception as e:
|
45 |
+
print(f"ERROR: loading classifiers failed with: {str(e)}")
|
46 |
+
|
47 |
|
48 |
def classify_code(text, top_n):
|
49 |
+
embed = [embedder(text)]
|
50 |
+
preds = {}
|
51 |
+
for clf_name in classifiers_codes.keys():
|
52 |
+
model = classifiers_codes[clf_name]
|
53 |
+
probs = model.predict_proba(embed)
|
54 |
+
best_n = np.flip(np.argsort(probs, axis=1,)[0,-top_n:])
|
55 |
+
clf_preds = {str(model.classes_[i]): float(probs[0][i]) for i in best_n}
|
56 |
+
preds[clf_name] = clf_preds
|
57 |
return preds
|
58 |
|
59 |
+
|
60 |
def classify_group(text, top_n):
|
61 |
+
embed = [embedder(text)]
|
62 |
+
preds = {}
|
63 |
+
for clf_name in classifiers_groups.keys():
|
64 |
+
model = classifiers_groups[clf_name]
|
65 |
+
probs = model.predict_proba(embed)
|
66 |
+
best_n = np.flip(np.argsort(probs, axis=1,)[0,-top_n:])
|
67 |
+
clf_preds = {str(model.classes_[i]): float(probs[0][i]) for i in best_n}
|
68 |
+
preds[clf_name] = clf_preds
|
69 |
return preds
|
70 |
|
71 |
+
def get_top_result(preds):
|
72 |
+
total_scores = {}
|
73 |
+
for clf_name, scores in preds.items():
|
74 |
+
clf_name = clf_name.replace('_codes', '').replace('_groups', '')
|
75 |
+
for class_name, score in scores.items():
|
76 |
+
if class_name in total_scores:
|
77 |
+
total_scores[class_name] += CLS_WEIGHTS[clf_name] * score
|
78 |
+
else:
|
79 |
+
total_scores[class_name] = CLS_WEIGHTS[clf_name] * score
|
80 |
+
|
81 |
+
max_idx = np.array(total_scores.values()).argmax()
|
82 |
+
if list(total_scores.values())[max_idx] > 0.5:
|
83 |
+
return list(total_scores.keys())[max_idx]
|
84 |
+
else:
|
85 |
+
return None
|
86 |
+
|
87 |
|
88 |
app = Flask(__name__)
|
89 |
|
|
|
102 |
base64_bytes = str(data['textB64']).encode("ascii")
|
103 |
sample_string_bytes = base64.b64decode(base64_bytes)
|
104 |
text = sample_string_bytes.decode("ascii")
|
|
|
|
|
105 |
top_n = int(data['top_n'])
|
106 |
|
107 |
if top_n < 1:
|
|
|
111 |
|
112 |
pred_codes = classify_code(text, top_n)
|
113 |
pred_groups = classify_group(text, top_n)
|
114 |
+
pred_codes_top = get_top_result(pred_codes)
|
115 |
+
pred_groups_top = get_top_result(pred_groups)
|
116 |
result = {
|
117 |
"icd10":
|
118 |
+
{'result': pred_codes_top, 'details': pred_codes},
|
119 |
"dx_group":
|
120 |
+
{'result': pred_groups_top, 'details': pred_groups}
|
121 |
}
|
122 |
return result
|
123 |
|
classifiers/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
classifiers/codes/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
model_finetuned_clear.pkl β classifiers/codes/mlp_codes.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39eda1dc2f583525dd689e048b396f476835c42ed2a8fead4884b6c87bad639d
|
3 |
+
size 5185392
|
classifiers/codes/svc_codes.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:669ddbb9dea179f38dcc5e94282074240eac2cb361e839416a4fc74ea718050d
|
3 |
+
size 865684575
|
classifiers/codes/xgboost_codes.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49a24f3740770aca1f546ccf44e2d2346ba18105920691255b587026cba962d7
|
3 |
+
size 17972293
|
classifiers/groups/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
classifiers/groups/mlp_groups.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f494f298b214d3239be0a53ae986b6eadf6822ccb77fcfdf8149b1aad80c87f2
|
3 |
+
size 4924655
|
classifiers/groups/svc_groups.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85d1f0874ebdc3986d412e6d8508d979a324c278d70b29dc4491477b209e69fa
|
3 |
+
size 574657606
|
classifiers/groups/xgboost_groups.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3f1d2fd1f46c18e3bf6b0b67ae8a6c6b52c8b4e06961eff4568cebee7f68989
|
3 |
+
size 8361773
|
embedder/embedder.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b27f378ed88ce2ae9e01e9fd4e5706fc2072957b665c4d12e02a84bbe4f4c4d8
|
3 |
+
size 434228972
|
required_classes.py
CHANGED
@@ -25,7 +25,7 @@ class BertEmbedder:
|
|
25 |
truncation=True).to(self.device)
|
26 |
model_output = self.embedder(**encoded_input)
|
27 |
text_embed = model_output.pooler_output[0].cpu()
|
28 |
-
return text_embed
|
29 |
|
30 |
def batch_predict(self, texts: List[str]):
|
31 |
encoded_input = self.tokenizer(texts,
|
|
|
25 |
truncation=True).to(self.device)
|
26 |
model_output = self.embedder(**encoded_input)
|
27 |
text_embed = model_output.pooler_output[0].cpu()
|
28 |
+
return text_embed.tolist()
|
29 |
|
30 |
def batch_predict(self, texts: List[str]):
|
31 |
encoded_input = self.tokenizer(texts,
|
requirements.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
numpy==1.22.4
|
2 |
torch==2.0.1
|
3 |
-
scikit-learn==1.
|
4 |
transformers==4.29.2
|
5 |
flask==2.0.3
|
|
|
|
1 |
numpy==1.22.4
|
2 |
torch==2.0.1
|
3 |
+
scikit-learn==1.3.0
|
4 |
transformers==4.29.2
|
5 |
flask==2.0.3
|
6 |
+
xgboost==1.7.6
|