Spaces:
Sleeping
Sleeping
ngocminhta
commited on
Commit
·
667fbf3
1
Parent(s):
9770ff8
update faidsetv2
Browse files- app.py +1 -1
- core/seen_db/index.faiss +3 -0
- core/seen_db/index_meta.faiss +3 -0
- core/seen_db/is_mixed_dict.pkl +3 -0
- core/seen_db/label_dict.pkl +3 -0
- core/seen_db/write_model_dict.pkl +3 -0
- infer.py +1 -37
app.py
CHANGED
@@ -64,7 +64,7 @@ async def predict(request: Request):
|
|
64 |
label_dict=label_dict,
|
65 |
is_mixed_dict=is_mixed_dict,
|
66 |
text_list=text_list,
|
67 |
-
K=
|
68 |
return JSONResponse(content={"results": results})
|
69 |
elif mode == "advanced":
|
70 |
return 0
|
|
|
64 |
label_dict=label_dict,
|
65 |
is_mixed_dict=is_mixed_dict,
|
66 |
text_list=text_list,
|
67 |
+
K=21)
|
68 |
return JSONResponse(content={"results": results})
|
69 |
elif mode == "advanced":
|
70 |
return 0
|
core/seen_db/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afedda60bfd82c2579bbc7bf15a7ab59a0cb3f885377f28f1e9c5f06c756ca1e
|
3 |
+
size 398736429
|
core/seen_db/index_meta.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc9346766d599b9d87e3521d77ea908d21ac9263b02bf46a0ede282f29a92ac8
|
3 |
+
size 1297873
|
core/seen_db/is_mixed_dict.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5879f5bbcec0881436a54bcbed6be999269017cc8aa4542eb55f8a8689590fa
|
3 |
+
size 1555703
|
core/seen_db/label_dict.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce731039f21189f4a7911ce9eb30566a5b03ae4912d5ba938a22ac9e350ee128
|
3 |
+
size 1555703
|
core/seen_db/write_model_dict.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f18b37b64ef2556406fa303a31db1d17e64b220859fe2fc48e09c8d46311497e
|
3 |
+
size 1555703
|
infer.py
CHANGED
@@ -45,21 +45,6 @@ def load_pkl(path):
|
|
45 |
return pickle.load(f)
|
46 |
|
47 |
def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list, K):
|
48 |
-
# model = TextEmbeddingModel(opt.model_name).cuda()
|
49 |
-
# state_dict = torch.load(opt.model_path, map_location=model.model.device)
|
50 |
-
# new_state_dict={}
|
51 |
-
# for key in state_dict.keys():
|
52 |
-
# if key.startswith('model.'):
|
53 |
-
# new_state_dict[key[6:]]=state_dict[key]
|
54 |
-
# model.load_state_dict(state_dict)
|
55 |
-
# tokenizer=model.tokenizer
|
56 |
-
|
57 |
-
# index = Indexer(opt.embedding_dim)
|
58 |
-
# index.deserialize_from(opt.database_path)
|
59 |
-
# label_dict=load_pkl(os.path.join(opt.database_path,'label_dict.pkl'))
|
60 |
-
# is_mixed_dict=load_pkl(os.path.join(opt.database_path,'is_mixed_dict.pkl'))
|
61 |
-
|
62 |
-
# text = opt.text
|
63 |
encoded_text = tokenizer.batch_encode_plus(
|
64 |
text_list,
|
65 |
return_tensors="pt",
|
@@ -72,7 +57,6 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
|
|
72 |
top_ids_and_scores = index.search_knn(embeddings, K)
|
73 |
pred = []
|
74 |
for i, (ids, scores) in enumerate(top_ids_and_scores):
|
75 |
-
# print(f"Top {K} results for text:")
|
76 |
sorted_scores = np.argsort(scores)
|
77 |
sorted_scores = sorted_scores[::-1]
|
78 |
|
@@ -94,25 +78,5 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
|
|
94 |
final[0] = round(fuzzy_cnt[(1,0)] / total_score*100,2)
|
95 |
final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
|
96 |
final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
|
97 |
-
# print(f"Final prediction: {final}")
|
98 |
pred.append(final)
|
99 |
-
return pred
|
100 |
-
|
101 |
-
|
102 |
-
# if __name__ == "__main__":
|
103 |
-
# parser = argparse.ArgumentParser()
|
104 |
-
# parser.add_argument('--embedding_dim', type=int, default=768)
|
105 |
-
# parser.add_argument('--database_path', type=str, default="database", help="Path to the index file")
|
106 |
-
|
107 |
-
# parser.add_argument("--model_path", type=str, default="core/model.pth",\
|
108 |
-
# help="Path to the embedding model checkpoint")
|
109 |
-
# parser.add_argument('--model_name', type=str, default="ZurichNLPZurichNLP/unsup-simcse-xlm-roberta-base", help="Model name")
|
110 |
-
|
111 |
-
# parser.add_argument('--K', type=int, default=20, help="Search [1,K] nearest neighbors,choose the best K")
|
112 |
-
# parser.add_argument('--pooling', type=str, default="average", help="Pooling method, average or cls")
|
113 |
-
# parser.add_argument('--text', type=str, default="")
|
114 |
-
# parser.add_argument('--seed', type=int, default=0)
|
115 |
-
|
116 |
-
# opt = parser.parse_args()
|
117 |
-
# set_seed(opt.seed)
|
118 |
-
# infer(opt)
|
|
|
45 |
return pickle.load(f)
|
46 |
|
47 |
def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list, K):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
encoded_text = tokenizer.batch_encode_plus(
|
49 |
text_list,
|
50 |
return_tensors="pt",
|
|
|
57 |
top_ids_and_scores = index.search_knn(embeddings, K)
|
58 |
pred = []
|
59 |
for i, (ids, scores) in enumerate(top_ids_and_scores):
|
|
|
60 |
sorted_scores = np.argsort(scores)
|
61 |
sorted_scores = sorted_scores[::-1]
|
62 |
|
|
|
78 |
final[0] = round(fuzzy_cnt[(1,0)] / total_score*100,2)
|
79 |
final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
|
80 |
final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
|
|
|
81 |
pred.append(final)
|
82 |
+
return pred
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|