ngocminhta commited on
Commit
667fbf3
·
1 Parent(s): 9770ff8

update faidsetv2

Browse files
app.py CHANGED
@@ -64,7 +64,7 @@ async def predict(request: Request):
64
  label_dict=label_dict,
65
  is_mixed_dict=is_mixed_dict,
66
  text_list=text_list,
67
- K=20)
68
  return JSONResponse(content={"results": results})
69
  elif mode == "advanced":
70
  return 0
 
64
  label_dict=label_dict,
65
  is_mixed_dict=is_mixed_dict,
66
  text_list=text_list,
67
+ K=21)
68
  return JSONResponse(content={"results": results})
69
  elif mode == "advanced":
70
  return 0
core/seen_db/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afedda60bfd82c2579bbc7bf15a7ab59a0cb3f885377f28f1e9c5f06c756ca1e
3
+ size 398736429
core/seen_db/index_meta.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc9346766d599b9d87e3521d77ea908d21ac9263b02bf46a0ede282f29a92ac8
3
+ size 1297873
core/seen_db/is_mixed_dict.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5879f5bbcec0881436a54bcbed6be999269017cc8aa4542eb55f8a8689590fa
3
+ size 1555703
core/seen_db/label_dict.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce731039f21189f4a7911ce9eb30566a5b03ae4912d5ba938a22ac9e350ee128
3
+ size 1555703
core/seen_db/write_model_dict.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18b37b64ef2556406fa303a31db1d17e64b220859fe2fc48e09c8d46311497e
3
+ size 1555703
infer.py CHANGED
@@ -45,21 +45,6 @@ def load_pkl(path):
45
  return pickle.load(f)
46
 
47
  def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list, K):
48
- # model = TextEmbeddingModel(opt.model_name).cuda()
49
- # state_dict = torch.load(opt.model_path, map_location=model.model.device)
50
- # new_state_dict={}
51
- # for key in state_dict.keys():
52
- # if key.startswith('model.'):
53
- # new_state_dict[key[6:]]=state_dict[key]
54
- # model.load_state_dict(state_dict)
55
- # tokenizer=model.tokenizer
56
-
57
- # index = Indexer(opt.embedding_dim)
58
- # index.deserialize_from(opt.database_path)
59
- # label_dict=load_pkl(os.path.join(opt.database_path,'label_dict.pkl'))
60
- # is_mixed_dict=load_pkl(os.path.join(opt.database_path,'is_mixed_dict.pkl'))
61
-
62
- # text = opt.text
63
  encoded_text = tokenizer.batch_encode_plus(
64
  text_list,
65
  return_tensors="pt",
@@ -72,7 +57,6 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
72
  top_ids_and_scores = index.search_knn(embeddings, K)
73
  pred = []
74
  for i, (ids, scores) in enumerate(top_ids_and_scores):
75
- # print(f"Top {K} results for text:")
76
  sorted_scores = np.argsort(scores)
77
  sorted_scores = sorted_scores[::-1]
78
 
@@ -94,25 +78,5 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
94
  final[0] = round(fuzzy_cnt[(1,0)] / total_score*100,2)
95
  final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
96
  final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
97
- # print(f"Final prediction: {final}")
98
  pred.append(final)
99
- return pred
100
-
101
-
102
- # if __name__ == "__main__":
103
- # parser = argparse.ArgumentParser()
104
- # parser.add_argument('--embedding_dim', type=int, default=768)
105
- # parser.add_argument('--database_path', type=str, default="database", help="Path to the index file")
106
-
107
- # parser.add_argument("--model_path", type=str, default="core/model.pth",\
108
- # help="Path to the embedding model checkpoint")
109
- # parser.add_argument('--model_name', type=str, default="ZurichNLPZurichNLP/unsup-simcse-xlm-roberta-base", help="Model name")
110
-
111
- # parser.add_argument('--K', type=int, default=20, help="Search [1,K] nearest neighbors,choose the best K")
112
- # parser.add_argument('--pooling', type=str, default="average", help="Pooling method, average or cls")
113
- # parser.add_argument('--text', type=str, default="")
114
- # parser.add_argument('--seed', type=int, default=0)
115
-
116
- # opt = parser.parse_args()
117
- # set_seed(opt.seed)
118
- # infer(opt)
 
45
  return pickle.load(f)
46
 
47
  def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list, K):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  encoded_text = tokenizer.batch_encode_plus(
49
  text_list,
50
  return_tensors="pt",
 
57
  top_ids_and_scores = index.search_knn(embeddings, K)
58
  pred = []
59
  for i, (ids, scores) in enumerate(top_ids_and_scores):
 
60
  sorted_scores = np.argsort(scores)
61
  sorted_scores = sorted_scores[::-1]
62
 
 
78
  final[0] = round(fuzzy_cnt[(1,0)] / total_score*100,2)
79
  final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
80
  final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
 
81
  pred.append(final)
82
+ return pred