lihuigu commited on
Commit
c9fbbef
·
1 Parent(s): c8709b2
configs/datasets.yaml CHANGED
@@ -5,7 +5,7 @@ DEFAULT:
5
  log_dir: ./log
6
  # embedding: sentence-transformers/all-MiniLM-L6-v2
7
  # embedding: BAAI/llm-embedder
8
- embedding: jina-embeddings-v3
9
  embedding_task: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
10
  embedding_database: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
11
 
@@ -39,4 +39,4 @@ RETRIEVE:
39
  s_summary: 0.0
40
  s_abstract: 0.0
41
  similarity_threshold: 0.95
42
- # similarity_threshold: 0.55
 
5
  log_dir: ./log
6
  # embedding: sentence-transformers/all-MiniLM-L6-v2
7
  # embedding: BAAI/llm-embedder
8
+ embedding: jinaai/jina-embeddings-v3
9
  embedding_task: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
10
  embedding_database: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
11
 
 
39
  s_summary: 0.0
40
  s_abstract: 0.0
41
  similarity_threshold: 0.95
42
+ # similarity_threshold: 0.55
src/paper_manager.py CHANGED
@@ -664,7 +664,7 @@ class PaperManager:
664
  postfix_set = {
665
  "sentence-transformers/all-MiniLM-L6-v2": "",
666
  "BAAI/llm-embedder": "_llm_embedder",
667
- "jina-embeddings-v3": "_jina_v3"
668
  }
669
  postfix = postfix_set[self.config.DEFAULT.embedding]
670
  if "jina" in postfix:
 
664
  postfix_set = {
665
  "sentence-transformers/all-MiniLM-L6-v2": "",
666
  "BAAI/llm-embedder": "_llm_embedder",
667
+ "jina/jina-embeddings-v3": "_jina_v3"
668
  }
669
  postfix = postfix_set[self.config.DEFAULT.embedding]
670
  if "jina" in postfix:
src/utils/hash.py CHANGED
@@ -32,7 +32,7 @@ def check_embedding(repo_id):
32
  "vocab.txt",
33
  ]
34
  elif repo_id in [
35
- "jina-embeddings-v3",
36
  ]:
37
  files_to_download = [
38
  "model.safetensors",
 
32
  "vocab.txt",
33
  ]
34
  elif repo_id in [
35
+ "jinaai/jina-embeddings-v3",
36
  ]:
37
  files_to_download = [
38
  "model.safetensors",
src/utils/paper_retriever.py CHANGED
@@ -110,7 +110,7 @@ class Retriever(object):
110
  self.embedding_postfix = ""
111
  elif self.config.DEFAULT.embedding == "BAAI/llm-embedder":
112
  self.embedding_postfix = "_llm_embedder"
113
- elif self.config.DEFAULT.embedding == "jina-embeddings-v3":
114
  self.embedding_postfix = "_jina_v3"
115
  if self.config.DEFAULT.embedding_database == "text-matching":
116
  self.embedding_postfix += "_text_matching"
 
110
  self.embedding_postfix = ""
111
  elif self.config.DEFAULT.embedding == "BAAI/llm-embedder":
112
  self.embedding_postfix = "_llm_embedder"
113
+ elif self.config.DEFAULT.embedding == "jinaai/jina-embeddings-v3":
114
  self.embedding_postfix = "_jina_v3"
115
  if self.config.DEFAULT.embedding_database == "text-matching":
116
  self.embedding_postfix += "_text_matching"