jina name
Browse files- configs/datasets.yaml +2 -2
- src/paper_manager.py +1 -1
- src/utils/hash.py +1 -1
- src/utils/paper_retriever.py +1 -1
configs/datasets.yaml
CHANGED
@@ -5,7 +5,7 @@ DEFAULT:
|
|
5 |
log_dir: ./log
|
6 |
# embedding: sentence-transformers/all-MiniLM-L6-v2
|
7 |
# embedding: BAAI/llm-embedder
|
8 |
-
embedding: jina-embeddings-v3
|
9 |
embedding_task: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
|
10 |
embedding_database: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
|
11 |
|
@@ -39,4 +39,4 @@ RETRIEVE:
|
|
39 |
s_summary: 0.0
|
40 |
s_abstract: 0.0
|
41 |
similarity_threshold: 0.95
|
42 |
-
# similarity_threshold: 0.55
|
|
|
5 |
log_dir: ./log
|
6 |
# embedding: sentence-transformers/all-MiniLM-L6-v2
|
7 |
# embedding: BAAI/llm-embedder
|
8 |
+
embedding: jinaai/jina-embeddings-v3
|
9 |
embedding_task: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
|
10 |
embedding_database: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
|
11 |
|
|
|
39 |
s_summary: 0.0
|
40 |
s_abstract: 0.0
|
41 |
similarity_threshold: 0.95
|
42 |
+
# similarity_threshold: 0.55
|
src/paper_manager.py
CHANGED
@@ -664,7 +664,7 @@ class PaperManager:
|
|
664 |
postfix_set = {
|
665 |
"sentence-transformers/all-MiniLM-L6-v2": "",
|
666 |
"BAAI/llm-embedder": "_llm_embedder",
|
667 |
-
"jina-embeddings-v3": "_jina_v3"
|
668 |
}
|
669 |
postfix = postfix_set[self.config.DEFAULT.embedding]
|
670 |
if "jina" in postfix:
|
|
|
664 |
postfix_set = {
|
665 |
"sentence-transformers/all-MiniLM-L6-v2": "",
|
666 |
"BAAI/llm-embedder": "_llm_embedder",
|
667 |
+
"jina/jina-embeddings-v3": "_jina_v3"
|
668 |
}
|
669 |
postfix = postfix_set[self.config.DEFAULT.embedding]
|
670 |
if "jina" in postfix:
|
src/utils/hash.py
CHANGED
@@ -32,7 +32,7 @@ def check_embedding(repo_id):
|
|
32 |
"vocab.txt",
|
33 |
]
|
34 |
elif repo_id in [
|
35 |
-
"jina-embeddings-v3",
|
36 |
]:
|
37 |
files_to_download = [
|
38 |
"model.safetensors",
|
|
|
32 |
"vocab.txt",
|
33 |
]
|
34 |
elif repo_id in [
|
35 |
+
"jinaai/jina-embeddings-v3",
|
36 |
]:
|
37 |
files_to_download = [
|
38 |
"model.safetensors",
|
src/utils/paper_retriever.py
CHANGED
@@ -110,7 +110,7 @@ class Retriever(object):
|
|
110 |
self.embedding_postfix = ""
|
111 |
elif self.config.DEFAULT.embedding == "BAAI/llm-embedder":
|
112 |
self.embedding_postfix = "_llm_embedder"
|
113 |
-
elif self.config.DEFAULT.embedding == "jina-embeddings-v3":
|
114 |
self.embedding_postfix = "_jina_v3"
|
115 |
if self.config.DEFAULT.embedding_database == "text-matching":
|
116 |
self.embedding_postfix += "_text_matching"
|
|
|
110 |
self.embedding_postfix = ""
|
111 |
elif self.config.DEFAULT.embedding == "BAAI/llm-embedder":
|
112 |
self.embedding_postfix = "_llm_embedder"
|
113 |
+
elif self.config.DEFAULT.embedding == "jinaai/jina-embeddings-v3":
|
114 |
self.embedding_postfix = "_jina_v3"
|
115 |
if self.config.DEFAULT.embedding_database == "text-matching":
|
116 |
self.embedding_postfix += "_text_matching"
|