File size: 848 Bytes
d7fdb42 360f505 d7fdb42 8b1c859 eeafaaa 34b78ab d7fdb42 eeafaaa d7fdb42 34b78ab 8b1c859 34b78ab 8b1c859 eeafaaa 8b1c859 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
MARKDOWN_SOURCE_DIR = "data/transformers/docs/source/en/"
LANCEDB_DIRECTORY = "data/lancedb"
LANCEDB_TABLE_NAME = "table"
VECTOR_COLUMN_NAME = "embedding"
TEXT_COLUMN_NAME = "text"
DOCUMENT_PATH_COLUMN_NAME = "document_path"
# EMBED_NAME = "sentence-transformers/all-MiniLM-L6-v2"
EMBED_NAME = "text-embedding-ada-002"
TOP_K_RANK = 50
TOP_K_RERANK = 5
emb_sizes = {
"sentence-transformers/all-MiniLM-L6-v2": 384,
"thenlper/gte-large": 1024,
"text-embedding-ada-002": 1536,
}
thresh_distances = {
"sentence-transformers/all-MiniLM-L6-v2": 1.2,
"text-embedding-ada-002": 0.5,
}
context_lengths = {
"mistralai/Mistral-7B-Instruct-v0.1": 4096,
"GeneZC/MiniChat-3B": 4096,
"gpt-3.5-turbo": 4096,
"sentence-transformers/all-MiniLM-L6-v2": 128,
"thenlper/gte-large": 512,
"text-embedding-ada-002": 8191,
}
|