File size: 1,454 Bytes
37c2a8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import src.constants.credentials as cred
import os


service_mxbai_msc_direct_config = {"reader_config": {"input_path": os.environ['msc_direct_s3_path'],
                                                                     "credentials": cred.credentials_backblaze,
                                                                     "format":"parquet"
                                                                    },
                             "sample_size": 32,
                             "sentence_transformer_name": "mixedbread-ai/mxbai-embed-large-v1",
                             "cross_encoder_name": "mixedbread-ai/mxbai-rerank-large-v1",
                             "batch_size": 4,
                             "dataset_size": 32,
                             "seq_len": 256,
                             "top_k": 50,
                            "semantic_column_names": ['name', 'price', 'brand', 'keyword', 'description',
                                                     'specifications'],
                             "programmatic_search_config": {
                                 "scalar_columns": [{"column_name": "price", "min_value": 0, "max_value": "10000"}],
                                 "discrete_columns": [{"column_name": "brand", "default_values": []}],
                                 "columns_to_drop": ["similarities", "embeddings", "index"]
                             }
                             }