bibliotecadebabel
first commit
37c2a8d
raw
history blame
4.7 kB
import src.constants.credentials as cred
import os
service_mxbai_made_in_china_config = {"reader_config": {"input_path": os.environ['made_in_china_s3_path'],
"credentials": cred.credentials_backblaze,
"format":"parquet"
},
"sample_size": 32,
"sentence_transformer_name": "mixedbread-ai/mxbai-embed-large-v1",
"cross_encoder_name": "mixedbread-ai/mxbai-rerank-large-v1",
"batch_size": 4,
"dataset_size": 32,
"seq_len": 256,
"top_k": 1000,
"programmatic_search_config": {
"scalar_columns": [{"column_name": "price", "min_value": 0, "max_value": "10000"}],
"discrete_columns": [{"column_name": "supplierName",
# "default_values": ['Zhongshan Norye Hardware Co., Ltd.']
"default_values": []
},
{"column_name": "warranty",
# "default_values": ['Zhongshan Norye Hardware Co., Ltd.']
"default_values": []
}
],
"columns_to_drop": ["similarities", "embeddings"]
}
}
service_mxbai_msc_direct_sample_config = {"reader_config": {"input_path": os.environ['msc_direct_s3_path'],
"credentials": cred.credentials_backblaze,
"format":"parquet"
},
"sample_size": 32,
"sentence_transformer_name": "mixedbread-ai/mxbai-embed-large-v1",
"cross_encoder_name": "mixedbread-ai/mxbai-rerank-large-v1",
"batch_size": 4,
"dataset_size": 32,
"seq_len": 256,
"top_k": 50,
"semantic_column_names": ['name', 'price', 'brand', 'keyword', 'description',
'specifications'],
"programmatic_search_config": {
"scalar_columns": [{"column_name": "price", "min_value": 0, "max_value": "10000"}],
"discrete_columns": [{"column_name": "brand", "default_values": []}],
"columns_to_drop": ["similarities", "embeddings", "index"]
}
}
service_mxbai_msc_direct_config = {"reader_config": {"input_path": os.environ['msc_direct_s3_path'],
"credentials": cred.credentials_backblaze,
"format":"parquet"
},
"sample_size": 32,
"sentence_transformer_name": "mixedbread-ai/mxbai-embed-large-v1",
"cross_encoder_name": "mixedbread-ai/mxbai-rerank-large-v1",
"batch_size": 4,
"dataset_size": 32,
"seq_len": 256,
"top_k": 50,
"semantic_column_names": ['name', 'price', 'brand', 'keyword', 'description',
'specifications'],
"programmatic_search_config": {
"scalar_columns": [{"column_name": "price", "min_value": 0, "max_value": "10000"}],
"discrete_columns": [{"column_name": "brand", "default_values": []}],
"columns_to_drop": ["similarities", "embeddings", "index"]
}
}