|
backend: bert-embeddings |
|
embeddings: true |
|
f16: true |
|
|
|
gpu_layers: 90 |
|
mmap: true |
|
name: bert-cpp-minilm-v6 |
|
|
|
parameters: |
|
model: bert-MiniLM-L6-v2q4_0.bin |
|
|
|
download_files: |
|
- filename: "bert-MiniLM-L6-v2q4_0.bin" |
|
sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad" |
|
uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin" |
|
|
|
usage: | |
|
You can test this model with curl like this: |
|
|
|
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ |
|
"input": "Your text string goes here", |
|
"model": "bert-cpp-minilm-v6" |
|
}' |