Disable forcing CUDA
Browse files~550 ms for 100 embeddings on T4
- handler.py +1 -1
handler.py
CHANGED
@@ -14,5 +14,5 @@ class EndpointHandler():
|
|
14 |
A :obj:`list` | `dict`: will be serialized and returned
|
15 |
"""
|
16 |
sentences = data.pop("inputs",data)
|
17 |
-
embeddings = self.model.encode(sentences,
|
18 |
return embeddings.tolist()
|
|
|
14 |
A :obj:`list` | `dict`: will be serialized and returned
|
15 |
"""
|
16 |
sentences = data.pop("inputs",data)
|
17 |
+
embeddings = self.model.encode(sentences, batch_size=100)
|
18 |
return embeddings.tolist()
|