nbpe97 commited on
Commit
0e189b6
·
1 Parent(s): abb0f6b

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ mxbai-embed-large-v1.mlpackage filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,9 +1,99 @@
1
- ---
2
- license: apache-2.0
3
- base_model:
4
- - mixedbread-ai/mxbai-embed-large-v1
5
- tags:
6
- - coreml
7
- - embedding model
8
- - bert
9
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CoreML Conversion of the mxbai-embed-large-v1 sentence embedding model
2
+
3
+ After extensive testing (and a lot of debugging with ChatGPT), I was able to convert the mxbai-embed-large-v1 model to CoreML and run it mostly on the GPU.
4
+
5
+ ```Python3
6
+ import torch
7
+ from transformers import AutoModel, AutoTokenizer
8
+ import coremltools as ct
9
+
10
+ # Define a wrapper class for the AutoModel to return only the last_hidden_state
11
+ class ModelWrapper(torch.nn.Module):
12
+ def __init__(self, model):
13
+ super(ModelWrapper, self).__init__()
14
+ self.model = model
15
+
16
+ def forward(self, input_ids, attention_mask):
17
+ # Extract the 'last_hidden_state' from the model output
18
+ output = self.model(input_ids=input_ids, attention_mask=attention_mask)
19
+ return output.last_hidden_state # or use 'pooler_output' if needed
20
+
21
+ # Load your SentenceTransformer model and tokenizer
22
+ model_name = "mixedbread-ai/mxbai-embed-large-v1" # Replace with your model
23
+ model = AutoModel.from_pretrained(model_name)
24
+ model.eval()
25
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
26
+
27
+ # Wrap the model to return only the tensor output
28
+ wrapped_model = ModelWrapper(model)
29
+ wrapped_model.eval()
30
+
31
+ # Sample input to export the model
32
+ dummy_input = tokenizer("This is a sample input", return_tensors="pt")
33
+
34
+ # Trace the model using tensor inputs (input_ids, attention_mask)
35
+ traced_model = torch.jit.trace(wrapped_model, (dummy_input['input_ids'], dummy_input['attention_mask']))
36
+
37
+ # Convert the traced PyTorch model to CoreML using the ML Program format
38
+ model_from_torch = ct.convert(
39
+ traced_model,
40
+ inputs=[
41
+ ct.TensorType(name="input_ids", shape=(1, ct.RangeDim(1, 512))),
42
+ ct.TensorType(name="attention_mask", shape=(1, ct.RangeDim(1, 512)))
43
+ ],
44
+ minimum_deployment_target=ct.target.iOS17,
45
+ convert_to="mlprogram",
46
+ compute_precision=ct.precision.FLOAT32
47
+ )
48
+
49
+ # Save the CoreML model as an mlpackage
50
+ model_from_torch.save("mxbai-embed-large-v1.mlpackage")
51
+ ```
52
+
53
+
54
+ It can be run like this:
55
+ ```Python
56
+ import coremltools as ct
57
+ from transformers import AutoTokenizer
58
+ import numpy as np
59
+
60
+ # Load the CoreML model
61
+ model = ct.models.MLModel("mxbai-embed-large-v1.mlpackage")
62
+
63
+ # Load the tokenizer
64
+ tokenizer = AutoTokenizer.from_pretrained("mixedbread-ai/mxbai-embed-large-v1")
65
+
66
+ # Prepare some input text
67
+ input_text = "This is a test sentence for the CoreML model"
68
+ inputs = tokenizer(input_text, return_tensors="np", padding=True, truncation=True, max_length=512)
69
+
70
+ # Extract input tensors
71
+ input_ids = inputs['input_ids'].astype(np.float32) # CoreML expects float32
72
+ attention_mask = inputs['attention_mask'].astype(np.float32)
73
+
74
+ # Prepare inputs for the CoreML model
75
+ coreml_input = {"input_ids": input_ids, "attention_mask": attention_mask}
76
+
77
+ predictions = model.predict(coreml_input)
78
+
79
+ hidden_states = predictions['hidden_states']
80
+ cls_embedding = hidden_states[0, 0, :]
81
+ np.set_printoptions(threshold=np.inf)
82
+
83
+ # Print the CLS token embedding, which is a 1024-dimensional vector
84
+ print("CLS Token Embedding:", cls_embedding, len(cls_embedding))
85
+ ```
86
+
87
+ I verified the output with ollama:
88
+
89
+ ```
90
+ curl http://localhost:11434/api/embeddings -d '{
91
+ "model": "mxbai-embed-large",
92
+ "prompt": "This is a test sentence for the CoreML model"
93
+ }'
94
+ ```
95
+
96
+ Environment: Python 3.11
97
+ coremltools 8.0
98
+ sentence-transformers 3.1.0
99
+ transformers 4.44.2
mxbai-embed-large-v1.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d1bae3b1c903b800466ccaffe181f50a888b920730c3b6a1ec1d1f2020c3dc4
3
+ size 409884
mxbai-embed-large-v1.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d5a25f7de6794e1d89755e79d9b6eb37921baf723780713d62851f60058db48
3
+ size 1336394112
mxbai-embed-large-v1.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "D33F0FF9-224F-430E-AB16-D6E0AF12FDA8": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "FF0B3830-4E7E-4515-8982-24A9E6B4F2FE": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "FF0B3830-4E7E-4515-8982-24A9E6B4F2FE"
18
+ }