Update README.md
Browse files
README.md
CHANGED
@@ -16,17 +16,12 @@ wget -O buddy.jpeg https://raw.githubusercontent.com/neuralmagic/deepsparse/main
|
|
16 |
wget -O thailand.jpg https://raw.githubusercontent.com/neuralmagic/deepsparse/main/src/deepsparse/yolact/sample_images/thailand.jpg
|
17 |
```
|
18 |
|
19 |
-
|
20 |
```python
|
21 |
import numpy as np
|
22 |
-
from deepsparse import
|
23 |
-
from deepsparse.clip import (
|
24 |
-
CLIPTextInput,
|
25 |
-
CLIPVisualInput,
|
26 |
-
CLIPZeroShotInput
|
27 |
-
)
|
28 |
|
29 |
-
def
|
30 |
if not isinstance(inputs.text, list):
|
31 |
inputs.text = [inputs.text]
|
32 |
if not isinstance(inputs.text[0], str):
|
@@ -36,21 +31,38 @@ def new_process_inputs(self, inputs: CLIPTextInput):
|
|
36 |
tokens_lengths = np.array(tokens.shape[0] * [tokens.shape[1] - 1])
|
37 |
return [tokens, tokens_lengths]
|
38 |
|
39 |
-
# This overrides the process_inputs function globally for all CLIPTextPipeline classes
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
possible_classes = ["ice cream", "an elephant", "a dog", "a building", "a church"]
|
44 |
images = ["basilica.jpg", "buddy.jpeg", "thailand.jpg"]
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
|
49 |
image=CLIPVisualInput(images=images),
|
50 |
text=CLIPTextInput(text=possible_classes),
|
51 |
-
)
|
52 |
|
53 |
-
output = pipeline(pipeline_input).text_scores
|
54 |
for i in range(len(output)):
|
55 |
prediction = possible_classes[np.argmax(output[i])]
|
56 |
print(f"Image {images[i]} is a picture of {prediction}")
|
|
|
16 |
wget -O thailand.jpg https://raw.githubusercontent.com/neuralmagic/deepsparse/main/src/deepsparse/yolact/sample_images/thailand.jpg
|
17 |
```
|
18 |
|
19 |
+
For this model there is a second input that is the length of tokens, so run this input override before making the pipeline:
|
20 |
```python
|
21 |
import numpy as np
|
22 |
+
from deepsparse.clip import CLIPTextPipeline
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
def custom_process_inputs(self, inputs):
|
25 |
if not isinstance(inputs.text, list):
|
26 |
inputs.text = [inputs.text]
|
27 |
if not isinstance(inputs.text[0], str):
|
|
|
31 |
tokens_lengths = np.array(tokens.shape[0] * [tokens.shape[1] - 1])
|
32 |
return [tokens, tokens_lengths]
|
33 |
|
34 |
+
# This overrides the process_inputs function globally for all CLIPTextPipeline classes
|
35 |
+
CLIPTextPipeline.process_inputs = custom_process_inputs
|
36 |
+
```
|
37 |
+
|
38 |
+
Then make and run a pipeline in Python:
|
39 |
+
```python
|
40 |
+
from deepsparse import Pipeline
|
41 |
+
from deepsparse.clip import (
|
42 |
+
CLIPTextInput,
|
43 |
+
CLIPVisualInput,
|
44 |
+
CLIPZeroShotInput
|
45 |
+
)
|
46 |
+
from huggingface_hub import snapshot_download
|
47 |
+
|
48 |
+
# Download the model from HF
|
49 |
+
model_folder = snapshot_download(repo_id="mgoin/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K-quant-ds")
|
50 |
|
51 |
possible_classes = ["ice cream", "an elephant", "a dog", "a building", "a church"]
|
52 |
images = ["basilica.jpg", "buddy.jpeg", "thailand.jpg"]
|
53 |
|
54 |
+
# Load the model into DeepSparse
|
55 |
+
pipeline = Pipeline.create(
|
56 |
+
task="clip_zeroshot",
|
57 |
+
visual_model_path=model_folder + "/visual.onnx",
|
58 |
+
text_model_path=model_folder + "/textual.onnx"
|
59 |
+
)
|
60 |
|
61 |
+
output = pipeline(
|
62 |
image=CLIPVisualInput(images=images),
|
63 |
text=CLIPTextInput(text=possible_classes),
|
64 |
+
).text_scores
|
65 |
|
|
|
66 |
for i in range(len(output)):
|
67 |
prediction = possible_classes[np.argmax(output[i])]
|
68 |
print(f"Image {images[i]} is a picture of {prediction}")
|