mgoin commited on
Commit
43517d5
·
1 Parent(s): 6f1ad15

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +27 -15
README.md CHANGED
@@ -16,17 +16,12 @@ wget -O buddy.jpeg https://raw.githubusercontent.com/neuralmagic/deepsparse/main
16
  wget -O thailand.jpg https://raw.githubusercontent.com/neuralmagic/deepsparse/main/src/deepsparse/yolact/sample_images/thailand.jpg
17
  ```
18
 
19
- Then make and run a pipeline in Python:
20
  ```python
21
  import numpy as np
22
- from deepsparse import Pipeline
23
- from deepsparse.clip import (
24
- CLIPTextInput,
25
- CLIPVisualInput,
26
- CLIPZeroShotInput
27
- )
28
 
29
- def new_process_inputs(self, inputs: CLIPTextInput):
30
  if not isinstance(inputs.text, list):
31
  inputs.text = [inputs.text]
32
  if not isinstance(inputs.text[0], str):
@@ -36,21 +31,38 @@ def new_process_inputs(self, inputs: CLIPTextInput):
36
  tokens_lengths = np.array(tokens.shape[0] * [tokens.shape[1] - 1])
37
  return [tokens, tokens_lengths]
38
 
39
- # This overrides the process_inputs function globally for all CLIPTextPipeline classes,
40
- # so when we make a zeroshot pipeline later that uses this class, it will use this edit!
41
- CLIPTextPipeline.process_inputs = new_process_inputs
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  possible_classes = ["ice cream", "an elephant", "a dog", "a building", "a church"]
44
  images = ["basilica.jpg", "buddy.jpeg", "thailand.jpg"]
45
 
46
- pipeline = Pipeline.create(task="clip_zeroshot", visual_model_path="visual.onnx", text_model_path="textual.onnx")
 
 
 
 
 
47
 
48
- pipeline_input = CLIPZeroShotInput(
49
  image=CLIPVisualInput(images=images),
50
  text=CLIPTextInput(text=possible_classes),
51
- )
52
 
53
- output = pipeline(pipeline_input).text_scores
54
  for i in range(len(output)):
55
  prediction = possible_classes[np.argmax(output[i])]
56
  print(f"Image {images[i]} is a picture of {prediction}")
 
16
  wget -O thailand.jpg https://raw.githubusercontent.com/neuralmagic/deepsparse/main/src/deepsparse/yolact/sample_images/thailand.jpg
17
  ```
18
 
19
+ For this model there is a second input that is the length of tokens, so run this input override before making the pipeline:
20
  ```python
21
  import numpy as np
22
+ from deepsparse.clip import CLIPTextPipeline
 
 
 
 
 
23
 
24
+ def custom_process_inputs(self, inputs):
25
  if not isinstance(inputs.text, list):
26
  inputs.text = [inputs.text]
27
  if not isinstance(inputs.text[0], str):
 
31
  tokens_lengths = np.array(tokens.shape[0] * [tokens.shape[1] - 1])
32
  return [tokens, tokens_lengths]
33
 
34
+ # This overrides the process_inputs function globally for all CLIPTextPipeline classes
35
+ CLIPTextPipeline.process_inputs = custom_process_inputs
36
+ ```
37
+
38
+ Then make and run a pipeline in Python:
39
+ ```python
40
+ from deepsparse import Pipeline
41
+ from deepsparse.clip import (
42
+ CLIPTextInput,
43
+ CLIPVisualInput,
44
+ CLIPZeroShotInput
45
+ )
46
+ from huggingface_hub import snapshot_download
47
+
48
+ # Download the model from HF
49
+ model_folder = snapshot_download(repo_id="mgoin/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K-quant-ds")
50
 
51
  possible_classes = ["ice cream", "an elephant", "a dog", "a building", "a church"]
52
  images = ["basilica.jpg", "buddy.jpeg", "thailand.jpg"]
53
 
54
+ # Load the model into DeepSparse
55
+ pipeline = Pipeline.create(
56
+ task="clip_zeroshot",
57
+ visual_model_path=model_folder + "/visual.onnx",
58
+ text_model_path=model_folder + "/textual.onnx"
59
+ )
60
 
61
+ output = pipeline(
62
  image=CLIPVisualInput(images=images),
63
  text=CLIPTextInput(text=possible_classes),
64
+ ).text_scores
65
 
 
66
  for i in range(len(output)):
67
  prediction = possible_classes[np.argmax(output[i])]
68
  print(f"Image {images[i]} is a picture of {prediction}")