gmastrapas
commited on
Commit
•
6c8a548
1
Parent(s):
ede5490
feat: add onnx runtime usage
Browse files
README.md
CHANGED
@@ -151,9 +151,9 @@ Similar to our predecessor model, `jina-clip-v2` bridges the gap between text-to
|
|
151 |
This dual capability makes it an excellent tool for multimodal retrieval-augmented generation (MuRAG) applications, enabling seamless text-to-text and text-to-image searches within a single model.
|
152 |
|
153 |
|
154 |
-
## Data
|
155 |
|
156 |
-
|
157 |
|
158 |
|
159 |
## Usage
|
@@ -216,33 +216,32 @@ EOFEOF
|
|
216 |
|
217 |
</details>
|
218 |
|
219 |
-
|
220 |
<details>
|
221 |
-
<summary>via transformers
|
222 |
|
223 |
```python
|
224 |
# !pip install transformers einops timm pillow
|
225 |
from transformers import AutoModel
|
226 |
|
227 |
# Initialize the model
|
228 |
-
model = AutoModel.from_pretrained(
|
229 |
|
230 |
# Corpus
|
231 |
sentences = [
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
]
|
242 |
|
|
|
|
|
243 |
|
244 |
-
# Public image URLs or Pil
|
245 |
-
image_urls = ["https://i.ibb.co/nQNGqL0/beach1.jpg", "https://i.ibb.co/r5w8hG8/beach2.jpg"]
|
246 |
# Choose a matryoshka dimension, set to None to get the full 1024-dim vectors
|
247 |
truncate_dim = 512
|
248 |
|
@@ -250,68 +249,124 @@ truncate_dim = 512
|
|
250 |
text_embeddings = model.encode_text(sentences, truncate_dim=truncate_dim)
|
251 |
image_embeddings = model.encode_image(
|
252 |
image_urls, truncate_dim=truncate_dim
|
253 |
-
) # also accepts PIL.
|
254 |
|
255 |
# Encode query text
|
256 |
-
query =
|
257 |
query_embeddings = model.encode_text(
|
258 |
query, task='retrieval.query', truncate_dim=truncate_dim
|
259 |
)
|
260 |
|
261 |
-
#
|
262 |
-
print(
|
263 |
-
#
|
264 |
-
print(
|
265 |
-
#
|
266 |
-
print(
|
267 |
-
print(
|
268 |
-
print(
|
269 |
-
print(
|
270 |
-
print(
|
271 |
-
print(
|
272 |
-
print(
|
273 |
-
print(
|
274 |
-
print(
|
275 |
```
|
276 |
</details>
|
277 |
|
278 |
-
|
279 |
<details>
|
280 |
-
<summary>via sentence-transformers
|
281 |
|
282 |
```python
|
283 |
# !pip install sentence-transformers einops timm pillow
|
284 |
from sentence_transformers import SentenceTransformer
|
285 |
|
286 |
-
#
|
287 |
truncate_dim = 512
|
|
|
|
|
288 |
model = SentenceTransformer(
|
289 |
-
|
290 |
)
|
291 |
|
292 |
# Corpus
|
293 |
sentences = [
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
]
|
304 |
|
305 |
-
# Public image URLs or
|
306 |
-
image_urls = [
|
307 |
|
|
|
308 |
text_embeddings = model.encode(sentences)
|
309 |
-
image_embeddings = model.encode(image_urls)
|
310 |
-
|
311 |
-
|
|
|
|
|
312 |
```
|
313 |
</details>
|
314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
## Contact
|
316 |
|
317 |
Join our [Discord community](https://discord.jina.ai) and chat with other community members about ideas.
|
|
|
151 |
This dual capability makes it an excellent tool for multimodal retrieval-augmented generation (MuRAG) applications, enabling seamless text-to-text and text-to-image searches within a single model.
|
152 |
|
153 |
|
154 |
+
## Data, Parameters, Training
|
155 |
|
156 |
+
An updated version of our [technical report](https://arxiv.org/abs/2405.20204) with details on `jina-clip-v2` is coming soon. Stay tuned!
|
157 |
|
158 |
|
159 |
## Usage
|
|
|
216 |
|
217 |
</details>
|
218 |
|
|
|
219 |
<details>
|
220 |
+
<summary>via <a href="https://huggingface.co/docs/transformers/en/index">transformers</a></summary>
|
221 |
|
222 |
```python
|
223 |
# !pip install transformers einops timm pillow
|
224 |
from transformers import AutoModel
|
225 |
|
226 |
# Initialize the model
|
227 |
+
model = AutoModel.from_pretrained('jinaai/jina-clip-v2', trust_remote_code=True)
|
228 |
|
229 |
# Corpus
|
230 |
sentences = [
|
231 |
+
'غروب جميل على الشاطئ', # Arabic
|
232 |
+
'海滩上美丽的日落', # Chinese
|
233 |
+
'Un beau coucher de soleil sur la plage', # French
|
234 |
+
'Ein wunderschöner Sonnenuntergang am Strand', # German
|
235 |
+
'Ένα όμορφο ηλιοβασίλεμα πάνω από την παραλία', # Greek
|
236 |
+
'समुद्र तट पर एक खूबसूरत सूर्यास्त', # Hindi
|
237 |
+
'Un bellissimo tramonto sulla spiaggia', # Italian
|
238 |
+
'浜辺に沈む美しい夕日', # Japanese
|
239 |
+
'해변 위로 아름다운 일몰', # Korean
|
240 |
]
|
241 |
|
242 |
+
# Public image URLs or PIL Images
|
243 |
+
image_urls = ['https://i.ibb.co/nQNGqL0/beach1.jpg', 'https://i.ibb.co/r5w8hG8/beach2.jpg']
|
244 |
|
|
|
|
|
245 |
# Choose a matryoshka dimension, set to None to get the full 1024-dim vectors
|
246 |
truncate_dim = 512
|
247 |
|
|
|
249 |
text_embeddings = model.encode_text(sentences, truncate_dim=truncate_dim)
|
250 |
image_embeddings = model.encode_image(
|
251 |
image_urls, truncate_dim=truncate_dim
|
252 |
+
) # also accepts PIL.Image.Image, local filenames, dataURI
|
253 |
|
254 |
# Encode query text
|
255 |
+
query = 'beautiful sunset over the beach' # English
|
256 |
query_embeddings = model.encode_text(
|
257 |
query, task='retrieval.query', truncate_dim=truncate_dim
|
258 |
)
|
259 |
|
260 |
+
# Text to Image
|
261 |
+
print('En -> Img: ' + str(query_embeddings @ image_embeddings[0].T))
|
262 |
+
# Image to Image
|
263 |
+
print('Img -> Img: ' + str(image_embeddings[0] @ image_embeddings[1].T))
|
264 |
+
# Text to Text
|
265 |
+
print('En -> Ar: ' + str(query_embeddings @ text_embeddings[0].T))
|
266 |
+
print('En -> Zh: ' + str(query_embeddings @ text_embeddings[1].T))
|
267 |
+
print('En -> Fr: ' + str(query_embeddings @ text_embeddings[2].T))
|
268 |
+
print('En -> De: ' + str(query_embeddings @ text_embeddings[3].T))
|
269 |
+
print('En -> Gr: ' + str(query_embeddings @ text_embeddings[4].T))
|
270 |
+
print('En -> Hi: ' + str(query_embeddings @ text_embeddings[5].T))
|
271 |
+
print('En -> It: ' + str(query_embeddings @ text_embeddings[6].T))
|
272 |
+
print('En -> Jp: ' + str(query_embeddings @ text_embeddings[7].T))
|
273 |
+
print('En -> Ko: ' + str(query_embeddings @ text_embeddings[8].T))
|
274 |
```
|
275 |
</details>
|
276 |
|
|
|
277 |
<details>
|
278 |
+
<summary>via <a href="https://sbert.net/">sentence-transformers</a></summary>
|
279 |
|
280 |
```python
|
281 |
# !pip install sentence-transformers einops timm pillow
|
282 |
from sentence_transformers import SentenceTransformer
|
283 |
|
284 |
+
# Choose a matryoshka dimension
|
285 |
truncate_dim = 512
|
286 |
+
|
287 |
+
# Initialize the model
|
288 |
model = SentenceTransformer(
|
289 |
+
'jinaai/jina-clip-v2', trust_remote_code=True, truncate_dim=truncate_dim
|
290 |
)
|
291 |
|
292 |
# Corpus
|
293 |
sentences = [
|
294 |
+
'غروب جميل على الشاطئ', # Arabic
|
295 |
+
'海滩上美丽的日落', # Chinese
|
296 |
+
'Un beau coucher de soleil sur la plage', # French
|
297 |
+
'Ein wunderschöner Sonnenuntergang am Strand', # German
|
298 |
+
'Ένα όμορφο ηλιοβασίλεμα πάνω από την παραλία', # Greek
|
299 |
+
'समुद्र तट पर एक खूबसूरत सूर्यास्त', # Hindi
|
300 |
+
'Un bellissimo tramonto sulla spiaggia', # Italian
|
301 |
+
'浜辺に沈む美しい夕日', # Japanese
|
302 |
+
'해변 위로 아름다운 일몰', # Korean
|
303 |
]
|
304 |
|
305 |
+
# Public image URLs or PIL Images
|
306 |
+
image_urls = ['https://i.ibb.co/nQNGqL0/beach1.jpg', 'https://i.ibb.co/r5w8hG8/beach2.jpg']
|
307 |
|
308 |
+
# Encode text and images
|
309 |
text_embeddings = model.encode(sentences)
|
310 |
+
image_embeddings = model.encode(image_urls) # also accepts PIL.Image.Image, local filenames, dataURI
|
311 |
+
|
312 |
+
# Encode query text
|
313 |
+
query = 'beautiful sunset over the beach' # English
|
314 |
+
query_embeddings = model.encode(query, prompt_name='retrieval.query')
|
315 |
```
|
316 |
</details>
|
317 |
|
318 |
+
<details>
|
319 |
+
<summary>via the <a href="https://onnxruntime.ai/">ONNX Runtime</a></summary>
|
320 |
+
|
321 |
+
```python
|
322 |
+
# !pip install transformers onnxruntime pillow
|
323 |
+
import onnxruntime as ort
|
324 |
+
from transformers import AutoImageProcessor, AutoTokenizer
|
325 |
+
|
326 |
+
# Load tokenizer and image processor using transformers
|
327 |
+
tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-clip-v2', trust_remote_code=True)
|
328 |
+
image_processor = AutoImageProcessor.from_pretrained(
|
329 |
+
'jinaai/jina-clip-v2', trust_remote_code=True
|
330 |
+
)
|
331 |
+
|
332 |
+
# Corpus
|
333 |
+
sentences = [
|
334 |
+
'غروب جميل على الشاطئ', # Arabic
|
335 |
+
'海滩上美丽的日落', # Chinese
|
336 |
+
'Un beau coucher de soleil sur la plage', # French
|
337 |
+
'Ein wunderschöner Sonnenuntergang am Strand', # German
|
338 |
+
'Ένα όμορφο ηλιοβασίλεμα πάνω από την παραλία', # Greek
|
339 |
+
'समुद्र तट पर एक खूबसूरत सूर्यास्त', # Hindi
|
340 |
+
'Un bellissimo tramonto sulla spiaggia', # Italian
|
341 |
+
'浜辺に沈む美しい夕日', # Japanese
|
342 |
+
'해변 위로 아름다운 일몰', # Korean
|
343 |
+
]
|
344 |
+
|
345 |
+
# Public image URLs or PIL Images
|
346 |
+
image_urls = ['https://i.ibb.co/nQNGqL0/beach1.jpg', 'https://i.ibb.co/r5w8hG8/beach2.jpg']
|
347 |
+
|
348 |
+
# Tokenize input texts and transform input images
|
349 |
+
input_ids = tokenizer(sentences, return_tensors='np')['input_ids']
|
350 |
+
pixel_values = image_processor(image_urls)['pixel_values']
|
351 |
+
|
352 |
+
# Start an ONNX Runtime Session
|
353 |
+
session = ort.InferenceSession('jina-clip-v2/onnx/model.onnx')
|
354 |
+
|
355 |
+
# Run inference
|
356 |
+
output = session.run(None, {'input_ids': input_ids, 'pixel_values': pixel_values})
|
357 |
+
|
358 |
+
# Keep the normalised embeddings, first 2 outputs are un-normalized
|
359 |
+
_, _, text_embeddings, image_embeddings = output
|
360 |
+
```
|
361 |
+
|
362 |
+
</details>
|
363 |
+
|
364 |
+
|
365 |
+
## License
|
366 |
+
|
367 |
+
`jina-clip-v2` is listed on AWS & Azure. If you need to use it beyond those platforms or on-premises within your company, note that the models is licensed under CC BY-NC 4.0. For commercial usage inquiries, feel free to [contact us](https://jina.ai/contact-sales/).
|
368 |
+
|
369 |
+
|
370 |
## Contact
|
371 |
|
372 |
Join our [Discord community](https://discord.jina.ai) and chat with other community members about ideas.
|