mszel commited on
Commit
a8b8b90
·
1 Parent(s): bf42c5b

bugfix of text input handling

Browse files
examples/LynxScribe Image RAG CHANGED
@@ -1,19 +1,5 @@
1
  {
2
  "edges": [
3
- {
4
- "id": "LynxScribe Image Describer 1 LynxScribe Image RAG Builder 1",
5
- "source": "LynxScribe Image Describer 1",
6
- "sourceHandle": "output",
7
- "target": "LynxScribe Image RAG Builder 1",
8
- "targetHandle": "image_describer"
9
- },
10
- {
11
- "id": "LynxScribe RAG Vector Store 1 LynxScribe Image RAG Builder 1",
12
- "source": "LynxScribe RAG Vector Store 1",
13
- "sourceHandle": "output",
14
- "target": "LynxScribe Image RAG Builder 1",
15
- "targetHandle": "rag_graph"
16
- },
17
  {
18
  "id": "GCP Image Loader 1 LynxScribe Image RAG Builder 1",
19
  "source": "GCP Image Loader 1",
@@ -34,79 +20,31 @@
34
  "sourceHandle": "output",
35
  "target": "LynxScribe Image RAG Query 1",
36
  "targetHandle": "text"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  }
38
  ],
39
  "env": "LynxScribe",
40
  "nodes": [
41
- {
42
- "data": {
43
- "__execution_delay": 0.0,
44
- "collapsed": false,
45
- "display": null,
46
- "error": null,
47
- "meta": {
48
- "inputs": {},
49
- "name": "LynxScribe Image Describer",
50
- "outputs": {
51
- "output": {
52
- "name": "output",
53
- "position": "top",
54
- "type": {
55
- "type": "None"
56
- }
57
- }
58
- },
59
- "params": {
60
- "llm_interface": {
61
- "default": "openai",
62
- "name": "llm_interface",
63
- "type": {
64
- "type": "<class 'str'>"
65
- }
66
- },
67
- "llm_prompt_name": {
68
- "default": "cot_picture_descriptor",
69
- "name": "llm_prompt_name",
70
- "type": {
71
- "type": "<class 'str'>"
72
- }
73
- },
74
- "llm_prompt_path": {
75
- "default": "/Users/mszel/git/lynxscribe-demos/component_tutorials/04_image_search/image_description_prompts.yaml",
76
- "name": "llm_prompt_path",
77
- "type": {
78
- "type": "<class 'str'>"
79
- }
80
- },
81
- "llm_visual_model": {
82
- "default": "gpt-4o",
83
- "name": "llm_visual_model",
84
- "type": {
85
- "type": "<class 'str'>"
86
- }
87
- }
88
- },
89
- "type": "basic"
90
- },
91
- "params": {
92
- "llm_interface": "openai",
93
- "llm_prompt_name": "cot_picture_descriptor",
94
- "llm_prompt_path": "lynxkite-lynxscribe/promptdb/image_description_prompts.yaml",
95
- "llm_visual_model": "gpt-4o"
96
- },
97
- "status": "done",
98
- "title": "LynxScribe Image Describer"
99
- },
100
- "dragHandle": ".bg-primary",
101
- "height": 358.0,
102
- "id": "LynxScribe Image Describer 1",
103
- "position": {
104
- "x": 97.54029108623294,
105
- "y": 622.6506477264763
106
- },
107
- "type": "basic",
108
- "width": 376.0
109
- },
110
  {
111
  "data": {
112
  "__execution_delay": 0.0,
@@ -160,87 +98,6 @@
160
  "type": "basic",
161
  "width": 282.0
162
  },
163
- {
164
- "data": {
165
- "__execution_delay": 0.0,
166
- "collapsed": null,
167
- "display": null,
168
- "error": null,
169
- "meta": {
170
- "inputs": {},
171
- "name": "LynxScribe RAG Vector Store",
172
- "outputs": {
173
- "output": {
174
- "name": "output",
175
- "position": "top",
176
- "type": {
177
- "type": "None"
178
- }
179
- }
180
- },
181
- "params": {
182
- "collection_name": {
183
- "default": "lynx",
184
- "name": "collection_name",
185
- "type": {
186
- "type": "<class 'str'>"
187
- }
188
- },
189
- "name": {
190
- "default": "faiss",
191
- "name": "name",
192
- "type": {
193
- "type": "<class 'str'>"
194
- }
195
- },
196
- "num_dimensions": {
197
- "default": 3072.0,
198
- "name": "num_dimensions",
199
- "type": {
200
- "type": "<class 'int'>"
201
- }
202
- },
203
- "text_embedder_interface": {
204
- "default": "openai",
205
- "name": "text_embedder_interface",
206
- "type": {
207
- "type": "<class 'str'>"
208
- }
209
- },
210
- "text_embedder_model_name_or_path": {
211
- "default": "text-embedding-3-large",
212
- "name": "text_embedder_model_name_or_path",
213
- "type": {
214
- "type": "<class 'str'>"
215
- }
216
- }
217
- },
218
- "position": {
219
- "x": 807.0,
220
- "y": 315.0
221
- },
222
- "type": "basic"
223
- },
224
- "params": {
225
- "collection_name": "lynx",
226
- "name": "faiss",
227
- "num_dimensions": 3072.0,
228
- "text_embedder_interface": "openai",
229
- "text_embedder_model_name_or_path": "text-embedding-3-large"
230
- },
231
- "status": "active",
232
- "title": "LynxScribe RAG Vector Store"
233
- },
234
- "dragHandle": ".bg-primary",
235
- "height": 435.0,
236
- "id": "LynxScribe RAG Vector Store 1",
237
- "position": {
238
- "x": 507.56541832959726,
239
- "y": 625.9615546166448
240
- },
241
- "type": "basic",
242
- "width": 283.0
243
- },
244
  {
245
  "data": {
246
  "__execution_delay": 0.0,
@@ -344,7 +201,7 @@
344
  "type": "basic"
345
  },
346
  "params": {
347
- "chat": "Show me two cyclists!"
348
  },
349
  "status": "done",
350
  "title": "Input chat"
@@ -422,6 +279,219 @@
422
  },
423
  "type": "basic",
424
  "width": 200.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  }
426
  ]
427
  }
 
1
  {
2
  "edges": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "id": "GCP Image Loader 1 LynxScribe Image RAG Builder 1",
5
  "source": "GCP Image Loader 1",
 
20
  "sourceHandle": "output",
21
  "target": "LynxScribe Image RAG Query 1",
22
  "targetHandle": "text"
23
+ },
24
+ {
25
+ "id": "LynxScribe Image Describer 1 LynxScribe Image RAG Builder 1",
26
+ "source": "LynxScribe Image Describer 1",
27
+ "sourceHandle": "output",
28
+ "target": "LynxScribe Image RAG Builder 1",
29
+ "targetHandle": "image_describer"
30
+ },
31
+ {
32
+ "id": "LynxScribe RAG Vector Store 1 LynxScribe Image RAG Builder 1",
33
+ "source": "LynxScribe RAG Vector Store 1",
34
+ "sourceHandle": "output",
35
+ "target": "LynxScribe Image RAG Builder 1",
36
+ "targetHandle": "rag_graph"
37
+ },
38
+ {
39
+ "id": "LynxScribe Image RAG Query 1 View image 1",
40
+ "source": "LynxScribe Image RAG Query 1",
41
+ "sourceHandle": "output",
42
+ "target": "View image 1",
43
+ "targetHandle": "embedding_similarities"
44
  }
45
  ],
46
  "env": "LynxScribe",
47
  "nodes": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  {
49
  "data": {
50
  "__execution_delay": 0.0,
 
98
  "type": "basic",
99
  "width": 282.0
100
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  {
102
  "data": {
103
  "__execution_delay": 0.0,
 
201
  "type": "basic"
202
  },
203
  "params": {
204
+ "chat": "Show me a picture about meditation!"
205
  },
206
  "status": "done",
207
  "title": "Input chat"
 
279
  },
280
  "type": "basic",
281
  "width": 200.0
282
+ },
283
+ {
284
+ "data": {
285
+ "display": null,
286
+ "error": null,
287
+ "meta": {
288
+ "inputs": {},
289
+ "name": "LynxScribe Image Describer",
290
+ "outputs": {
291
+ "output": {
292
+ "name": "output",
293
+ "position": "top",
294
+ "type": {
295
+ "type": "None"
296
+ }
297
+ }
298
+ },
299
+ "params": {
300
+ "api_key_name": {
301
+ "default": "OPENAI_API_KEY",
302
+ "name": "api_key_name",
303
+ "type": {
304
+ "type": "<class 'str'>"
305
+ }
306
+ },
307
+ "llm_interface": {
308
+ "default": "openai",
309
+ "name": "llm_interface",
310
+ "type": {
311
+ "type": "<class 'str'>"
312
+ }
313
+ },
314
+ "llm_prompt_name": {
315
+ "default": "cot_picture_descriptor",
316
+ "name": "llm_prompt_name",
317
+ "type": {
318
+ "type": "<class 'str'>"
319
+ }
320
+ },
321
+ "llm_prompt_path": {
322
+ "default": "lynxkite-lynxscribe/promptdb/image_description_prompts.yaml",
323
+ "name": "llm_prompt_path",
324
+ "type": {
325
+ "type": "<class 'str'>"
326
+ }
327
+ },
328
+ "llm_visual_model": {
329
+ "default": "gpt-4o",
330
+ "name": "llm_visual_model",
331
+ "type": {
332
+ "type": "<class 'str'>"
333
+ }
334
+ }
335
+ },
336
+ "position": {
337
+ "x": 879.0,
338
+ "y": 686.0
339
+ },
340
+ "type": "basic"
341
+ },
342
+ "params": {
343
+ "api_key_name": "OPENAI_API_KEY",
344
+ "llm_interface": "openai",
345
+ "llm_prompt_name": "cot_picture_descriptor",
346
+ "llm_prompt_path": "lynxkite-lynxscribe/promptdb/image_description_prompts.yaml",
347
+ "llm_visual_model": "gpt-4o"
348
+ },
349
+ "status": "done",
350
+ "title": "LynxScribe Image Describer"
351
+ },
352
+ "dragHandle": ".bg-primary",
353
+ "height": 457.0,
354
+ "id": "LynxScribe Image Describer 1",
355
+ "position": {
356
+ "x": 41.706162883849856,
357
+ "y": 609.2665142696401
358
+ },
359
+ "type": "basic",
360
+ "width": 357.0
361
+ },
362
+ {
363
+ "data": {
364
+ "display": null,
365
+ "error": null,
366
+ "meta": {
367
+ "inputs": {},
368
+ "name": "LynxScribe RAG Vector Store",
369
+ "outputs": {
370
+ "output": {
371
+ "name": "output",
372
+ "position": "top",
373
+ "type": {
374
+ "type": "None"
375
+ }
376
+ }
377
+ },
378
+ "params": {
379
+ "api_key_name": {
380
+ "default": "OPENAI_API_KEY",
381
+ "name": "api_key_name",
382
+ "type": {
383
+ "type": "<class 'str'>"
384
+ }
385
+ },
386
+ "collection_name": {
387
+ "default": "lynx",
388
+ "name": "collection_name",
389
+ "type": {
390
+ "type": "<class 'str'>"
391
+ }
392
+ },
393
+ "name": {
394
+ "default": "faiss",
395
+ "name": "name",
396
+ "type": {
397
+ "type": "<class 'str'>"
398
+ }
399
+ },
400
+ "num_dimensions": {
401
+ "default": 3072.0,
402
+ "name": "num_dimensions",
403
+ "type": {
404
+ "type": "<class 'int'>"
405
+ }
406
+ },
407
+ "text_embedder_interface": {
408
+ "default": "openai",
409
+ "name": "text_embedder_interface",
410
+ "type": {
411
+ "type": "<class 'str'>"
412
+ }
413
+ },
414
+ "text_embedder_model_name_or_path": {
415
+ "default": "text-embedding-3-large",
416
+ "name": "text_embedder_model_name_or_path",
417
+ "type": {
418
+ "type": "<class 'str'>"
419
+ }
420
+ }
421
+ },
422
+ "position": {
423
+ "x": 1010.0,
424
+ "y": 713.0
425
+ },
426
+ "type": "basic"
427
+ },
428
+ "params": {
429
+ "api_key_name": "OPENAI_API_KEY",
430
+ "collection_name": "lynx",
431
+ "name": "faiss",
432
+ "num_dimensions": 3072.0,
433
+ "text_embedder_interface": "openai",
434
+ "text_embedder_model_name_or_path": "text-embedding-3-large"
435
+ },
436
+ "status": "active",
437
+ "title": "LynxScribe RAG Vector Store"
438
+ },
439
+ "dragHandle": ".bg-primary",
440
+ "height": 515.0,
441
+ "id": "LynxScribe RAG Vector Store 1",
442
+ "position": {
443
+ "x": 470.84700620705013,
444
+ "y": 609.2665142696401
445
+ },
446
+ "type": "basic",
447
+ "width": 350.0
448
+ },
449
+ {
450
+ "data": {
451
+ "__execution_delay": null,
452
+ "collapsed": false,
453
+ "display": null,
454
+ "error": null,
455
+ "meta": {
456
+ "inputs": {
457
+ "embedding_similarities": {
458
+ "name": "embedding_similarities",
459
+ "position": "left",
460
+ "type": {
461
+ "type": "<class 'inspect._empty'>"
462
+ }
463
+ }
464
+ },
465
+ "name": "View image",
466
+ "outputs": {},
467
+ "params": {
468
+ "idx": {
469
+ "default": 0.0,
470
+ "name": "idx",
471
+ "type": {
472
+ "type": "<class 'int'>"
473
+ }
474
+ }
475
+ },
476
+ "position": {
477
+ "x": 1418.0,
478
+ "y": 272.0
479
+ },
480
+ "type": "table_view"
481
+ },
482
+ "params": {},
483
+ "status": "done",
484
+ "title": "View image"
485
+ },
486
+ "dragHandle": ".bg-primary",
487
+ "height": 342.0,
488
+ "id": "View image 1",
489
+ "position": {
490
+ "x": 1424.7240705678887,
491
+ "y": -202.17696978464585
492
+ },
493
+ "type": "table_view",
494
+ "width": 652.0
495
  }
496
  ]
497
  }
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py CHANGED
@@ -7,6 +7,7 @@ from copy import deepcopy
7
  import asyncio
8
  import pandas as pd
9
  import os
 
10
 
11
  from lynxscribe.core.llm.base import get_llm_engine
12
  from lynxscribe.core.vector_store.base import get_vector_store
@@ -36,6 +37,8 @@ from lynxkite.core.executors import one_by_one
36
 
37
  ENV = "LynxScribe"
38
  one_by_one.register(ENV)
 
 
39
  op = ops.op_registration(ENV)
40
  output_on_top = ops.output_position(output="top")
41
 
@@ -49,7 +52,6 @@ def gcp_image_loader(
49
  """
50
  Gives back the list of URLs of all the images in the GCP storage.
51
  """
52
-
53
  client = storage.Client()
54
  bucket = client.bucket(gcp_bucket)
55
  blobs = bucket.list_blobs(prefix=prefix)
@@ -63,6 +65,7 @@ def gcp_image_loader(
63
 
64
  @output_on_top
65
  @op("LynxScribe RAG Vector Store")
 
66
  def ls_rag_graph(
67
  *,
68
  name: str = "faiss",
@@ -101,6 +104,7 @@ def ls_rag_graph(
101
 
102
  @output_on_top
103
  @op("LynxScribe Image Describer")
 
104
  def ls_image_describer(
105
  *,
106
  llm_interface: str = "openai",
@@ -132,6 +136,7 @@ def ls_image_describer(
132
 
133
  @ops.input_position(image_describer="bottom", rag_graph="bottom")
134
  @op("LynxScribe Image RAG Builder")
 
135
  async def ls_image_rag_builder(
136
  image_urls,
137
  image_describer,
@@ -253,9 +258,12 @@ def ls_save_rag_graph(
253
  @ops.input_position(rag_graph="bottom")
254
  @op("LynxScribe Image RAG Query")
255
  async def search_context(rag_graph, text, *, top_k=3):
 
 
 
256
  # get all similarities
257
  emb_similarities = await rag_graph.search_context(
258
- text, max_results=top_k, unique_metadata_key="image_url"
259
  )
260
 
261
  # get the image urls, scores and descriptions
@@ -273,6 +281,15 @@ async def search_context(rag_graph, text, *, top_k=3):
273
  return {"embedding_similarities": result_list}
274
 
275
 
 
 
 
 
 
 
 
 
 
276
  @output_on_top
277
  @op("Vector store")
278
  def vector_store(*, name="chromadb", collection_name="lynx"):
 
7
  import asyncio
8
  import pandas as pd
9
  import os
10
+ import joblib
11
 
12
  from lynxscribe.core.llm.base import get_llm_engine
13
  from lynxscribe.core.vector_store.base import get_vector_store
 
37
 
38
  ENV = "LynxScribe"
39
  one_by_one.register(ENV)
40
+ os.makedirs("../joblib-cache", exist_ok=True)
41
+ mem = joblib.Memory("../joblib-cache")
42
  op = ops.op_registration(ENV)
43
  output_on_top = ops.output_position(output="top")
44
 
 
52
  """
53
  Gives back the list of URLs of all the images in the GCP storage.
54
  """
 
55
  client = storage.Client()
56
  bucket = client.bucket(gcp_bucket)
57
  blobs = bucket.list_blobs(prefix=prefix)
 
65
 
66
  @output_on_top
67
  @op("LynxScribe RAG Vector Store")
68
+ # @mem.cache
69
  def ls_rag_graph(
70
  *,
71
  name: str = "faiss",
 
104
 
105
  @output_on_top
106
  @op("LynxScribe Image Describer")
107
+ # @mem.cache
108
  def ls_image_describer(
109
  *,
110
  llm_interface: str = "openai",
 
136
 
137
  @ops.input_position(image_describer="bottom", rag_graph="bottom")
138
  @op("LynxScribe Image RAG Builder")
139
+ # @mem.cache
140
  async def ls_image_rag_builder(
141
  image_urls,
142
  image_describer,
 
258
  @ops.input_position(rag_graph="bottom")
259
  @op("LynxScribe Image RAG Query")
260
  async def search_context(rag_graph, text, *, top_k=3):
261
+ message = text["text"]
262
+ rag_graph = rag_graph[0]["knowledge_base"]
263
+
264
  # get all similarities
265
  emb_similarities = await rag_graph.search_context(
266
+ message, max_results=top_k, unique_metadata_key="image_url"
267
  )
268
 
269
  # get the image urls, scores and descriptions
 
281
  return {"embedding_similarities": result_list}
282
 
283
 
284
+ @op("View image", view="image")
285
+ def view_image(embedding_similarities):
286
+ """
287
+ Plotting the selected image.
288
+ """
289
+ embedding_similarities = embedding_similarities["embedding_similarities"]
290
+ return embedding_similarities[0]["image_url"]
291
+
292
+
293
  @output_on_top
294
  @op("Vector store")
295
  def vector_store(*, name="chromadb", collection_name="lynx"):