mszel commited on
Commit
0754675
·
1 Parent(s): c3044a4

adding the query part

Browse files
examples/LynxScribe Image RAG CHANGED
@@ -20,6 +20,20 @@
20
  "sourceHandle": "output",
21
  "target": "LynxScribe Image RAG Builder 1",
22
  "targetHandle": "image_urls"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  }
24
  ],
25
  "env": "LynxScribe",
@@ -95,6 +109,8 @@
95
  },
96
  {
97
  "data": {
 
 
98
  "display": null,
99
  "error": null,
100
  "meta": {
@@ -280,9 +296,7 @@
280
  },
281
  "type": "basic"
282
  },
283
- "params": {
284
- "image_rag_out_path": "image_test_rag_graph.pickle"
285
- },
286
  "status": "done",
287
  "title": "LynxScribe Image RAG Builder"
288
  },
@@ -295,6 +309,117 @@
295
  },
296
  "type": "basic",
297
  "width": 479.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  }
299
  ]
300
  }
 
20
  "sourceHandle": "output",
21
  "target": "LynxScribe Image RAG Builder 1",
22
  "targetHandle": "image_urls"
23
+ },
24
+ {
25
+ "id": "LynxScribe Image RAG Builder 1 LynxScribe Image RAG Query 1",
26
+ "source": "LynxScribe Image RAG Builder 1",
27
+ "sourceHandle": "output",
28
+ "target": "LynxScribe Image RAG Query 1",
29
+ "targetHandle": "rag_graph"
30
+ },
31
+ {
32
+ "id": "Input chat 1 LynxScribe Image RAG Query 1",
33
+ "source": "Input chat 1",
34
+ "sourceHandle": "output",
35
+ "target": "LynxScribe Image RAG Query 1",
36
+ "targetHandle": "text"
37
  }
38
  ],
39
  "env": "LynxScribe",
 
109
  },
110
  {
111
  "data": {
112
+ "__execution_delay": 0.0,
113
+ "collapsed": null,
114
  "display": null,
115
  "error": null,
116
  "meta": {
 
296
  },
297
  "type": "basic"
298
  },
299
+ "params": {},
 
 
300
  "status": "done",
301
  "title": "LynxScribe Image RAG Builder"
302
  },
 
309
  },
310
  "type": "basic",
311
  "width": 479.0
312
+ },
313
+ {
314
+ "data": {
315
+ "__execution_delay": 0.0,
316
+ "collapsed": null,
317
+ "display": null,
318
+ "error": null,
319
+ "meta": {
320
+ "inputs": {},
321
+ "name": "Input chat",
322
+ "outputs": {
323
+ "output": {
324
+ "name": "output",
325
+ "position": "right",
326
+ "type": {
327
+ "type": "None"
328
+ }
329
+ }
330
+ },
331
+ "params": {
332
+ "chat": {
333
+ "default": null,
334
+ "name": "chat",
335
+ "type": {
336
+ "type": "<class 'str'>"
337
+ }
338
+ }
339
+ },
340
+ "position": {
341
+ "x": 1336.0,
342
+ "y": 378.0
343
+ },
344
+ "type": "basic"
345
+ },
346
+ "params": {
347
+ "chat": "Show me two cyclists!"
348
+ },
349
+ "status": "done",
350
+ "title": "Input chat"
351
+ },
352
+ "dragHandle": ".bg-primary",
353
+ "height": 214.0,
354
+ "id": "Input chat 1",
355
+ "position": {
356
+ "x": -310.1420152146455,
357
+ "y": -139.39548490290966
358
+ },
359
+ "type": "basic",
360
+ "width": 387.0
361
+ },
362
+ {
363
+ "data": {
364
+ "display": null,
365
+ "error": null,
366
+ "meta": {
367
+ "inputs": {
368
+ "rag_graph": {
369
+ "name": "rag_graph",
370
+ "position": "bottom",
371
+ "type": {
372
+ "type": "<class 'inspect._empty'>"
373
+ }
374
+ },
375
+ "text": {
376
+ "name": "text",
377
+ "position": "left",
378
+ "type": {
379
+ "type": "<class 'inspect._empty'>"
380
+ }
381
+ }
382
+ },
383
+ "name": "LynxScribe Image RAG Query",
384
+ "outputs": {
385
+ "output": {
386
+ "name": "output",
387
+ "position": "right",
388
+ "type": {
389
+ "type": "None"
390
+ }
391
+ }
392
+ },
393
+ "params": {
394
+ "top_k": {
395
+ "default": 3.0,
396
+ "name": "top_k",
397
+ "type": {
398
+ "type": "<class 'int'>"
399
+ }
400
+ }
401
+ },
402
+ "position": {
403
+ "x": 1419.0,
404
+ "y": 371.0
405
+ },
406
+ "type": "basic"
407
+ },
408
+ "params": {
409
+ "top_k": 3.0
410
+ },
411
+ "status": "done",
412
+ "title": "LynxScribe Image RAG Query"
413
+ },
414
+ "dragHandle": ".bg-primary",
415
+ "height": 200.0,
416
+ "id": "LynxScribe Image RAG Query 1",
417
+ "position": {
418
+ "x": 908.9211080204011,
419
+ "y": -132.3031800030364
420
+ },
421
+ "type": "basic",
422
+ "width": 200.0
423
  }
424
  ]
425
  }
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py CHANGED
@@ -6,6 +6,7 @@ from google.cloud import storage
6
  from copy import deepcopy
7
  import asyncio
8
  import pandas as pd
 
9
 
10
  from lynxscribe.core.llm.base import get_llm_engine
11
  from lynxscribe.core.vector_store.base import get_vector_store
@@ -28,6 +29,11 @@ from lynxkite.core import ops
28
  import json
29
  from lynxkite.core.executors import one_by_one
30
 
 
 
 
 
 
31
  ENV = "LynxScribe"
32
  one_by_one.register(ENV)
33
  op = ops.op_registration(ENV)
@@ -64,13 +70,17 @@ def ls_rag_graph(
64
  collection_name: str = "lynx",
65
  text_embedder_interface: str = "openai",
66
  text_embedder_model_name_or_path: str = "text-embedding-3-large",
 
67
  ):
68
  """
69
  Returns with a vector store instance.
70
  """
71
 
72
  # getting the text embedder instance
73
- llm = get_llm_engine(name=text_embedder_interface)
 
 
 
74
  text_embedder = TextEmbedder(llm=llm, model=text_embedder_model_name_or_path)
75
 
76
  # getting the vector store
@@ -95,15 +105,20 @@ def ls_image_describer(
95
  *,
96
  llm_interface: str = "openai",
97
  llm_visual_model: str = "gpt-4o",
98
- llm_prompt_path: str = "/Users/mszel/git/lynxscribe-demos/component_tutorials/04_image_search/image_description_prompts.yaml",
99
  llm_prompt_name: str = "cot_picture_descriptor",
 
100
  ):
101
  """
102
  Returns with an image describer instance.
103
  TODO: adding a relative path to the prompt path + adding model kwargs
104
  """
105
 
106
- llm = get_llm_engine(name=llm_interface)
 
 
 
 
107
  prompt_base = load_config(llm_prompt_path)[llm_prompt_name]
108
 
109
  return {
@@ -135,7 +150,7 @@ async def ls_image_rag_builder(
135
 
136
  # handling inputs
137
  image_describer = image_describer[0]["image_describer"]
138
- image_urls = image_urls[0]["image_urls"]
139
  rag_graph = rag_graph[0]["rag_graph"]
140
 
141
  # generate prompts from inputs
@@ -215,10 +230,47 @@ async def ls_image_rag_builder(
215
  # adding the embeddings to the RAG graph
216
  rag_graph.kg_base.vector_store.upsert(embedding_list)
217
 
218
- # saving the RAG graph
219
- rag_graph.kg_base.save(image_rag_out_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
- return {"image_rag_path": image_rag_out_path} # TODO: do we need an output?
 
222
 
223
 
224
  @output_on_top
 
6
  from copy import deepcopy
7
  import asyncio
8
  import pandas as pd
9
+ import os
10
 
11
  from lynxscribe.core.llm.base import get_llm_engine
12
  from lynxscribe.core.vector_store.base import get_vector_store
 
29
  import json
30
  from lynxkite.core.executors import one_by_one
31
 
32
+ # logger
33
+ # import logging
34
+ # logging.basicConfig(level=logging.INFO)
35
+ # logger = logging.getLogger(__name__)
36
+
37
  ENV = "LynxScribe"
38
  one_by_one.register(ENV)
39
  op = ops.op_registration(ENV)
 
70
  collection_name: str = "lynx",
71
  text_embedder_interface: str = "openai",
72
  text_embedder_model_name_or_path: str = "text-embedding-3-large",
73
+ api_key_name: str = "OPENAI_API_KEY",
74
  ):
75
  """
76
  Returns with a vector store instance.
77
  """
78
 
79
  # getting the text embedder instance
80
+ llm_params = {"name": text_embedder_interface}
81
+ if api_key_name:
82
+ llm_params["api_key"] = os.getenv(api_key_name)
83
+ llm = get_llm_engine(**llm_params)
84
  text_embedder = TextEmbedder(llm=llm, model=text_embedder_model_name_or_path)
85
 
86
  # getting the vector store
 
105
  *,
106
  llm_interface: str = "openai",
107
  llm_visual_model: str = "gpt-4o",
108
+ llm_prompt_path: str = "lynxkite-lynxscribe/promptdb/image_description_prompts.yaml",
109
  llm_prompt_name: str = "cot_picture_descriptor",
110
+ api_key_name: str = "OPENAI_API_KEY",
111
  ):
112
  """
113
  Returns with an image describer instance.
114
  TODO: adding a relative path to the prompt path + adding model kwargs
115
  """
116
 
117
+ llm_params = {"name": llm_interface}
118
+ if api_key_name:
119
+ llm_params["api_key"] = os.getenv(api_key_name)
120
+ llm = get_llm_engine(**llm_params)
121
+
122
  prompt_base = load_config(llm_prompt_path)[llm_prompt_name]
123
 
124
  return {
 
150
 
151
  # handling inputs
152
  image_describer = image_describer[0]["image_describer"]
153
+ image_urls = image_urls["image_urls"]
154
  rag_graph = rag_graph[0]["rag_graph"]
155
 
156
  # generate prompts from inputs
 
230
  # adding the embeddings to the RAG graph
231
  rag_graph.kg_base.vector_store.upsert(embedding_list)
232
 
233
+ # # saving the RAG graph
234
+ # rag_graph.kg_base.save(image_rag_out_path)
235
+
236
+ return {"knowledge_base": rag_graph}
237
+
238
+
239
+ @op("LynxScribe RAG Graph Saver")
240
+ def ls_save_rag_graph(
241
+ knowledge_base,
242
+ *,
243
+ image_rag_out_path: str = "image_test_rag_graph.pickle",
244
+ ):
245
+ """
246
+ Saves the RAG graph to a pickle file.
247
+ """
248
+
249
+ knowledge_base.kg_base.save(image_rag_out_path)
250
+ return None
251
+
252
+
253
+ @ops.input_position(rag_graph="bottom")
254
+ @op("LynxScribe Image RAG Query")
255
+ async def search_context(rag_graph, text, *, top_k=3):
256
+ # get all similarities
257
+ emb_similarities = await rag_graph.search_context(
258
+ text, max_results=top_k, unique_metadata_key="image_url"
259
+ )
260
+
261
+ # get the image urls, scores and descriptions
262
+ result_list = []
263
+
264
+ for emb_sim in emb_similarities:
265
+ image_url = emb_sim.embedding.metadata["image_url"]
266
+ score = emb_sim.score
267
+ description = emb_sim.embedding.document
268
+ result_list.append(
269
+ {"image_url": image_url, "score": score, "description": description}
270
+ )
271
 
272
+ print(result_list)
273
+ return {"embedding_similarities": result_list}
274
 
275
 
276
  @output_on_top