mszel commited on
Commit
35248ff
·
1 Parent(s): 5384241

review fixes 1

Browse files
examples/LynxScribe Image RAG CHANGED
@@ -1,12 +1,5 @@
1
  {
2
  "edges": [
3
- {
4
- "id": "GCP Image Loader 1 LynxScribe Image RAG Builder 1",
5
- "source": "GCP Image Loader 1",
6
- "sourceHandle": "output",
7
- "target": "LynxScribe Image RAG Builder 1",
8
- "targetHandle": "image_urls"
9
- },
10
  {
11
  "id": "LynxScribe Image RAG Builder 1 LynxScribe Image RAG Query 1",
12
  "source": "LynxScribe Image RAG Builder 1",
@@ -41,63 +34,17 @@
41
  "sourceHandle": "output",
42
  "target": "LynxScribe Image RAG Builder 1",
43
  "targetHandle": "rag_graph"
 
 
 
 
 
 
 
44
  }
45
  ],
46
  "env": "LynxScribe",
47
  "nodes": [
48
- {
49
- "data": {
50
- "__execution_delay": 0.0,
51
- "collapsed": null,
52
- "display": null,
53
- "error": null,
54
- "meta": {
55
- "inputs": {},
56
- "name": "GCP Image Loader",
57
- "outputs": {
58
- "output": {
59
- "name": "output",
60
- "position": "right",
61
- "type": {
62
- "type": "None"
63
- }
64
- }
65
- },
66
- "params": {
67
- "gcp_bucket": {
68
- "default": "lynxkite_public_data",
69
- "name": "gcp_bucket",
70
- "type": {
71
- "type": "<class 'str'>"
72
- }
73
- },
74
- "prefix": {
75
- "default": "lynxscribe-images/image-rag-test",
76
- "name": "prefix",
77
- "type": {
78
- "type": "<class 'str'>"
79
- }
80
- }
81
- },
82
- "type": "basic"
83
- },
84
- "params": {
85
- "gcp_bucket": "lynxkite_public_data",
86
- "prefix": "lynxscribe-images/image-rag-test"
87
- },
88
- "status": "done",
89
- "title": "GCP Image Loader"
90
- },
91
- "dragHandle": ".bg-primary",
92
- "height": 225.0,
93
- "id": "GCP Image Loader 1",
94
- "position": {
95
- "x": -316.75434318246107,
96
- "y": 245.50177834264986
97
- },
98
- "type": "basic",
99
- "width": 282.0
100
- },
101
  {
102
  "data": {
103
  "__execution_delay": 0.0,
@@ -193,7 +140,7 @@
193
  "type": "basic"
194
  },
195
  "params": {
196
- "chat": "Show me a picture about healthy life"
197
  },
198
  "status": "done",
199
  "title": "Input chat"
@@ -272,7 +219,7 @@
272
  "data": {
273
  "__execution_delay": null,
274
  "collapsed": false,
275
- "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/food-405521_1280.jpg",
276
  "error": null,
277
  "meta": {
278
  "inputs": {
@@ -452,6 +399,61 @@
452
  },
453
  "type": "basic",
454
  "width": 442.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  }
456
  ]
457
  }
 
1
  {
2
  "edges": [
 
 
 
 
 
 
 
3
  {
4
  "id": "LynxScribe Image RAG Builder 1 LynxScribe Image RAG Query 1",
5
  "source": "LynxScribe Image RAG Builder 1",
 
34
  "sourceHandle": "output",
35
  "target": "LynxScribe Image RAG Builder 1",
36
  "targetHandle": "rag_graph"
37
+ },
38
+ {
39
+ "id": "Cloud-sourced Image Loader 1 LynxScribe Image RAG Builder 1",
40
+ "source": "Cloud-sourced Image Loader 1",
41
+ "sourceHandle": "output",
42
+ "target": "LynxScribe Image RAG Builder 1",
43
+ "targetHandle": "image_urls"
44
  }
45
  ],
46
  "env": "LynxScribe",
47
  "nodes": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  {
49
  "data": {
50
  "__execution_delay": 0.0,
 
140
  "type": "basic"
141
  },
142
  "params": {
143
+ "chat": "Show me a picture about meditation"
144
  },
145
  "status": "done",
146
  "title": "Input chat"
 
219
  "data": {
220
  "__execution_delay": null,
221
  "collapsed": false,
222
+ "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/yoga-3053487_1280.jpg",
223
  "error": null,
224
  "meta": {
225
  "inputs": {
 
399
  },
400
  "type": "basic",
401
  "width": 442.0
402
+ },
403
+ {
404
+ "data": {
405
+ "display": null,
406
+ "error": null,
407
+ "meta": {
408
+ "inputs": {},
409
+ "name": "Cloud-sourced Image Loader",
410
+ "outputs": {
411
+ "output": {
412
+ "name": "output",
413
+ "position": "right",
414
+ "type": {
415
+ "type": "None"
416
+ }
417
+ }
418
+ },
419
+ "params": {
420
+ "cloud_provider": {
421
+ "default": "gcp",
422
+ "name": "cloud_provider",
423
+ "type": {
424
+ "type": "<class 'str'>"
425
+ }
426
+ },
427
+ "folder_URL": {
428
+ "default": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test",
429
+ "name": "folder_URL",
430
+ "type": {
431
+ "type": "<class 'str'>"
432
+ }
433
+ }
434
+ },
435
+ "position": {
436
+ "x": 141.0,
437
+ "y": 421.0
438
+ },
439
+ "type": "basic"
440
+ },
441
+ "params": {
442
+ "cloud_provider": "gcp",
443
+ "folder_URL": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test"
444
+ },
445
+ "status": "done",
446
+ "title": "Cloud-sourced Image Loader"
447
+ },
448
+ "dragHandle": ".bg-primary",
449
+ "height": 267.0,
450
+ "id": "Cloud-sourced Image Loader 1",
451
+ "position": {
452
+ "x": -451.2626989986675,
453
+ "y": 225.06333310959974
454
+ },
455
+ "type": "basic",
456
+ "width": 437.0
457
  }
458
  ]
459
  }
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py CHANGED
@@ -6,7 +6,6 @@ from google.cloud import storage
6
  from copy import deepcopy
7
  import asyncio
8
  import pandas as pd
9
- import os
10
  import joblib
11
 
12
  import pathlib
@@ -38,30 +37,43 @@ from lynxkite.core.executors import one_by_one
38
 
39
  ENV = "LynxScribe"
40
  one_by_one.register(ENV)
41
- os.makedirs("../joblib-cache", exist_ok=True)
42
- mem = joblib.Memory("../joblib-cache")
43
  op = ops.op_registration(ENV)
44
  output_on_top = ops.output_position(output="top")
45
 
46
 
47
- @op("GCP Image Loader")
48
- def gcp_image_loader(
49
  *,
50
- gcp_bucket: str = "lynxkite_public_data",
51
- prefix: str = "lynxscribe-images/image-rag-test",
52
  ):
53
  """
54
- Gives back the list of URLs of all the images in the GCP storage.
 
55
  """
56
- client = storage.Client()
57
- bucket = client.bucket(gcp_bucket)
58
- blobs = bucket.list_blobs(prefix=prefix)
59
- image_urls = [
60
- blob.public_url
61
- for blob in blobs
62
- if blob.name.endswith((".jpg", ".jpeg", ".png"))
63
- ]
64
- return {"image_urls": image_urls}
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
  @output_on_top
@@ -142,8 +154,6 @@ async def ls_image_rag_builder(
142
  image_urls,
143
  image_describer,
144
  rag_graph,
145
- *,
146
- image_rag_out_path: str = "image_test_rag_graph.pickle",
147
  ):
148
  """
149
  Based on an input image folder (currently only supports GCP storage),
@@ -278,7 +288,6 @@ async def search_context(rag_graph, text, *, top_k=3):
278
  {"image_url": image_url, "score": score, "description": description}
279
  )
280
 
281
- print(result_list)
282
  return {"embedding_similarities": result_list}
283
 
284
 
 
6
  from copy import deepcopy
7
  import asyncio
8
  import pandas as pd
 
9
  import joblib
10
 
11
  import pathlib
 
37
 
38
  ENV = "LynxScribe"
39
  one_by_one.register(ENV)
40
+ mem = joblib.Memory("joblib-cache")
 
41
  op = ops.op_registration(ENV)
42
  output_on_top = ops.output_position(output="top")
43
 
44
 
45
+ @op("Cloud-sourced Image Loader")
46
+ def cloud_image_loader(
47
  *,
48
+ cloud_provider: str = "gcp",
49
+ folder_URL: str = "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test",
50
  ):
51
  """
52
+ Gives back the list of URLs of all the images from a cloud-based folder.
53
+ Currently only supports GCP storage.
54
  """
55
+ if folder_URL[-1].endswith("/"):
56
+ folder_URL = folder_URL[:-1]
57
+
58
+ if cloud_provider == "gcp":
59
+ client = storage.Client()
60
+ url_useful_part = folder_URL.split(".com/")[-1]
61
+ bucket_name = url_useful_part.split("/")[0]
62
+ if bucket_name == url_useful_part:
63
+ prefix = ""
64
+ else:
65
+ prefix = url_useful_part.split(bucket_name + "/")[-1]
66
+
67
+ bucket = client.bucket(bucket_name)
68
+ blobs = bucket.list_blobs(prefix=prefix)
69
+ image_urls = [
70
+ blob.public_url
71
+ for blob in blobs
72
+ if blob.name.endswith((".jpg", ".jpeg", ".png"))
73
+ ]
74
+ return {"image_urls": image_urls}
75
+ else:
76
+ raise ValueError(f"Cloud provider '{cloud_provider}' is not supported.")
77
 
78
 
79
  @output_on_top
 
154
  image_urls,
155
  image_describer,
156
  rag_graph,
 
 
157
  ):
158
  """
159
  Based on an input image folder (currently only supports GCP storage),
 
288
  {"image_url": image_url, "score": score, "description": description}
289
  )
290
 
 
291
  return {"embedding_similarities": result_list}
292
 
293