Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

mszel commited on Mar 17

Commit

35248ff

1 Parent(s): 5384241

review fixes 1

Browse files

Files changed (2) hide show

examples/LynxScribe Image RAG +64 -62
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py +29 -20

examples/LynxScribe Image RAG CHANGED Viewed

@@ -1,12 +1,5 @@
 {
   "edges": [
-    {
-      "id": "GCP Image Loader 1 LynxScribe Image RAG Builder 1",
-      "source": "GCP Image Loader 1",
-      "sourceHandle": "output",
-      "target": "LynxScribe Image RAG Builder 1",
-      "targetHandle": "image_urls"
-    },
     {
       "id": "LynxScribe Image RAG Builder 1 LynxScribe Image RAG Query 1",
       "source": "LynxScribe Image RAG Builder 1",
@@ -41,63 +34,17 @@
       "sourceHandle": "output",
       "target": "LynxScribe Image RAG Builder 1",
       "targetHandle": "rag_graph"
     }
   ],
   "env": "LynxScribe",
   "nodes": [
-    {
-      "data": {
-        "__execution_delay": 0.0,
-        "collapsed": null,
-        "display": null,
-        "error": null,
-        "meta": {
-          "inputs": {},
-          "name": "GCP Image Loader",
-          "outputs": {
-            "output": {
-              "name": "output",
-              "position": "right",
-              "type": {
-                "type": "None"
-              }
-            }
-          },
-          "params": {
-            "gcp_bucket": {
-              "default": "lynxkite_public_data",
-              "name": "gcp_bucket",
-              "type": {
-                "type": "<class 'str'>"
-              }
-            },
-            "prefix": {
-              "default": "lynxscribe-images/image-rag-test",
-              "name": "prefix",
-              "type": {
-                "type": "<class 'str'>"
-              }
-            }
-          },
-          "type": "basic"
-        },
-        "params": {
-          "gcp_bucket": "lynxkite_public_data",
-          "prefix": "lynxscribe-images/image-rag-test"
-        },
-        "status": "done",
-        "title": "GCP Image Loader"
-      },
-      "dragHandle": ".bg-primary",
-      "height": 225.0,
-      "id": "GCP Image Loader 1",
-      "position": {
-        "x": -316.75434318246107,
-        "y": 245.50177834264986
-      },
-      "type": "basic",
-      "width": 282.0
-    },
     {
       "data": {
         "__execution_delay": 0.0,
@@ -193,7 +140,7 @@
           "type": "basic"
         },
         "params": {
-          "chat": "Show me a picture about healthy life"
         },
         "status": "done",
         "title": "Input chat"
@@ -272,7 +219,7 @@
       "data": {
         "__execution_delay": null,
         "collapsed": false,
-        "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/food-405521_1280.jpg",
         "error": null,
         "meta": {
           "inputs": {
@@ -452,6 +399,61 @@
       },
       "type": "basic",
       "width": 442.0
     }
   ]
 }

 {
   "edges": [
     {
       "id": "LynxScribe Image RAG Builder 1 LynxScribe Image RAG Query 1",
       "source": "LynxScribe Image RAG Builder 1",
       "sourceHandle": "output",
       "target": "LynxScribe Image RAG Builder 1",
       "targetHandle": "rag_graph"
+    },
+    {
+      "id": "Cloud-sourced Image Loader 1 LynxScribe Image RAG Builder 1",
+      "source": "Cloud-sourced Image Loader 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe Image RAG Builder 1",
+      "targetHandle": "image_urls"
     }
   ],
   "env": "LynxScribe",
   "nodes": [
     {
       "data": {
         "__execution_delay": 0.0,
           "type": "basic"
         },
         "params": {
+          "chat": "Show me a picture about meditation"
         },
         "status": "done",
         "title": "Input chat"
       "data": {
         "__execution_delay": null,
         "collapsed": false,
+        "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/yoga-3053487_1280.jpg",
         "error": null,
         "meta": {
           "inputs": {
       },
       "type": "basic",
       "width": 442.0
+    },
+    {
+      "data": {
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {},
+          "name": "Cloud-sourced Image Loader",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "cloud_provider": {
+              "default": "gcp",
+              "name": "cloud_provider",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "folder_URL": {
+              "default": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test",
+              "name": "folder_URL",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 141.0,
+            "y": 421.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "cloud_provider": "gcp",
+          "folder_URL": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test"
+        },
+        "status": "done",
+        "title": "Cloud-sourced Image Loader"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 267.0,
+      "id": "Cloud-sourced Image Loader 1",
+      "position": {
+        "x": -451.2626989986675,
+        "y": 225.06333310959974
+      },
+      "type": "basic",
+      "width": 437.0
     }
   ]
 }

lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py CHANGED Viewed

@@ -6,7 +6,6 @@ from google.cloud import storage
 from copy import deepcopy
 import asyncio
 import pandas as pd
-import os
 import joblib
 import pathlib
@@ -38,30 +37,43 @@ from lynxkite.core.executors import one_by_one
 ENV = "LynxScribe"
 one_by_one.register(ENV)
-os.makedirs("../joblib-cache", exist_ok=True)
-mem = joblib.Memory("../joblib-cache")
 op = ops.op_registration(ENV)
 output_on_top = ops.output_position(output="top")
-@op("GCP Image Loader")
-def gcp_image_loader(
     *,
-    gcp_bucket: str = "lynxkite_public_data",
-    prefix: str = "lynxscribe-images/image-rag-test",
 ):
     """
-    Gives back the list of URLs of all the images in the GCP storage.
     """
-    client = storage.Client()
-    bucket = client.bucket(gcp_bucket)
-    blobs = bucket.list_blobs(prefix=prefix)
-    image_urls = [
-        blob.public_url
-        for blob in blobs
-        if blob.name.endswith((".jpg", ".jpeg", ".png"))
-    ]
-    return {"image_urls": image_urls}
 @output_on_top
@@ -142,8 +154,6 @@ async def ls_image_rag_builder(
     image_urls,
     image_describer,
     rag_graph,
-    *,
-    image_rag_out_path: str = "image_test_rag_graph.pickle",
 ):
     """
     Based on an input image folder (currently only supports GCP storage),
@@ -278,7 +288,6 @@ async def search_context(rag_graph, text, *, top_k=3):
             {"image_url": image_url, "score": score, "description": description}
         )
-    print(result_list)
     return {"embedding_similarities": result_list}

 from copy import deepcopy
 import asyncio
 import pandas as pd
 import joblib
 import pathlib
 ENV = "LynxScribe"
 one_by_one.register(ENV)
+mem = joblib.Memory("joblib-cache")
 op = ops.op_registration(ENV)
 output_on_top = ops.output_position(output="top")
+@op("Cloud-sourced Image Loader")
+def cloud_image_loader(
     *,
+    cloud_provider: str = "gcp",
+    folder_URL: str = "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test",
 ):
     """
+    Gives back the list of URLs of all the images from a cloud-based folder.
+    Currently only supports GCP storage.
     """
+    if folder_URL[-1].endswith("/"):
+        folder_URL = folder_URL[:-1]
+    if cloud_provider == "gcp":
+        client = storage.Client()
+        url_useful_part = folder_URL.split(".com/")[-1]
+        bucket_name = url_useful_part.split("/")[0]
+        if bucket_name == url_useful_part:
+            prefix = ""
+        else:
+            prefix = url_useful_part.split(bucket_name + "/")[-1]
+        bucket = client.bucket(bucket_name)
+        blobs = bucket.list_blobs(prefix=prefix)
+        image_urls = [
+            blob.public_url
+            for blob in blobs
+            if blob.name.endswith((".jpg", ".jpeg", ".png"))
+        ]
+        return {"image_urls": image_urls}
+    else:
+        raise ValueError(f"Cloud provider '{cloud_provider}' is not supported.")
 @output_on_top
     image_urls,
     image_describer,
     rag_graph,
 ):
     """
     Based on an input image folder (currently only supports GCP storage),
             {"image_url": image_url, "score": score, "description": description}
         )
     return {"embedding_similarities": result_list}