Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

mszel commited on Apr 22

Commit

76e89e6

1 Parent(s): 2148b2a

adding task solver box n xmpl

Browse files

Files changed (4) hide show

examples/LynxScribe Data Cleaning.lynxkite.json +345 -0
examples/LynxScribe Image Search.lynxkite.json +2 -14
examples/uploads/task_solver_examples.xlsx +0 -0
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py +135 -1

examples/LynxScribe Data Cleaning.lynxkite.json ADDED Viewed

	@@ -0,0 +1,345 @@

+{
+  "edges": [
+    {
+      "id": "LynxScribe Task Solver 1 View DataFrame 1",
+      "source": "LynxScribe Task Solver 1",
+      "sourceHandle": "output",
+      "target": "View DataFrame 1",
+      "targetHandle": "input"
+    },
+    {
+      "id": "Read Excel 1 LynxScribe Task Solver 1",
+      "source": "Read Excel 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe Task Solver 1",
+      "targetHandle": "dataframe"
+    },
+    {
+      "id": "LynxScribe Message 3 LynxScribe Task Solver 1",
+      "source": "LynxScribe Message 3",
+      "sourceHandle": "output",
+      "target": "LynxScribe Task Solver 1",
+      "targetHandle": "system_prompt"
+    },
+    {
+      "id": "LynxScribe Message 1 LynxScribe Task Solver 1",
+      "source": "LynxScribe Message 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe Task Solver 1",
+      "targetHandle": "instruction_prompt"
+    }
+  ],
+  "env": "LynxScribe",
+  "nodes": [
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "dataframe": {
+              "name": "dataframe",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            },
+            "instruction_prompt": {
+              "name": "instruction_prompt",
+              "position": "bottom",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            },
+            "system_prompt": {
+              "name": "system_prompt",
+              "position": "bottom",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "LynxScribe Task Solver",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "llm_interface": {
+              "default": "openai",
+              "name": "llm_interface",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "llm_model_name": {
+              "default": "gpt-4o",
+              "name": "llm_model_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "new_column_names": {
+              "default": "processed_field",
+              "name": "new_column_names",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "type": "basic"
+        },
+        "params": {
+          "llm_interface": "openai",
+          "llm_model_name": "gpt-4o",
+          "new_column_names": "zip_code, country, state_or_county, city, district, street_name, house_number, floor, flat_number, additional_info"
+        },
+        "status": "done",
+        "title": "LynxScribe Task Solver"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 292.0,
+      "id": "LynxScribe Task Solver 1",
+      "position": {
+        "x": 576.0,
+        "y": 209.0
+      },
+      "type": "basic",
+      "width": 432.0
+    },
+    {
+      "data": {
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "input": {
+              "name": "input",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "View DataFrame",
+          "outputs": {},
+          "params": {},
+          "position": {
+            "x": 2162.0,
+            "y": 266.0
+          },
+          "type": "table_view"
+        },
+        "params": {},
+        "status": "done",
+        "title": "View DataFrame"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 309.0,
+      "id": "View DataFrame 1",
+      "position": {
+        "x": 1350.0,
+        "y": 210.0
+      },
+      "type": "table_view",
+      "width": 662.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {},
+          "name": "Read Excel",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "columns": {
+              "default": "",
+              "name": "columns",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "file_path": {
+              "default": null,
+              "name": "file_path",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "sheet_name": {
+              "default": "Sheet1",
+              "name": "sheet_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 429.0,
+            "y": 234.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "columns": "",
+          "file_path": "uploads/task_solver_examples.xlsx",
+          "sheet_name": "address_example"
+        },
+        "status": "done",
+        "title": "Read Excel"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 296.0,
+      "id": "Read Excel 1",
+      "position": {
+        "x": 11.0,
+        "y": 207.0
+      },
+      "type": "basic",
+      "width": 400.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {},
+          "name": "LynxScribe Message",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "top",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "prompt_content": {
+              "default": null,
+              "name": "prompt_content",
+              "type": {
+                "format": "textarea"
+              }
+            },
+            "prompt_role": {
+              "default": null,
+              "name": "prompt_role",
+              "type": {
+                "enum": [
+                  "SYSTEM",
+                  "USER"
+                ]
+              }
+            }
+          },
+          "position": {
+            "x": 653.0,
+            "y": 954.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "prompt_content": "You are an AI assistant designed to clean and extract structured address information from raw text.\nYour goal is to identify and extract relevant address components while ignoring any unrelated information.\nThe output must be formatted as a structured dictionary.\n\nYour task is to parse an address from raw text and return a dictionary with the following keys:\n - zip_code: The postal or ZIP code.\n - country: The country name.\n - state_or_county: The state, province, or county (if applicable).\n - city: The city or town name.\n - district: The district or borough name (if mentioned).\n - street_type: The type of public space (e.g., street, avenue, boulevard, square).\n - street_name: The name of the public space (e.g., Main, Baker, Champs-\u00c9lys\u00e9es).\n - house_number: The house or building number.\n - floor: The floor number (if mentioned).\n - flat_number: The apartment or unit number (if mentioned).\n - additional_info: Any other useful details, such as building names, or known landmarks.\n\nIf any information is missing from the input, leave the corresponding key as an empty string.\n\nYou must return only a python dictionary with the following keys:\n`zip_code`, `country`, `state_or_county`, `city`, `district`, `street_name`, \n`house_number`, `floor`, `flat_number`, `additional_info`.\n\nDo not include any extra text, comments, or explanations\u2014only return the dictionary.",
+          "prompt_role": null
+        },
+        "status": "done",
+        "title": "LynxScribe Message"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 354.0,
+      "id": "LynxScribe Message 3",
+      "position": {
+        "x": 36.0,
+        "y": 561.0
+      },
+      "type": "basic",
+      "width": 740.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {},
+          "name": "LynxScribe Message",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "top",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "prompt_content": {
+              "default": null,
+              "name": "prompt_content",
+              "type": {
+                "format": "textarea"
+              }
+            },
+            "prompt_role": {
+              "default": null,
+              "name": "prompt_role",
+              "type": {
+                "enum": [
+                  "SYSTEM",
+                  "USER"
+                ]
+              }
+            }
+          },
+          "position": {
+            "x": 1498.0,
+            "y": 660.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "prompt_content": "Extract structured address information from the following text: {message_parts}",
+          "prompt_role": "USER"
+        },
+        "status": "done",
+        "title": "LynxScribe Message"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 347.0,
+      "id": "LynxScribe Message 1",
+      "position": {
+        "x": 817.0,
+        "y": 566.0
+      },
+      "type": "basic",
+      "width": 498.0
+    }
+  ]
+}

examples/LynxScribe Image Search.lynxkite.json CHANGED Viewed

@@ -292,14 +292,10 @@
               }
             }
           },
-          "position": {
-            "x": 1260.0,
-            "y": 166.0
-          },
           "type": "basic"
         },
         "params": {
-          "chat": "Show me a picture about doctors and patients!"
         },
         "status": "done",
         "title": "Input chat"
@@ -355,10 +351,6 @@
               }
             }
           },
-          "position": {
-            "x": 1987.0,
-            "y": 365.0
-          },
           "type": "basic"
         },
         "params": {
@@ -379,7 +371,7 @@
     },
     {
       "data": {
-        "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/bethesda-naval-medical-center-80380_1280.jpg",
         "error": null,
         "input_metadata": null,
         "meta": {
@@ -395,10 +387,6 @@
           "name": "LynxScribe Image Result Viewer",
           "outputs": {},
           "params": {},
-          "position": {
-            "x": 2326.0,
-            "y": 319.0
-          },
           "type": "image"
         },
         "params": {},

               }
             }
           },
           "type": "basic"
         },
         "params": {
+          "chat": "Show me a picture about fruits"
         },
         "status": "done",
         "title": "Input chat"
               }
             }
           },
           "type": "basic"
         },
         "params": {
     },
     {
       "data": {
+        "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/food-405521_1280.jpg",
         "error": null,
         "input_metadata": null,
         "meta": {
           "name": "LynxScribe Image Result Viewer",
           "outputs": {},
           "params": {},
           "type": "image"
         },
         "params": {},

examples/uploads/task_solver_examples.xlsx ADDED Viewed

Binary file (11.3 kB). View file

lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py CHANGED Viewed

@@ -28,7 +28,7 @@ from lynxscribe.components.chat.processors import (
     TruncateHistory,
 )
 from lynxscribe.components.chat.api import ChatAPI
-from lynxscribe.core.models.prompts import ChatCompletionPrompt
 from lynxscribe.components.rag.loaders import FAQTemplateLoader
 from lynxkite.core import ops
@@ -56,6 +56,11 @@ class RAGVersion(Enum):
     V2 = "v2"
 class RAGTemplate(BaseModel):
     """
     Model for RAG templates consisting of three tables: they are connected via scenario names.
@@ -672,6 +677,113 @@ def chat_processor(processor, *, _ctx: one_by_one.Context):
     return {"chat_processor": chat_processor, **cfg}
 @output_on_top
 @op("Truncate history")
 def truncate_history(*, max_tokens=10000):
@@ -718,6 +830,28 @@ def input_chat(*, chat: str):
     return {"text": chat}
 @op("View", view="table_view")
 def view(input):
     columns = [str(c) for c in input.keys() if not str(c).startswith("_")]

     TruncateHistory,
 )
 from lynxscribe.components.chat.api import ChatAPI
+from lynxscribe.core.models.prompts import ChatCompletionPrompt, Message
 from lynxscribe.components.rag.loaders import FAQTemplateLoader
 from lynxkite.core import ops
     V2 = "v2"
+class MessageRole(Enum):
+    SYSTEM = "system"
+    USER = "user"
 class RAGTemplate(BaseModel):
     """
     Model for RAG templates consisting of three tables: they are connected via scenario names.
     return {"chat_processor": chat_processor, **cfg}
+@output_on_top
+@op("LynxScribe Message")
+def lynxscribe_message(*, prompt_role: MessageRole, prompt_content: ops.LongStr):
+    return_message = Message(role=prompt_role.value, content=prompt_content.strip())
+    return {"prompt_message": return_message}
+@op("Read Excel")
+def read_excel(*, file_path: str, sheet_name: str = "Sheet1", columns: str = ""):
+    """
+    Reads an Excel file and returns the content of the specified sheet.
+    The columns parameter can be used to specify which columns to include in the output.
+    If not specified, all columns will be included (separate the values by comma).
+    TODO: more general: several input/output versions.
+    """
+    df = pd.read_excel(file_path, sheet_name=sheet_name)
+    if columns:
+        columns = [c.strip() for c in columns.split(",") if c.strip()]
+        columns = [c for c in columns if c in df.columns]
+        if len(columns) == 0:
+            raise ValueError("No valid columns specified.")
+        df = df[columns].copy()
+    return df  # {"dataframe": df}
+@ops.input_position(system_prompt="bottom", instruction_prompt="bottom", df="left")
+@op("LynxScribe Task Solver")
+@mem.cache
+async def ls_task_solver(
+    system_prompt,
+    instruction_prompt,
+    df,
+    *,
+    llm_interface: str = "openai",
+    llm_model_name: str = "gpt-4o",
+    new_column_names: str = "processed_field",
+    # api_key_name: str = "OPENAI_API_KEY",
+):
+    """
+    Solving the described task on a data frame and put the results into a new column.
+    If there are multiple new_column_names provided, the structured dictionary output
+    will be split into multiple columns.
+    """
+    # handling inputs
+    system_message = system_prompt[0]["prompt_message"]
+    instruction_message = instruction_prompt[0]["prompt_message"]
+    # preparing output
+    out_df = df.copy()
+    # connecting to the LLM
+    llm_params = {"name": llm_interface}
+    # if api_key_name:
+    #     llm_params["api_key"] = os.getenv(api_key_name)
+    llm = get_llm_engine(**llm_params)
+    # getting the list of fieldnames used in the instruction message
+    fieldnames = []
+    for pot_fieldname in df.columns:
+        if "{" + pot_fieldname + "}" in instruction_message.content:
+            fieldnames.append(pot_fieldname)
+    # generate a list of instruction messages (from fieldnames)
+    # each row of the df is a separate instruction message
+    # TODO: make it fast for large dataframes
+    instruction_messages = []
+    for i in range(len(df)):
+        instruction_message_i = deepcopy(instruction_message)
+        for fieldname in fieldnames:
+            instruction_message_i.content = instruction_message_i.content.replace(
+                "{" + fieldname + "}", str(df.iloc[i][fieldname])
+            )
+        instruction_messages.append(instruction_message_i)
+    # generate completition prompt
+    completion_prompts = [
+        ChatCompletionPrompt(
+            model=llm_model_name,
+            messages=[system_message, instruction_message_j],
+        )
+        for instruction_message_j in instruction_messages
+    ]
+    # get the answers
+    tasks = [llm.acreate_completion(completion_prompt=_prompt) for _prompt in completion_prompts]
+    out_completions = await asyncio.gather(*tasks)
+    # answer post-processing: 1 vs more columns
+    col_list = [_c.strip() for _c in new_column_names.split(",") if _c.strip()]
+    if len(col_list) == 0:
+        raise ValueError("No valid column names specified.")
+    elif len(col_list) == 1:
+        out_df[col_list[0]] = [result.choices[0].message.content for result in out_completions]
+    else:
+        answers = [
+            dictionary_corrector(result.choices[0].message.content, expected_keys=col_list)
+            for result in out_completions
+        ]
+        for i, col in enumerate(col_list):
+            out_df[col] = [answer[col] for answer in answers]
+    return out_df  # {"dataframe": out_df}
 @output_on_top
 @op("Truncate history")
 def truncate_history(*, max_tokens=10000):
     return {"text": chat}
+@op("View DataFrame", view="table_view")
+def view_df(input, *, _ctx: one_by_one.Context):
+    """
+    TODO: This part is not working
+    """
+    v = _ctx.last_result
+    if v:
+        columns = v["dataframes"]["df"]["columns"]
+        v["dataframes"]["df"]["data"].append([input[c] for c in columns])
+    else:
+        columns = [str(c) for c in input.keys() if not str(c).startswith("_")]
+        v = {
+            "dataframes": {
+                "df": {
+                    "columns": columns,
+                    "data": [[input[c] for c in columns]],
+                }
+            }
+        }
+    return v
 @op("View", view="table_view")
 def view(input):
     columns = [str(c) for c in input.keys() if not str(c).startswith("_")]