Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

mszel commited on Apr 22

Commit

98383bd

1 Parent(s): 48202b3

adding FAQ chatbot

Browse files

Files changed (3) hide show

examples/LynxScribe FAQ Chatbot Builder.lynxkite.json +605 -0
examples/LynxScribe RAG Chatbot.lynxkite.json +92 -118
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py +160 -6

examples/LynxScribe FAQ Chatbot Builder.lynxkite.json ADDED Viewed

	@@ -0,0 +1,605 @@

+{
+  "edges": [
+    {
+      "id": "LynxScribe FAQ to RAG 1 LynxScribe RAG Graph Chatbot Builder 1",
+      "source": "LynxScribe FAQ to RAG 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe RAG Graph Chatbot Builder 1",
+      "targetHandle": "rag_graph"
+    },
+    {
+      "id": "LynxScribe RAG Graph Chatbot Builder 1 LynxScribe RAG Graph Chatbot Backend 1",
+      "source": "LynxScribe RAG Graph Chatbot Builder 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe RAG Graph Chatbot Backend 1",
+      "targetHandle": "knowledge_base"
+    },
+    {
+      "id": "Chat processor 1 LynxScribe RAG Graph Chatbot Backend 1",
+      "source": "Chat processor 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe RAG Graph Chatbot Backend 1",
+      "targetHandle": "chat_processor"
+    },
+    {
+      "id": "Truncate history 1 Chat processor 1",
+      "source": "Truncate history 1",
+      "sourceHandle": "output",
+      "target": "Chat processor 1",
+      "targetHandle": "processor"
+    },
+    {
+      "id": "LynxScribe RAG Graph Chatbot Backend 1 Test Chat API 1",
+      "source": "LynxScribe RAG Graph Chatbot Backend 1",
+      "sourceHandle": "output",
+      "target": "Test Chat API 1",
+      "targetHandle": "chat_api"
+    },
+    {
+      "id": "Input chat 1 Test Chat API 1",
+      "source": "Input chat 1",
+      "sourceHandle": "output",
+      "target": "Test Chat API 1",
+      "targetHandle": "message"
+    },
+    {
+      "id": "Test Chat API 1 View 1",
+      "source": "Test Chat API 1",
+      "sourceHandle": "output",
+      "target": "View 1",
+      "targetHandle": "input"
+    }
+  ],
+  "env": "LynxScribe",
+  "nodes": [
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": false,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {},
+          "name": "LynxScribe FAQ to RAG",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "faq_excel_path": {
+              "default": "uploads/organon_demo/organon_en_copy.xlsx",
+              "name": "faq_excel_path",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "scenario_cluster_distance_pct": {
+              "default": 30.0,
+              "name": "scenario_cluster_distance_pct",
+              "type": {
+                "type": "<class 'float'>"
+              }
+            },
+            "text_embedder_interface": {
+              "default": "openai",
+              "name": "text_embedder_interface",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "text_embedder_model_name_or_path": {
+              "default": "text-embedding-3-large",
+              "name": "text_embedder_model_name_or_path",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "vdb_collection_name": {
+              "default": "lynx",
+              "name": "vdb_collection_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "vdb_num_dimensions": {
+              "default": 3072.0,
+              "name": "vdb_num_dimensions",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            },
+            "vdb_provider_name": {
+              "default": "faiss",
+              "name": "vdb_provider_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "type": "basic"
+        },
+        "params": {
+          "faq_excel_path": "uploads/organon_demo/organon_en_copy.xlsx",
+          "scenario_cluster_distance_pct": "30",
+          "text_embedder_interface": "openai",
+          "text_embedder_model_name_or_path": "text-embedding-3-large",
+          "vdb_collection_name": "lynx",
+          "vdb_num_dimensions": 3072.0,
+          "vdb_provider_name": "faiss"
+        },
+        "status": "done",
+        "title": "LynxScribe FAQ to RAG"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 620.0,
+      "id": "LynxScribe FAQ to RAG 1",
+      "position": {
+        "x": -1180.0,
+        "y": -76.0
+      },
+      "type": "basic",
+      "width": 415.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": false,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "rag_graph": {
+              "name": "rag_graph",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "LynxScribe RAG Graph Chatbot Builder",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "top",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "node_types": {
+              "default": "intent_cluster",
+              "name": "node_types",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "scenario_file": {
+              "default": "uploads/lynx_chatbot_scenario_selector.yaml",
+              "name": "scenario_file",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "scenario_meta_name": {
+              "default": "scenario_name",
+              "name": "scenario_meta_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 1569.0,
+            "y": 528.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "node_types": "intent_cluster",
+          "scenario_file": "uploads/organon_demo/backend-scenarios-en.yaml",
+          "scenario_meta_name": "scenario_name"
+        },
+        "status": "done",
+        "title": "LynxScribe RAG Graph Chatbot Builder"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 296.0,
+      "id": "LynxScribe RAG Graph Chatbot Builder 1",
+      "position": {
+        "x": -591.0,
+        "y": 86.0
+      },
+      "type": "basic",
+      "width": 547.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "chat_processor": {
+              "name": "chat_processor",
+              "position": "bottom",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            },
+            "knowledge_base": {
+              "name": "knowledge_base",
+              "position": "bottom",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "LynxScribe RAG Graph Chatbot Backend",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "top",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "llm_interface": {
+              "default": "openai",
+              "name": "llm_interface",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "llm_model_name": {
+              "default": "gpt-4o",
+              "name": "llm_model_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "negative_answer": {
+              "default": "I'm sorry, but the data I've been trained on does not contain any information related to your question.",
+              "name": "negative_answer",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "retriever_limits_by_type": {
+              "default": "{}",
+              "name": "retriever_limits_by_type",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "retriever_max_iterations": {
+              "default": 3.0,
+              "name": "retriever_max_iterations",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            },
+            "retriever_overall_chunk_limit": {
+              "default": 20.0,
+              "name": "retriever_overall_chunk_limit",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            },
+            "retriever_overall_token_limit": {
+              "default": 3000.0,
+              "name": "retriever_overall_token_limit",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            },
+            "retriever_strict_limits": {
+              "default": true,
+              "name": "retriever_strict_limits",
+              "type": {
+                "type": "<class 'bool'>"
+              }
+            }
+          },
+          "position": {
+            "x": 1280.0,
+            "y": 450.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "llm_interface": "openai",
+          "llm_model_name": "gpt-4o",
+          "negative_answer": "I'm sorry, but the data I've been trained on does not contain any information related to your question.",
+          "retriever_limits_by_type": "{\"faq_question\": [0, 0], \"faq_answer\": [3, 3]}",
+          "retriever_max_iterations": "3",
+          "retriever_overall_chunk_limit": "3",
+          "retriever_overall_token_limit": "30000",
+          "retriever_strict_limits": true
+        },
+        "status": "done",
+        "title": "LynxScribe RAG Graph Chatbot Backend"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 382.0,
+      "id": "LynxScribe RAG Graph Chatbot Backend 1",
+      "position": {
+        "x": -427.131476508498,
+        "y": -465.1194966607713
+      },
+      "type": "basic",
+      "width": 791.0
+    },
+    {
+      "data": {
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "processor": {
+              "name": "processor",
+              "position": "bottom",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "Chat processor",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "top",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {},
+          "position": {
+            "x": 1291.0,
+            "y": 718.0
+          },
+          "type": "basic"
+        },
+        "params": {},
+        "status": "done",
+        "title": "Chat processor"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "Chat processor 1",
+      "position": {
+        "x": 252.7291107206022,
+        "y": 81.86852349150202
+      },
+      "type": "basic",
+      "width": 200.0
+    },
+    {
+      "data": {
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {},
+          "name": "Truncate history",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "top",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "max_tokens": {
+              "default": 10000.0,
+              "name": "max_tokens",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            }
+          },
+          "position": {
+            "x": 1440.0,
+            "y": 936.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "max_tokens": 10000.0
+        },
+        "status": "done",
+        "title": "Truncate history"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "Truncate history 1",
+      "position": {
+        "x": 253.59374153502728,
+        "y": 386.4661577036063
+      },
+      "type": "basic",
+      "width": 200.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {},
+          "name": "Input chat",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "chat": {
+              "default": null,
+              "name": "chat",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 449.0,
+            "y": 172.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "chat": "I had headache after taking the pill"
+        },
+        "status": "done",
+        "title": "Input chat"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 204.0,
+      "id": "Input chat 1",
+      "position": {
+        "x": -1115.7774404622555,
+        "y": -747.1320865489535
+      },
+      "type": "basic",
+      "width": 552.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "chat_api": {
+              "name": "chat_api",
+              "position": "bottom",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            },
+            "message": {
+              "name": "message",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "Test Chat API",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "show_details": {
+              "default": false,
+              "name": "show_details",
+              "type": {
+                "type": "<class 'bool'>"
+              }
+            }
+          },
+          "position": {
+            "x": 937.0,
+            "y": 213.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "show_details": false
+        },
+        "status": "done",
+        "title": "Test Chat API"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "Test Chat API 1",
+      "position": {
+        "x": -131.54900620226195,
+        "y": -745.4660726292032
+      },
+      "type": "basic",
+      "width": 200.0
+    },
+    {
+      "data": {
+        "display": {
+          "dataframes": {
+            "df": {
+              "columns": [
+                "answer"
+              ],
+              "data": [
+                [
+                  "I'm not equipped to handle adverse events or other product-related queries. Your safety is important to us, and we want to ensure you receive the appropriate support. Please report any adverse events or concerns to our dedicated support team. They can be reached at [email protected]. If you have any questions related to contraceptives or women's health, please feel free to ask, and I'll provide you with the information you need.\n"
+                ]
+              ]
+            }
+          }
+        },
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "input": {
+              "name": "input",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "View",
+          "outputs": {},
+          "params": {},
+          "position": {
+            "x": 1547.0,
+            "y": 222.0
+          },
+          "type": "table_view"
+        },
+        "params": {},
+        "status": "done",
+        "title": "View"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 483.0,
+      "id": "View 1",
+      "position": {
+        "x": 540.6544350347407,
+        "y": -886.065865503576
+      },
+      "type": "table_view",
+      "width": 707.0
+    }
+  ]
+}

examples/LynxScribe RAG Chatbot.lynxkite.json CHANGED Viewed

@@ -7,20 +7,6 @@
       "target": "LynxScribe Text RAG Loader 1",
       "targetHandle": "file_urls"
     },
-    {
-      "id": "LynxScribe Text RAG Loader 1 LynxScribe RAG Graph Chatbot Builder 1",
-      "source": "LynxScribe Text RAG Loader 1",
-      "sourceHandle": "output",
-      "target": "LynxScribe RAG Graph Chatbot Builder 1",
-      "targetHandle": "rag_graph"
-    },
-    {
-      "id": "LynxScribe RAG Graph Chatbot Builder 1 LynxScribe RAG Graph Chatbot Backend 1",
-      "source": "LynxScribe RAG Graph Chatbot Builder 1",
-      "sourceHandle": "output",
-      "target": "LynxScribe RAG Graph Chatbot Backend 1",
-      "targetHandle": "knowledge_base"
-    },
     {
       "id": "Truncate history 1 Chat processor 1",
       "source": "Truncate history 1",
@@ -62,6 +48,20 @@
       "sourceHandle": "output",
       "target": "View 1",
       "targetHandle": "input"
     }
   ],
   "env": "LynxScribe",
@@ -112,10 +112,6 @@
               }
             }
           },
-          "position": {
-            "x": 530.0,
-            "y": 350.0
-          },
           "type": "basic"
         },
         "params": {
@@ -210,10 +206,6 @@
               }
             }
           },
-          "position": {
-            "x": 1048.0,
-            "y": 762.0
-          },
           "type": "basic"
         },
         "params": {
@@ -237,70 +229,6 @@
       "type": "basic",
       "width": 290.0
     },
-    {
-      "data": {
-        "display": null,
-        "error": null,
-        "input_metadata": null,
-        "meta": {
-          "inputs": {
-            "rag_graph": {
-              "name": "rag_graph",
-              "position": "left",
-              "type": {
-                "type": "<class 'inspect._empty'>"
-              }
-            }
-          },
-          "name": "LynxScribe RAG Graph Chatbot Builder",
-          "outputs": {
-            "output": {
-              "name": "output",
-              "position": "top",
-              "type": {
-                "type": "None"
-              }
-            }
-          },
-          "params": {
-            "node_types": {
-              "default": "intent_cluster",
-              "name": "node_types",
-              "type": {
-                "type": "<class 'str'>"
-              }
-            },
-            "scenario_file": {
-              "default": "uploads/lynx_chatbot_scenario_selector.yaml",
-              "name": "scenario_file",
-              "type": {
-                "type": "<class 'str'>"
-              }
-            }
-          },
-          "position": {
-            "x": 1451.0,
-            "y": 752.0
-          },
-          "type": "basic"
-        },
-        "params": {
-          "node_types": "intent_cluster",
-          "scenario_file": "uploads/lynx_chatbot_scenario_selector.yaml"
-        },
-        "status": "done",
-        "title": "LynxScribe RAG Graph Chatbot Builder"
-      },
-      "dragHandle": ".bg-primary",
-      "height": 208.0,
-      "id": "LynxScribe RAG Graph Chatbot Builder 1",
-      "position": {
-        "x": 245.0,
-        "y": 421.0
-      },
-      "type": "basic",
-      "width": 407.0
-    },
     {
       "data": {
         "__execution_delay": 0.0,
@@ -393,10 +321,6 @@
               }
             }
           },
-          "position": {
-            "x": 1658.0,
-            "y": 587.0
-          },
           "type": "basic"
         },
         "params": {
@@ -404,7 +328,7 @@
           "llm_model_name": "gpt-4o",
           "negative_answer": "I'm sorry, but the data I've been trained on does not contain any information related to your question.",
           "retriever_limits_by_type": "{\"information\": [1, 5], \"summary\": [0, 2], \"template_qna\": [1, 3], \"QnA question\": [0, 0]}",
-          "retriever_max_iterations": 3.0,
           "retriever_overall_chunk_limit": 20.0,
           "retriever_overall_token_limit": 3000.0,
           "retriever_strict_limits": true
@@ -448,10 +372,6 @@
             }
           },
           "params": {},
-          "position": {
-            "x": 1742.0,
-            "y": 847.0
-          },
           "type": "basic"
         },
         "params": {},
@@ -462,8 +382,8 @@
       "height": 220.0,
       "id": "Chat processor 1",
       "position": {
-        "x": 921.2787659034059,
-        "y": 431.57233488216445
       },
       "type": "basic",
       "width": 387.0
@@ -494,10 +414,6 @@
               }
             }
           },
-          "position": {
-            "x": 1991.0,
-            "y": 1042.0
-          },
           "type": "basic"
         },
         "params": {
@@ -565,10 +481,6 @@
               }
             }
           },
-          "position": {
-            "x": 2003.0,
-            "y": 1053.0
-          },
           "type": "basic"
         },
         "params": {
@@ -618,10 +530,6 @@
               }
             }
           },
-          "position": {
-            "x": 2012.0,
-            "y": 475.0
-          },
           "type": "basic"
         },
         "params": {
@@ -683,10 +591,6 @@
               }
             }
           },
-          "position": {
-            "x": 1238.0,
-            "y": 211.0
-          },
           "type": "basic"
         },
         "params": {
@@ -715,7 +619,7 @@
               ],
               "data": [
                 [
-                  "The CEO of Lynx Analytics is Gyorgy Lajtai. He co-founded the company in 2010 and has a background in CRM, marketing automation, and systems."
                 ]
               ]
             }
@@ -736,10 +640,6 @@
           "name": "View",
           "outputs": {},
           "params": {},
-          "position": {
-            "x": 1746.0,
-            "y": 232.0
-          },
           "type": "table_view"
         },
         "params": {},
@@ -755,6 +655,80 @@
       },
       "type": "table_view",
       "width": 995.0
     }
   ]
 }

       "target": "LynxScribe Text RAG Loader 1",
       "targetHandle": "file_urls"
     },
     {
       "id": "Truncate history 1 Chat processor 1",
       "source": "Truncate history 1",
       "sourceHandle": "output",
       "target": "View 1",
       "targetHandle": "input"
+    },
+    {
+      "id": "LynxScribe Text RAG Loader 1 LynxScribe RAG Graph Chatbot Builder 1",
+      "source": "LynxScribe Text RAG Loader 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe RAG Graph Chatbot Builder 1",
+      "targetHandle": "rag_graph"
+    },
+    {
+      "id": "LynxScribe RAG Graph Chatbot Builder 1 LynxScribe RAG Graph Chatbot Backend 1",
+      "source": "LynxScribe RAG Graph Chatbot Builder 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe RAG Graph Chatbot Backend 1",
+      "targetHandle": "knowledge_base"
     }
   ],
   "env": "LynxScribe",
               }
             }
           },
           "type": "basic"
         },
         "params": {
               }
             }
           },
           "type": "basic"
         },
         "params": {
       "type": "basic",
       "width": 290.0
     },
     {
       "data": {
         "__execution_delay": 0.0,
               }
             }
           },
           "type": "basic"
         },
         "params": {
           "llm_model_name": "gpt-4o",
           "negative_answer": "I'm sorry, but the data I've been trained on does not contain any information related to your question.",
           "retriever_limits_by_type": "{\"information\": [1, 5], \"summary\": [0, 2], \"template_qna\": [1, 3], \"QnA question\": [0, 0]}",
+          "retriever_max_iterations": "3",
           "retriever_overall_chunk_limit": 20.0,
           "retriever_overall_token_limit": 3000.0,
           "retriever_strict_limits": true
             }
           },
           "params": {},
           "type": "basic"
         },
         "params": {},
       "height": 220.0,
       "id": "Chat processor 1",
       "position": {
+        "x": 907.3546850533578,
+        "y": 381.09754180073975
       },
       "type": "basic",
       "width": 387.0
               }
             }
           },
           "type": "basic"
         },
         "params": {
               }
             }
           },
           "type": "basic"
         },
         "params": {
               }
             }
           },
           "type": "basic"
         },
         "params": {
               }
             }
           },
           "type": "basic"
         },
         "params": {
               ],
               "data": [
                 [
+                  "The CEO of Lynx Analytics is Gyorgy Lajtai. He is also a co-founder of the company and has a rich background in CRM, marketing automation, and systems."
                 ]
               ]
             }
           "name": "View",
           "outputs": {},
           "params": {},
           "type": "table_view"
         },
         "params": {},
       },
       "type": "table_view",
       "width": 995.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "rag_graph": {
+              "name": "rag_graph",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "LynxScribe RAG Graph Chatbot Builder",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "top",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "node_types": {
+              "default": "intent_cluster",
+              "name": "node_types",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "scenario_file": {
+              "default": "uploads/lynx_chatbot_scenario_selector.yaml",
+              "name": "scenario_file",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "scenario_meta_name": {
+              "default": "scenario_name",
+              "name": "scenario_meta_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 1121.0,
+            "y": 813.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "node_types": "intent_cluster",
+          "scenario_file": "uploads/lynx_chatbot_scenario_selector.yaml",
+          "scenario_meta_name": ""
+        },
+        "status": "done",
+        "title": "LynxScribe RAG Graph Chatbot Builder"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 297.0,
+      "id": "LynxScribe RAG Graph Chatbot Builder 1",
+      "position": {
+        "x": 328.41755532473496,
+        "y": 378.2277574498554
+      },
+      "type": "basic",
+      "width": 396.0
     }
   ]
 }

lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py CHANGED Viewed

@@ -9,6 +9,7 @@ from enum import Enum
 import asyncio
 import pandas as pd
 import joblib
 import pathlib
 from lynxscribe.core.llm.base import get_llm_engine
@@ -16,6 +17,7 @@ from lynxscribe.core.vector_store.base import get_vector_store
 from lynxscribe.common.config import load_config
 from lynxscribe.components.text.embedder import TextEmbedder
 from lynxscribe.core.models.embedding import Embedding
 from lynxscribe.components.rag.rag_graph import RAGGraph
 from lynxscribe.components.rag.knowledge_base_graph import PandasKnowledgeBaseGraph
@@ -27,6 +29,7 @@ from lynxscribe.components.chat.processors import (
 )
 from lynxscribe.components.chat.api import ChatAPI
 from lynxscribe.core.models.prompts import ChatCompletionPrompt
 from lynxkite.core import ops
 import json
@@ -53,6 +56,60 @@ class RAGVersion(Enum):
     V2 = "v2"
 @op("Cloud-sourced File Listing")
 def cloud_file_loader(
     *,
@@ -397,14 +454,102 @@ def ls_text_rag_loader(
     return {"rag_graph": rag_graph}
 @output_on_top
 @op("LynxScribe RAG Graph Chatbot Builder")
-@mem.cache
 def ls_rag_chatbot_builder(
     rag_graph,
     *,
     scenario_file: str = "uploads/lynx_chatbot_scenario_selector.yaml",
     node_types: str = "intent_cluster",
 ):
     """
     Builds up a RAG Graph-based chatbot (basically the loaded RAG graph +
@@ -422,11 +567,15 @@ def ls_rag_chatbot_builder(
     # rag_graph = rag_graph[0]["rag_graph"] TODO: check why is it bad
     rag_graph = rag_graph["rag_graph"]
     # loading the scenarios
-    scenario_selector = ScenarioSelector(
-        scenarios=[Scenario(**scenario) for scenario in scenarios],
-        node_types=node_types,
-    )
     # TODO: later we should unify this "knowledge base" object across the functions
     # this could be always an input of a RAG Chatbot, but also for other apps.
@@ -554,7 +703,12 @@ async def test_chat_api(message, chat_api, *, show_details=False):
         messages=[{"role": "user", "content": message["text"]}],
     )
     response = await chat_api.answer(request, stream=False)
-    answer = response.choices[0].message.content
     if show_details:
         return {"answer": answer, **response.__dict__}
     else:

 import asyncio
 import pandas as pd
 import joblib
+from pydantic import BaseModel, ConfigDict
 import pathlib
 from lynxscribe.core.llm.base import get_llm_engine
 from lynxscribe.common.config import load_config
 from lynxscribe.components.text.embedder import TextEmbedder
 from lynxscribe.core.models.embedding import Embedding
+from lynxscribe.components.embedding_clustering import FclusterBasedClustering
 from lynxscribe.components.rag.rag_graph import RAGGraph
 from lynxscribe.components.rag.knowledge_base_graph import PandasKnowledgeBaseGraph
 )
 from lynxscribe.components.chat.api import ChatAPI
 from lynxscribe.core.models.prompts import ChatCompletionPrompt
+from lynxscribe.components.rag.loaders import FAQTemplateLoader
 from lynxkite.core import ops
 import json
     V2 = "v2"
+class RAGTemplate(BaseModel):
+    """
+    Model for RAG templates consisting of three tables: they are connected via scenario names.
+    One table (FAQs) contains scenario-denoted nodes to upsert into the knowledge base, the other
+    two tables serve as the configuration for the scenario selector.
+    Attributes:
+        faq_data:
+            Table where each row is an FAQ question, and possibly its answer pair. Will be fed into
+            `FAQTemplateLoader.load_nodes_and_edges()`. For configuration of this table see the
+            loader's init arguments.
+        scenario_data:
+            Table where each row is a Scenario, column names are thus scenario attributes. Will be
+            fed into `ScenarioSelector.from_data()`.
+        prompt_codes:
+            Optional helper for the scenario table, may contain prompt code mappings to real prompt
+            messages. It's enough then to use the codes instead of the full messages in the
+            scenarios table. Will be fed into `ScenarioSelector.from_data()`.
+    """
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    faq_data: pd.DataFrame
+    scenario_data: pd.DataFrame
+    prompt_codes: dict[str, str] = {}
+    @classmethod
+    def from_excel_path(
+        cls,
+        path: str,
+        faq_data_sheet_name: str,
+        scenario_data_sheet_name: str,
+        prompt_codes_sheet_name: str | None = None,
+    ) -> "RAGTemplate":
+        """Spawn a from an Excel file containing the two needed (plus one optional) sheets."""
+        def transform_codes(prompt_codes: pd.DataFrame) -> dict[str, str]:
+            """Check and transform prompt codes table into a code dictionary."""
+            if (len_columns := len(prompt_codes.columns)) != 2:
+                raise ValueError(
+                    f"Prompt codes should contain exactly 2 columns, {len_columns} found."
+                )
+            return prompt_codes.set_index(prompt_codes.columns[0])[
+                prompt_codes.columns[1]
+            ].to_dict()
+        return cls(
+            faq_data=pd.read_excel(path, sheet_name=faq_data_sheet_name),
+            scenario_data=pd.read_excel(path, sheet_name=scenario_data_sheet_name),
+            prompt_codes=transform_codes(pd.read_excel(path, sheet_name=prompt_codes_sheet_name))
+            if prompt_codes_sheet_name
+            else {},
+        )
 @op("Cloud-sourced File Listing")
 def cloud_file_loader(
     *,
     return {"rag_graph": rag_graph}
+@op("LynxScribe FAQ to RAG")
+@mem.cache
+async def ls_faq_to_rag(
+    *,
+    faq_excel_path: str = "uploads/organon_demo/organon_en_copy.xlsx",
+    vdb_provider_name: str = "faiss",
+    vdb_num_dimensions: int = 3072,
+    vdb_collection_name: str = "lynx",
+    text_embedder_interface: str = "openai",
+    text_embedder_model_name_or_path: str = "text-embedding-3-large",
+    scenario_cluster_distance_pct: int = 30,
+):
+    """
+    Loading a text-based RAG graph from saved files (getting pandas readable links).
+    """
+    # getting the text embedder instance
+    llm_params = {"name": text_embedder_interface}
+    llm = get_llm_engine(**llm_params)
+    text_embedder = TextEmbedder(llm=llm, model=text_embedder_model_name_or_path)
+    # getting the vector store
+    if vdb_provider_name == "chromadb":
+        vector_store = get_vector_store(name=vdb_provider_name, collection_name=vdb_collection_name)
+    elif vdb_provider_name == "faiss":
+        vector_store = get_vector_store(name=vdb_provider_name, num_dimensions=vdb_num_dimensions)
+    else:
+        raise ValueError(f"Vector store name '{vdb_provider_name}' is not supported.")
+    # building up the RAG graph
+    rag_graph = RAGGraph(
+        PandasKnowledgeBaseGraph(vector_store=vector_store, text_embedder=text_embedder)
+    )
+    # loading the knowledge base from the FAQ file
+    rag_template = RAGTemplate.from_excel_path(
+        path=faq_excel_path,
+        faq_data_sheet_name="scenario_examples",
+        scenario_data_sheet_name="scenario_scripts",
+        prompt_codes_sheet_name="prompt_dictionary",
+    )
+    faq_loader_params = {
+        "id_column": "scenario_example_ID",
+        "timestamp_column": "last_modified_timestamp",
+        "validity_column": "valid_flg",
+        "question_type_contents_id": ["faq_question", "faq_question", "q_{id}"],
+        "answer_type_contents_id": ["faq_answer", "{faq_question}\n\n{faq_answer}", "a_{id}"],
+        "question_to_answer_edge_type_weight": ["qna", 1.0],
+    }
+    nodes, edges = FAQTemplateLoader(**faq_loader_params).load_nodes_and_edges(
+        rag_template.faq_data
+    )
+    await rag_graph.kg_base.upsert_nodes(*nodes)
+    rag_graph.kg_base.upsert_edges(edges)
+    # Generating scenario clusters
+    question_ids = [_id for _id in nodes[0] if _id.startswith("q_")]
+    stored_embeddings = rag_graph.kg_base.vector_store.get(
+        question_ids, include=["embeddings", "metadatas"]
+    )
+    embedding_vals = pd.Series([_emb.value for _emb in stored_embeddings], index=question_ids)
+    labels = pd.Series(
+        [_emb.metadata["scenario_name"] for _emb in stored_embeddings], index=question_ids
+    )
+    temp_cls = FclusterBasedClustering(distance_percentile=scenario_cluster_distance_pct)
+    temp_cls.fit(embedding_vals, labels)
+    df_tempclusters = temp_cls.get_cluster_centers()
+    # Adding the scenario clusters to the RAG Graph
+    df_tempclusters["template_id"] = "t_" + df_tempclusters.index.astype(str)
+    df_tempclusters["embedding"] = df_tempclusters.apply(
+        lambda row: Embedding(
+            id=row["template_id"],
+            value=row["cluster_center"],
+            metadata={"scenario_name": row["control_label"], "type": "intent_cluster"},
+        ),
+        axis=1,
+    )
+    embedding_list = df_tempclusters["embedding"].tolist()
+    rag_graph.kg_base.vector_store.upsert(embedding_list)
+    return {"rag_graph": rag_graph}
 @output_on_top
 @op("LynxScribe RAG Graph Chatbot Builder")
+# @mem.cache
 def ls_rag_chatbot_builder(
     rag_graph,
     *,
     scenario_file: str = "uploads/lynx_chatbot_scenario_selector.yaml",
     node_types: str = "intent_cluster",
+    scenario_meta_name: str = "",
 ):
     """
     Builds up a RAG Graph-based chatbot (basically the loaded RAG graph +
     # rag_graph = rag_graph[0]["rag_graph"] TODO: check why is it bad
     rag_graph = rag_graph["rag_graph"]
+    parameters = {
+        "scenarios": [Scenario(**scenario) for scenario in scenarios],
+        "node_types": node_types,
+    }
+    if len(scenario_meta_name) > 0:
+        parameters["get_scenario_name"] = lambda node: node.metadata[scenario_meta_name]
     # loading the scenarios
+    scenario_selector = ScenarioSelector(**parameters)
     # TODO: later we should unify this "knowledge base" object across the functions
     # this could be always an input of a RAG Chatbot, but also for other apps.
         messages=[{"role": "user", "content": message["text"]}],
     )
     response = await chat_api.answer(request, stream=False)
+    if len(response.choices) == 0:
+        answer = "The following FAQ items are similar to the question:\n"
+        for item in response.sources:
+            answer += f"------------------------------------------------------ \n{item.body}\n\n"
+    else:
+        answer = response.choices[0].message.content
     if show_details:
         return {"answer": answer, **response.__dict__}
     else: