mszel commited on
Commit
6ba5741
·
1 Parent(s): 8883a14

follow ups on RAG graph part

Browse files
examples/LynxScribe Image RAG CHANGED
@@ -68,7 +68,7 @@
68
  "type": "basic"
69
  },
70
  "params": {
71
- "chat": "show me a picture about 2 doctors"
72
  },
73
  "status": "done",
74
  "title": "Input chat"
@@ -304,9 +304,9 @@
304
  },
305
  "params": {
306
  "text_embedder_interface": "openai",
307
- "text_embedder_model_name_or_path": "text-embedding-3-small",
308
  "vdb_collection_name": "lynx",
309
- "vdb_num_dimensions": "1536",
310
  "vdb_provider_name": "faiss"
311
  },
312
  "status": "done",
@@ -388,7 +388,7 @@
388
  },
389
  {
390
  "data": {
391
- "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/surgery-1807541_1280.jpg",
392
  "error": null,
393
  "meta": {
394
  "inputs": {
@@ -414,11 +414,11 @@
414
  "title": "LynxScribe Image Result Viewer"
415
  },
416
  "dragHandle": ".bg-primary",
417
- "height": 622.0,
418
  "id": "LynxScribe Image Result Viewer 1",
419
  "position": {
420
- "x": 1550.5086064306404,
421
- "y": -349.93521115271193
422
  },
423
  "type": "image",
424
  "width": 802.0
 
68
  "type": "basic"
69
  },
70
  "params": {
71
+ "chat": "show me a picture about cyclists"
72
  },
73
  "status": "done",
74
  "title": "Input chat"
 
304
  },
305
  "params": {
306
  "text_embedder_interface": "openai",
307
+ "text_embedder_model_name_or_path": "text-embedding-3-large",
308
  "vdb_collection_name": "lynx",
309
+ "vdb_num_dimensions": "3072",
310
  "vdb_provider_name": "faiss"
311
  },
312
  "status": "done",
 
388
  },
389
  {
390
  "data": {
391
+ "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/riding-7661658_1280.jpg",
392
  "error": null,
393
  "meta": {
394
  "inputs": {
 
414
  "title": "LynxScribe Image Result Viewer"
415
  },
416
  "dragHandle": ".bg-primary",
417
+ "height": 574.0,
418
  "id": "LynxScribe Image Result Viewer 1",
419
  "position": {
420
+ "x": 1551.9130513063783,
421
+ "y": -326.059648265166
422
  },
423
  "type": "image",
424
  "width": 802.0
examples/LynxScribe demo CHANGED
@@ -50,25 +50,25 @@
50
  "targetHandle": "chat_processor"
51
  },
52
  {
53
- "id": "Cloud-sourced File Loader 1 LynxScribe RAG Graph Chatbot Builder 1",
54
- "source": "Cloud-sourced File Loader 1",
55
  "sourceHandle": "output",
56
- "target": "LynxScribe RAG Graph Chatbot Builder 1",
57
  "targetHandle": "file_urls"
58
  },
 
 
 
 
 
 
 
59
  {
60
  "id": "LynxScribe RAG Graph Chatbot Builder 1 LynxScribe RAG Graph Chatbot Backend 1",
61
  "source": "LynxScribe RAG Graph Chatbot Builder 1",
62
  "sourceHandle": "output",
63
  "target": "LynxScribe RAG Graph Chatbot Backend 1",
64
  "targetHandle": "knowledge_base"
65
- },
66
- {
67
- "id": "LynxScribe RAG Graph Vector Store 1 LynxScribe RAG Graph Chatbot Builder 1",
68
- "source": "LynxScribe RAG Graph Vector Store 1",
69
- "sourceHandle": "output",
70
- "target": "LynxScribe RAG Graph Chatbot Builder 1",
71
- "targetHandle": "rag_graph"
72
  }
73
  ],
74
  "env": "LynxScribe",
@@ -103,7 +103,7 @@
103
  "type": "basic"
104
  },
105
  "params": {
106
- "chat": "What products does Lynx have?"
107
  },
108
  "status": "done",
109
  "title": "Input chat"
@@ -129,7 +129,7 @@
129
  ],
130
  "data": [
131
  [
132
- "Lynx Analytics offers a range of data analytics products and solutions tailored for various industries. Here are some of our key offerings:\n\n- **Generative AI**: We provide innovative solutions such as chatbots specifically designed for the pharmaceutical and service provider sectors.\n \n- **Graph AI**: Our advanced graph reasoning tools help in areas like transport scenario planning and predicting patient outcomes with graph representation learning.\n\n- **Pharma and Life Sciences**: We focus on marketing support, including Next Best Action predictions and Brand Adoption Ladder analysis, as well as supporting drug discovery and medical analytics.\n\n- **Retail Solutions**: Our products include Price AI, Assort AI, and Promo AI to optimize pricing, assortment, and promotion strategies for retailers.\n\n- **Financial Services**: We offer digital banking analytics solutions and a Customer Happiness Index to enhance customer experience and retention.\n\n- **Telecommunications**: While briefly mentioned, our telecom solutions help optimize operations, including fibre CAPEX optimization and churn prevention management.\n\nThese products are complemented by consulting services to help businesses make data-driven decisions. If you need more specific information or a demo, feel free to get in touch.\n\nCould you share what prompted your visit to our website today? Or may I know which specific domain or industry you are interested in or work in?"
133
  ]
134
  ]
135
  }
@@ -256,6 +256,8 @@
256
  },
257
  {
258
  "data": {
 
 
259
  "display": null,
260
  "error": null,
261
  "meta": {
@@ -324,6 +326,8 @@
324
  },
325
  {
326
  "data": {
 
 
327
  "display": null,
328
  "error": null,
329
  "meta": {
@@ -456,8 +460,23 @@
456
  "display": null,
457
  "error": null,
458
  "meta": {
459
- "inputs": {},
460
- "name": "LynxScribe RAG Graph Vector Store",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  "outputs": {
462
  "output": {
463
  "name": "output",
@@ -468,67 +487,91 @@
468
  }
469
  },
470
  "params": {
471
- "collection_name": {
472
- "default": "lynx",
473
- "name": "collection_name",
474
  "type": {
475
  "type": "<class 'str'>"
476
  }
477
  },
478
- "name": {
479
- "default": "faiss",
480
- "name": "name",
481
  "type": {
482
  "type": "<class 'str'>"
483
  }
484
  },
485
- "num_dimensions": {
486
- "default": 3072.0,
487
- "name": "num_dimensions",
488
  "type": {
489
- "type": "<class 'int'>"
490
  }
491
  },
492
- "text_embedder_interface": {
493
- "default": "openai",
494
- "name": "text_embedder_interface",
495
  "type": {
496
  "type": "<class 'str'>"
497
  }
498
  },
499
- "text_embedder_model_name_or_path": {
500
- "default": "text-embedding-3-large",
501
- "name": "text_embedder_model_name_or_path",
502
  "type": {
503
- "type": "<class 'str'>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  }
505
  }
506
  },
507
  "position": {
508
- "x": 189.0,
509
- "y": 489.0
510
  },
511
  "type": "basic"
512
  },
513
  "params": {
514
- "collection_name": "lynx",
515
- "name": "faiss",
516
- "num_dimensions": "1536",
517
- "text_embedder_interface": "openai",
518
- "text_embedder_model_name_or_path": "text-embedding-ada-002"
 
 
 
519
  },
520
  "status": "done",
521
- "title": "LynxScribe RAG Graph Vector Store"
522
  },
523
  "dragHandle": ".bg-primary",
524
- "height": 443.0,
525
- "id": "LynxScribe RAG Graph Vector Store 1",
526
  "position": {
527
- "x": -2019.279230344727,
528
- "y": 1080.1955856484205
529
  },
530
  "type": "basic",
531
- "width": 336.0
532
  },
533
  {
534
  "data": {
@@ -537,8 +580,16 @@
537
  "display": null,
538
  "error": null,
539
  "meta": {
540
- "inputs": {},
541
- "name": "Cloud-sourced File Loader",
 
 
 
 
 
 
 
 
542
  "outputs": {
543
  "output": {
544
  "name": "output",
@@ -549,51 +600,78 @@
549
  }
550
  },
551
  "params": {
552
- "accepted_file_types": {
553
- "default": ".jpg, .jpeg, .png",
554
- "name": "accepted_file_types",
 
 
 
 
 
 
 
 
 
 
555
  "type": {
556
  "type": "<class 'str'>"
557
  }
558
  },
559
- "cloud_provider": {
560
- "default": "gcp",
561
- "name": "cloud_provider",
562
  "type": {
563
  "type": "<class 'str'>"
564
  }
565
  },
566
- "folder_URL": {
567
- "default": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test",
568
- "name": "folder_URL",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
  "type": {
570
  "type": "<class 'str'>"
571
  }
572
  }
573
  },
574
  "position": {
575
- "x": 189.0,
576
- "y": 412.0
577
  },
578
  "type": "basic"
579
  },
580
  "params": {
581
- "accepted_file_types": ".pickle",
582
- "cloud_provider": "gcp",
583
- "folder_URL": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-knowledge-graphs/lynx-chatbot"
 
 
 
584
  },
585
  "status": "done",
586
- "title": "Cloud-sourced File Loader"
587
  },
588
  "dragHandle": ".bg-primary",
589
- "height": 325.0,
590
- "id": "Cloud-sourced File Loader 1",
591
  "position": {
592
- "x": -2884.195823214815,
593
- "y": 630.6408986626046
594
  },
595
  "type": "basic",
596
- "width": 622.0
597
  },
598
  {
599
  "data": {
@@ -602,118 +680,67 @@
602
  "display": null,
603
  "error": null,
604
  "meta": {
605
- "inputs": {
606
- "chat_processor": {
607
- "name": "chat_processor",
608
- "position": "bottom",
609
- "type": {
610
- "type": "<class 'inspect._empty'>"
611
- }
612
- },
613
- "knowledge_base": {
614
- "name": "knowledge_base",
615
- "position": "bottom",
616
- "type": {
617
- "type": "<class 'inspect._empty'>"
618
- }
619
- }
620
- },
621
- "name": "LynxScribe RAG Graph Chatbot Backend",
622
  "outputs": {
623
  "output": {
624
  "name": "output",
625
- "position": "top",
626
  "type": {
627
  "type": "None"
628
  }
629
  }
630
  },
631
  "params": {
632
- "llm_interface": {
633
- "default": "openai",
634
- "name": "llm_interface",
635
- "type": {
636
- "type": "<class 'str'>"
637
- }
638
- },
639
- "llm_model_name": {
640
- "default": "gpt-4o",
641
- "name": "llm_model_name",
642
  "type": {
643
  "type": "<class 'str'>"
644
  }
645
  },
646
- "negative_answer": {
647
- "default": "I'm sorry, but the data I've been trained on does not contain any information related to your question.",
648
- "name": "negative_answer",
649
  "type": {
650
- "type": "<class 'str'>"
 
 
 
 
651
  }
652
  },
653
- "retriever_limits_by_type": {
654
- "default": "{}",
655
- "name": "retriever_limits_by_type",
656
  "type": {
657
  "type": "<class 'str'>"
658
  }
659
- },
660
- "retriever_max_iterations": {
661
- "default": 3.0,
662
- "name": "retriever_max_iterations",
663
- "type": {
664
- "type": "<class 'int'>"
665
- }
666
- },
667
- "retriever_overall_chunk_limit": {
668
- "default": 20.0,
669
- "name": "retriever_overall_chunk_limit",
670
- "type": {
671
- "type": "<class 'int'>"
672
- }
673
- },
674
- "retriever_overall_token_limit": {
675
- "default": 3000.0,
676
- "name": "retriever_overall_token_limit",
677
- "type": {
678
- "type": "<class 'int'>"
679
- }
680
- },
681
- "retriever_strict_limits": {
682
- "default": true,
683
- "name": "retriever_strict_limits",
684
- "type": {
685
- "type": "<class 'bool'>"
686
- }
687
  }
688
  },
689
  "position": {
690
- "x": 543.0,
691
- "y": 256.0
692
  },
693
  "type": "basic"
694
  },
695
  "params": {
696
- "llm_interface": "openai",
697
- "llm_model_name": "gpt-4o",
698
- "negative_answer": "I'm sorry, but the data I've been trained on does not contain any information related to your question.",
699
- "retriever_limits_by_type": "{\"information\": [1, 5], \"summary\": [0, 2], \"template_qna\": [1, 3], \"QnA question\": [0, 0]}",
700
- "retriever_max_iterations": 3.0,
701
- "retriever_overall_chunk_limit": "20",
702
- "retriever_overall_token_limit": 3000.0,
703
- "retriever_strict_limits": true
704
  },
705
  "status": "done",
706
- "title": "LynxScribe RAG Graph Chatbot Backend"
707
  },
708
  "dragHandle": ".bg-primary",
709
- "height": 556.0,
710
- "id": "LynxScribe RAG Graph Chatbot Backend 1",
711
  "position": {
712
- "x": -2020.0,
713
- "y": -188.33333333333334
714
  },
715
  "type": "basic",
716
- "width": 903.0
717
  },
718
  {
719
  "data": {
@@ -721,16 +748,9 @@
721
  "error": null,
722
  "meta": {
723
  "inputs": {
724
- "file_urls": {
725
- "name": "file_urls",
726
- "position": "left",
727
- "type": {
728
- "type": "<class 'inspect._empty'>"
729
- }
730
- },
731
  "rag_graph": {
732
  "name": "rag_graph",
733
- "position": "bottom",
734
  "type": {
735
  "type": "<class 'inspect._empty'>"
736
  }
@@ -747,13 +767,6 @@
747
  }
748
  },
749
  "params": {
750
- "input_type": {
751
- "default": "v1",
752
- "name": "input_type",
753
- "type": {
754
- "type": "<class 'str'>"
755
- }
756
- },
757
  "node_types": {
758
  "default": "intent_cluster",
759
  "name": "node_types",
@@ -770,13 +783,12 @@
770
  }
771
  },
772
  "position": {
773
- "x": 466.0,
774
- "y": 478.0
775
  },
776
  "type": "basic"
777
  },
778
  "params": {
779
- "input_type": "v1",
780
  "node_types": "intent_cluster",
781
  "scenario_file": "uploads/lynx_chatbot_scenario_selector.yaml"
782
  },
@@ -784,14 +796,14 @@
784
  "title": "LynxScribe RAG Graph Chatbot Builder"
785
  },
786
  "dragHandle": ".bg-primary",
787
- "height": 364.0,
788
  "id": "LynxScribe RAG Graph Chatbot Builder 1",
789
  "position": {
790
- "x": -2101.666666666667,
791
- "y": 611.6666666666666
792
  },
793
  "type": "basic",
794
- "width": 500.0
795
  }
796
  ]
797
  }
 
50
  "targetHandle": "chat_processor"
51
  },
52
  {
53
+ "id": "Cloud-sourced File Listing 1 LynxScribe Text RAG Loader 1",
54
+ "source": "Cloud-sourced File Listing 1",
55
  "sourceHandle": "output",
56
+ "target": "LynxScribe Text RAG Loader 1",
57
  "targetHandle": "file_urls"
58
  },
59
+ {
60
+ "id": "LynxScribe Text RAG Loader 1 LynxScribe RAG Graph Chatbot Builder 1",
61
+ "source": "LynxScribe Text RAG Loader 1",
62
+ "sourceHandle": "output",
63
+ "target": "LynxScribe RAG Graph Chatbot Builder 1",
64
+ "targetHandle": "rag_graph"
65
+ },
66
  {
67
  "id": "LynxScribe RAG Graph Chatbot Builder 1 LynxScribe RAG Graph Chatbot Backend 1",
68
  "source": "LynxScribe RAG Graph Chatbot Builder 1",
69
  "sourceHandle": "output",
70
  "target": "LynxScribe RAG Graph Chatbot Backend 1",
71
  "targetHandle": "knowledge_base"
 
 
 
 
 
 
 
72
  }
73
  ],
74
  "env": "LynxScribe",
 
103
  "type": "basic"
104
  },
105
  "params": {
106
+ "chat": "Wgo is Gabor?"
107
  },
108
  "status": "done",
109
  "title": "Input chat"
 
129
  ],
130
  "data": [
131
  [
132
+ "Lynx Analytics has two notable professionals named G\u00e1bor. Could you please specify which G\u00e1bor you are inquiring about?\n\n- **G\u00e1bor Benedek**: Chief Innovation Officer & Co-founder at Lynx Analytics. He specializes in economic and business simulations, social network analysis, data mining, and predictive analytics. He has an academic background as a former Associate Professor at Corvinus University of Budapest and has founded several data-related companies.\n\n- **G\u00e1bor Kriv\u00e1chy**: Country Manager at Lynx Analytics in Hungary. He is an experienced technology executive with a background in system implementation, integration, and project management, particularly in SAP implementations.\n\nLet me know which G\u00e1bor's details you would like to learn more about!"
133
  ]
134
  ]
135
  }
 
256
  },
257
  {
258
  "data": {
259
+ "__execution_delay": 0.0,
260
+ "collapsed": null,
261
  "display": null,
262
  "error": null,
263
  "meta": {
 
326
  },
327
  {
328
  "data": {
329
+ "__execution_delay": 0.0,
330
+ "collapsed": null,
331
  "display": null,
332
  "error": null,
333
  "meta": {
 
460
  "display": null,
461
  "error": null,
462
  "meta": {
463
+ "inputs": {
464
+ "chat_processor": {
465
+ "name": "chat_processor",
466
+ "position": "bottom",
467
+ "type": {
468
+ "type": "<class 'inspect._empty'>"
469
+ }
470
+ },
471
+ "knowledge_base": {
472
+ "name": "knowledge_base",
473
+ "position": "bottom",
474
+ "type": {
475
+ "type": "<class 'inspect._empty'>"
476
+ }
477
+ }
478
+ },
479
+ "name": "LynxScribe RAG Graph Chatbot Backend",
480
  "outputs": {
481
  "output": {
482
  "name": "output",
 
487
  }
488
  },
489
  "params": {
490
+ "llm_interface": {
491
+ "default": "openai",
492
+ "name": "llm_interface",
493
  "type": {
494
  "type": "<class 'str'>"
495
  }
496
  },
497
+ "llm_model_name": {
498
+ "default": "gpt-4o",
499
+ "name": "llm_model_name",
500
  "type": {
501
  "type": "<class 'str'>"
502
  }
503
  },
504
+ "negative_answer": {
505
+ "default": "I'm sorry, but the data I've been trained on does not contain any information related to your question.",
506
+ "name": "negative_answer",
507
  "type": {
508
+ "type": "<class 'str'>"
509
  }
510
  },
511
+ "retriever_limits_by_type": {
512
+ "default": "{}",
513
+ "name": "retriever_limits_by_type",
514
  "type": {
515
  "type": "<class 'str'>"
516
  }
517
  },
518
+ "retriever_max_iterations": {
519
+ "default": 3.0,
520
+ "name": "retriever_max_iterations",
521
  "type": {
522
+ "type": "<class 'int'>"
523
+ }
524
+ },
525
+ "retriever_overall_chunk_limit": {
526
+ "default": 20.0,
527
+ "name": "retriever_overall_chunk_limit",
528
+ "type": {
529
+ "type": "<class 'int'>"
530
+ }
531
+ },
532
+ "retriever_overall_token_limit": {
533
+ "default": 3000.0,
534
+ "name": "retriever_overall_token_limit",
535
+ "type": {
536
+ "type": "<class 'int'>"
537
+ }
538
+ },
539
+ "retriever_strict_limits": {
540
+ "default": true,
541
+ "name": "retriever_strict_limits",
542
+ "type": {
543
+ "type": "<class 'bool'>"
544
  }
545
  }
546
  },
547
  "position": {
548
+ "x": 543.0,
549
+ "y": 256.0
550
  },
551
  "type": "basic"
552
  },
553
  "params": {
554
+ "llm_interface": "openai",
555
+ "llm_model_name": "gpt-4o",
556
+ "negative_answer": "I'm sorry, but the data I've been trained on does not contain any information related to your question.",
557
+ "retriever_limits_by_type": "{\"information\": [1, 5], \"summary\": [0, 2], \"template_qna\": [1, 3], \"QnA question\": [0, 0]}",
558
+ "retriever_max_iterations": 3.0,
559
+ "retriever_overall_chunk_limit": "20",
560
+ "retriever_overall_token_limit": 3000.0,
561
+ "retriever_strict_limits": true
562
  },
563
  "status": "done",
564
+ "title": "LynxScribe RAG Graph Chatbot Backend"
565
  },
566
  "dragHandle": ".bg-primary",
567
+ "height": 556.0,
568
+ "id": "LynxScribe RAG Graph Chatbot Backend 1",
569
  "position": {
570
+ "x": -2020.0,
571
+ "y": -188.33333333333334
572
  },
573
  "type": "basic",
574
+ "width": 903.0
575
  },
576
  {
577
  "data": {
 
580
  "display": null,
581
  "error": null,
582
  "meta": {
583
+ "inputs": {
584
+ "file_urls": {
585
+ "name": "file_urls",
586
+ "position": "left",
587
+ "type": {
588
+ "type": "<class 'inspect._empty'>"
589
+ }
590
+ }
591
+ },
592
+ "name": "LynxScribe Text RAG Loader",
593
  "outputs": {
594
  "output": {
595
  "name": "output",
 
600
  }
601
  },
602
  "params": {
603
+ "input_type": {
604
+ "default": "v1",
605
+ "name": "input_type",
606
+ "type": {
607
+ "enum": [
608
+ "V1",
609
+ "V2"
610
+ ]
611
+ }
612
+ },
613
+ "text_embedder_interface": {
614
+ "default": "openai",
615
+ "name": "text_embedder_interface",
616
  "type": {
617
  "type": "<class 'str'>"
618
  }
619
  },
620
+ "text_embedder_model_name_or_path": {
621
+ "default": "text-embedding-3-large",
622
+ "name": "text_embedder_model_name_or_path",
623
  "type": {
624
  "type": "<class 'str'>"
625
  }
626
  },
627
+ "vdb_collection_name": {
628
+ "default": "lynx",
629
+ "name": "vdb_collection_name",
630
+ "type": {
631
+ "type": "<class 'str'>"
632
+ }
633
+ },
634
+ "vdb_num_dimensions": {
635
+ "default": 3072.0,
636
+ "name": "vdb_num_dimensions",
637
+ "type": {
638
+ "type": "<class 'int'>"
639
+ }
640
+ },
641
+ "vdb_provider_name": {
642
+ "default": "faiss",
643
+ "name": "vdb_provider_name",
644
  "type": {
645
  "type": "<class 'str'>"
646
  }
647
  }
648
  },
649
  "position": {
650
+ "x": 870.0,
651
+ "y": 926.0
652
  },
653
  "type": "basic"
654
  },
655
  "params": {
656
+ "input_type": "V1",
657
+ "text_embedder_interface": "openai",
658
+ "text_embedder_model_name_or_path": "text-embedding-ada-002",
659
+ "vdb_collection_name": "lynx",
660
+ "vdb_num_dimensions": "1536",
661
+ "vdb_provider_name": "faiss"
662
  },
663
  "status": "done",
664
+ "title": "LynxScribe Text RAG Loader"
665
  },
666
  "dragHandle": ".bg-primary",
667
+ "height": 520.0,
668
+ "id": "LynxScribe Text RAG Loader 1",
669
  "position": {
670
+ "x": -2980.4063452955706,
671
+ "y": 787.1039827859594
672
  },
673
  "type": "basic",
674
+ "width": 318.0
675
  },
676
  {
677
  "data": {
 
680
  "display": null,
681
  "error": null,
682
  "meta": {
683
+ "inputs": {},
684
+ "name": "Cloud-sourced File Listing",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
685
  "outputs": {
686
  "output": {
687
  "name": "output",
688
+ "position": "right",
689
  "type": {
690
  "type": "None"
691
  }
692
  }
693
  },
694
  "params": {
695
+ "accepted_file_types": {
696
+ "default": ".jpg, .jpeg, .png",
697
+ "name": "accepted_file_types",
 
 
 
 
 
 
 
698
  "type": {
699
  "type": "<class 'str'>"
700
  }
701
  },
702
+ "cloud_provider": {
703
+ "default": "gcp",
704
+ "name": "cloud_provider",
705
  "type": {
706
+ "enum": [
707
+ "GCP",
708
+ "AWS",
709
+ "AZURE"
710
+ ]
711
  }
712
  },
713
+ "folder_URL": {
714
+ "default": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test",
715
+ "name": "folder_URL",
716
  "type": {
717
  "type": "<class 'str'>"
718
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
  }
720
  },
721
  "position": {
722
+ "x": 451.0,
723
+ "y": 505.0
724
  },
725
  "type": "basic"
726
  },
727
  "params": {
728
+ "accepted_file_types": ".pickle",
729
+ "cloud_provider": "GCP",
730
+ "folder_URL": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-knowledge-graphs/lynx-chatbot"
 
 
 
 
 
731
  },
732
  "status": "done",
733
+ "title": "Cloud-sourced File Listing"
734
  },
735
  "dragHandle": ".bg-primary",
736
+ "height": 324.0,
737
+ "id": "Cloud-sourced File Listing 1",
738
  "position": {
739
+ "x": -3827.1644268005352,
740
+ "y": 883.7859821532916
741
  },
742
  "type": "basic",
743
+ "width": 613.0
744
  },
745
  {
746
  "data": {
 
748
  "error": null,
749
  "meta": {
750
  "inputs": {
 
 
 
 
 
 
 
751
  "rag_graph": {
752
  "name": "rag_graph",
753
+ "position": "left",
754
  "type": {
755
  "type": "<class 'inspect._empty'>"
756
  }
 
767
  }
768
  },
769
  "params": {
 
 
 
 
 
 
 
770
  "node_types": {
771
  "default": "intent_cluster",
772
  "name": "node_types",
 
783
  }
784
  },
785
  "position": {
786
+ "x": 1314.0,
787
+ "y": 1003.0
788
  },
789
  "type": "basic"
790
  },
791
  "params": {
 
792
  "node_types": "intent_cluster",
793
  "scenario_file": "uploads/lynx_chatbot_scenario_selector.yaml"
794
  },
 
796
  "title": "LynxScribe RAG Graph Chatbot Builder"
797
  },
798
  "dragHandle": ".bg-primary",
799
+ "height": 238.0,
800
  "id": "LynxScribe RAG Graph Chatbot Builder 1",
801
  "position": {
802
+ "x": -2453.755433834285,
803
+ "y": 927.5600547745715
804
  },
805
  "type": "basic",
806
+ "width": 448.0
807
  }
808
  ]
809
  }
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py CHANGED
@@ -34,11 +34,6 @@ from lynxkite.core.executors import one_by_one
34
 
35
  DEFAULT_NEGATIVE_ANSWER = "I'm sorry, but the data I've been trained on does not contain any information related to your question."
36
 
37
- # logger
38
- # import logging
39
- # logging.basicConfig(level=logging.INFO)
40
- # logger = logging.getLogger(__name__)
41
-
42
  ENV = "LynxScribe"
43
  one_by_one.register(ENV)
44
  mem = joblib.Memory("joblib-cache")
@@ -53,6 +48,11 @@ class CloudProvider(Enum):
53
  AZURE = "azure"
54
 
55
 
 
 
 
 
 
56
  @op("Cloud-sourced File Listing")
57
  def cloud_file_loader(
58
  *,
@@ -88,43 +88,43 @@ def cloud_file_loader(
88
  raise ValueError(f"Cloud provider '{cloud_provider}' is not supported.")
89
 
90
 
91
- @output_on_top
92
- @op("LynxScribe RAG Graph Vector Store")
93
- @mem.cache
94
- def ls_rag_graph(
95
- *,
96
- name: str = "faiss",
97
- num_dimensions: int = 3072,
98
- collection_name: str = "lynx",
99
- text_embedder_interface: str = "openai",
100
- text_embedder_model_name_or_path: str = "text-embedding-3-large",
101
- # api_key_name: str = "OPENAI_API_KEY",
102
- ):
103
- """
104
- Returns with a vector store instance.
105
- """
106
-
107
- # getting the text embedder instance
108
- llm_params = {"name": text_embedder_interface}
109
- # if api_key_name:
110
- # llm_params["api_key"] = os.getenv(api_key_name)
111
- llm = get_llm_engine(**llm_params)
112
- text_embedder = TextEmbedder(llm=llm, model=text_embedder_model_name_or_path)
113
-
114
- # getting the vector store
115
- if name == "chromadb":
116
- vector_store = get_vector_store(name=name, collection_name=collection_name)
117
- elif name == "faiss":
118
- vector_store = get_vector_store(name=name, num_dimensions=num_dimensions)
119
- else:
120
- raise ValueError(f"Vector store name '{name}' is not supported.")
121
-
122
- # building up the RAG graph
123
- rag_graph = RAGGraph(
124
- PandasKnowledgeBaseGraph(vector_store=vector_store, text_embedder=text_embedder)
125
- )
126
 
127
- return {"rag_graph": rag_graph}
128
 
129
 
130
  @op("LynxScribe Image Describer")
@@ -296,7 +296,7 @@ async def ls_image_rag_builder(
296
  # # saving the RAG graph
297
  # rag_graph.kg_base.save(image_rag_out_path)
298
 
299
- return {"knowledge_base": rag_graph}
300
 
301
 
302
  @op("LynxScribe RAG Graph Saver")
@@ -320,7 +320,7 @@ def ls_save_rag_graph(
320
  @op("LynxScribe Image RAG Query")
321
  async def search_context(rag_graph, text, *, top_k=3):
322
  message = text["text"]
323
- rag_graph = rag_graph[0]["knowledge_base"]
324
 
325
  # get all similarities
326
  emb_similarities = await rag_graph.search_context(
@@ -352,90 +352,92 @@ def view_image(embedding_similarities):
352
  return embedding_similarities[0]["image_url"]
353
 
354
 
355
- # @output_on_top
356
- # @op("Vector store")
357
- # def vector_store(*, name="chromadb", collection_name="lynx"):
358
- # vector_store = get_vector_store(name=name, collection_name=collection_name)
359
- # return {"vector_store": vector_store}
 
 
 
 
 
 
 
 
 
 
 
360
 
 
 
361
 
362
- # @output_on_top
363
- # @op("LLM")
364
- # def llm(*, name="openai"):
365
- # llm = get_llm_engine(name=name)
366
- # return {"llm": llm}
 
367
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
- # @output_on_top
370
- # @ops.input_position(llm="bottom")
371
- # @op("Text embedder")
372
- # def text_embedder(llm, *, model="text-embedding-ada-002"):
373
- # llm = llm[0]["llm"]
374
- # text_embedder = TextEmbedder(llm=llm, model=model)
375
- # return {"text_embedder": text_embedder}
376
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
- # @output_on_top
379
- # @ops.input_position(vector_store="bottom", text_embedder="bottom")
380
- # @op("RAG graph")
381
- # def rag_graph(vector_store, text_embedder):
382
- # vector_store = vector_store[0]["vector_store"]
383
- # text_embedder = text_embedder[0]["text_embedder"]
384
- # rag_graph = RAGGraph(
385
- # PandasKnowledgeBaseGraph(vector_store=vector_store, text_embedder=text_embedder)
386
- # )
387
- # return {"rag_graph": rag_graph}
388
 
389
 
390
  @output_on_top
391
- @ops.input_position(rag_graph="bottom")
392
  @op("LynxScribe RAG Graph Chatbot Builder")
393
  @mem.cache
394
  def ls_rag_chatbot_builder(
395
- file_urls,
396
  rag_graph,
397
  *,
398
  scenario_file: str = "uploads/lynx_chatbot_scenario_selector.yaml",
399
  node_types: str = "intent_cluster",
400
- input_type: str = "v1",
401
  ):
402
  """
403
- Builds up a RAG Graph-based chatbot. It could load the chatbot from
404
- an existing folder (v1 or v2).
405
-
406
- TODO: Later, we should not use these saved files, but we should build
407
- up the chatbot from scratch - will be added soon). That time we will
408
- add the summarizer-related parameters (LLM interface and model).
409
 
410
  TODO: Later, the scenario selector can be built up synthetically from
411
- the input documents - or semi-automated.
412
-
413
- TODO: Currently, we do not affected by the embedder, as the files are
414
- pre-loaded, so the text embedder should have the same model as the
415
- one used in the files...
416
  """
417
 
418
  scenarios = load_config(scenario_file)
419
  node_types = [t.strip() for t in node_types.split(",")]
420
 
421
  # handling inputs
422
- file_urls = file_urls["file_urls"]
423
- rag_graph = rag_graph[0]["rag_graph"]
424
-
425
- # loading v1 knowledge base (shitty solution, but temporary)
426
- if input_type == "v1":
427
- node_file = [f for f in file_urls if "nodes.p" in f][0]
428
- edge_file = [f for f in file_urls if "edges.p" in f][0]
429
- tempcluster_file = [f for f in file_urls if "clusters.p" in f][0]
430
- rag_graph.kg_base.load_v1_knowledge_base(
431
- nodes_path=node_file,
432
- edges_path=edge_file,
433
- template_cluster_path=tempcluster_file,
434
- )
435
- elif input_type == "v2":
436
- raise ValueError("Currently only v1 input type is supported.")
437
- else:
438
- raise ValueError(f"Input type '{input_type}' is not supported.")
439
 
440
  # loading the scenarios
441
  scenario_selector = ScenarioSelector(
@@ -453,18 +455,6 @@ def ls_rag_chatbot_builder(
453
  }
454
 
455
 
456
- # @output_on_top
457
- # @op("Scenario selector")
458
- # def scenario_selector(*, scenario_file: str, node_types="intent_cluster"):
459
- # scenarios = load_config(scenario_file)
460
- # node_types = [t.strip() for t in node_types.split(",")]
461
- # scenario_selector = ScenarioSelector(
462
- # scenarios=[Scenario(**scenario) for scenario in scenarios],
463
- # node_types=node_types,
464
- # )
465
- # return {"scenario_selector": scenario_selector}
466
-
467
-
468
  @output_on_top
469
  @ops.input_position(knowledge_base="bottom", chat_processor="bottom")
470
  @op("LynxScribe RAG Graph Chatbot Backend")
@@ -526,35 +516,6 @@ def ls_rag_chatbot_backend(
526
  return {"chat_api": c}
527
 
528
 
529
- # @output_on_top
530
- # @ops.input_position(rag_graph="bottom", scenario_selector="bottom", llm="bottom")
531
- # @op("RAG chatbot")
532
- # def rag_chatbot(
533
- # rag_graph,
534
- # scenario_selector,
535
- # llm,
536
- # *,
537
- # negative_answer=DEFAULT_NEGATIVE_ANSWER,
538
- # limits_by_type="{}",
539
- # strict_limits=True,
540
- # max_results=5,
541
- # ):
542
- # rag_graph = rag_graph[0]["rag_graph"]
543
- # scenario_selector = scenario_selector[0]["scenario_selector"]
544
- # llm = llm[0]["llm"]
545
- # limits_by_type = json.loads(limits_by_type)
546
- # rag_chatbot = RAGChatbot(
547
- # rag_graph=rag_graph,
548
- # scenario_selector=scenario_selector,
549
- # llm=llm,
550
- # negative_answer=negative_answer,
551
- # limits_by_type=limits_by_type,
552
- # strict_limits=strict_limits,
553
- # max_results=max_results,
554
- # )
555
- # return {"chatbot": rag_chatbot}
556
-
557
-
558
  @output_on_top
559
  @ops.input_position(processor="bottom")
560
  @op("Chat processor")
@@ -622,39 +583,6 @@ def input_chat(*, chat: str):
622
  return {"text": chat}
623
 
624
 
625
- # @output_on_top
626
- # @ops.input_position(chatbot="bottom", chat_processor="bottom", knowledge_base="bottom")
627
- # @op("Chat API")
628
- # def chat_api(chatbot, chat_processor, knowledge_base, *, model="gpt-4o-mini"):
629
- # chatbot = chatbot[0]["chatbot"]
630
- # chat_processor = chat_processor[0]["chat_processor"]
631
- # knowledge_base = knowledge_base[0]
632
- # c = ChatAPI(
633
- # chatbot=chatbot,
634
- # chat_processor=chat_processor,
635
- # model=model,
636
- # )
637
- # if knowledge_base:
638
- # c.chatbot.rag_graph.kg_base.load_v1_knowledge_base(**knowledge_base)
639
- # c.chatbot.scenario_selector.check_compatibility(c.chatbot.rag_graph)
640
- # return {"chat_api": c}
641
-
642
-
643
- # @output_on_top
644
- # @op("Knowledge base")
645
- # def knowledge_base(
646
- # *,
647
- # nodes_path="nodes.pickle",
648
- # edges_path="edges.pickle",
649
- # template_cluster_path="tempclusters.pickle",
650
- # ):
651
- # return {
652
- # "nodes_path": nodes_path,
653
- # "edges_path": edges_path,
654
- # "template_cluster_path": template_cluster_path,
655
- # }
656
-
657
-
658
  @op("View", view="table_view")
659
  def view(input):
660
  columns = [str(c) for c in input.keys() if not str(c).startswith("_")]
@@ -772,7 +700,7 @@ def dictionary_corrector(dict_string: str, expected_keys: list | None = None) ->
772
  dstring_prc = "}".join(dstring_prc.split("}")[:-1]) + "}"
773
 
774
  try:
775
- trf_dict = eval(dstring_prc)
776
  if expected_keys:
777
  for _key in expected_keys:
778
  if _key in trf_dict:
 
34
 
35
  DEFAULT_NEGATIVE_ANSWER = "I'm sorry, but the data I've been trained on does not contain any information related to your question."
36
 
 
 
 
 
 
37
  ENV = "LynxScribe"
38
  one_by_one.register(ENV)
39
  mem = joblib.Memory("joblib-cache")
 
48
  AZURE = "azure"
49
 
50
 
51
+ class RAGVersion(Enum):
52
+ V1 = "v1"
53
+ V2 = "v2"
54
+
55
+
56
  @op("Cloud-sourced File Listing")
57
  def cloud_file_loader(
58
  *,
 
88
  raise ValueError(f"Cloud provider '{cloud_provider}' is not supported.")
89
 
90
 
91
+ # @output_on_top
92
+ # @op("LynxScribe RAG Graph Vector Store")
93
+ # @mem.cache
94
+ # def ls_rag_graph(
95
+ # *,
96
+ # name: str = "faiss",
97
+ # num_dimensions: int = 3072,
98
+ # collection_name: str = "lynx",
99
+ # text_embedder_interface: str = "openai",
100
+ # text_embedder_model_name_or_path: str = "text-embedding-3-large",
101
+ # # api_key_name: str = "OPENAI_API_KEY",
102
+ # ):
103
+ # """
104
+ # Returns with a vector store instance.
105
+ # """
106
+
107
+ # # getting the text embedder instance
108
+ # llm_params = {"name": text_embedder_interface}
109
+ # # if api_key_name:
110
+ # # llm_params["api_key"] = os.getenv(api_key_name)
111
+ # llm = get_llm_engine(**llm_params)
112
+ # text_embedder = TextEmbedder(llm=llm, model=text_embedder_model_name_or_path)
113
+
114
+ # # getting the vector store
115
+ # if name == "chromadb":
116
+ # vector_store = get_vector_store(name=name, collection_name=collection_name)
117
+ # elif name == "faiss":
118
+ # vector_store = get_vector_store(name=name, num_dimensions=num_dimensions)
119
+ # else:
120
+ # raise ValueError(f"Vector store name '{name}' is not supported.")
121
+
122
+ # # building up the RAG graph
123
+ # rag_graph = RAGGraph(
124
+ # PandasKnowledgeBaseGraph(vector_store=vector_store, text_embedder=text_embedder)
125
+ # )
126
 
127
+ # return {"rag_graph": rag_graph}
128
 
129
 
130
  @op("LynxScribe Image Describer")
 
296
  # # saving the RAG graph
297
  # rag_graph.kg_base.save(image_rag_out_path)
298
 
299
+ return {"rag_graph": rag_graph}
300
 
301
 
302
  @op("LynxScribe RAG Graph Saver")
 
320
  @op("LynxScribe Image RAG Query")
321
  async def search_context(rag_graph, text, *, top_k=3):
322
  message = text["text"]
323
+ rag_graph = rag_graph[0]["rag_graph"]
324
 
325
  # get all similarities
326
  emb_similarities = await rag_graph.search_context(
 
352
  return embedding_similarities[0]["image_url"]
353
 
354
 
355
+ @op("LynxScribe Text RAG Loader")
356
+ @mem.cache
357
+ def ls_text_rag_loader(
358
+ file_urls,
359
+ *,
360
+ input_type: RAGVersion = RAGVersion.V1,
361
+ vdb_provider_name: str = "faiss",
362
+ vdb_num_dimensions: int = 3072,
363
+ vdb_collection_name: str = "lynx",
364
+ text_embedder_interface: str = "openai",
365
+ text_embedder_model_name_or_path: str = "text-embedding-3-large",
366
+ # api_key_name: str = "OPENAI_API_KEY",
367
+ ):
368
+ """
369
+ Loading a text-based RAG graph from saved files (getting pandas readable links).
370
+ """
371
 
372
+ # handling inputs
373
+ file_urls = file_urls["file_urls"]
374
 
375
+ # getting the text embedder instance
376
+ llm_params = {"name": text_embedder_interface}
377
+ # if api_key_name:
378
+ # llm_params["api_key"] = os.getenv(api_key_name)
379
+ llm = get_llm_engine(**llm_params)
380
+ text_embedder = TextEmbedder(llm=llm, model=text_embedder_model_name_or_path)
381
 
382
+ # getting the vector store
383
+ if vdb_provider_name == "chromadb":
384
+ vector_store = get_vector_store(
385
+ name=vdb_provider_name, collection_name=vdb_collection_name
386
+ )
387
+ elif vdb_provider_name == "faiss":
388
+ vector_store = get_vector_store(
389
+ name=vdb_provider_name, num_dimensions=vdb_num_dimensions
390
+ )
391
+ else:
392
+ raise ValueError(f"Vector store name '{vdb_provider_name}' is not supported.")
393
 
394
+ # building up the RAG graph
395
+ rag_graph = RAGGraph(
396
+ PandasKnowledgeBaseGraph(vector_store=vector_store, text_embedder=text_embedder)
397
+ )
 
 
 
398
 
399
+ # loading the knowledge base (temporary + TODO: adding v2)
400
+ if input_type == RAGVersion.V1:
401
+ node_file = [f for f in file_urls if "nodes.p" in f][0]
402
+ edge_file = [f for f in file_urls if "edges.p" in f][0]
403
+ tempcluster_file = [f for f in file_urls if "clusters.p" in f][0]
404
+ rag_graph.kg_base.load_v1_knowledge_base(
405
+ nodes_path=node_file,
406
+ edges_path=edge_file,
407
+ template_cluster_path=tempcluster_file,
408
+ )
409
+ elif input_type == RAGVersion.V2:
410
+ raise ValueError("Currently only v1 input type is supported.")
411
+ else:
412
+ raise ValueError(f"Input type '{input_type}' is not supported.")
413
 
414
+ return {"rag_graph": rag_graph}
 
 
 
 
 
 
 
 
 
415
 
416
 
417
  @output_on_top
 
418
  @op("LynxScribe RAG Graph Chatbot Builder")
419
  @mem.cache
420
  def ls_rag_chatbot_builder(
 
421
  rag_graph,
422
  *,
423
  scenario_file: str = "uploads/lynx_chatbot_scenario_selector.yaml",
424
  node_types: str = "intent_cluster",
 
425
  ):
426
  """
427
+ Builds up a RAG Graph-based chatbot (basically the loaded RAG graph +
428
+ a scenario selector).
 
 
 
 
429
 
430
  TODO: Later, the scenario selector can be built up synthetically from
431
+ the input documents - or semi-automated, not just from the scenario
432
+ yaml.
 
 
 
433
  """
434
 
435
  scenarios = load_config(scenario_file)
436
  node_types = [t.strip() for t in node_types.split(",")]
437
 
438
  # handling inputs
439
+ # rag_graph = rag_graph[0]["rag_graph"] TODO: check why is it bad
440
+ rag_graph = rag_graph["rag_graph"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
 
442
  # loading the scenarios
443
  scenario_selector = ScenarioSelector(
 
455
  }
456
 
457
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  @output_on_top
459
  @ops.input_position(knowledge_base="bottom", chat_processor="bottom")
460
  @op("LynxScribe RAG Graph Chatbot Backend")
 
516
  return {"chat_api": c}
517
 
518
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
  @output_on_top
520
  @ops.input_position(processor="bottom")
521
  @op("Chat processor")
 
583
  return {"text": chat}
584
 
585
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
586
  @op("View", view="table_view")
587
  def view(input):
588
  columns = [str(c) for c in input.keys() if not str(c).startswith("_")]
 
700
  dstring_prc = "}".join(dstring_prc.split("}")[:-1]) + "}"
701
 
702
  try:
703
+ trf_dict = json.loads(dstring_prc)
704
  if expected_keys:
705
  for _key in expected_keys:
706
  if _key in trf_dict: