Pratik Bhavsar commited on
Commit
91da2cc
·
1 Parent(s): c8ff2be

improved dataset table

Browse files
Files changed (1) hide show
  1. data_loader.py +88 -31
data_loader.py CHANGED
@@ -659,6 +659,64 @@ METHODOLOGY = """
659
  width: 100%;
660
  padding: 2rem 0;
661
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
662
 
663
  [Rest of the CSS remains the same]
664
  </style>
@@ -698,64 +756,63 @@ METHODOLOGY = """
698
  <tbody>
699
  <tr>
700
  <td rowspan="4">Single-Turn</td>
701
- <td>200</td>
702
- <td>Single Function Call</td>
703
- <td>xlam_single_tool_single_call</td>
704
- <td>Basic ability to read documentation and make single function calls</td>
705
  </tr>
706
  <tr>
707
- <td>250</td>
708
- <td>Multiple Function Call</td>
709
- <td>xlam_multiple_tool_multiple_call</td>
710
- <td>Parallel execution and result aggregation capabilities</td>
711
  </tr>
712
  <tr>
713
  <td>100</td>
714
- <td>Irrelevant Query</td>
715
- <td>BFCL_v3_irrelevance</td>
716
- <td>Recognition of tool mismatches with user needs</td>
717
  </tr>
718
  <tr>
719
  <td>100</td>
720
- <td>Long Context</td>
721
- <td>tau_long_context</td>
722
- <td>Extended interactions and complex instructions</td>
723
  </tr>
724
  <tr>
725
  <td rowspan="5">Multi-Turn</td>
726
- <td>80</td>
727
- <td>Single Function Call</td>
728
- <td>BFCL_v3_multi_turn_base_single_func_call</td>
729
- <td>Conversational function calling abilities</td>
730
  </tr>
731
  <tr>
732
  <td>50</td>
733
- <td>Multiple Function Call</td>
734
- <td>BFCL_v3_multi_turn_base_multi_func_call</td>
735
- <td>Multiple function calls in conversation</td>
736
  </tr>
737
  <tr>
738
  <td>100</td>
739
- <td>Missing Function</td>
740
- <td>BFCL_v3_multi_turn_miss_func</td>
741
- <td>Graceful handling of unavailable tools</td>
742
  </tr>
743
  <tr>
744
  <td>100</td>
745
- <td>Missing Parameters</td>
746
- <td>BFCL_v3_multi_turn_miss_param</td>
747
- <td>Parameter collection and incomplete information</td>
748
  </tr>
749
  <tr>
750
  <td>100</td>
751
- <td>Composite</td>
752
- <td>BFCL_v3_multi_turn_composite</td>
753
- <td>Overall robustness in complex scenarios</td>
754
  </tr>
755
  </tbody>
756
  </table>
757
  </div>
758
- </div>
759
 
760
  <!-- Features Grid Section -->
761
  <div class="features-grid">
 
659
  width: 100%;
660
  padding: 2rem 0;
661
  }
662
+
663
+ .dataset-table {
664
+ width: 100%;
665
+ border-collapse: separate;
666
+ border-spacing: 0;
667
+ margin: 2rem 0;
668
+ background: var(--bg-tertiary);
669
+ border-radius: 1rem;
670
+ overflow: hidden;
671
+ box-shadow: 0 4px 20px var(--shadow-color);
672
+ }
673
+
674
+ .dataset-table thead {
675
+ background: linear-gradient(90deg, var(--accent-blue), var(--accent-purple));
676
+ }
677
+
678
+ .dataset-table th {
679
+ padding: 1.25rem 1rem;
680
+ text-align: left;
681
+ color: white;
682
+ font-weight: 600;
683
+ font-size: 1rem;
684
+ }
685
+
686
+ .dataset-table td {
687
+ padding: 1rem;
688
+ border-bottom: 1px solid var(--border-primary);
689
+ color: var(--text-secondary);
690
+ transition: all 0.2s ease;
691
+ }
692
+
693
+ .dataset-table tbody tr:hover td {
694
+ background: var(--card-hover-bg);
695
+ color: var(--text-primary);
696
+ }
697
+
698
+ .dataset-table td[rowspan] {
699
+ background: var(--bg-secondary);
700
+ color: var(--accent-blue);
701
+ font-weight: 600;
702
+ border-right: 1px solid var(--border-primary);
703
+ }
704
+
705
+ .purpose-cell {
706
+ max-width: 300px;
707
+ line-height: 1.5;
708
+ }
709
+
710
+ .category-cell {
711
+ color: var(--accent-purple);
712
+ font-weight: 500;
713
+ }
714
+
715
+ .dataset-name {
716
+ font-family: monospace;
717
+ color: var(--accent-pink);
718
+ font-size: 0.9rem;
719
+ }
720
 
721
  [Rest of the CSS remains the same]
722
  </style>
 
756
  <tbody>
757
  <tr>
758
  <td rowspan="4">Single-Turn</td>
759
+ <td>100 + 100</td>
760
+ <td class="category-cell">Single Function Call</td>
761
+ <td class="dataset-name">xlam_single_tool_single_call</td>
762
+ <td class="purpose-cell">Evaluates basic ability to read documentation and make single function calls</td>
763
  </tr>
764
  <tr>
765
+ <td>200 + 50</td>
766
+ <td class="category-cell">Multiple Function Call</td>
767
+ <td class="dataset-name">xlam_multiple_tool_multiple_call, xlam_single_tool_multiple_call</td>
768
+ <td class="purpose-cell">Tests parallel execution and result aggregation capabilities</td>
769
  </tr>
770
  <tr>
771
  <td>100</td>
772
+ <td class="category-cell">Irrelevant Query</td>
773
+ <td class="dataset-name">BFCL_v3_irrelevance</td>
774
+ <td class="purpose-cell">Tests ability to recognize when available tools don't match user needs</td>
775
  </tr>
776
  <tr>
777
  <td>100</td>
778
+ <td class="category-cell">Long Context</td>
779
+ <td class="dataset-name">tau_long_context</td>
780
+ <td class="purpose-cell">Assesses handling of extended interactions and complex instructions</td>
781
  </tr>
782
  <tr>
783
  <td rowspan="5">Multi-Turn</td>
784
+ <td>50 + 30</td>
785
+ <td class="category-cell">Single Function Call</td>
786
+ <td class="dataset-name">BFCL_v3_multi_turn_base_single_func_call, toolscs_single_func_call</td>
787
+ <td class="purpose-cell">Tests basic conversational function calling abilities</td>
788
  </tr>
789
  <tr>
790
  <td>50</td>
791
+ <td class="category-cell">Multiple Function Call</td>
792
+ <td class="dataset-name">BFCL_v3_multi_turn_base_multi_func_call</td>
793
+ <td class="purpose-cell">Evaluates handling of multiple function calls in conversation</td>
794
  </tr>
795
  <tr>
796
  <td>100</td>
797
+ <td class="category-cell">Missing Function</td>
798
+ <td class="dataset-name">BFCL_v3_multi_turn_miss_func</td>
799
+ <td class="purpose-cell">Tests graceful handling of unavailable tools</td>
800
  </tr>
801
  <tr>
802
  <td>100</td>
803
+ <td class="category-cell">Missing Parameters</td>
804
+ <td class="dataset-name">BFCL_v3_multi_turn_miss_param</td>
805
+ <td class="purpose-cell">Assesses parameter collection and handling incomplete information</td>
806
  </tr>
807
  <tr>
808
  <td>100</td>
809
+ <td class="category-cell">Composite</td>
810
+ <td class="dataset-name">BFCL_v3_multi_turn_composite</td>
811
+ <td class="purpose-cell">Tests overall robustness in complex scenarios</td>
812
  </tr>
813
  </tbody>
814
  </table>
815
  </div>
 
816
 
817
  <!-- Features Grid Section -->
818
  <div class="features-grid">