Me1oy commited on
Commit
9970286
Β·
verified Β·
1 Parent(s): 322df4e

Update src/about.py

Browse files
Files changed (1) hide show
  1. src/about.py +45 -10
src/about.py CHANGED
@@ -56,21 +56,56 @@ NUM_FEWSHOT = 0 # Change with your few shot
56
  TITLE = """<h1 align="center" id="space-title">🐲 The FinBen FLARE Leaderboard</h1>"""
57
 
58
  # What does your leaderboard evaluate?
59
- INTRODUCTION_TEXT = """πŸ“Š The FinBen FLARE Leaderboard is designed to rigorously track, rank, and evaluate state-of-the-art models in financial Natural Language Understanding and Prediction.
60
-
61
- πŸ“ˆ Unique to FLARE, our leaderboard not only covers standard NLP tasks but also incorporates financial prediction tasks such as stock movement and credit scoring, offering a more comprehensive evaluation for real-world financial applications.
62
-
63
- πŸ“š Our evaluation metrics include, but are not limited to, Accuracy, F1 Score, ROUGE score, BERTScore, and Matthews correlation coefficient (MCC), providing a multidimensional assessment of model performance.
64
-
65
- πŸ”— For more details, refer to our GitHub page [here](https://github.com/The-FinAI/PIXIU).
66
  """
67
 
68
  # Which evaluations are you running? how can people reproduce what you have?
69
  LLM_BENCHMARKS_TEXT = f"""
70
- ## How it works
 
 
 
71
 
72
- ## Reproducibility
73
- To reproduce our results, here is the commands you can run:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  """
76
 
 
56
  TITLE = """<h1 align="center" id="space-title">🐲 The FinBen FLARE Leaderboard</h1>"""
57
 
58
  # What does your leaderboard evaluate?
59
+ INTRODUCTION_TEXT = """
 
 
 
 
 
 
60
  """
61
 
62
  # Which evaluations are you running? how can people reproduce what you have?
63
  LLM_BENCHMARKS_TEXT = f"""
64
+ ## Introduction
65
+ πŸ“Š The FinBen FLARE Leaderboard is designed to rigorously track, rank, and evaluate state-of-the-art models in financial Natural Language Understanding and Prediction.
66
+
67
+ πŸ“ˆ Unique to FLARE, our leaderboard not only covers standard NLP tasks but also incorporates financial prediction tasks such as stock movement and credit scoring, offering a more comprehensive evaluation for real-world financial applications.
68
 
69
+ ## Metrics
70
+ πŸ“š Our evaluation metrics include, but are not limited to, Accuracy, F1 Score, ROUGE score, BERTScore, and Matthews correlation coefficient (MCC), providing a multidimensional assessment of model performance.
71
+ Metrics for specific tasks are as follows:
72
+ FPB-F1
73
+ FiQA-SA-F1
74
+ TSA-RMSE
75
+ Headlines-AvgF1
76
+ FOMC-F1
77
+ FinArg-ACC-MicroF1
78
+ FinArg-ARC-MicroF1
79
+ Multifin-MicroF1
80
+ MA-MicroF1
81
+ MLESG-MicroF1
82
+ NER-EntityF1
83
+ FINER-ORD-EntityF1
84
+ FinRED-F1
85
+ SC-F1
86
+ CD-F1
87
+ FinQA-EmAcc
88
+ TATQA-EmAcc
89
+ ConvFinQA-EmAcc
90
+ FNXL-EntityF1
91
+ FSRL-EntityF1
92
+ EDTSUM-Rouge-1
93
+ ECTSUM-Rouge-1
94
+ BigData22-Acc
95
+ ACL18-Acc
96
+ CIKM18-Acc
97
+ German-F1
98
+ Australian-F1
99
+ LendingClub-F1
100
+ ccf-F1
101
+ ccfraud-F1
102
+ polish-F1
103
+ taiwan-F1
104
+ portoseguro-F1
105
+ travelinsurance-F1
106
+
107
+ ## REPRODUCIBILITY
108
+ πŸ”— For more details, refer to our GitHub page [here](https://github.com/The-FinAI/PIXIU).
109
 
110
  """
111