lewtun HF staff commited on
Commit
b66bb5e
1 Parent(s): b0781a3
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -38,7 +38,6 @@ def download_submissions():
38
  tags = extract_tags(dataset)
39
  if tags.get("benchmark") == "ought/raft" and tags.get("type") == "evaluation":
40
  submissions.append(dataset)
41
- submissions = sorted(submissions, key=lambda x: int(x["id"].split("-")[-1]))
42
  return submissions
43
 
44
 
@@ -47,7 +46,7 @@ def format_submissions(submissions):
47
 
48
  # TODO(lewtun): delete / filter all the junk repos from development
49
  # The following picks the latest submissions which adhere to the model card schema
50
- for submission in submissions[-2:]:
51
  submission_id = submission["id"]
52
  response = requests.get(
53
  f"http://huggingface.co/api/datasets/{submission_id}?full=true",
@@ -80,15 +79,19 @@ def format_submissions(submissions):
80
  ###########
81
  st.set_page_config(layout="wide")
82
  st.title("RAFT: Real-world Annotated Few-shot Tasks")
83
- st.markdown("""
84
- Large pre-trained language models have shown promise for few-shot learning, completing text-based tasks given only a few task-specific examples. Will models soon solve classification tasks that have so far been reserved for human research assistants? RAFT is a few-shot classification benchmark that tests language models:
 
 
 
85
 
86
  - across multiple domains (lit review, tweets, customer interaction, etc.)
87
  - on economically valuable classification tasks (someone inherently cares about the task)
88
  - in a setting that mirrors deployment (50 examples per task, info retrieval allowed, hidden test set)
89
 
90
  To submit to RAFT, follow the instruction posted on [this page](https://github.com/oughtinc/raft_submission).
91
- """)
 
92
  submissions = download_submissions()
93
  df = format_submissions(submissions)
94
  # hack to remove index column from https://github.com/streamlit/streamlit/issues/641
 
38
  tags = extract_tags(dataset)
39
  if tags.get("benchmark") == "ought/raft" and tags.get("type") == "evaluation":
40
  submissions.append(dataset)
 
41
  return submissions
42
 
43
 
 
46
 
47
  # TODO(lewtun): delete / filter all the junk repos from development
48
  # The following picks the latest submissions which adhere to the model card schema
49
+ for submission in submissions:
50
  submission_id = submission["id"]
51
  response = requests.get(
52
  f"http://huggingface.co/api/datasets/{submission_id}?full=true",
 
79
  ###########
80
  st.set_page_config(layout="wide")
81
  st.title("RAFT: Real-world Annotated Few-shot Tasks")
82
+ st.markdown(
83
+ """
84
+ Large pre-trained language models have shown promise for few-shot learning, completing text-based tasks given only a few task-specific examples. Will models soon solve classification tasks that have so far been reserved for human research assistants?
85
+
86
+ [RAFT](https://raft.elicit.org) is a few-shot classification benchmark that tests language models:
87
 
88
  - across multiple domains (lit review, tweets, customer interaction, etc.)
89
  - on economically valuable classification tasks (someone inherently cares about the task)
90
  - in a setting that mirrors deployment (50 examples per task, info retrieval allowed, hidden test set)
91
 
92
  To submit to RAFT, follow the instruction posted on [this page](https://github.com/oughtinc/raft_submission).
93
+ """
94
+ )
95
  submissions = download_submissions()
96
  df = format_submissions(submissions)
97
  # hack to remove index column from https://github.com/streamlit/streamlit/issues/641