Yu (Hope) Hou commited on
Commit
7b5f267
·
1 Parent(s): 5683255

update full instructions V1

Browse files
Files changed (2) hide show
  1. app.py +10 -10
  2. src/about.py +74 -6
app.py CHANGED
@@ -287,7 +287,7 @@ with demo:
287
 
288
  with gr.Row():
289
  with gr.Column():
290
- model_name_textbox = gr.Textbox(label="QA Model name")
291
  revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
292
  model_type = gr.Dropdown(
293
  choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
@@ -330,15 +330,15 @@ with demo:
330
  submission_result,
331
  )
332
 
333
- with gr.Row():
334
- with gr.Accordion("📙 More about the task", open=False):
335
- citation_button = gr.Textbox(
336
- value=CITATION_BUTTON_TEXT,
337
- label=CITATION_BUTTON_LABEL,
338
- lines=20,
339
- elem_id="citation-button",
340
- show_copy_button=True,
341
- )
342
 
343
  scheduler = BackgroundScheduler()
344
  scheduler.add_job(restart_space, "interval", seconds=1800)
 
287
 
288
  with gr.Row():
289
  with gr.Column():
290
+ model_name_textbox = gr.Textbox(label="QA model name")
291
  revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
292
  model_type = gr.Dropdown(
293
  choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
 
330
  submission_result,
331
  )
332
 
333
+ # with gr.Row():
334
+ # with gr.Accordion("📙 More about the task", open=False):
335
+ # citation_button = gr.Textbox(
336
+ # value=CITATION_BUTTON_TEXT,
337
+ # label=CITATION_BUTTON_LABEL,
338
+ # lines=20,
339
+ # elem_id="citation-button",
340
+ # show_copy_button=True,
341
+ # )
342
 
343
  scheduler = BackgroundScheduler()
344
  scheduler.add_job(restart_space, "interval", seconds=1800)
src/about.py CHANGED
@@ -25,20 +25,88 @@ TITLE = """<h1 align="center" id="space-title">Grounded QA leaderboard</h1>"""
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
28
- Build an open-domain QA system that can answer any question posed by humans!
29
  """
30
 
31
  # Which evaluations are you running? how can people reproduce what you have?
32
  LLM_BENCHMARKS_TEXT = f"""
33
- ## Generative QA
34
- If you are working on a generative QA model, you are expected to submit your system by filling the `Model name`.
35
 
36
- ## Extractive QA
37
- If you are working on a generative QA model, you are expected to submit your QA system by filling the `Model name` and your retriever in `Base model`.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  """
39
 
40
  EVALUATION_QUEUE_TEXT = """
41
- We have the generative QA tutorial [here](https://docs.google.com/document/d/1ZwyEnrF7OjkDdNpEsiSU1RPEilepAmmFJZgVypzm9Es)!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  """
43
 
44
  CITATION_BUTTON_LABEL = "Copy the following link to check more details"
 
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
28
+ Build an open-domain QA system that can answer any question posed by humans! For more: https://sites.google.com/view/qanta/home
29
  """
30
 
31
  # Which evaluations are you running? how can people reproduce what you have?
32
  LLM_BENCHMARKS_TEXT = f"""
33
+ ## QA variants
 
34
 
35
+ ### Generative QA
36
+ This type of QA system aims to generate an answer to a given question directly.
37
+
38
+ #### Input
39
+ (1) `question` string
40
+
41
+ ```
42
+ E.g. qa_pipe(question)
43
+ ```
44
+
45
+ #### Output
46
+ Return in a JSON format: (1) `guess` string, (2) `confidence` score which should be a float number representing the probability (0-1) of your guess.
47
+
48
+ ```
49
+ E.g. {'guess': 'Apple', 'confidence': 0.02}
50
+ ```
51
+
52
+ Reminder: Feel free to check the tutorial provided to see how you could calculate the probability of the generated tokens!
53
+
54
+ ### Extractive QA
55
+ This type of QA system aims to extract an answer span from a context passage for a given question.
56
+
57
+ #### Input
58
+ (1) `question` string, and (2) `context` string
59
+
60
+ ```
61
+ E.g. qa_pipe(question=question, context=context)
62
+ ```
63
+
64
+ #### Output
65
+ Return in a JSON format: (1) `guess` string, (2) `confidence` score which should be a float number representing the probability (0-1) of your guess.
66
+
67
+ ```
68
+ E.g. {'guess': 'Apple', 'confidence': 0.02}
69
+ ```
70
+
71
+ Reminder: If you are playing around with an extractive QA model already, HF QA models output the `score` already, so you only need to wrap the `score` to `confidence`.
72
+
73
+ #### Customized retriever
74
+ If you didn’t submit anything for retriever, we will feed the `context` string with our pre-loaded context. However, we do provide the option for you to customize your retriever model with the dataset you wish to do retrieval. Please check the tutorial example for more details.
75
+
76
+ ## Evaluation Metric
77
+ For each question in the test set, we parsed it into multiple runs and fed each run as the question to your pipeline. Then we use the confidence scores calculated for all runs to get the Buzz Confidence.
78
+
79
+ ## FAQ
80
+ What if my system type is not specified here or not supported yet?
81
+ - Please have a private post to instructors so we could check how we could adapt the leaderboard for your purpose. Thanks!
82
+
83
+ I don’t understand where I could start to build a QA system for submission.
84
+ - Please check our submission tutorials. From there, you could fine-tune or do anything above the base models.
85
  """
86
 
87
  EVALUATION_QUEUE_TEXT = """
88
+ **Step 1: Make sure it could work locally**
89
+ After you have a QA system uploaded to HuggingFace (with license specified), please check with the following example code to see if your pipe could return the guess and confidence score in a **JSON** format.
90
+
91
+ ```
92
+ from transformers import pipeline
93
+ qa_pipe = pipeline(model="...", trust_remote_code=True)
94
+
95
+ # If it is a Generative QA pipeline
96
+ qa_pipe(“Where is UMD?”)
97
+
98
+ # If it is a Extractive QA pipeline
99
+ qa_pipe(question=“Where is UMD?”, context=”UMD is in Maryland.”)
100
+ ```
101
+
102
+ **Step 2: Fill in the submission form**
103
+ (1) Fill in the `QA model name`
104
+ (2) Fill in the `Revision commit`: if you leave it empty, by default it will be `main`.
105
+ (3) Fill in the `Model type`
106
+ (4) `Precision` by default is `float16 `. You could update it as needed.
107
+ (5) If you have a trained retriever and want to submit an Extractive QA system, please also fill in the `Retrieved dataset name` and `Retriever model`.
108
+
109
+ Here is a tutorial on how you could make pipe wrappers for submissions: [Colab](https://colab.research.google.com/drive/1bCt2870SdY6tI4uE3JPG8_3nLmNJXX6_?usp=sharing)
110
  """
111
 
112
  CITATION_BUTTON_LABEL = "Copy the following link to check more details"