Spaces:
Build error
Build error
Upload 8 files
Browse files
.gitignore
CHANGED
@@ -157,4 +157,4 @@ cython_debug/
|
|
157 |
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
-
#.idea/
|
|
|
157 |
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
README.md
CHANGED
@@ -1,10 +1,11 @@
|
|
|
|
1 |
title: QASports Website - Basketball
|
2 |
emoji: π
|
3 |
colorFrom: purple
|
4 |
colorTo: green
|
5 |
sdk: streamlit
|
6 |
sdk_version: 1.33.0
|
7 |
-
python_version: 3.
|
8 |
suggested_hardware: t4-small
|
9 |
app_file: app.py
|
10 |
pinned: false
|
@@ -14,8 +15,27 @@ tags:
|
|
14 |
- question-answering
|
15 |
- open-domain-qa
|
16 |
- extractive-qa
|
17 |
-
short_description: QASports the first large sports-themed QA dataset
|
18 |
models:
|
19 |
- deepset/roberta-base-squad2
|
20 |
datasets:
|
21 |
-
- PedroCJardim/QASports
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
title: QASports Website - Basketball
|
3 |
emoji: π
|
4 |
colorFrom: purple
|
5 |
colorTo: green
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.33.0
|
8 |
+
python_version: 3.10
|
9 |
suggested_hardware: t4-small
|
10 |
app_file: app.py
|
11 |
pinned: false
|
|
|
15 |
- question-answering
|
16 |
- open-domain-qa
|
17 |
- extractive-qa
|
18 |
+
short_description: "QASports the first large sports-themed QA dataset"
|
19 |
models:
|
20 |
- deepset/roberta-base-squad2
|
21 |
datasets:
|
22 |
+
- PedroCJardim/QASports
|
23 |
+
---
|
24 |
+
|
25 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
26 |
+
|
27 |
+
# Website
|
28 |
+
|
29 |
+
This website presents a collection of documents from the dataset named "QASports", the first large sports question answering dataset for open questions. QASports contains real data of players, teams and matches from the sports soccer, basketball and American football.
|
30 |
+
|
31 |
+
- π» Website: https://huggingface.co/spaces/leomaurodesenv/qasports-website
|
32 |
+
- π§ Scripts: https://github.com/leomaurodesenv/qasports-dataset-website
|
33 |
+
|
34 |
+
> **Note**. As first version, we are only focusing in Basketball data.
|
35 |
+
|
36 |
+
## Dataset Summary
|
37 |
+
|
38 |
+
QASports is the first large sports-themed question answering dataset counting over 1.5 million questions and answers about 54k preprocessed wiki pages, using as documents the wiki of 3 of the most popular sports in the world, Soccer, American Football and Basketball. Each sport can be downloaded individually as a subset, with the train, test and validation splits, or all 3 can be downloaded together.
|
39 |
+
|
40 |
+
- π² Dataset: https://huggingface.co/datasets/PedroCJardim/QASports
|
41 |
+
- π§ Scripts: https://github.com/leomaurodesenv/qasports-dataset-scripts/
|
app.py
CHANGED
@@ -13,6 +13,7 @@ from utils import get_unique_docs
|
|
13 |
def load_documents():
|
14 |
"""
|
15 |
Load the documents from the dataset considering only unique documents.
|
|
|
16 |
Returns:
|
17 |
- documents: list of dictionaries with the documents.
|
18 |
"""
|
@@ -33,6 +34,7 @@ def load_documents():
|
|
33 |
def get_document_store(documents):
|
34 |
"""
|
35 |
Index the files in the document store.
|
|
|
36 |
Args:
|
37 |
- files: list of dictionaries with the documents.
|
38 |
"""
|
@@ -47,8 +49,10 @@ def get_document_store(documents):
|
|
47 |
def get_question_pipeline(_doc_store):
|
48 |
"""
|
49 |
Create the pipeline with the retriever and reader components.
|
|
|
50 |
Args:
|
51 |
- doc_store: instance of the document store.
|
|
|
52 |
Returns:
|
53 |
- pipe: instance of the pipeline.
|
54 |
"""
|
@@ -68,9 +72,11 @@ def get_question_pipeline(_doc_store):
|
|
68 |
def search(pipeline, question: str):
|
69 |
"""
|
70 |
Search for the answer to a question in the documents.
|
|
|
71 |
Args:
|
72 |
- pipeline: instance of the pipeline.
|
73 |
- question: string with the question.
|
|
|
74 |
Returns:
|
75 |
- answer: dictionary with the answer.
|
76 |
"""
|
|
|
13 |
def load_documents():
|
14 |
"""
|
15 |
Load the documents from the dataset considering only unique documents.
|
16 |
+
|
17 |
Returns:
|
18 |
- documents: list of dictionaries with the documents.
|
19 |
"""
|
|
|
34 |
def get_document_store(documents):
|
35 |
"""
|
36 |
Index the files in the document store.
|
37 |
+
|
38 |
Args:
|
39 |
- files: list of dictionaries with the documents.
|
40 |
"""
|
|
|
49 |
def get_question_pipeline(_doc_store):
|
50 |
"""
|
51 |
Create the pipeline with the retriever and reader components.
|
52 |
+
|
53 |
Args:
|
54 |
- doc_store: instance of the document store.
|
55 |
+
|
56 |
Returns:
|
57 |
- pipe: instance of the pipeline.
|
58 |
"""
|
|
|
72 |
def search(pipeline, question: str):
|
73 |
"""
|
74 |
Search for the answer to a question in the documents.
|
75 |
+
|
76 |
Args:
|
77 |
- pipeline: instance of the pipeline.
|
78 |
- question: string with the question.
|
79 |
+
|
80 |
Returns:
|
81 |
- answer: dictionary with the answer.
|
82 |
"""
|
utils.py
CHANGED
@@ -6,8 +6,10 @@ from haystack import Document
|
|
6 |
|
7 |
def get_unique_docs(dataset, unique_docs: set):
|
8 |
"""Get unique documents from dataset
|
|
|
9 |
Args:
|
10 |
dataset: list of dictionaries
|
|
|
11 |
Returns:
|
12 |
docs: list of haystack.Document
|
13 |
"""
|
|
|
6 |
|
7 |
def get_unique_docs(dataset, unique_docs: set):
|
8 |
"""Get unique documents from dataset
|
9 |
+
|
10 |
Args:
|
11 |
dataset: list of dictionaries
|
12 |
+
|
13 |
Returns:
|
14 |
docs: list of haystack.Document
|
15 |
"""
|