Ono-Enzo commited on
Commit
770d2f5
Β·
verified Β·
1 Parent(s): 2e2eee1

Upload 8 files

Browse files
Files changed (4) hide show
  1. .gitignore +1 -1
  2. README.md +23 -3
  3. app.py +6 -0
  4. utils.py +2 -0
.gitignore CHANGED
@@ -157,4 +157,4 @@ cython_debug/
157
  # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
  # and can be added to the global gitignore or merged into this file. For a more nuclear
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
- #.idea/
 
157
  # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
  # and can be added to the global gitignore or merged into this file. For a more nuclear
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
README.md CHANGED
@@ -1,10 +1,11 @@
 
1
  title: QASports Website - Basketball
2
  emoji: πŸ‘
3
  colorFrom: purple
4
  colorTo: green
5
  sdk: streamlit
6
  sdk_version: 1.33.0
7
- python_version: 3.1
8
  suggested_hardware: t4-small
9
  app_file: app.py
10
  pinned: false
@@ -14,8 +15,27 @@ tags:
14
  - question-answering
15
  - open-domain-qa
16
  - extractive-qa
17
- short_description: QASports the first large sports-themed QA dataset
18
  models:
19
  - deepset/roberta-base-squad2
20
  datasets:
21
- - PedroCJardim/QASports
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
  title: QASports Website - Basketball
3
  emoji: πŸ‘
4
  colorFrom: purple
5
  colorTo: green
6
  sdk: streamlit
7
  sdk_version: 1.33.0
8
+ python_version: 3.10
9
  suggested_hardware: t4-small
10
  app_file: app.py
11
  pinned: false
 
15
  - question-answering
16
  - open-domain-qa
17
  - extractive-qa
18
+ short_description: "QASports the first large sports-themed QA dataset"
19
  models:
20
  - deepset/roberta-base-squad2
21
  datasets:
22
+ - PedroCJardim/QASports
23
+ ---
24
+
25
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
26
+
27
+ # Website
28
+
29
+ This website presents a collection of documents from the dataset named "QASports", the first large sports question answering dataset for open questions. QASports contains real data of players, teams and matches from the sports soccer, basketball and American football.
30
+
31
+ - πŸ’» Website: https://huggingface.co/spaces/leomaurodesenv/qasports-website
32
+ - πŸ”§ Scripts: https://github.com/leomaurodesenv/qasports-dataset-website
33
+
34
+ > **Note**. As first version, we are only focusing in Basketball data.
35
+
36
+ ## Dataset Summary
37
+
38
+ QASports is the first large sports-themed question answering dataset counting over 1.5 million questions and answers about 54k preprocessed wiki pages, using as documents the wiki of 3 of the most popular sports in the world, Soccer, American Football and Basketball. Each sport can be downloaded individually as a subset, with the train, test and validation splits, or all 3 can be downloaded together.
39
+
40
+ - 🎲 Dataset: https://huggingface.co/datasets/PedroCJardim/QASports
41
+ - πŸ”§ Scripts: https://github.com/leomaurodesenv/qasports-dataset-scripts/
app.py CHANGED
@@ -13,6 +13,7 @@ from utils import get_unique_docs
13
  def load_documents():
14
  """
15
  Load the documents from the dataset considering only unique documents.
 
16
  Returns:
17
  - documents: list of dictionaries with the documents.
18
  """
@@ -33,6 +34,7 @@ def load_documents():
33
  def get_document_store(documents):
34
  """
35
  Index the files in the document store.
 
36
  Args:
37
  - files: list of dictionaries with the documents.
38
  """
@@ -47,8 +49,10 @@ def get_document_store(documents):
47
  def get_question_pipeline(_doc_store):
48
  """
49
  Create the pipeline with the retriever and reader components.
 
50
  Args:
51
  - doc_store: instance of the document store.
 
52
  Returns:
53
  - pipe: instance of the pipeline.
54
  """
@@ -68,9 +72,11 @@ def get_question_pipeline(_doc_store):
68
  def search(pipeline, question: str):
69
  """
70
  Search for the answer to a question in the documents.
 
71
  Args:
72
  - pipeline: instance of the pipeline.
73
  - question: string with the question.
 
74
  Returns:
75
  - answer: dictionary with the answer.
76
  """
 
13
  def load_documents():
14
  """
15
  Load the documents from the dataset considering only unique documents.
16
+
17
  Returns:
18
  - documents: list of dictionaries with the documents.
19
  """
 
34
  def get_document_store(documents):
35
  """
36
  Index the files in the document store.
37
+
38
  Args:
39
  - files: list of dictionaries with the documents.
40
  """
 
49
  def get_question_pipeline(_doc_store):
50
  """
51
  Create the pipeline with the retriever and reader components.
52
+
53
  Args:
54
  - doc_store: instance of the document store.
55
+
56
  Returns:
57
  - pipe: instance of the pipeline.
58
  """
 
72
  def search(pipeline, question: str):
73
  """
74
  Search for the answer to a question in the documents.
75
+
76
  Args:
77
  - pipeline: instance of the pipeline.
78
  - question: string with the question.
79
+
80
  Returns:
81
  - answer: dictionary with the answer.
82
  """
utils.py CHANGED
@@ -6,8 +6,10 @@ from haystack import Document
6
 
7
  def get_unique_docs(dataset, unique_docs: set):
8
  """Get unique documents from dataset
 
9
  Args:
10
  dataset: list of dictionaries
 
11
  Returns:
12
  docs: list of haystack.Document
13
  """
 
6
 
7
  def get_unique_docs(dataset, unique_docs: set):
8
  """Get unique documents from dataset
9
+
10
  Args:
11
  dataset: list of dictionaries
12
+
13
  Returns:
14
  docs: list of haystack.Document
15
  """