dejanseo's picture
Upload 12 files
fffb0cd verified
raw
history blame
2.15 kB
{
"builder_name": "google_wellformed_query",
"citation": "@misc{faruqui2018identifying,\n title={Identifying Well-formed Natural Language Questions},\n author={Manaal Faruqui and Dipanjan Das},\n year={2018},\n eprint={1808.09419},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n",
"config_name": "default",
"dataset_name": "google_wellformed_query",
"dataset_size": 1230988,
"description": "Google's query wellformedness dataset was created by crowdsourcing well-formedness annotations for 25,100 queries from the Paralex corpus. Every query was annotated by five raters each with 1/0 rating of whether or not the query is well-formed.\n",
"download_checksums": {
"https://raw.githubusercontent.com/google-research-datasets/query-wellformedness/master/train.tsv": {
"num_bytes": 805818,
"checksum": null
},
"https://raw.githubusercontent.com/google-research-datasets/query-wellformedness/master/test.tsv": {
"num_bytes": 178070,
"checksum": null
},
"https://raw.githubusercontent.com/google-research-datasets/query-wellformedness/master/dev.tsv": {
"num_bytes": 173131,
"checksum": null
}
},
"download_size": 1157019,
"features": {
"rating": {
"dtype": "float32",
"_type": "Value"
},
"content": {
"dtype": "string",
"_type": "Value"
}
},
"homepage": "https://github.com/google-research-datasets/query-wellformedness",
"license": "",
"size_in_bytes": 2388007,
"splits": {
"train": {
"name": "train",
"num_bytes": 857383,
"num_examples": 17500,
"dataset_name": "google_wellformed_query"
},
"test": {
"name": "test",
"num_bytes": 189499,
"num_examples": 3850,
"dataset_name": "google_wellformed_query"
},
"validation": {
"name": "validation",
"num_bytes": 184106,
"num_examples": 3750,
"dataset_name": "google_wellformed_query"
}
},
"version": {
"version_str": "0.0.0",
"major": 0,
"minor": 0,
"patch": 0
}
}