pathfinder_v3 / data /dataset_info.json
kiyer
major upgrade to v2.0
d1fa2c0
raw
history blame
4.79 kB
{
"builder_name": "parquet",
"citation": "",
"config_name": "default",
"dataset_name": "pathfinder_arxiv_data",
"dataset_size": 5770056875,
"description": "",
"download_checksums": {
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00000-of-00012.parquet": {
"num_bytes": 384481705,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00001-of-00012.parquet": {
"num_bytes": 383347319,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00002-of-00012.parquet": {
"num_bytes": 383133689,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00003-of-00012.parquet": {
"num_bytes": 384399351,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00004-of-00012.parquet": {
"num_bytes": 382810245,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00005-of-00012.parquet": {
"num_bytes": 382870394,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00006-of-00012.parquet": {
"num_bytes": 364849142,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00007-of-00012.parquet": {
"num_bytes": 363965178,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00008-of-00012.parquet": {
"num_bytes": 376639054,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00009-of-00012.parquet": {
"num_bytes": 384035100,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00010-of-00012.parquet": {
"num_bytes": 355126903,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00011-of-00012.parquet": {
"num_bytes": 359912183,
"checksum": null
}
},
"download_size": 4505570263,
"features": {
"ads_id": {
"dtype": "string",
"_type": "Value"
},
"arxiv_id": {
"dtype": "string",
"_type": "Value"
},
"title": {
"dtype": "string",
"_type": "Value"
},
"abstract": {
"dtype": "string",
"_type": "Value"
},
"embed": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"_type": "Sequence"
},
"umap_x": {
"dtype": "float32",
"_type": "Value"
},
"umap_y": {
"dtype": "float32",
"_type": "Value"
},
"date": {
"dtype": "date32",
"_type": "Value"
},
"cites": {
"dtype": "int64",
"_type": "Value"
},
"bibcode": {
"dtype": "string",
"_type": "Value"
},
"keywords": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"ads_keywords": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"read_count": {
"dtype": "int64",
"_type": "Value"
},
"doi": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"authors": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"aff": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"cite_bibcodes": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"ref_bibcodes": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": "",
"size_in_bytes": 10275627138,
"splits": {
"train": {
"name": "train",
"num_bytes": 5770056875,
"num_examples": 499142,
"shard_lengths": [
42596,
43596,
43595,
42595,
43595,
43595,
46595,
44595,
43595,
43595,
43595,
17595
],
"dataset_name": "pathfinder_arxiv_data"
}
},
"version": {
"version_str": "0.0.0",
"major": 0,
"minor": 0,
"patch": 0
}
}