pathfinder_v3 / data /dataset_info.json
kiyer's picture
Upload 13 files
a354504 verified
raw
history blame
4 kB
{
"builder_name": "parquet",
"citation": "",
"config_name": "default",
"dataset_name": "pathfinder_arxiv_data",
"dataset_size": 4065510154,
"description": "",
"download_checksums": {
"hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00000-of-00008.parquet": {
"num_bytes": 406754152,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00001-of-00008.parquet": {
"num_bytes": 405109745,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00002-of-00008.parquet": {
"num_bytes": 405466052,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00003-of-00008.parquet": {
"num_bytes": 406784839,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00004-of-00008.parquet": {
"num_bytes": 404752067,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00005-of-00008.parquet": {
"num_bytes": 404624503,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00006-of-00008.parquet": {
"num_bytes": 392634525,
"checksum": null
},
"hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00007-of-00008.parquet": {
"num_bytes": 397101125,
"checksum": null
}
},
"download_size": 3223227008,
"features": {
"ads_id": {
"dtype": "string",
"_type": "Value"
},
"arxiv_id": {
"dtype": "string",
"_type": "Value"
},
"title": {
"dtype": "string",
"_type": "Value"
},
"abstract": {
"dtype": "string",
"_type": "Value"
},
"embed": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"_type": "Sequence"
},
"umap_x": {
"dtype": "float32",
"_type": "Value"
},
"umap_y": {
"dtype": "float32",
"_type": "Value"
},
"date": {
"dtype": "date32",
"_type": "Value"
},
"cites": {
"dtype": "int64",
"_type": "Value"
},
"bibcode": {
"dtype": "string",
"_type": "Value"
},
"keywords": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"ads_keywords": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"read_count": {
"dtype": "int64",
"_type": "Value"
},
"doi": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"authors": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"aff": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"cite_bibcodes": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"ref_bibcodes": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": "",
"size_in_bytes": 7288737162,
"splits": {
"train": {
"name": "train",
"num_bytes": 4065510154,
"num_examples": 352194,
"shard_lengths": [
43000,
43025,
43025,
43024,
44024,
44024,
46024,
44024,
2024
],
"dataset_name": "pathfinder_arxiv_data"
}
},
"version": {
"version_str": "0.0.0",
"major": 0,
"minor": 0,
"patch": 0
}
}