{ "builder_name": "parquet", "citation": "", "config_name": "default", "dataset_name": "pathfinder_arxiv_data", "dataset_size": 5770056875, "description": "", "download_checksums": { "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00000-of-00012.parquet": { "num_bytes": 384481705, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00001-of-00012.parquet": { "num_bytes": 383347319, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00002-of-00012.parquet": { "num_bytes": 383133689, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00003-of-00012.parquet": { "num_bytes": 384399351, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00004-of-00012.parquet": { "num_bytes": 382810245, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00005-of-00012.parquet": { "num_bytes": 382870394, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00006-of-00012.parquet": { "num_bytes": 364849142, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00007-of-00012.parquet": { "num_bytes": 363965178, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00008-of-00012.parquet": { "num_bytes": 376639054, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00009-of-00012.parquet": { "num_bytes": 384035100, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00010-of-00012.parquet": { "num_bytes": 355126903, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@66fc52fb3d7d82779c3d73b0cb0c14218cb02e63/data/train-00011-of-00012.parquet": { "num_bytes": 359912183, "checksum": null } }, "download_size": 4505570263, "features": { "ads_id": { "dtype": "string", "_type": "Value" }, "arxiv_id": { "dtype": "string", "_type": "Value" }, "title": { "dtype": "string", "_type": "Value" }, "abstract": { "dtype": "string", "_type": "Value" }, "embed": { "feature": { "dtype": "float32", "_type": "Value" }, "_type": "Sequence" }, "umap_x": { "dtype": "float32", "_type": "Value" }, "umap_y": { "dtype": "float32", "_type": "Value" }, "date": { "dtype": "date32", "_type": "Value" }, "cites": { "dtype": "int64", "_type": "Value" }, "bibcode": { "dtype": "string", "_type": "Value" }, "keywords": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "ads_keywords": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "read_count": { "dtype": "int64", "_type": "Value" }, "doi": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "authors": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "aff": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "cite_bibcodes": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "ref_bibcodes": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" } }, "homepage": "", "license": "", "size_in_bytes": 10275627138, "splits": { "train": { "name": "train", "num_bytes": 5770056875, "num_examples": 499142, "shard_lengths": [ 42596, 43596, 43595, 42595, 43595, 43595, 46595, 44595, 43595, 43595, 43595, 17595 ], "dataset_name": "pathfinder_arxiv_data" } }, "version": { "version_str": "0.0.0", "major": 0, "minor": 0, "patch": 0 } }