{ "builder_name": "parquet", "citation": "", "config_name": "default", "dataset_name": "pathfinder_arxiv_data_galaxy", "dataset_size": 505886100, "description": "", "download_checksums": { "hf://datasets/kiyer/pathfinder_arxiv_data_galaxy@29754b03f3cd82e4051ece1cf96605f8756bc197/data/train-00000-of-00001.parquet": { "num_bytes": 379674094, "checksum": null } }, "download_size": 379674094, "features": { "ads_id": { "dtype": "string", "_type": "Value" }, "arxiv_id": { "dtype": "string", "_type": "Value" }, "title": { "dtype": "string", "_type": "Value" }, "abstract": { "dtype": "string", "_type": "Value" }, "embed": { "feature": { "dtype": "float32", "_type": "Value" }, "_type": "Sequence" }, "umap_x": { "dtype": "float32", "_type": "Value" }, "umap_y": { "dtype": "float32", "_type": "Value" }, "date": { "dtype": "date32", "_type": "Value" }, "cites": { "dtype": "int64", "_type": "Value" }, "bibcode": { "dtype": "string", "_type": "Value" }, "keywords": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "ads_keywords": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "read_count": { "dtype": "int64", "_type": "Value" }, "doi": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "authors": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "aff": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "cite_bibcodes": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "ref_bibcodes": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" } }, "homepage": "", "license": "", "size_in_bytes": 885560194, "splits": { "train": { "name": "train", "num_bytes": 505886100, "num_examples": 41195, "shard_lengths": [ 41000, 195 ], "dataset_name": "pathfinder_arxiv_data_galaxy" } }, "version": { "version_str": "0.0.0", "major": 0, "minor": 0, "patch": 0 } }