File size: 2,609 Bytes
4351936
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
{
  "builder_name": "parquet",
  "citation": "",
  "config_name": "default",
  "dataset_name": "pathfinder_arxiv_data_galaxy",
  "dataset_size": 505886100,
  "description": "",
  "download_checksums": {
    "hf://datasets/kiyer/pathfinder_arxiv_data_galaxy@29754b03f3cd82e4051ece1cf96605f8756bc197/data/train-00000-of-00001.parquet": {
      "num_bytes": 379674094,
      "checksum": null
    }
  },
  "download_size": 379674094,
  "features": {
    "ads_id": {
      "dtype": "string",
      "_type": "Value"
    },
    "arxiv_id": {
      "dtype": "string",
      "_type": "Value"
    },
    "title": {
      "dtype": "string",
      "_type": "Value"
    },
    "abstract": {
      "dtype": "string",
      "_type": "Value"
    },
    "embed": {
      "feature": {
        "dtype": "float32",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "umap_x": {
      "dtype": "float32",
      "_type": "Value"
    },
    "umap_y": {
      "dtype": "float32",
      "_type": "Value"
    },
    "date": {
      "dtype": "date32",
      "_type": "Value"
    },
    "cites": {
      "dtype": "int64",
      "_type": "Value"
    },
    "bibcode": {
      "dtype": "string",
      "_type": "Value"
    },
    "keywords": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "ads_keywords": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "read_count": {
      "dtype": "int64",
      "_type": "Value"
    },
    "doi": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "authors": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "aff": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "cite_bibcodes": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "ref_bibcodes": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    }
  },
  "homepage": "",
  "license": "",
  "size_in_bytes": 885560194,
  "splits": {
    "train": {
      "name": "train",
      "num_bytes": 505886100,
      "num_examples": 41195,
      "shard_lengths": [
        41000,
        195
      ],
      "dataset_name": "pathfinder_arxiv_data_galaxy"
    }
  },
  "version": {
    "version_str": "0.0.0",
    "major": 0,
    "minor": 0,
    "patch": 0
  }
}