charlieoneill commited on
Commit
8ca8ccd
·
verified ·
1 Parent(s): f8be7ed

Upload astro_paper_metadata/train/dataset_info.json with huggingface_hub

Browse files
astro_paper_metadata/train/dataset_info.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "astro_paper_corpus",
6
+ "dataset_size": 4128813829,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00000-of-00009.parquet": {
10
+ "num_bytes": 240072323,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00001-of-00009.parquet": {
14
+ "num_bytes": 235851056,
15
+ "checksum": null
16
+ },
17
+ "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00002-of-00009.parquet": {
18
+ "num_bytes": 236413937,
19
+ "checksum": null
20
+ },
21
+ "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00003-of-00009.parquet": {
22
+ "num_bytes": 237728419,
23
+ "checksum": null
24
+ },
25
+ "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00004-of-00009.parquet": {
26
+ "num_bytes": 236710419,
27
+ "checksum": null
28
+ },
29
+ "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00005-of-00009.parquet": {
30
+ "num_bytes": 239567004,
31
+ "checksum": null
32
+ },
33
+ "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00006-of-00009.parquet": {
34
+ "num_bytes": 234863979,
35
+ "checksum": null
36
+ },
37
+ "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00007-of-00009.parquet": {
38
+ "num_bytes": 232662046,
39
+ "checksum": null
40
+ },
41
+ "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00008-of-00009.parquet": {
42
+ "num_bytes": 237444927,
43
+ "checksum": null
44
+ }
45
+ },
46
+ "download_size": 2131314110,
47
+ "features": {
48
+ "id": {
49
+ "dtype": "string",
50
+ "_type": "Value"
51
+ },
52
+ "author": {
53
+ "feature": {
54
+ "dtype": "string",
55
+ "_type": "Value"
56
+ },
57
+ "_type": "Sequence"
58
+ },
59
+ "bibcode": {
60
+ "dtype": "string",
61
+ "_type": "Value"
62
+ },
63
+ "title": {
64
+ "feature": {
65
+ "dtype": "string",
66
+ "_type": "Value"
67
+ },
68
+ "_type": "Sequence"
69
+ },
70
+ "citation_count": {
71
+ "dtype": "int64",
72
+ "_type": "Value"
73
+ },
74
+ "aff": {
75
+ "feature": {
76
+ "dtype": "string",
77
+ "_type": "Value"
78
+ },
79
+ "_type": "Sequence"
80
+ },
81
+ "citation": {
82
+ "feature": {
83
+ "dtype": "string",
84
+ "_type": "Value"
85
+ },
86
+ "_type": "Sequence"
87
+ },
88
+ "database": {
89
+ "feature": {
90
+ "dtype": "string",
91
+ "_type": "Value"
92
+ },
93
+ "_type": "Sequence"
94
+ },
95
+ "read_count": {
96
+ "dtype": "int64",
97
+ "_type": "Value"
98
+ },
99
+ "keyword": {
100
+ "feature": {
101
+ "dtype": "string",
102
+ "_type": "Value"
103
+ },
104
+ "_type": "Sequence"
105
+ },
106
+ "reference": {
107
+ "feature": {
108
+ "dtype": "string",
109
+ "_type": "Value"
110
+ },
111
+ "_type": "Sequence"
112
+ },
113
+ "doi": {
114
+ "feature": {
115
+ "dtype": "string",
116
+ "_type": "Value"
117
+ },
118
+ "_type": "Sequence"
119
+ },
120
+ "subfolder": {
121
+ "dtype": "string",
122
+ "_type": "Value"
123
+ },
124
+ "filename": {
125
+ "dtype": "string",
126
+ "_type": "Value"
127
+ },
128
+ "introduction": {
129
+ "dtype": "string",
130
+ "_type": "Value"
131
+ },
132
+ "conclusions": {
133
+ "dtype": "string",
134
+ "_type": "Value"
135
+ },
136
+ "year": {
137
+ "dtype": "int64",
138
+ "_type": "Value"
139
+ },
140
+ "month": {
141
+ "dtype": "int64",
142
+ "_type": "Value"
143
+ },
144
+ "arxiv_id": {
145
+ "dtype": "string",
146
+ "_type": "Value"
147
+ },
148
+ "abstract": {
149
+ "dtype": "string",
150
+ "_type": "Value"
151
+ },
152
+ "failed_ids": {
153
+ "dtype": "bool",
154
+ "_type": "Value"
155
+ },
156
+ "keyword_search": {
157
+ "feature": {
158
+ "dtype": "string",
159
+ "_type": "Value"
160
+ },
161
+ "_type": "Sequence"
162
+ },
163
+ "umap_x": {
164
+ "dtype": "float32",
165
+ "_type": "Value"
166
+ },
167
+ "umap_y": {
168
+ "dtype": "float32",
169
+ "_type": "Value"
170
+ },
171
+ "clust_id": {
172
+ "dtype": "int64",
173
+ "_type": "Value"
174
+ }
175
+ },
176
+ "homepage": "",
177
+ "license": "",
178
+ "size_in_bytes": 6260127939,
179
+ "splits": {
180
+ "train": {
181
+ "name": "train",
182
+ "num_bytes": 4128813829,
183
+ "num_examples": 271544,
184
+ "shard_lengths": [
185
+ 33172,
186
+ 33172,
187
+ 33172,
188
+ 33172,
189
+ 33172,
190
+ 33171,
191
+ 34171,
192
+ 34171,
193
+ 4171
194
+ ],
195
+ "dataset_name": "astro_paper_corpus"
196
+ }
197
+ },
198
+ "version": {
199
+ "version_str": "0.0.0",
200
+ "major": 0,
201
+ "minor": 0,
202
+ "patch": 0
203
+ }
204
+ }