raannakasturi commited on
Commit
9edc728
·
verified ·
1 Parent(s): a8d745c

Update arvix.py

Browse files
Files changed (1) hide show
  1. arvix.py +5 -5
arvix.py CHANGED
@@ -54,7 +54,7 @@ def extract_data(category):
54
  if category in ["hep-ex", "hep-lat", "hep-ph", "hep-th"]:
55
  category_list = []
56
  for id in data:
57
- if len(category_list) >= 3:
58
  break
59
  if tools.check_data_in_file(id, 'arxiv.txt'):
60
  continue
@@ -65,7 +65,7 @@ def extract_data(category):
65
  tools.write_data_to_file(id, 'arxiv.txt')
66
  else:
67
  for id in data:
68
- if len(sanitized_data) >= 12:
69
  break
70
  if tools.check_data_in_file(id, 'arxiv.txt'):
71
  continue
@@ -104,7 +104,7 @@ def extract_arxiv_data():
104
  temp_id_storage = []
105
  for subcategory in subcategories:
106
  ids = extract_data(subcategory)
107
- if len(ids) == 12:
108
  for id in ids:
109
  temp_id_storage.append(id)
110
  else:
@@ -113,8 +113,8 @@ def extract_arxiv_data():
113
  for temp_id in temp_id_storage:
114
  all_ids.append(temp_id)
115
  random.shuffle(all_ids)
116
- if len(all_ids) > 12:
117
- print(f"Found more than 12 papers for {category}.")
118
  all_ids = all_ids
119
  category_data['count'] = len(all_ids)
120
  category_data['ids'] = all_ids
 
54
  if category in ["hep-ex", "hep-lat", "hep-ph", "hep-th"]:
55
  category_list = []
56
  for id in data:
57
+ if len(category_list) >= 1:
58
  break
59
  if tools.check_data_in_file(id, 'arxiv.txt'):
60
  continue
 
65
  tools.write_data_to_file(id, 'arxiv.txt')
66
  else:
67
  for id in data:
68
+ if len(sanitized_data) >= 4:
69
  break
70
  if tools.check_data_in_file(id, 'arxiv.txt'):
71
  continue
 
104
  temp_id_storage = []
105
  for subcategory in subcategories:
106
  ids = extract_data(subcategory)
107
+ if len(ids) == 4:
108
  for id in ids:
109
  temp_id_storage.append(id)
110
  else:
 
113
  for temp_id in temp_id_storage:
114
  all_ids.append(temp_id)
115
  random.shuffle(all_ids)
116
+ if len(all_ids) > 4:
117
+ print(f"Found more than 4 papers for {category}.")
118
  all_ids = all_ids
119
  category_data['count'] = len(all_ids)
120
  category_data['ids'] = all_ids