HaileyStorm commited on
Commit
d3da7b8
1 Parent(s): 5122a87

Update filter_lichess_multi.py

Browse files
Files changed (1) hide show
  1. filter_lichess_multi.py +6 -7
filter_lichess_multi.py CHANGED
@@ -14,10 +14,11 @@ def process_pgn_chunk(pgn_data, output_file, start_index, end_index):
14
  csv_writer = csv.writer(csv_file)
15
 
16
  pgn = chess.pgn.read_game(chess.pgn.StringIO(pgn_data.decode('utf-8')))
17
-
18
  games_seen = 0
19
  games_added = 0
20
- while pgn is not None and games_seen < end_index - start_index:
 
 
21
  games_seen += 1
22
 
23
  # Filter games based on the specified criteria
@@ -54,18 +55,16 @@ def process_pgn_file(input_file, output_file):
54
 
55
  file_size = os.path.getsize(input_file)
56
  chunk_size = (file_size - start_at) // num_threads
57
-
58
  with open(input_file, 'rb') as pgn_file:
59
  with mmap.mmap(pgn_file.fileno(), 0, access=mmap.ACCESS_READ) as pgn_mmap:
60
- threads = []
61
  for i in range(num_threads):
62
  start_index = start_at + i * chunk_size
63
  end_index = start_at + (i + 1) * chunk_size
64
  if i == num_threads - 1:
65
  end_index = file_size
66
-
67
- pgn_chunk = pgn_mmap[start_index:end_index]
68
- thread = threading.Thread(target=process_pgn_chunk, args=(pgn_chunk, f"{output_file[:-4]}_{i}.csv", start_index, end_index))
69
  threads.append(thread)
70
  thread.start()
71
 
 
14
  csv_writer = csv.writer(csv_file)
15
 
16
  pgn = chess.pgn.read_game(chess.pgn.StringIO(pgn_data.decode('utf-8')))
 
17
  games_seen = 0
18
  games_added = 0
19
+ while pgn is not None:
20
+ if games_seen >= end_index - start_index:
21
+ break
22
  games_seen += 1
23
 
24
  # Filter games based on the specified criteria
 
55
 
56
  file_size = os.path.getsize(input_file)
57
  chunk_size = (file_size - start_at) // num_threads
58
+ threads = []
59
  with open(input_file, 'rb') as pgn_file:
60
  with mmap.mmap(pgn_file.fileno(), 0, access=mmap.ACCESS_READ) as pgn_mmap:
 
61
  for i in range(num_threads):
62
  start_index = start_at + i * chunk_size
63
  end_index = start_at + (i + 1) * chunk_size
64
  if i == num_threads - 1:
65
  end_index = file_size
66
+ pgn_data = pgn_mmap[start_index:end_index]
67
+ thread = threading.Thread(target=process_pgn_chunk, args=(pgn_data, f"{output_file[:-4]}_{i}.csv", start_index, end_index))
 
68
  threads.append(thread)
69
  thread.start()
70