HaileyStorm
commited on
Commit
•
d3da7b8
1
Parent(s):
5122a87
Update filter_lichess_multi.py
Browse files- filter_lichess_multi.py +6 -7
filter_lichess_multi.py
CHANGED
@@ -14,10 +14,11 @@ def process_pgn_chunk(pgn_data, output_file, start_index, end_index):
|
|
14 |
csv_writer = csv.writer(csv_file)
|
15 |
|
16 |
pgn = chess.pgn.read_game(chess.pgn.StringIO(pgn_data.decode('utf-8')))
|
17 |
-
|
18 |
games_seen = 0
|
19 |
games_added = 0
|
20 |
-
while pgn is not None
|
|
|
|
|
21 |
games_seen += 1
|
22 |
|
23 |
# Filter games based on the specified criteria
|
@@ -54,18 +55,16 @@ def process_pgn_file(input_file, output_file):
|
|
54 |
|
55 |
file_size = os.path.getsize(input_file)
|
56 |
chunk_size = (file_size - start_at) // num_threads
|
57 |
-
|
58 |
with open(input_file, 'rb') as pgn_file:
|
59 |
with mmap.mmap(pgn_file.fileno(), 0, access=mmap.ACCESS_READ) as pgn_mmap:
|
60 |
-
threads = []
|
61 |
for i in range(num_threads):
|
62 |
start_index = start_at + i * chunk_size
|
63 |
end_index = start_at + (i + 1) * chunk_size
|
64 |
if i == num_threads - 1:
|
65 |
end_index = file_size
|
66 |
-
|
67 |
-
|
68 |
-
thread = threading.Thread(target=process_pgn_chunk, args=(pgn_chunk, f"{output_file[:-4]}_{i}.csv", start_index, end_index))
|
69 |
threads.append(thread)
|
70 |
thread.start()
|
71 |
|
|
|
14 |
csv_writer = csv.writer(csv_file)
|
15 |
|
16 |
pgn = chess.pgn.read_game(chess.pgn.StringIO(pgn_data.decode('utf-8')))
|
|
|
17 |
games_seen = 0
|
18 |
games_added = 0
|
19 |
+
while pgn is not None:
|
20 |
+
if games_seen >= end_index - start_index:
|
21 |
+
break
|
22 |
games_seen += 1
|
23 |
|
24 |
# Filter games based on the specified criteria
|
|
|
55 |
|
56 |
file_size = os.path.getsize(input_file)
|
57 |
chunk_size = (file_size - start_at) // num_threads
|
58 |
+
threads = []
|
59 |
with open(input_file, 'rb') as pgn_file:
|
60 |
with mmap.mmap(pgn_file.fileno(), 0, access=mmap.ACCESS_READ) as pgn_mmap:
|
|
|
61 |
for i in range(num_threads):
|
62 |
start_index = start_at + i * chunk_size
|
63 |
end_index = start_at + (i + 1) * chunk_size
|
64 |
if i == num_threads - 1:
|
65 |
end_index = file_size
|
66 |
+
pgn_data = pgn_mmap[start_index:end_index]
|
67 |
+
thread = threading.Thread(target=process_pgn_chunk, args=(pgn_data, f"{output_file[:-4]}_{i}.csv", start_index, end_index))
|
|
|
68 |
threads.append(thread)
|
69 |
thread.start()
|
70 |
|