HaileyStorm
/

chess-mamba-vs-xformer

Model card Files Files and versions Community

HaileyStorm commited on Mar 26

Commit

22cfeab

•

1 Parent(s): d3da7b8

Delete filter_lichess_multi.py

Browse files

Files changed (1) hide show

filter_lichess_multi.py +0 -77

filter_lichess_multi.py DELETED Viewed

@@ -1,77 +0,0 @@
-import chess
-import chess.pgn
-import csv
-import os
-import threading
-import mmap
-start_at = 0
-total_games = 92055571
-num_threads = 8
-def process_pgn_chunk(pgn_data, output_file, start_index, end_index):
-    with open(output_file, 'a', newline='') as csv_file:
-        csv_writer = csv.writer(csv_file)
-        pgn = chess.pgn.read_game(chess.pgn.StringIO(pgn_data.decode('utf-8')))
-        games_seen = 0
-        games_added = 0
-        while pgn is not None:
-            if games_seen >= end_index - start_index:
-                break
-            games_seen += 1
-            # Filter games based on the specified criteria
-            if (
-                pgn.headers['Result'] == '1-0' and
-                'Rated' in pgn.headers['Event'] and
-                1500 < int(pgn.headers['WhiteElo']) < 2400 and
-                1400 < int(pgn.headers['BlackElo']) < 2800
-            ):
-                board = chess.Board()
-                moves = []
-                move_number = 1
-                for move in pgn.mainline_moves():
-                    if board.turn == chess.WHITE:
-                        moves.append(f"{move_number}.")
-                        move_number += 1
-                    san = board.san(move)
-                    moves.append(san + " ")
-                    board.push(board.parse_san(san))
-                if board.is_game_over() and board.result() == "1-0":
-                    transcript = ''.join(moves)
-                    csv_writer.writerow([transcript.rstrip()])
-                    games_added += 1
-                    if games_added % 100 == 0:
-                        print(f"Thread {threading.current_thread().name} - Added {games_added} of {games_seen} games. {(games_seen+start_index)/float(total_games):.2%} complete.")
-            pgn = chess.pgn.read_game(chess.pgn.StringIO(pgn_data.decode('utf-8')))
-def process_pgn_file(input_file, output_file):
-    with open(output_file, 'w', newline='') as csv_file:
-        csv_writer = csv.writer(csv_file)
-        csv_writer.writerow(['transcript'])
-    file_size = os.path.getsize(input_file)
-    chunk_size = (file_size - start_at) // num_threads
-    threads = []
-    with open(input_file, 'rb') as pgn_file:
-        with mmap.mmap(pgn_file.fileno(), 0, access=mmap.ACCESS_READ) as pgn_mmap:
-            for i in range(num_threads):
-                start_index = start_at + i * chunk_size
-                end_index = start_at + (i + 1) * chunk_size
-                if i == num_threads - 1:
-                    end_index = file_size
-                pgn_data = pgn_mmap[start_index:end_index]
-                thread = threading.Thread(target=process_pgn_chunk, args=(pgn_data, f"{output_file[:-4]}_{i}.csv", start_index, end_index))
-                threads.append(thread)
-                thread.start()
-            for thread in threads:
-                thread.join()
-# Usage example
-input_file = './chess-mamba-vs-xformer/lichess_db_standard_rated_2022-07.pgn'
-output_file = './chess-mamba-vs-xformer/lichess_transcripts_phase2_stable.csv'
-process_pgn_file(input_file, output_file)