{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "new_data = pd.read_csv(\"sudoku-3m.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idpuzzlesolutioncluesdifficulty
011..5.37..6.3..8.9......98...1.......8761.........1985437266432785915276198439147352688761924352...272.2
12...81.....2........1.9..7...7..25.934.2..........9348172567286534196159427381764258934523981673...230.0
23..5...74.3..6...19.....1..5...7...2.9....58..7...2159837463876542194692713855387169249413258677...252.6
34........5.2...9....9..2...373..481.....36....5...4738169256285397411954278637329481569413652785...261.4
45.4.1..............653.....1.8.9..74...24..91.....9471536821286493576532874913819267455724389164...251.1
\n", "
" ], "text/plain": [ " id puzzle \\\n", "0 1 1..5.37..6.3..8.9......98...1.......8761......... \n", "1 2 ...81.....2........1.9..7...7..25.934.2.......... \n", "2 3 ..5...74.3..6...19.....1..5...7...2.9....58..7... \n", "3 4 ........5.2...9....9..2...373..481.....36....5... \n", "4 5 .4.1..............653.....1.8.9..74...24..91..... \n", "\n", " solution clues difficulty \n", "0 1985437266432785915276198439147352688761924352... 27 2.2 \n", "1 9348172567286534196159427381764258934523981673... 23 0.0 \n", "2 2159837463876542194692713855387169249413258677... 25 2.6 \n", "3 4738169256285397411954278637329481569413652785... 26 1.4 \n", "4 9471536821286493576532874913819267455724389164... 25 1.1 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "new_data['puzzle'] = new_data['puzzle'].apply(lambda x: x.replace('.','0'))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "quizzes = []\n", "solutions = []\n", "for idx, row in new_data.iterrows():\n", " quizz = row[\"puzzle\"]\n", " solution = row[\"solution\"]\n", " quizzes.append(np.array([int(x) for x in quizz],).reshape(9,9).astype(np.uint8))\n", " solutions.append(np.array([int(x) for x in solution]).reshape(9,9).astype(np.uint8))\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((3000000, 9, 9), (3000000, 9, 9))" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "quizzes = np.stack(quizzes)\n", "solutions = np.stack(solutions)\n", "quizzes.shape, solutions.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "sol = np.zeros((solutions.shape[0],2,9,9,9), dtype=np.uint8)\n", "quizz = np.zeros((solutions.shape[0],2,9,9,9), dtype=np.uint8)\n", "for i in range(9):\n", " sol[:,1,:,:,i] = (solutions==i+1).astype(np.uint8)\n", " sol[:,0,:,:,i] = ((solutions!=i+1) & (solutions!=0)).astype(np.uint8)\n", " quizz[:,1,:,:,i] = (quizzes==i+1).astype(np.uint8)\n", " quizz[:,0,:,:,i] = ((quizzes!=i+1) & (quizzes!=0)).astype(np.uint8)\n", " \n", "np.savez(\"sudoku_reshaped_3_million.npz\",quizzes=quizz[:1000000],solutions=sol[:1000000])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }