The example below is working however, if more zeros (empty cells) are added to the sudoku grid g
, it takes longer to run, if it ever finishes. Not asking for a code review here, just I might've overlooked something, and I'd appreciate pointing it out.
def is_solved(grid):
for row, col in zip(grid, [*zip(*grid)]):
for i in range(1, 10):
if (i not in row) or (i not in col):
return False
return True
def square_available(rows, x, y, n):
if 0 <= x < 3:
rows = rows[:3]
elif 3 <= x < 6:
rows = rows[3:6]
else:
rows = rows[6:]
if 0 <= y < 3:
return not any([n in r[:3] for r in rows])
elif 3 <= y < 6:
return not any([n in r[3:6] for r in rows])
else:
return not any([n in r[6:] for r in rows])
def is_valid(grid, x, y, n):
columns = [*zip(*grid)]
return (
square_available(grid, x, y, n) and n not in grid[x] and (n not in columns[y])
)
def solve(grid, empty_cells):
if is_solved(grid):
return grid
for x, y in empty_cells:
for n in range(1, 10):
if is_valid(grid, x, y, n):
grid[x][y] = n
empty_cells.remove((x, y))
if solve(grid, empty_cells):
return grid
else:
grid[x][y] = 0
empty_cells.append((x, y))
if __name__ == '__main__':
solution = [
[5, 3, 4, 6, 7, 8, 9, 1, 2],
[6, 7, 2, 1, 9, 5, 3, 4, 8],
[1, 9, 8, 3, 4, 2, 5, 6, 7],
[8, 5, 9, 7, 6, 1, 4, 2, 3],
[4, 2, 6, 8, 5, 3, 7, 9, 1],
[7, 1, 3, 9, 2, 4, 8, 5, 6],
[9, 6, 1, 5, 3, 7, 2, 8, 4],
[2, 8, 7, 4, 1, 9, 6, 3, 5],
[3, 4, 5, 2, 8, 6, 1, 7, 9],
]
g = [
[0, 0, 0, 6, 0, 8, 9, 1, 0],
[6, 0, 2, 0, 9, 0, 3, 4, 0],
[1, 9, 8, 3, 0, 0, 0, 6, 7],
[0, 5, 9, 0, 0, 0, 4, 2, 3],
[4, 0, 0, 8, 0, 3, 0, 0, 1],
[7, 1, 3, 0, 2, 0, 8, 0, 0],
[9, 6, 0, 5, 3, 7, 2, 8, 0],
[2, 0, 0, 4, 1, 0, 0, 3, 0],
[3, 4, 0, 2, 8, 0, 1, 7, 9],
]
empty = []
for i in range(9):
for j in range(9):
if not g[i][j]:
empty.append((i, j))
solved = solve(g, empty)
assert g == solution
I tried reimplementing the whole thing as shown below, and the results got worse. It's not even capable of solving what's already possible with the implementation above.
from collections import defaultdict
def get_possibilities(rows, columns, x, y, visited):
if (x, y) in visited:
return visited[x, y]
x0 = (x // 3) * 3
x1 = x0 + 3
y0 = (y // 3) * 3
y1 = y0 + 3
possibilities = set()
for n in range(1, 10):
square_rows = rows[x0:x1]
for row in square_rows:
if n in row[y0:y1]:
continue
if (n not in rows[x]) and (n not in columns[y]):
visited[x, y].add(n)
possibilities.add(n)
return possibilities
def solve(rows, columns, empty_cells, visited):
if not empty_cells:
return rows
for x, y in empty_cells:
for n in get_possibilities(rows, columns, x, y, visited):
rows[x][y] = n
columns[y][x] = n
visited[x, y].remove(n)
if solve(rows, columns, empty_cells - {(x, y)}, visited):
return rows
else:
rows[x][y] = 0
columns[y][x] = 0
visited[x, y].add(n)
if __name__ == '__main__':
solution = [
[5, 3, 4, 6, 7, 8, 9, 1, 2],
[6, 7, 2, 1, 9, 5, 3, 4, 8],
[1, 9, 8, 3, 4, 2, 5, 6, 7],
[8, 5, 9, 7, 6, 1, 4, 2, 3],
[4, 2, 6, 8, 5, 3, 7, 9, 1],
[7, 1, 3, 9, 2, 4, 8, 5, 6],
[9, 6, 1, 5, 3, 7, 2, 8, 4],
[2, 8, 7, 4, 1, 9, 6, 3, 5],
[3, 4, 5, 2, 8, 6, 1, 7, 9],
]
r = [
[0, 0, 0, 6, 0, 8, 9, 1, 0],
[6, 0, 2, 0, 9, 0, 3, 4, 0],
[1, 9, 8, 3, 0, 0, 0, 6, 7],
[0, 5, 9, 0, 0, 0, 4, 2, 3],
[4, 0, 0, 8, 0, 3, 0, 0, 1],
[7, 1, 3, 0, 2, 0, 8, 0, 0],
[9, 6, 0, 5, 3, 7, 2, 8, 0],
[2, 0, 0, 4, 1, 0, 0, 3, 0],
[3, 4, 0, 2, 8, 0, 1, 7, 9],
]
c = [list(r) for r in [*zip(*r)]]
cells = set()
for i in range(9):
for j in range(9):
if not r[i][j]:
cells.add((i, j))
v = defaultdict(set)
solved = solve(r, c, cells, v)
assert r == solution
Looking only at the first version of your code, there are a few things you can improve in the solve
function:
(A problem also in the second version:) You shouldn't loop over all empty_cells
. Instead consider that you must find a solution for the first empty cell, and if after doing all of the recursive effort there is no good value for this first empty cell, then it makes no sense to continue with other empty cells. The first one represents a problem that is unsolvable, so any effort on any other empty cell is useless. So remove that for
loop, and just work with one empty cell per recursion level.
You don't need to call is_solved
to know that the grid has been completed. You did this better in the second version. Just test not empty_cells
or something similar.
Don't call empty_cells.remove
and append
in each iteration of the inner loop as it is always the same that is removed and added again. Moreover, remove
is overkill as you don't really want to search for the entry -- you know at which index it sits. Just do the removal at the start of the process, and add it again at the very end.
With the above changes the performance will greatly improve. Still some little improvements are possible:
Leave the empty_cells
list unmutated, and instead pass along an index in that list. We can assume that all empty cells before that index have been filled in, and the others are still to do.
Instead of calling is_valid
on each digit, collect the valid moves -- an idea you tried to implement in the second version.
Here is the improved version:
def valid_moves(grid, x, y):
ybox = y - y % 3
xbox = x - x % 3
return set(range(1, 10)).difference(
grid[x]
).difference(
(row[y] for row in grid)
).difference(
(num for row in grid[xbox: xbox + 3]
for num in row[ybox: ybox + 3])
)
def solve(grid, empty_cells, i=0):
if i >= len(empty_cells):
return grid
x, y = empty_cells[i]
i += 1
for n in valid_moves(grid, x, y):
grid[x][y] = n
if solve(grid, empty_cells, i):
return grid
grid[x][y] = 0