generated from mwc/lab_tic_tac_toe
69 lines
2.5 KiB
Python
69 lines
2.5 KiB
Python
class NimGame:
|
|
"Models a Nim game."
|
|
|
|
def get_initial_state(self):
|
|
"Returns the game's initial state."
|
|
return {
|
|
"board": [1, 3, 5, 7],
|
|
"first_player": True
|
|
}
|
|
|
|
def get_next_state(self, state, action):
|
|
"""Given a state and an action, returns the resulting state.
|
|
In the resulting state, the lines have been removed from last
|
|
turn, and it is the opposite player's turn.
|
|
"""
|
|
next_state = {
|
|
"board": state["board"].copy(),
|
|
"first_player": not state["first_player"],
|
|
}
|
|
|
|
row, lines_to_remove = action
|
|
next_state["board"][row] -= lines_to_remove
|
|
|
|
return next_state
|
|
|
|
def get_actions(self, state):
|
|
"Returns a list of possible moves."
|
|
actions = []
|
|
|
|
for row, lines in enumerate(state["board"]):
|
|
for lines_to_remove in range(1, 4):
|
|
if lines >= lines_to_remove:
|
|
actions.append((row, lines_to_remove))
|
|
|
|
return actions
|
|
|
|
def get_reward(self, state):
|
|
"""Determines the reward associated with reaching this state.
|
|
For Nim, the two opponents each want a different game outcome.
|
|
If the game is over when it is first_player's turn, they lose, so reward is -1
|
|
and the reward for the game being over on Computer's turn as 1.
|
|
All other states (unfinished games) are worth 0.
|
|
"""
|
|
if self.is_over(state):
|
|
if state["first_player"]:
|
|
return -1
|
|
elif not state["first_player"]:
|
|
return 1
|
|
else:
|
|
return 0
|
|
|
|
def is_over(self, state):
|
|
"Checks whether the game is over."
|
|
return self.board_is_empty(state)
|
|
|
|
def board_is_empty(self, state):
|
|
"Checks whether all the lines in the board are gone."
|
|
for lines in state["board"]:
|
|
if lines != 0:
|
|
return False
|
|
return True
|
|
|
|
def get_objective(self, state):
|
|
"""Returns a player's objective, or a function describing what a player wants.
|
|
This function should choose the best value from a list. In Nim, the players
|
|
want opposite things, so we set first_player's objective to the built-in function `max`
|
|
(which chooses the largest number), and we set Computer's objective to the built-in function `min`.
|
|
"""
|
|
return max if state["first_player"] else min |