lab_tic_tac_toe/nim/game.py

class NimGame:
    "Models a Nim game."

    def get_initial_state(self):
        "Returns the game's initial state."
        return {
            "board": [1, 3, 5, 7],
            "first_player": True
        }

    def get_next_state(self, state, action):
        """Given a state and an action, returns the resulting state.
        In the resulting state, the lines have been removed from last
        turn, and it is the opposite player's turn.
        """
        next_state = {
            "board": state["board"].copy(),
            "first_player": not state["first_player"],
        }

        row, lines_to_remove = action
        next_state["board"][row] -= lines_to_remove

        return next_state

    def get_actions(self, state):
        "Returns a list of possible moves."
        actions = []

        for row, lines in enumerate(state["board"]):
            for lines_to_remove in range(1, 4):
                if lines >= lines_to_remove:
                    actions.append((row, lines_to_remove))

        return actions

    def get_reward(self, state):
        """Determines the reward associated with reaching this state.
        For Nim, the two opponents each want a different game outcome.
        If the game is over when it is first_player's turn, they lose, so reward is -1
        and the reward for the game being over on Computer's turn as 1.
        All other states (unfinished games) are worth 0.
        """
        if self.is_over(state):
            if state["first_player"]:
                return -1
            elif not state["first_player"]:
                return 1
        else:
            return 0

    def is_over(self, state):
        "Checks whether the game is over."
        return self.board_is_empty(state)

    def board_is_empty(self, state):
        "Checks whether all the lines in the board are gone."
        for lines in state["board"]:
            if lines != 0:
                return False
        return True

    def get_objective(self, state):
        """Returns a player's objective, or a function describing what a player wants.
        This function should choose the best value from a list. In Nim, the players
        want opposite things, so we set first_player's objective to the built-in function `max`
        (which chooses the largest number), and we set Computer's objective to the built-in function `min`.
        """
        return max if state["first_player"] else min