Refactor lab with Strategy classes
This commit is contained in:
parent
f49e78c35f
commit
6ad2672bd3
4
play.py
4
play.py
|
@ -9,6 +9,6 @@ view = TTTView()
|
||||||
|
|
||||||
view.greet(game)
|
view.greet(game)
|
||||||
while not game.is_over():
|
while not game.is_over():
|
||||||
move = view.get_move(game)
|
action = view.get_action(game)
|
||||||
game.play_move(move)
|
game.play_action(action)
|
||||||
view.conclude(game)
|
view.conclude(game)
|
||||||
|
|
|
@ -0,0 +1,98 @@
|
||||||
|
from types import MethodType
|
||||||
|
from random import choice
|
||||||
|
|
||||||
|
class RandomStrategy:
|
||||||
|
"""A Strategy which randomly chooses a move. Not a great choice.
|
||||||
|
"""
|
||||||
|
def __init__(self, game):
|
||||||
|
self.validate_game(game)
|
||||||
|
self.game = game
|
||||||
|
|
||||||
|
def choose_action(self, state):
|
||||||
|
possible_actions = game.get_actions(state)
|
||||||
|
return choice(possible_actions)
|
||||||
|
|
||||||
|
class LookaheadStrategy:
|
||||||
|
"""A Strategy which considers the future consequences of an action.
|
||||||
|
|
||||||
|
To initialize a LookaheadStrategy, pass in an instance of a game containing
|
||||||
|
the following methods. These methods encode the rules of the game,
|
||||||
|
which a LookaheadStrategy needs to know in order to determine which move is best.
|
||||||
|
|
||||||
|
- get_next_state: state, action -> state
|
||||||
|
- get_actions: state -> [actions]
|
||||||
|
- get_reward: state -> int
|
||||||
|
- is_over: state -> bool
|
||||||
|
- get_objective: str -> function
|
||||||
|
|
||||||
|
Optionally, pass the following arguments to control the behavior of the LookaheadStrategy:
|
||||||
|
|
||||||
|
- max_depth: int. A game may be too complex to search the full state tree.
|
||||||
|
Setting max_depth will set a cutoff on how far ahead the LookaheadStrategy will look.
|
||||||
|
- deterministic: bool. It's possible that there are multiple equally-good actions.
|
||||||
|
When deterministic is True, LookaheadStrategy will always choose the first of the
|
||||||
|
equally-good actions, so that LookaheadStrategy will always play out the same game.
|
||||||
|
When deterministic is False, LookaheadStrategy will choose randomly from all actions
|
||||||
|
which are equally-good.
|
||||||
|
- Explain: When set to True, LookaheadStrategy will print out its reasoning.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, game, max_depth=None, deterministic=True, explain=False):
|
||||||
|
self.validate_game(game)
|
||||||
|
self.game = game
|
||||||
|
self.max_depth = max_depth
|
||||||
|
self.deterministic = deterministic
|
||||||
|
self.explain = explain
|
||||||
|
|
||||||
|
def choose_action(self, state):
|
||||||
|
"""Given a state, chooses an action.
|
||||||
|
This is the most important method of a Strategy, corresponding to the situation where
|
||||||
|
it's a player's turn to play a game and she needs to decide what to do.
|
||||||
|
|
||||||
|
Strategy chooses an action by considering all possible actions, and finding the
|
||||||
|
total current and future reward which would come from playing that action.
|
||||||
|
Then we use the game's objective to choose the "best" reward. Usually bigger is better,
|
||||||
|
but in zero-sum games like tic tac toe, the players want opposite outcomes. One player
|
||||||
|
wants the reward to be high, while the other wants the reward to be low.
|
||||||
|
|
||||||
|
Once we know which reward is best, we choose an action which will lead to that reward.
|
||||||
|
"""
|
||||||
|
possible_actions = self.game.get_actions(state)
|
||||||
|
rewards = {}
|
||||||
|
for action in possible_actions:
|
||||||
|
future_state = self.game.get_next_state(state, action)
|
||||||
|
rewards[action] = self.game.get_reward(future_state)
|
||||||
|
objective = self.game.get_objective(state)
|
||||||
|
best_reward = objective(rewards.values())
|
||||||
|
best_actions = [action for action in possible_actions if rewards[action] == best_reward]
|
||||||
|
if self.deterministic:
|
||||||
|
return best_actions[0]
|
||||||
|
else:
|
||||||
|
return choice(best_actions)
|
||||||
|
|
||||||
|
def get_current_and_future_reward(self, state):
|
||||||
|
"""Calculates the reward from this state, and from all future states which would be
|
||||||
|
reached, assuming all players are using this Strategy.
|
||||||
|
"""
|
||||||
|
reward = self.game.get_reward(state)
|
||||||
|
if not self.game.is_over(state):
|
||||||
|
future_state = self.choose_action(state)
|
||||||
|
reward += self.get_current_and_future_reward(future_state)
|
||||||
|
return reward
|
||||||
|
|
||||||
|
def validate_game(self, game):
|
||||||
|
"Checks that the game has all the required methods."
|
||||||
|
required_methods = [
|
||||||
|
"get_next_state",
|
||||||
|
"get_actions",
|
||||||
|
"get_reward",
|
||||||
|
"is_over",
|
||||||
|
"get_objective",
|
||||||
|
]
|
||||||
|
for method in required_methods:
|
||||||
|
if not (hasattr(game, method) and isinstance(getattr(game, method), MethodType)):
|
||||||
|
message = f"Game {game} does not have method {method}."
|
||||||
|
raise ValueError(message)
|
||||||
|
|
||||||
|
|
||||||
|
|
88
ttt_game.py
88
ttt_game.py
|
@ -2,47 +2,81 @@ class TTTGame:
|
||||||
"Models a tic-tac-toe game."
|
"Models a tic-tac-toe game."
|
||||||
|
|
||||||
def __init__(self, playerX, playerO):
|
def __init__(self, playerX, playerO):
|
||||||
self.board = [None] * 9
|
self.state = self.get_initial_state()
|
||||||
self.turn_index = 0
|
|
||||||
self.players = {
|
self.players = {
|
||||||
'X': playerX,
|
'X': playerX,
|
||||||
'O': playerO,
|
'O': playerO,
|
||||||
}
|
}
|
||||||
|
|
||||||
def play_move(self, move):
|
def get_initial_state(self):
|
||||||
"Updates the game's state by recording a move"
|
"Returns the game's initial state."
|
||||||
if not self.is_valid_move(move):
|
return {
|
||||||
raise ValueError(f"Illegal move {move} with board {self.board}.")
|
"board": ['-', '-', '-', '-', '-', '-', '-', '-', '-'],
|
||||||
self.board[move] = self.get_current_player_symbol()
|
"player": "X",
|
||||||
self.turn_index += 1
|
}
|
||||||
|
|
||||||
def get_valid_moves(self):
|
def get_next_state(self, state, action):
|
||||||
|
"""Given a state and an action, returns the resulting state.
|
||||||
|
In the resulting state, the current player's symbol has been placed
|
||||||
|
in an empty board space, and it is the opposite player's turn.
|
||||||
|
"""
|
||||||
|
new_board = state["board"]
|
||||||
|
new_board[action] = state["player"]
|
||||||
|
if state["player"] == "O":
|
||||||
|
new_player = "X"
|
||||||
|
else:
|
||||||
|
new_player = "O"
|
||||||
|
return {
|
||||||
|
"board": new_board,
|
||||||
|
"player": new_player,
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_actions(self, state):
|
||||||
"Returns a list of the indices of empty spaces"
|
"Returns a list of the indices of empty spaces"
|
||||||
return [index for index in range(9) if self.board[index] is None]
|
return [index for index in range(9) if state["board"][index] == '-']
|
||||||
|
|
||||||
|
def is_over(self):
|
||||||
|
"Checks whether the game is over."
|
||||||
|
return self.board_is_full() or self.check_winner('X') or self.check_winner('O')
|
||||||
|
|
||||||
|
def get_reward(self, state):
|
||||||
|
"""Determines the reward associated with reaching this state.
|
||||||
|
For tic-tac-toe, the two opponents each want a different game outcome. So
|
||||||
|
we set the reward for X winning to 1 and the reward for O winning to -1.
|
||||||
|
All other states (unfinished games and games which ended in a draw) are worth 0.
|
||||||
|
"""
|
||||||
|
if self.check_winner('X'):
|
||||||
|
return 1
|
||||||
|
elif self.check_winner('O'):
|
||||||
|
return -1
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_objective(self, state):
|
||||||
|
"""Returns a player's objective, or a function describing what a player wants.
|
||||||
|
This function should choose the best value from a list. In tic tac toe, the players
|
||||||
|
want opposite things, so we set X's objective to the built-in function `max`
|
||||||
|
(which chooses the largest number), and we set O's objective to the built-in function `min`.
|
||||||
|
"""
|
||||||
|
if state["player"] == 'X':
|
||||||
|
return max
|
||||||
|
elif state["player"] == 'O':
|
||||||
|
return min
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unrecognized player {state['player']}")
|
||||||
|
|
||||||
|
def play_action(self, action):
|
||||||
|
"Plays a move, updating the game's state."
|
||||||
|
self.state = self.get_next_state(self.state, action)
|
||||||
|
|
||||||
def is_valid_move(self, move):
|
def is_valid_move(self, move):
|
||||||
"Checks whether a move is valid"
|
"Checks whether a move is valid"
|
||||||
return move in self.get_valid_moves()
|
return move in self.get_valid_moves()
|
||||||
|
|
||||||
def get_current_player_symbol(self):
|
|
||||||
"Returns the symbol of the current player"
|
|
||||||
if self.turn_index % 2 == 0:
|
|
||||||
return 'X'
|
|
||||||
else:
|
|
||||||
return 'O'
|
|
||||||
|
|
||||||
def get_current_player(self):
|
|
||||||
"Returns the symbol of the current player and the current player"
|
|
||||||
return self.players[self.get_current_player_symbol()]
|
|
||||||
|
|
||||||
def is_over(self):
|
|
||||||
"Checks whether the game is over."
|
|
||||||
return self.board_is_full() or self.check_winner('X') or self.check_winner('O')
|
|
||||||
|
|
||||||
def board_is_full(self):
|
def board_is_full(self):
|
||||||
"Checks whether all the spaces in the board are occupied."
|
"Checks whether all the spaces in the board are occupied."
|
||||||
for space in self.board:
|
for space in self.state["board"]:
|
||||||
if space == None:
|
if space == '-':
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
45
ttt_learn.py
45
ttt_learn.py
|
@ -9,16 +9,6 @@
|
||||||
from itertools import count
|
from itertools import count
|
||||||
counter = count()
|
counter = count()
|
||||||
|
|
||||||
def get_next_state(state, action):
|
|
||||||
"""Returns the state which would result from taking an action at a particular state.
|
|
||||||
"""
|
|
||||||
new_board = [space for space in state["board"]]
|
|
||||||
new_board[action] = state["player"]
|
|
||||||
return {
|
|
||||||
"board": new_board,
|
|
||||||
"player": get_opponent(state["player"]),
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_actions(state):
|
def get_actions(state):
|
||||||
"""Given a board state, returns a dictionary whose keys are possible actions and whose
|
"""Given a board state, returns a dictionary whose keys are possible actions and whose
|
||||||
values are the resulting state from each action. If the game is over, returns an empty
|
values are the resulting state from each action. If the game is over, returns an empty
|
||||||
|
@ -29,7 +19,7 @@ def get_actions(state):
|
||||||
else:
|
else:
|
||||||
actions = {}
|
actions = {}
|
||||||
for i in range(9):
|
for i in range(9):
|
||||||
if state["board"][i] is None or state["board"][i] == '-':
|
if state["board"][i] is None or state["board"][i] == ' ':
|
||||||
actions[i] = get_next_state(state, i)
|
actions[i] = get_next_state(state, i)
|
||||||
return actions
|
return actions
|
||||||
|
|
||||||
|
@ -72,34 +62,6 @@ def get_value(state, depth=0, explain=False):
|
||||||
# ================================== HELPERS ==============================================
|
# ================================== HELPERS ==============================================
|
||||||
# =========================================================================================
|
# =========================================================================================
|
||||||
|
|
||||||
def get_opponent(player):
|
|
||||||
"Returns 'X' when player is 'O' and 'O' when player is 'X'"
|
|
||||||
if player == 'X':
|
|
||||||
return 'O'
|
|
||||||
elif player == 'O':
|
|
||||||
return 'X'
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unrecognized player {player}")
|
|
||||||
|
|
||||||
def is_over(state):
|
|
||||||
"Returns True if the game is over"
|
|
||||||
return is_draw(state) or is_win(state, 'X') or is_win(state, 'O')
|
|
||||||
|
|
||||||
def is_draw(state):
|
|
||||||
"Returns True if the game ended in a draw."
|
|
||||||
for space in state["board"]:
|
|
||||||
if space is None or space == '-':
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def is_win(state, player):
|
|
||||||
"Returns True if `player` has won the game."
|
|
||||||
win_lines = [[0,1,2], [3,4,5], [6,7,8], [0,3,6], [1,4,7], [2,5,8], [0, 4, 8], [2, 4, 6]]
|
|
||||||
for a, b, c in win_lines:
|
|
||||||
if state["board"][a] == player and state["board"][b] == player and state["board"][c] == player:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def pose_question(state, index, depth):
|
def pose_question(state, index, depth):
|
||||||
"Logs a question asking about a player's best move at a state."
|
"Logs a question asking about a player's best move at a state."
|
||||||
board = format_board_inline(state['board'])
|
board = format_board_inline(state['board'])
|
||||||
|
@ -114,8 +76,3 @@ def log(message, index, depth):
|
||||||
"Prints a message at the appropriate depth, with each message numbered."
|
"Prints a message at the appropriate depth, with each message numbered."
|
||||||
indent = ' ' * depth
|
indent = ' ' * depth
|
||||||
print(f"{indent}{index}. {message}")
|
print(f"{indent}{index}. {message}")
|
||||||
|
|
||||||
def format_board_inline(board):
|
|
||||||
"Formats a board like '[ OOX | -X- | --- ]' "
|
|
||||||
symbols = [sym or '-' for sym in board]
|
|
||||||
return '[ ' + ' | '.join([''.join(symbols[:3]), ''.join(symbols[3:6]), ''.join(symbols[6:])]) + ' ]'
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from click import Choice, prompt
|
from click import Choice, prompt
|
||||||
|
from strategy import RandomStrategy
|
||||||
import random
|
import random
|
||||||
|
|
||||||
class TTTHumanPlayer:
|
class TTTHumanPlayer:
|
||||||
|
@ -8,9 +9,9 @@ class TTTHumanPlayer:
|
||||||
"Sets up the player."
|
"Sets up the player."
|
||||||
self.name = name
|
self.name = name
|
||||||
|
|
||||||
def choose_move(self, game):
|
def choose_action(self, game):
|
||||||
"Chooses a move by prompting the player for a choice."
|
"Chooses an action by prompting the player for a choice."
|
||||||
choices = Choice([str(i) for i in game.get_valid_moves()])
|
choices = Choice([str(i) for i in game.get_actions(game.state)])
|
||||||
move = prompt("> ", type=choices, show_choices=False)
|
move = prompt("> ", type=choices, show_choices=False)
|
||||||
return int(move)
|
return int(move)
|
||||||
|
|
||||||
|
@ -21,10 +22,11 @@ class TTTComputerPlayer:
|
||||||
"Sets up the player."
|
"Sets up the player."
|
||||||
self.name = name
|
self.name = name
|
||||||
|
|
||||||
def choose_move(self, game):
|
def choose_action(self, game):
|
||||||
"Chooses a random move from the moves available."
|
"Chooses a random move from the moves available."
|
||||||
move = random.choice(game.get_valid_moves())
|
strategy = RandomStrategy(game)
|
||||||
print(f"{self.name} chooses {move}.")
|
action = strategy.choose_action(game.state)
|
||||||
|
print(f"{self.name} chooses {action}.")
|
||||||
return move
|
return move
|
||||||
|
|
||||||
def get_symbol(self, game):
|
def get_symbol(self, game):
|
||||||
|
|
16
ttt_view.py
16
ttt_view.py
|
@ -17,18 +17,19 @@ class TTTView:
|
||||||
print(f"{x_name} will play as X.")
|
print(f"{x_name} will play as X.")
|
||||||
print(f"{o_name} will play as O.")
|
print(f"{o_name} will play as O.")
|
||||||
|
|
||||||
def get_move(self, game):
|
def get_action(self, game):
|
||||||
"Shows the board and asks the current player for their choice of move."
|
"Shows the board and asks the current player for their choice of action."
|
||||||
self.print_board_with_options(game)
|
self.print_board_with_options(game)
|
||||||
player = game.get_current_player()
|
current_player_symbol = game.state["player"]
|
||||||
|
player = game.players[current_player_symbol]
|
||||||
print(f"{player.name}, it's your move.")
|
print(f"{player.name}, it's your move.")
|
||||||
return player.choose_move(game)
|
return player.choose_action(game)
|
||||||
|
|
||||||
def print_board_with_options(self, game):
|
def print_board_with_options(self, game):
|
||||||
"Prints the current board, showing indices of available spaces"
|
"Prints the current board, showing indices of available spaces"
|
||||||
print(self.format_row(game, [0, 1, 2]))
|
print(self.format_row(game, [0, 1, 2]))
|
||||||
print(self.divider)
|
print(self.divider)
|
||||||
print(self.format_row(game, [3, 4, 6]))
|
print(self.format_row(game, [3, 4, 5]))
|
||||||
print(self.divider)
|
print(self.divider)
|
||||||
print(self.format_row(game, [6, 7, 8]))
|
print(self.format_row(game, [6, 7, 8]))
|
||||||
|
|
||||||
|
@ -42,9 +43,9 @@ class TTTView:
|
||||||
If the game board already has a symbol in that space, formats that value for the Terminal.
|
If the game board already has a symbol in that space, formats that value for the Terminal.
|
||||||
If the space is empty, instead formats the index of the space.
|
If the space is empty, instead formats the index of the space.
|
||||||
"""
|
"""
|
||||||
if game.board[index] == 'X':
|
if game.state["board"][index] == 'X':
|
||||||
return click.style('X', fg=self.x_color)
|
return click.style('X', fg=self.x_color)
|
||||||
elif game.board[index] == 'O':
|
elif game.state["board"][index] == 'O':
|
||||||
return click.style('O', fg=self.o_color)
|
return click.style('O', fg=self.o_color)
|
||||||
else:
|
else:
|
||||||
return click.style(index, fg=self.option_color)
|
return click.style(index, fg=self.option_color)
|
||||||
|
@ -52,6 +53,7 @@ class TTTView:
|
||||||
def conclude(self, game):
|
def conclude(self, game):
|
||||||
"""Says goodbye.
|
"""Says goodbye.
|
||||||
"""
|
"""
|
||||||
|
self.print_board_with_options(game)
|
||||||
if game.check_winner('X'):
|
if game.check_winner('X'):
|
||||||
winner = game.players['X']
|
winner = game.players['X']
|
||||||
elif game.check_winner('O'):
|
elif game.check_winner('O'):
|
||||||
|
|
Loading…
Reference in New Issue