generated from mwc/lab_tic_tac_toe
97 lines
4.5 KiB
Python
97 lines
4.5 KiB
Python
from types import MethodType
|
|
from random import choice
|
|
|
|
class LookaheadStrategy:
|
|
"""A Strategy which considers the future consequences of an action.
|
|
|
|
To initialize a LookaheadStrategy, pass in an instance of a game containing
|
|
the following methods. These methods encode the rules of the game,
|
|
which a LookaheadStrategy needs to know in order to determine which move is best.
|
|
|
|
- get_next_state: state, action -> state
|
|
- get_actions: state -> [actions]
|
|
- get_reward: state -> int
|
|
- is_over: state -> bool
|
|
- get_objective: str -> function
|
|
|
|
Optionally, pass the following arguments to control the behavior of the LookaheadStrategy:
|
|
|
|
- max_depth: int. A game may be too complex to search the full state tree.
|
|
Setting max_depth will set a cutoff on how far ahead the LookaheadStrategy will look.
|
|
- deterministic: bool. It's possible that there are multiple equally-good actions.
|
|
When deterministic is True, LookaheadStrategy will always choose the first of the
|
|
equally-good actions, so that LookaheadStrategy will always play out the same game.
|
|
When deterministic is False, LookaheadStrategy will choose randomly from all actions
|
|
which are equally-good.
|
|
- Explain: When set to True, LookaheadStrategy will print out its reasoning.
|
|
"""
|
|
|
|
def __init__(self, game, max_depth=None, deterministic=True, explain=False):
|
|
self.validate_game(game)
|
|
self.game = game
|
|
self.max_depth = max_depth
|
|
self.deterministic = deterministic
|
|
self.explain = explain
|
|
|
|
def choose_action(self, state, depth=0):
|
|
"""Given a state, chooses an action.
|
|
This is the most important method of a Strategy, corresponding to the situation where
|
|
it's a player's turn to play a game and she needs to decide what to do.
|
|
|
|
Strategy chooses an action by considering all possible actions, and finding the
|
|
total current and future reward which would come from playing that action.
|
|
Then we use the game's objective to choose the "best" reward. Usually bigger is better,
|
|
but in zero-sum games like tic tac toe, the players want opposite outcomes. One player
|
|
wants the reward to be high, while the other wants the reward to be low.
|
|
|
|
Once we know which reward is best, we choose an action which will lead to that reward.
|
|
"""
|
|
possible_actions = self.game.get_actions(state)
|
|
rewards = {}
|
|
for action in possible_actions:
|
|
future_state = self.game.get_next_state(state, action)
|
|
rewards[action] = self.get_current_and_future_reward(future_state, depth=depth)
|
|
objective = self.game.get_objective(state)
|
|
best_reward = objective(rewards.values())
|
|
best_actions = [action for action in possible_actions if rewards[action] == best_reward]
|
|
if self.deterministic:
|
|
action = best_actions[0]
|
|
else:
|
|
action = choice(best_actions)
|
|
if self.explain:
|
|
self.print_explanation(state, action, rewards[action], depth)
|
|
return action
|
|
|
|
def get_current_and_future_reward(self, state, depth=0):
|
|
"""Calculates the reward from this state, and from all future states which would be
|
|
reached, assuming all players are using this Strategy.
|
|
"""
|
|
reward = self.game.get_reward(state)
|
|
if (self.max_depth is None or depth <= self.max_depth) and not self.game.is_over(state):
|
|
action = self.choose_action(state, depth=depth+1)
|
|
future_state = self.game.get_next_state(state, action)
|
|
reward += self.get_current_and_future_reward(future_state, depth=depth+1)
|
|
return reward
|
|
|
|
def validate_game(self, game):
|
|
"Checks that the game has all the required methods."
|
|
required_methods = [
|
|
"get_next_state",
|
|
"get_actions",
|
|
"get_reward",
|
|
"is_over",
|
|
"get_objective",
|
|
]
|
|
for method in required_methods:
|
|
if not (hasattr(game, method) and isinstance(getattr(game, method), MethodType)):
|
|
message = f"Game {game} does not have method {method}."
|
|
raise ValueError(message)
|
|
|
|
def print_explanation(self, state, action, reward, depth):
|
|
"""Prints out the current state of exploration of the state tree"""
|
|
indent = '│ ' * (max(0, depth-1)) + ('├ ' if depth > 0 else '')
|
|
print(f"{indent}[{reward}] Best action: {action} {state}")
|
|
|
|
|
|
|