lab_tic_tac_toe/strategy/lookahead_strategy.py

97 lines
4.5 KiB
Python

from types import MethodType
from random import choice
class LookaheadStrategy:
"""A Strategy which considers the future consequences of an action.
To initialize a LookaheadStrategy, pass in an instance of a game containing
the following methods. These methods encode the rules of the game,
which a LookaheadStrategy needs to know in order to determine which move is best.
- get_next_state: state, action -> state
- get_actions: state -> [actions]
- get_reward: state -> int
- is_over: state -> bool
- get_objective: str -> function
Optionally, pass the following arguments to control the behavior of the LookaheadStrategy:
- max_depth: int. A game may be too complex to search the full state tree.
Setting max_depth will set a cutoff on how far ahead the LookaheadStrategy will look.
- deterministic: bool. It's possible that there are multiple equally-good actions.
When deterministic is True, LookaheadStrategy will always choose the first of the
equally-good actions, so that LookaheadStrategy will always play out the same game.
When deterministic is False, LookaheadStrategy will choose randomly from all actions
which are equally-good.
- Explain: When set to True, LookaheadStrategy will print out its reasoning.
"""
def __init__(self, game, max_depth=None, deterministic=True, explain=False):
self.validate_game(game)
self.game = game
self.max_depth = max_depth
self.deterministic = deterministic
self.explain = explain
def choose_action(self, state, depth=0):
"""Given a state, chooses an action.
This is the most important method of a Strategy, corresponding to the situation where
it's a player's turn to play a game and she needs to decide what to do.
Strategy chooses an action by considering all possible actions, and finding the
total current and future reward which would come from playing that action.
Then we use the game's objective to choose the "best" reward. Usually bigger is better,
but in zero-sum games like tic tac toe, the players want opposite outcomes. One player
wants the reward to be high, while the other wants the reward to be low.
Once we know which reward is best, we choose an action which will lead to that reward.
"""
possible_actions = self.game.get_actions(state)
rewards = {}
for action in possible_actions:
future_state = self.game.get_next_state(state, action)
rewards[action] = self.get_current_and_future_reward(future_state, depth=depth)
objective = self.game.get_objective(state)
best_reward = objective(rewards.values())
best_actions = [action for action in possible_actions if rewards[action] == best_reward]
if self.deterministic:
action = best_actions[0]
else:
action = choice(best_actions)
if self.explain:
self.print_explanation(state, action, rewards[action], depth)
return action
def get_current_and_future_reward(self, state, depth=0):
"""Calculates the reward from this state, and from all future states which would be
reached, assuming all players are using this Strategy.
"""
reward = self.game.get_reward(state)
if (self.max_depth is None or depth <= self.max_depth) and not self.game.is_over(state):
action = self.choose_action(state, depth=depth+1)
future_state = self.game.get_next_state(state, action)
reward += self.get_current_and_future_reward(future_state, depth=depth+1)
return reward
def validate_game(self, game):
"Checks that the game has all the required methods."
required_methods = [
"get_next_state",
"get_actions",
"get_reward",
"is_over",
"get_objective",
]
for method in required_methods:
if not (hasattr(game, method) and isinstance(getattr(game, method), MethodType)):
message = f"Game {game} does not have method {method}."
raise ValueError(message)
def print_explanation(self, state, action, reward, depth):
"""Prints out the current state of exploration of the state tree"""
indent = '' * (max(0, depth-1)) + ('' if depth > 0 else '')
print(f"{indent}[{reward}] Best action: {action} {state}")