generated from mwc/lab_tic_tac_toe
	Initial commit
This commit is contained in:
		
							
								
								
									
										96
									
								
								strategy/lookahead_strategy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								strategy/lookahead_strategy.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,96 @@
 | 
			
		||||
from types import MethodType
 | 
			
		||||
from random import choice
 | 
			
		||||
 | 
			
		||||
class LookaheadStrategy:
 | 
			
		||||
    """A Strategy which considers the future consequences of an action.
 | 
			
		||||
 | 
			
		||||
    To initialize a LookaheadStrategy, pass in an instance of a game containing
 | 
			
		||||
    the following methods. These methods encode the rules of the game, 
 | 
			
		||||
    which a LookaheadStrategy needs to know in order to determine which move is best.
 | 
			
		||||
 | 
			
		||||
        - get_next_state: state, action -> state
 | 
			
		||||
        - get_actions: state -> [actions]
 | 
			
		||||
        - get_reward: state -> int
 | 
			
		||||
        - is_over: state -> bool
 | 
			
		||||
        - get_objective: str -> function
 | 
			
		||||
 | 
			
		||||
    Optionally, pass the following arguments to control the behavior of the LookaheadStrategy:
 | 
			
		||||
 | 
			
		||||
        - max_depth: int. A game may be too complex to search the full state tree.
 | 
			
		||||
          Setting max_depth will set a cutoff on how far ahead the LookaheadStrategy will look.
 | 
			
		||||
        - deterministic: bool. It's possible that there are multiple equally-good actions. 
 | 
			
		||||
          When deterministic is True, LookaheadStrategy will always choose the first of the 
 | 
			
		||||
          equally-good actions, so that LookaheadStrategy will always play out the same game. 
 | 
			
		||||
          When deterministic is False, LookaheadStrategy will choose randomly from all actions
 | 
			
		||||
          which are equally-good. 
 | 
			
		||||
        - Explain: When set to True, LookaheadStrategy will print out its reasoning. 
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, game, max_depth=None, deterministic=True, explain=False):
 | 
			
		||||
        self.validate_game(game)
 | 
			
		||||
        self.game = game
 | 
			
		||||
        self.max_depth = max_depth
 | 
			
		||||
        self.deterministic = deterministic
 | 
			
		||||
        self.explain = explain
 | 
			
		||||
 | 
			
		||||
    def choose_action(self, state, depth=0):
 | 
			
		||||
        """Given a state, chooses an action.
 | 
			
		||||
        This is the most important method of a Strategy, corresponding to the situation where
 | 
			
		||||
        it's a player's turn to play a game and she needs to decide what to do. 
 | 
			
		||||
 | 
			
		||||
        Strategy chooses an action by considering all possible actions, and finding the 
 | 
			
		||||
        total current and future reward which would come from playing that action. 
 | 
			
		||||
        Then we use the game's objective to choose the "best" reward. Usually bigger is better, 
 | 
			
		||||
        but in zero-sum games like tic tac toe, the players want opposite outcomes. One player
 | 
			
		||||
        wants the reward to be high, while the other wants the reward to be low.
 | 
			
		||||
 | 
			
		||||
        Once we know which reward is best, we choose an action which will lead to that reward.
 | 
			
		||||
        """
 | 
			
		||||
        possible_actions = self.game.get_actions(state)
 | 
			
		||||
        rewards = {}
 | 
			
		||||
        for action in possible_actions:
 | 
			
		||||
            future_state = self.game.get_next_state(state, action)
 | 
			
		||||
            rewards[action] = self.get_current_and_future_reward(future_state, depth=depth)
 | 
			
		||||
        objective = self.game.get_objective(state)
 | 
			
		||||
        best_reward = objective(rewards.values())
 | 
			
		||||
        best_actions = [action for action in possible_actions if rewards[action] == best_reward]
 | 
			
		||||
        if self.deterministic:
 | 
			
		||||
            action = best_actions[0]
 | 
			
		||||
        else:
 | 
			
		||||
            action = choice(best_actions)
 | 
			
		||||
        if self.explain:
 | 
			
		||||
            self.print_explanation(state, action, rewards[action], depth)
 | 
			
		||||
        return action
 | 
			
		||||
 | 
			
		||||
    def get_current_and_future_reward(self, state, depth=0):
 | 
			
		||||
        """Calculates the reward from this state, and from all future states which would be 
 | 
			
		||||
        reached, assuming all players are using this Strategy.
 | 
			
		||||
        """
 | 
			
		||||
        reward = self.game.get_reward(state)
 | 
			
		||||
        if (self.max_depth is None or depth <= self.max_depth) and not self.game.is_over(state):
 | 
			
		||||
            action = self.choose_action(state, depth=depth+1)
 | 
			
		||||
            future_state = self.game.get_next_state(state, action)
 | 
			
		||||
            reward += self.get_current_and_future_reward(future_state, depth=depth+1)
 | 
			
		||||
        return reward
 | 
			
		||||
 | 
			
		||||
    def validate_game(self, game):
 | 
			
		||||
        "Checks that the game has all the required methods."
 | 
			
		||||
        required_methods = [
 | 
			
		||||
            "get_next_state",
 | 
			
		||||
            "get_actions",
 | 
			
		||||
            "get_reward",
 | 
			
		||||
            "is_over",
 | 
			
		||||
            "get_objective",
 | 
			
		||||
        ]
 | 
			
		||||
        for method in required_methods:
 | 
			
		||||
            if not (hasattr(game, method) and isinstance(getattr(game, method), MethodType)):
 | 
			
		||||
                message = f"Game {game} does not have method {method}."
 | 
			
		||||
                raise ValueError(message)
 | 
			
		||||
 | 
			
		||||
    def print_explanation(self, state, action, reward, depth):
 | 
			
		||||
        """Prints out the current state of exploration of the state tree"""
 | 
			
		||||
        indent = '│ ' * (max(0, depth-1)) + ('├ ' if depth > 0 else '')
 | 
			
		||||
        print(f"{indent}[{reward}] Best action: {action} {state}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										11
									
								
								strategy/random_strategy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								strategy/random_strategy.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,11 @@
 | 
			
		||||
from random import choice
 | 
			
		||||
 | 
			
		||||
class RandomStrategy:
 | 
			
		||||
    """A Strategy which randomly chooses a move. Not a great choice.
 | 
			
		||||
    """
 | 
			
		||||
    def __init__(self, game):
 | 
			
		||||
        self.game = game
 | 
			
		||||
 | 
			
		||||
    def choose_action(self, state):
 | 
			
		||||
        possible_actions = self.game.get_actions(state)
 | 
			
		||||
        return choice(possible_actions)
 | 
			
		||||
		Reference in New Issue
	
	Block a user