Initial commit

2023-08-25 14:34:55 +00:00
commit da4a3bc4c0
15 changed files with 503 additions and 0 deletions
--- a/strategy/lookahead_strategy.py
+++ b/strategy/lookahead_strategy.py
@@ -0,0 +1,96 @@
+from types import MethodType
+from random import choice
+
+class LookaheadStrategy:
+    """A Strategy which considers the future consequences of an action.
+
+    To initialize a LookaheadStrategy, pass in an instance of a game containing
+    the following methods. These methods encode the rules of the game, 
+    which a LookaheadStrategy needs to know in order to determine which move is best.
+
+        - get_next_state: state, action -> state
+        - get_actions: state -> [actions]
+        - get_reward: state -> int
+        - is_over: state -> bool
+        - get_objective: str -> function
+
+    Optionally, pass the following arguments to control the behavior of the LookaheadStrategy:
+
+        - max_depth: int. A game may be too complex to search the full state tree.
+          Setting max_depth will set a cutoff on how far ahead the LookaheadStrategy will look.
+        - deterministic: bool. It's possible that there are multiple equally-good actions. 
+          When deterministic is True, LookaheadStrategy will always choose the first of the 
+          equally-good actions, so that LookaheadStrategy will always play out the same game. 
+          When deterministic is False, LookaheadStrategy will choose randomly from all actions
+          which are equally-good. 
+        - Explain: When set to True, LookaheadStrategy will print out its reasoning. 
+    """
+
+    def __init__(self, game, max_depth=None, deterministic=True, explain=False):
+        self.validate_game(game)
+        self.game = game
+        self.max_depth = max_depth
+        self.deterministic = deterministic
+        self.explain = explain
+
+    def choose_action(self, state, depth=0):
+        """Given a state, chooses an action.
+        This is the most important method of a Strategy, corresponding to the situation where
+        it's a player's turn to play a game and she needs to decide what to do. 
+
+        Strategy chooses an action by considering all possible actions, and finding the 
+        total current and future reward which would come from playing that action. 
+        Then we use the game's objective to choose the "best" reward. Usually bigger is better, 
+        but in zero-sum games like tic tac toe, the players want opposite outcomes. One player
+        wants the reward to be high, while the other wants the reward to be low.
+
+        Once we know which reward is best, we choose an action which will lead to that reward.
+        """
+        possible_actions = self.game.get_actions(state)
+        rewards = {}
+        for action in possible_actions:
+            future_state = self.game.get_next_state(state, action)
+            rewards[action] = self.get_current_and_future_reward(future_state, depth=depth)
+        objective = self.game.get_objective(state)
+        best_reward = objective(rewards.values())
+        best_actions = [action for action in possible_actions if rewards[action] == best_reward]
+        if self.deterministic:
+            action = best_actions[0]
+        else:
+            action = choice(best_actions)
+        if self.explain:
+            self.print_explanation(state, action, rewards[action], depth)
+        return action
+
+    def get_current_and_future_reward(self, state, depth=0):
+        """Calculates the reward from this state, and from all future states which would be 
+        reached, assuming all players are using this Strategy.
+        """
+        reward = self.game.get_reward(state)
+        if (self.max_depth is None or depth <= self.max_depth) and not self.game.is_over(state):
+            action = self.choose_action(state, depth=depth+1)
+            future_state = self.game.get_next_state(state, action)
+            reward += self.get_current_and_future_reward(future_state, depth=depth+1)
+        return reward
+
+    def validate_game(self, game):
+        "Checks that the game has all the required methods."
+        required_methods = [
+            "get_next_state",
+            "get_actions",
+            "get_reward",
+            "is_over",
+            "get_objective",
+        ]
+        for method in required_methods:
+            if not (hasattr(game, method) and isinstance(getattr(game, method), MethodType)):
+                message = f"Game {game} does not have method {method}."
+                raise ValueError(message)
+
+    def print_explanation(self, state, action, reward, depth):
+        """Prints out the current state of exploration of the state tree"""
+        indent = '│ ' * (max(0, depth-1)) + ('├ ' if depth > 0 else '')
+        print(f"{indent}[{reward}] Best action: {action} {state}")
+
+
+
--- a/strategy/random_strategy.py
+++ b/strategy/random_strategy.py
@@ -0,0 +1,11 @@
+from random import choice
+
+class RandomStrategy:
+    """A Strategy which randomly chooses a move. Not a great choice.
+    """
+    def __init__(self, game):
+        self.game = game
+
+    def choose_action(self, state):
+        possible_actions = self.game.get_actions(state)
+        return choice(possible_actions)