Fix strategy

2022-05-06 17:17:17 -04:00 · 2022-05-06 17:17:17 -04:00 · d76cea824d
parent cf366e02c9
commit d76cea824d
1 changed files with 13 additions and 5 deletions
--- a/strategy.py
+++ b/strategy.py
@ -44,7 +44,7 @@ class LookaheadStrategy:
        self.deterministic = deterministic
        self.explain = explain

-    def choose_action(self, state):
+    def choose_action(self, state, depth=0):
        """Given a state, chooses an action.
        This is the most important method of a Strategy, corresponding to the situation where
        it's a player's turn to play a game and she needs to decide what to do. 
@ -57,11 +57,13 @@ class LookaheadStrategy:

        Once we know which reward is best, we choose an action which will lead to that reward.
        """
+        if self.explain:
+            self.print_explanation(state, depth)
        possible_actions = self.game.get_actions(state)
        rewards = {}
        for action in possible_actions:
            future_state = self.game.get_next_state(state, action)
-            rewards[action] = self.game.get_reward(future_state)
+            rewards[action] = self.get_current_and_future_reward(future_state, depth=depth)
        objective = self.game.get_objective(state)
        best_reward = objective(rewards.values())
        best_actions = [action for action in possible_actions if rewards[action] == best_reward]
@ -70,14 +72,15 @@ class LookaheadStrategy:
        else:
            return choice(best_actions)

-    def get_current_and_future_reward(self, state):
+    def get_current_and_future_reward(self, state, depth=0):
        """Calculates the reward from this state, and from all future states which would be 
        reached, assuming all players are using this Strategy.
        """
        reward = self.game.get_reward(state)
        if not self.game.is_over(state):
-            future_state = self.choose_action(state)
-            reward += self.get_current_and_future_reward(future_state)
+            action = self.choose_action(state, depth=depth)
+            future_state = self.game.get_next_state(state, action)
+            reward += self.get_current_and_future_reward(future_state, depth=depth+1)
        return reward

    def validate_game(self, game):
@ -94,5 +97,10 @@ class LookaheadStrategy:
                message = f"Game {game} does not have method {method}."
                raise ValueError(message)

+    def print_explanation(self, state, depth):
+        """Prints out the current state of exploration of the state tree"""
+        indent = '  ' * depth
+        print(f"{indent}{state}")
+