Better explanation in lookahead strategy

2022-05-12 15:32:47 -04:00 · 2022-05-12 15:32:47 -04:00 · c26930b9f1
parent a214bb654b
commit c26930b9f1
1 changed files with 8 additions and 7 deletions
--- a/strategy/lookahead_strategy.py
+++ b/strategy/lookahead_strategy.py
@ -46,8 +46,6 @@ class LookaheadStrategy:

        Once we know which reward is best, we choose an action which will lead to that reward.
        """
-        if self.explain:
-            self.print_explanation(state, depth)
        possible_actions = self.game.get_actions(state)
        rewards = {}
        for action in possible_actions:
@ -57,9 +55,12 @@ class LookaheadStrategy:
        best_reward = objective(rewards.values())
        best_actions = [action for action in possible_actions if rewards[action] == best_reward]
        if self.deterministic:
-            return best_actions[0]
+            action = best_actions[0]
        else:
-            return choice(best_actions)
+            action = choice(best_actions)
+        if self.explain:
+            self.print_explanation(state, action, rewards[action], depth)
+        return action

    def get_current_and_future_reward(self, state, depth=0):
        """Calculates the reward from this state, and from all future states which would be 
@ -86,10 +87,10 @@ class LookaheadStrategy:
                message = f"Game {game} does not have method {method}."
                raise ValueError(message)

-    def print_explanation(self, state, depth):
+    def print_explanation(self, state, action, reward, depth):
        """Prints out the current state of exploration of the state tree"""
-        indent = '  ' * depth
-        print(f"{indent}{state}")
+        indent = '│ ' * (max(0, depth-1)) + ('├ ' if depth > 0 else '')
+        print(f"{indent}[{reward}] Best action: {action} {state}")