Better explanation in lookahead strategy

This commit is contained in:
Chris Proctor 2022-05-12 15:32:47 -04:00
parent a214bb654b
commit c26930b9f1
1 changed files with 8 additions and 7 deletions

View File

@ -46,8 +46,6 @@ class LookaheadStrategy:
Once we know which reward is best, we choose an action which will lead to that reward. Once we know which reward is best, we choose an action which will lead to that reward.
""" """
if self.explain:
self.print_explanation(state, depth)
possible_actions = self.game.get_actions(state) possible_actions = self.game.get_actions(state)
rewards = {} rewards = {}
for action in possible_actions: for action in possible_actions:
@ -57,9 +55,12 @@ class LookaheadStrategy:
best_reward = objective(rewards.values()) best_reward = objective(rewards.values())
best_actions = [action for action in possible_actions if rewards[action] == best_reward] best_actions = [action for action in possible_actions if rewards[action] == best_reward]
if self.deterministic: if self.deterministic:
return best_actions[0] action = best_actions[0]
else: else:
return choice(best_actions) action = choice(best_actions)
if self.explain:
self.print_explanation(state, action, rewards[action], depth)
return action
def get_current_and_future_reward(self, state, depth=0): def get_current_and_future_reward(self, state, depth=0):
"""Calculates the reward from this state, and from all future states which would be """Calculates the reward from this state, and from all future states which would be
@ -86,10 +87,10 @@ class LookaheadStrategy:
message = f"Game {game} does not have method {method}." message = f"Game {game} does not have method {method}."
raise ValueError(message) raise ValueError(message)
def print_explanation(self, state, depth): def print_explanation(self, state, action, reward, depth):
"""Prints out the current state of exploration of the state tree""" """Prints out the current state of exploration of the state tree"""
indent = ' ' * depth indent = '' * (max(0, depth-1)) + ('' if depth > 0 else '')
print(f"{indent}{state}") print(f"{indent}[{reward}] Best action: {action} {state}")