Fix strategy

This commit is contained in:
Chris Proctor 2022-05-06 17:17:17 -04:00
parent cf366e02c9
commit d76cea824d
1 changed files with 13 additions and 5 deletions

View File

@ -44,7 +44,7 @@ class LookaheadStrategy:
self.deterministic = deterministic self.deterministic = deterministic
self.explain = explain self.explain = explain
def choose_action(self, state): def choose_action(self, state, depth=0):
"""Given a state, chooses an action. """Given a state, chooses an action.
This is the most important method of a Strategy, corresponding to the situation where This is the most important method of a Strategy, corresponding to the situation where
it's a player's turn to play a game and she needs to decide what to do. it's a player's turn to play a game and she needs to decide what to do.
@ -57,11 +57,13 @@ class LookaheadStrategy:
Once we know which reward is best, we choose an action which will lead to that reward. Once we know which reward is best, we choose an action which will lead to that reward.
""" """
if self.explain:
self.print_explanation(state, depth)
possible_actions = self.game.get_actions(state) possible_actions = self.game.get_actions(state)
rewards = {} rewards = {}
for action in possible_actions: for action in possible_actions:
future_state = self.game.get_next_state(state, action) future_state = self.game.get_next_state(state, action)
rewards[action] = self.game.get_reward(future_state) rewards[action] = self.get_current_and_future_reward(future_state, depth=depth)
objective = self.game.get_objective(state) objective = self.game.get_objective(state)
best_reward = objective(rewards.values()) best_reward = objective(rewards.values())
best_actions = [action for action in possible_actions if rewards[action] == best_reward] best_actions = [action for action in possible_actions if rewards[action] == best_reward]
@ -70,14 +72,15 @@ class LookaheadStrategy:
else: else:
return choice(best_actions) return choice(best_actions)
def get_current_and_future_reward(self, state): def get_current_and_future_reward(self, state, depth=0):
"""Calculates the reward from this state, and from all future states which would be """Calculates the reward from this state, and from all future states which would be
reached, assuming all players are using this Strategy. reached, assuming all players are using this Strategy.
""" """
reward = self.game.get_reward(state) reward = self.game.get_reward(state)
if not self.game.is_over(state): if not self.game.is_over(state):
future_state = self.choose_action(state) action = self.choose_action(state, depth=depth)
reward += self.get_current_and_future_reward(future_state) future_state = self.game.get_next_state(state, action)
reward += self.get_current_and_future_reward(future_state, depth=depth+1)
return reward return reward
def validate_game(self, game): def validate_game(self, game):
@ -94,5 +97,10 @@ class LookaheadStrategy:
message = f"Game {game} does not have method {method}." message = f"Game {game} does not have method {method}."
raise ValueError(message) raise ValueError(message)
def print_explanation(self, state, depth):
"""Prints out the current state of exploration of the state tree"""
indent = ' ' * depth
print(f"{indent}{state}")