Add explanation code
This commit is contained in:
parent
ffd47a8c8f
commit
f49e78c35f
53
ttt_learn.py
53
ttt_learn.py
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
# A state is a dictionary with two keys, "board" and "player." Here's an example:
|
# A state is a dictionary with two keys, "board" and "player." Here's an example:
|
||||||
#
|
#
|
||||||
# {
|
# {
|
||||||
|
@ -5,18 +6,17 @@
|
||||||
# "player": "X",
|
# "player": "X",
|
||||||
# }
|
# }
|
||||||
|
|
||||||
|
from itertools import count
|
||||||
|
counter = count()
|
||||||
|
|
||||||
def get_next_state(state, action):
|
def get_next_state(state, action):
|
||||||
"""Returns the state which would result from taking an action at a particular state.
|
"""Returns the state which would result from taking an action at a particular state.
|
||||||
"""
|
"""
|
||||||
if state["board"][action] is not None:
|
new_board = [space for space in state["board"]]
|
||||||
raise ValueError(f"Action {action} is illegal at state {state}; the space is occupied.")
|
|
||||||
new_board = state["board"].copy()
|
|
||||||
new_board[action] = state["player"]
|
new_board[action] = state["player"]
|
||||||
new_player = get_opponent(state["player"])
|
|
||||||
return {
|
return {
|
||||||
"board": new_board,
|
"board": new_board,
|
||||||
"player": new_player,
|
"player": get_opponent(state["player"]),
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_actions(state):
|
def get_actions(state):
|
||||||
|
@ -29,26 +29,33 @@ def get_actions(state):
|
||||||
else:
|
else:
|
||||||
actions = {}
|
actions = {}
|
||||||
for i in range(9):
|
for i in range(9):
|
||||||
if state["board"][i] is None:
|
if state["board"][i] is None or state["board"][i] == '-':
|
||||||
actions[i] = get_next_state(state, i)
|
actions[i] = get_next_state(state, i)
|
||||||
return actions
|
return actions
|
||||||
|
|
||||||
def choose_best_action(state):
|
def choose_best_action(state, depth=0, explain=False):
|
||||||
"""Given a state, returns the best action, its resulting state, and that state's value.
|
"""Given a state, returns the best action, its resulting state, and that state's value.
|
||||||
For each possible action, we find the value of the resulting state.
|
For each possible action, we find the value of the resulting state.
|
||||||
Then, if the player is 'X', choose the action corresponding to the highest
|
Then, if the player is 'X', choose the action corresponding to the highest
|
||||||
value. If the player is 'O', choose the action corresponding to the lowest
|
value. If the player is 'O', choose the action corresponding to the lowest
|
||||||
value.
|
value.
|
||||||
"""
|
"""
|
||||||
|
if explain:
|
||||||
|
question_number = next(counter)
|
||||||
|
pose_question(state, question_number, depth)
|
||||||
actions = get_actions(state)
|
actions = get_actions(state)
|
||||||
values_and_actions = [[get_value(result), action] for action, result in actions.items()]
|
values_and_actions = []
|
||||||
|
for action, result in actions.items():
|
||||||
|
values_and_actions.append([get_value(result, depth=depth, explain=explain), action])
|
||||||
if state["player"] == "X":
|
if state["player"] == "X":
|
||||||
value, action = max(values_and_actions)
|
value, action = max(values_and_actions)
|
||||||
else:
|
else:
|
||||||
value, action = min(values_and_actions)
|
value, action = min(values_and_actions)
|
||||||
|
if explain:
|
||||||
|
answer_question(state, action, question_number, depth)
|
||||||
return action, actions[action], value
|
return action, actions[action], value
|
||||||
|
|
||||||
def get_value(state, depth=0, debug=False):
|
def get_value(state, depth=0, explain=False):
|
||||||
"""Determines the value of the state.
|
"""Determines the value of the state.
|
||||||
"""
|
"""
|
||||||
if is_win(state, 'X'):
|
if is_win(state, 'X'):
|
||||||
|
@ -58,10 +65,12 @@ def get_value(state, depth=0, debug=False):
|
||||||
elif is_draw(state):
|
elif is_draw(state):
|
||||||
return 0
|
return 0
|
||||||
else:
|
else:
|
||||||
action, result, value = choose_best_action(state)
|
action, result, value = choose_best_action(state, depth=depth+1, explain=explain)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
# =========================================================================================
|
||||||
# ================================== HELPERS ==============================================
|
# ================================== HELPERS ==============================================
|
||||||
|
# =========================================================================================
|
||||||
|
|
||||||
def get_opponent(player):
|
def get_opponent(player):
|
||||||
"Returns 'X' when player is 'O' and 'O' when player is 'X'"
|
"Returns 'X' when player is 'O' and 'O' when player is 'X'"
|
||||||
|
@ -79,7 +88,7 @@ def is_over(state):
|
||||||
def is_draw(state):
|
def is_draw(state):
|
||||||
"Returns True if the game ended in a draw."
|
"Returns True if the game ended in a draw."
|
||||||
for space in state["board"]:
|
for space in state["board"]:
|
||||||
if space is None:
|
if space is None or space == '-':
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -90,3 +99,23 @@ def is_win(state, player):
|
||||||
if state["board"][a] == player and state["board"][b] == player and state["board"][c] == player:
|
if state["board"][a] == player and state["board"][b] == player and state["board"][c] == player:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def pose_question(state, index, depth):
|
||||||
|
"Logs a question asking about a player's best move at a state."
|
||||||
|
board = format_board_inline(state['board'])
|
||||||
|
log(f"What is the best action for {state['player']} at {board}?", index, depth)
|
||||||
|
|
||||||
|
def answer_question(state, action, index, depth):
|
||||||
|
"Logs the answer to a question about a player's best move at a state."
|
||||||
|
board = format_board_inline(state['board'])
|
||||||
|
log(f"The best action for {state['player']} at {board} is {action}", index, depth)
|
||||||
|
|
||||||
|
def log(message, index, depth):
|
||||||
|
"Prints a message at the appropriate depth, with each message numbered."
|
||||||
|
indent = ' ' * depth
|
||||||
|
print(f"{indent}{index}. {message}")
|
||||||
|
|
||||||
|
def format_board_inline(board):
|
||||||
|
"Formats a board like '[ OOX | -X- | --- ]' "
|
||||||
|
symbols = [sym or '-' for sym in board]
|
||||||
|
return '[ ' + ' | '.join([''.join(symbols[:3]), ''.join(symbols[3:6]), ''.join(symbols[6:])]) + ' ]'
|
||||||
|
|
Loading…
Reference in New Issue