WIP added ttt_learn
This commit is contained in:
parent
487aa30ced
commit
7ccf8f84d9
|
@ -0,0 +1,93 @@
|
|||
# A state is a dictionary with two keys, "board" and "player." Here's an example:
|
||||
#
|
||||
# {
|
||||
# "board": [None, None, "X", None, "O", None, "X", "O", None],
|
||||
# "player": "X",
|
||||
# }
|
||||
|
||||
|
||||
def get_next_state(state, action):
|
||||
"""Returns the state which would result from taking an action at a particular state.
|
||||
"""
|
||||
if state["board"][action] is not None:
|
||||
raise ValueError(f"Action {action} is illegal at state {state}; the space is occupied.")
|
||||
new_board = state["board"].copy()
|
||||
new_board[action] = state["player"]
|
||||
new_player = get_opponent(state["player"])
|
||||
return {
|
||||
"board": new_board,
|
||||
"player": new_player,
|
||||
}
|
||||
|
||||
def get_actions(state):
|
||||
"""Given a board state, returns a dictionary whose keys are possible actions and whose
|
||||
values are the resulting state from each action. If the game is over, returns an empty
|
||||
dictionary because no further moves are possible.
|
||||
"""
|
||||
if is_over(state):
|
||||
return {}
|
||||
else:
|
||||
actions = {}
|
||||
for i in range(9):
|
||||
if state["board"][i] is None:
|
||||
actions[i] = get_next_state(state, i)
|
||||
return actions
|
||||
|
||||
def choose_best_action(state):
|
||||
"""Given a state, returns the best action, its resulting state, and that state's value.
|
||||
For each possible action, we find the value of the resulting state.
|
||||
Then, if the player is 'X', choose the action corresponding to the highest
|
||||
value. If the player is 'O', choose the action corresponding to the lowest
|
||||
value.
|
||||
"""
|
||||
actions = get_actions(state)
|
||||
values_and_actions = [[get_value(result), action] for action, result in actions.items()]
|
||||
if state["player"] == "X":
|
||||
value, action = max(values_and_actions)
|
||||
else:
|
||||
value, action = min(values_and_actions)
|
||||
return action, actions[action], value
|
||||
|
||||
def get_value(state, depth=0, debug=False):
|
||||
"""Determines the value of the state.
|
||||
"""
|
||||
if is_win(state, state["player"]):
|
||||
return 1
|
||||
elif is_win(state, get_opponent(state["player"])):
|
||||
return -1
|
||||
elif is_draw(state):
|
||||
return 0
|
||||
else:
|
||||
action, result, value = choose_best_action(state)
|
||||
return value
|
||||
|
||||
|
||||
# ================================== HELPERS ==============================================
|
||||
|
||||
def get_opponent(player):
|
||||
"Returns 'X' when player is 'O' and 'O' when player is 'X'"
|
||||
if player == 'X':
|
||||
return 'O'
|
||||
elif player == 'O':
|
||||
return 'X'
|
||||
else:
|
||||
raise ValueError(f"Unrecognized player {player}")
|
||||
|
||||
def is_over(state):
|
||||
"Returns True if the game is over"
|
||||
return is_draw(state) or is_win(state, 'X') or is_win(state, 'O')
|
||||
|
||||
def is_draw(state):
|
||||
"Returns True if the game ended in a draw."
|
||||
for space in state["board"]:
|
||||
if space is None:
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_win(state, player):
|
||||
"Returns True if `player` has won the game."
|
||||
win_lines = [[0,1,2], [3,4,5], [6,7,8], [0,3,6], [1,4,7], [2,5,8], [0, 4, 8], [2, 4, 6]]
|
||||
for a, b, c in win_lines:
|
||||
if state["board"][a] == player and state["board"][b] == player and state["board"][c] == player:
|
||||
return True
|
||||
return False
|
Loading…
Reference in New Issue