diff --git a/notes.md b/notes.md index d0bb1d7..d3a6201 100644 --- a/notes.md +++ b/notes.md @@ -31,9 +31,15 @@ and it's your turn, which action would you take? Why? ---+---+--- ---+---+--- ---+---+--- ---+---+--- | | | | O | | | | +board #1: position 0, to block the O's from getting 3 in a row. +board #2: position 6, to block the O's from getting 3 in a row. +board #3: position 0, because I do not need to block O's and this will allow me to have 2 places to get 3 in a row. +board #4: position 4, because I will be tring to get 3 in a row. + ### Initial game state You can get the inital game state using game.get_initial_state(). What is the current and future reward for this state? What does this mean? +currently there is no reward for the inital state because all the spots are blank and nobody has won yet. We could also be at a this state after the game resets. Either a player will gain a point by winning or nobody gets a point if there is no winner. diff --git a/play_ttt.py b/play_ttt.py index ef5530a..92542ba 100644 --- a/play_ttt.py +++ b/play_ttt.py @@ -3,7 +3,7 @@ from ttt.view import TTTView from ttt.player import TTTHumanPlayer, TTTComputerPlayer player0 = TTTHumanPlayer("Player 1") -player1 = TTTHumanPlayer("Player 2") +player1 = TTTComputerPlayer("Robot") game = TTTGame() view = TTTView(player0, player1) diff --git a/ttt/player.py b/ttt/player.py index bfbbe15..300f919 100644 --- a/ttt/player.py +++ b/ttt/player.py @@ -1,5 +1,5 @@ from click import Choice, prompt -from strategy.random_strategy import RandomStrategy +from strategy.lookahead_strategy import LookaheadStrategy from ttt.game import TTTGame import random @@ -24,7 +24,7 @@ class TTTComputerPlayer: def __init__(self, name): "Sets up the player." self.name = name - self.strategy = RandomStrategy(TTTGame()) + self.strategy = LookaheadStrategy(TTTGame(), deterministic=False) def choose_action(self, state): "Chooses a random move from the moves available."