I implemented the three-way AI using AlphaBeta method, but it doesn't work the same way as MiniMax method.

Asked 1 years ago, Updated 1 years ago, 318 views

We implement AI in three-in-a-row using AlphaBeta method.
The triplet arrangement is based on the following site and is implemented using the Minimax method, but we have changed it to AlphaBeta method.

https://github.com/koki0702/tictactoe-ai-youtube

AlphaBeta method is a forward compatible Minimax method and is an algorithm with less computation, so I hope they will do the same, but the results are different.

I can't solve any problem on any site, so I'd like to ask for your help. Thank you.

The source code is as follows.

main program

import Board as b
import NPC
import PLAYER

board=b.Board()
players = [NPC.AplhaBeta(0), PLAYER.HumanPlayer(1)]
# players = [NPC.MiniMax(0), PLAYER.HumanPlayer(1)]
player=1#0 or 1

while True:
    p=players [player]
    p.play(board)
    board.render()

    if board.is_win(player):
        break
    elif board.is_end():
        print ("Draw")
        break

    player=1 if player==0 else0

Board Class

import copy

Class Board:
    def__init__(self)->None:
        self.state=[-1]*9#Type of Place (O or X)
        self.count = 0

    defrender(self):
        MARKS = {0:'X', 1:'O'}
        text=""
                0|1|2
                -----
                3|4|5
                -----
                6|7|8
                """
        for idx, x in enumerate (self.state):
            if x is not-1:
                text=text.replace(str(idx), MARKS[x])#4->X
        print(text)

    def put (self, player, idx):
        if self.state [idx]!=-1 or (not(0<=idx<=8)):
            return False

        self.state [idx] = player
        self.count+=1
        return True

    def take (self, idx):
        self.count -=1
        self.state [idx] = -1

    defis_win(self, player):
        s=self.state
        if(
            s[0] == s[1] == s[2] == player or
            s[3] == s[4] == s[5] == player or
            s[6] == s[7] == s[8] == player or
            s[0] == s[3] == s[6] == player or
            s[1] == s[4] == s[7] == player or
            s[2] == s[5] == s[8] == player or
            s[0] == s[4] == s[8] == player or
            s[2]==s[4]==s[6]==player
        ):
            return True
        return False

    def eva_value (self, player):
        opp=0 if player==1 else1
        if self.is_win(player):
            return1
        elif self.is_win(opp):
            return-1
        else:
            return 0

    defis_end(self):
        if-1 in self.state:
            return False
        
        return True

    default_puts(self):
        puts=[]#Candidates to be placed
        for idx, player in enumerate (self.state):
            if player==-1:
                puts.append(idx)
        return puts

    defboard_result(self,idx):
        tmp=copy.deepcopy(self)
        n_player=tmp.next_player()
        tmp.put(n_player,idx)
        return tmp

    def next_player(self):
        # 0...first f,1...second attack s
        state = self.state
        f = s = 0
        if self.count == 0:
            return 0
        
        For pin state:
            ifp == 0:
                f + = 1
            elif == 1:
                s+ = 1

        if f==s:
            return 0
        eliff>s:
            return1
        else:
            return-1

NPC Class # This program has MiniMax and AlphaBeta

import random
import Board 3x3 asbo

defmain():
    board=bo.Board()
    cpu=AplhaBeta(0)

    # score, idx=alphabeta(board, cpu.player, cpu.depth, float('-inf'), float('inf')))

    board.put(0,8)
    board.put(1,4)
    board.put(0,7)
    board.put(1,6)

    # score, idx=alphabeta(board, cpu.player, cpu.depth, float('-inf'), float('inf')))
    score, idx=minimax(board, 0)

    print(score, idx)

class RandomPlay:
    def__init__(self, player):
        self.player=player

    defplay(self, board):
        idx=random.randint(0,15)
        return board.put(self.player,idx),idx

def minimumax (board, player):
    maximize_player = 0
    minimize_player=1

    if board.is_win(maximize_player):
        return(1, None)
    elif board.is_win(minimize_player):
        return (-1, None)
    elif board.is_end():
        return(0, None)

    opp=1 if player==0 else0

    if player==maximize_player:
        max_score=float('-inf')
        max_idx = None

        for idx inboard.valid_puts():
            board.put (player, idx)
            score, next_idx=minimax(board, opp)
            if max_score<score:
                max_score=score
                max_idx = idx
            board.take(idx)
        
        return(max_score, max_idx)
    else:
        min_score=float('inf')
        min_idx = None

        for idx inboard.valid_puts():
            board.put (player, idx)
            score, next_idx=minimax(board, opp)
            if min_score>score:
                min_score=score
                min_idx = idx
            board.take(idx)
        
        return(min_score,min_idx)

defalfabeta(board, player, depth, alpha, beta):
    # print("depth=", depth)
    maximize_player = 0
    minimize_player=1

    # print(depth)

    if board.is_win(maximize_player):
        return(1, None)
    elif board.is_win(minimize_player):
        return (-1, None)
    elif board.is_end() or depth==0:
        return(0, None)
    

    opp=1 if player==0 else0

    if player==maximize_player:
        for put in board.valid_puts():
            score, next_idx = alpha (board.board_result(put), opp, depth-1, alpha, beta)
            alpha = max (alpha, score)
            if alpha>=beta:
                break
            next_idx=put
        return alpha, next_idx

    else:
        for put in board.valid_puts():
            score, next_idx = alpha (board.board_result(put), opp, depth-1, alpha, beta)
            beta=min(beta,score)
            if alpha<=beta:
                break
            next_idx=put
        return beta, next_idx


class MiniMax:
    def__init__(self, player):
        self.player=player
    
    defplay(self, board):
        score, idx=minimax(board,self.player,)
        return board.put(self.player,idx),idx

classAplhaBeta:
    def__init__(self, player):
        self.player=player
        self.depth=float('inf')
        self.depth=7

    defplay(self, board):
        score, idx=alphabeta(board, self.player, self.depth, float('-inf'), float('inf')))
        # idx=alphabeta (board, self.player, self.depth, -500,500)
        return board.put(self.player,idx),idx

if__name__=="__main__":
    main()

PLAYER CLASS

class Player:
    def__init__(self, player):
        self.player=player

    defplay(self, board, idx):
        return board.put (self.player, idx)


Class HumanPlayer:
    def__init__(self, player):
        self.player=player
        
    defplay(self, board):
        while True:
            print('Please enter a number from 0 to 8:', end="")
            idx=input()

            try:
                idx=int(idx)
                success=board.put(self.player, idx)
                if success:
                    break
                else:
                    print("Please enter the appropriate number")
            exceptionValueError:
                pass

python algorithm

2022-12-29 02:33

1 Answers

Here is a descriptive example of a modification based on minimax() because the proposed behavior of alphabeta() could not be understood.Also, regarding the description of alpha beta method, we used Wikipedia's pseudocode, but minimax() handles the termination without using depth.

defalphabeta(board, player, alpha, beta):
    maximize_player = 0
    minimize_player=1

    if board.is_win(maximize_player):
        return(1, None)
    if board.is_win(minimize_player):
        return (-1, None)
    if board.is_end():
        return(0, None)

    opp=1 if player==0 else0

    if player==maximize_player:
        max_score=float('-inf')
        max_idx = None
        for idx inboard.valid_puts():
            board.put (player, idx)
            score,_=alphabeta(board, opp, alpha, beta)
            if score>max_score:
                max_score=score
                max_idx = idx
            board.take(idx)
            alpha = max (alpha, score)
            if alpha>=beta:
                break
        return(max_score, max_idx)
    else:
        min_score=float('inf')
        min_idx = None
        for idx inboard.valid_puts():
            board.put (player, idx)
            score,_=alphabeta(board, opp, alpha, beta)
            if score<min_score:
                min_score=score
                min_idx = idx
            board.take(idx)
            beta=min(beta,score)
            if alpha>=beta:
                break
        return(min_score,min_idx)
class AlphaBeta:
    def__init__(self, player):
        self.player=player

    defplay(self, board):
        _,idx=alphabeta(board,self.player,
                           float('-inf', float('inf')))
        return board.put(self.player,idx),idx

In addition, we confirmed that the same procedure and results can be obtained by performing a total of 10 matches (automatic matches) from the beginning (nine types) fixed.On the other hand, the processing time was less than 1/10th by using the alpha beta method (environment: macOS 13.1(M1), Python 3.10.8).

 players = [NPC.MiniMax(0), NPC.MiniMax(1)]
players = [NPC.MiniMax(0), NPC.AlphaBeta(1)]
players = [NPC.AlphaBeta(0), NPC.MiniMax(1)]
players = [NPC.AlphaBeta(0), NPC.AlphaBeta(1)]


2022-12-31 00:21

If you have any answers or tips


© 2024 OneMinuteCode. All rights reserved.