Understanding Pythorch Output

The program below identifies the handwritten numeric dataset MNIST into 10 classes.
The number of correct answers and the identification rate are determined by the validate function, and we used a confusion matrix to find out which data was classified into which classes.However, running this program does not match the sum of the ncorrect values and the physical components of the confusion matrix.How do I match it?

Learning Programs

importos
import sys
import numpy as np
import datetime

import torch
import torch.nn asn
import torch.nn.functional asF
import torch.optim as optim

import getdata

PATH_MNIST='./mnist'
PATH_RESULT = 'result_mnist'

### definition of the network
#
class NN (nn.Module):

    def__init__(self):
        super(NN,self).__init__()
        Hin1, Win1 = 28,28
        self.conv1 = nn.Conv2d (1,32, kernel_size=5)
        Hout1,Wout1 =_output_shape(Hin1,Win1,self.conv1) #24x24
        self.conv2 = nn.Conv2d (self.conv1.out_channels, 64, kernel_size = 5) 
        Hout2, Wout2 =_output_shape(Hout1//2, Wout1//2, self.conv2)#8x8
        self.fc1 = nn.Linear (64*(Hout2//2)*(Wout2//2), 1024) # 64 x 4
        self.fc2 = nn.Linear (self.fc1.out_features, 10)

    def forward (self, X):
        X = F.relu(F.max_pool2d(self.conv1(X),2))
        X = F.relu(F.max_pool2d(self.conv2(X),2))
        X = X.view (-1, self.fc1.in_features)
        X = F.relu(self.fc1(X))
        X = self.fc2(X)
        return F.log_softmax(X,dim=1)

def_output_shape(Hin, Win, conv2d):
        Hout=int(np.floor((Hin+2*conv2d.padding[0]-conv2d.dilation[0]*(conv2d.kernel_size[0]-1)-1)/conv2d.stride[0]+1)))
        Wout=int(np.floor(Win+2*conv2d.padding[1]-conv2d.dilation[1]*(conv2d.kernel_size[1]-1)-1)/conv2d.stride[1]+1)))
        return Hout, Wout

default (model, X, Y, bindex):
    A = np.zeros ((10,10))
    nbatch=bindex.shape [0]
    loss = 0
    ncorrect = 0
    with torch.no_grad():
        for ib in range (nbatch):
            ii=np.where(bindex[ib,:])[0]
            output=model(X[ii,::])
            
            #loss+=F.nll_loss(output,Y[ii],size_average=False).item()
            loss+=F.nll_loss(output,Y[ii],reduction='sum').item()
            pred=output.max(1,keepdim=True)[1]#argmax of the output# Find the classification results here
            for i in range (100):
                A[Y[i]][pred[i]]+=1
            ncorrect+=pred.eq(Y[ii].view_as(pred)) .sum().item()
            

    loss / = X.shape [0]
    acc=ncorrect/X.shape [0]
    print(A)
    print(np.trace(A), np.sum(A))
    return loss, acc, ncorrect

    

if__name__=='__main__':

    ### device
    #
    use_gpu_if_available = True
    if use_gpu_if_available and torch.cuda.is_available():
        device=torch.device('cuda')
    else:
        device=torch.device('cpu')
    print('#using',device')
    
    ### reading and preparing the training data
    #
    data=getdata.Data(PATH_MNIST,nV=10000)

    D=data.nrow*data.ncol
    K = data.nclass
    datLraw,labL=data.getData('L')
    datL = datLraw.reshape ((-1, 1, data.nrow, data.ncol))
    datVraw,labV=data.getData('V')
    datV = datVraw.reshape ((-1, 1, data.nrow, data.ncol))
    NV = datV.shape [0]
    NL = datL.shape [0]

    ### toorch.Tensor
    #
    XL=torch.from_numpy(datL.astype(np.float32)).to(device)
    YL=torch.from_numpy(labL).to(device)
    XV = torch.from_numpy(datV.astype(np.float32)).to(device)
    YV=torch.from_numpy(labV).to(device)
    
    ### initializing the network
    #
    Seed = 20
    torch.manual_seed (Seed)
    nn = NN()
    model=nn.to(device)
    print(nn)
    optimizer=optim.SGD (model.parameters(), lr=0.01, momentum=0.9)
    print(optimizer)

    ### training
    #
    batchsize = 100
    bindexL=getdata.makeBatchIndex(NL,batchsize)
    nbatchL=bindexL.shape[0]
    bindexV=getdata.makeBatchIndex(NV,batchsize)
    nbatchV=bindexV.shape [0]

    nitr = 10001
    nd = 0
    start = datetime.datetime.now()

    for i in range (nitr):

        if(i!=0) and(i%500==0):
            model.eval()#setting the module in evaluation mode
            lossL, accL, ncorrectL = validate (model, XL, YL, bindexL)
            # lossV, accV = validate (model, XV, YV, bindexV)
            print('#epoch{}'.format(nd/NL), end='')
            print('{:.4f}{:2f}{}.format(lossL, accL*100, ncorrectL))
            fnModel=os.path.join(PATH_RESULT,os.path.splitext(sys.argv[0])[0]+'seed{}-{}'.format(str(seed), str(int(nd/NL))))+'-params.pickle')
            with open(fnModel, mode='wb') asf:
                torch.save(model.state_dict(),f)
        model.train()#setting the module in training mode
        ib = np.random.randint(0,nbatchL)
        ii=np.where(bindexL[ib,:])[0]
        optimizer.zero_grad()
        output=model(XL[ii,:])
        loss=F.nll_loss(output, YL[ii])
        loss.backward()
        optimizer.step()

        nd+=ii.shape [0]

    print('#elapped time:', datetime.datetime.now()-start)
    
    ### saving the model
    #
    fnModel=os.path.join(PATH_RESULT,os.path.splitext(sys.argv[0])[0]+'seed{}-params.pickle'.format(Seed))
    with open(fnModel, mode='wb') asf:
        torch.save(model.state_dict(),f)
        print('#The model is saved to', fnModel)

Test Programs

importos
import sys
import pickle
import numpy as np
import datetime

import torch
import torch.nn asn
import torch.nn.functional asF

import getdata
import cnnL


if__name__=='__main__':

    ### device
    #
    use_gpu_if_available = True
    if use_gpu_if_available and torch.cuda.is_available():
        device=torch.device('cuda')
    else:
        device=torch.device('cpu')
    print('#using',device')

    ### initializing the network
    #
    fnModel=os.path.join(cnnL.PATH_RESULT, 'cnnL-params.pickle')
    torch.manual_seed(0)
    nn = cnnL.NN()
    with open(fnModel, mode='rb') asf:
        nn.load_state_dict(torch.load(f))
    model=nn.to(device)

    ### reading and preparing the training data
    #
    data=getdata.Data(cnnL.PATH_MNIST,nV=10000)

    D=data.nrow*data.ncol
    K = data.nclass
    datTraw,labT=data.getData('T')
    datT=dataTraw.reshape(-1,1,data.nrow,data.ncol))
    NT = datT.shape [0]

    ### toorch.Tensor
    #
    XT=torch.from_numpy(datT.astype(np.float32)).to(device)
    YT=torch.from_numpy(labT).to(device)
    
    ### evaluation
    #
    batchsize = 100
    bindexT=getdata.makeBatchIndex(NT,batchsize)
    model.eval()#setting the module in evaluation mode
    start = datetime.datetime.now()
    lossT, accT = cnnL.evaluate (model, XT, YT, bindexT)
    print('#elapped time:', datetime.datetime.now()-start)
    print('{:.4f}{:2f}'.format(lossT,accT*100))

python machine-learning numpy pytorch

2022-09-29 21:34

1 Answers

I'm sorry I couldn't catch up on the code, but I'd like to make a suggestion to isolate the problem.

First of all, it is likely that the cause is due to the validate implementation, so consider separating it from the nn implementation.

Replace the arguments X, Y, bindex in the function validate and output=model(X[ii,::]) in the function with a simple hand-made array, and look at the mixed matrix and ncorrect output.Which is different from the expected value?

For better answers, please refer to this as well.

How to write a short reproducible sample code - stackoverflow

2022-09-29 21:34

If you have any answers or tips

Popular Tags

python x 4647

android x 1593

java x 1494

javascript x 1427

c x 927

c++ x 878

ruby-on-rails x 696

php x 692

python3 x 685

html x 656