I'm studying how to code the accuracy of two categories from Cycitron Wisconsin breast cancer data.
There are a total of 569 rows of data, and 30 columns to distinguish them. The code I studied previously was 82% accurate when determining if it was binary classification (positive and negative) using a total of 30 datasets What code should I add to correct this if I want to classify accuracy with only the features in the fourth column?
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
import numpy as np
import pandas as pd
print(cancer.data.shape, cancer.target.shape)
import matplotlib.pyplot as plt
import numpy as np
plt.boxplot(cancer.data)
plt.xlabel('feature')
plt.ylabel('value')
plt.show()
cancer.feature_names[[3,13,23]]
x = cancel.data (=>) I put cancel.data[3] and proceeded with the following code, but it doesn't work.)
y = cancer.target
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, stratify = y, test_size=0.2, random_state=42)
#Implementing logistic regression
class LogisticNeuron:
def __init__(self):
self.w = None
self.b = None
def forpass(self, x):
z = np.sum(x*self.w) + self.b #Calculate the equation of the straight line
return z
def backprop(self, x, err):
w_grad = x*err #calculate gradients for weights
b_grad = 1*err #Calculate the gradient for the intercept
return w_grad, b_grad
#Implementing a training method
def fit(self, x, y, epochs = 100):
self.w = np.ones(x.shape[1]) #weight initialization
Self b = 0 # early intercept.
For I in range (epochs) : repeated as many times as # epochs
For x y I, _ I in zip (x, y) : # on all samples again.
z = self.Forpass (_ x I) # positive evaluation.
a= self.Activation (z) # Apply the activation function.
Err = - (I - _ y a) #.
w_grad, b_grad = self.Backprop the calculation in reverse (x I, err) #.
Update - = weighted grad of # _ w w self.
Self grad of # _ b - = b section updates.
def activation(self,z):
a=1/(1+np.exp(-z))
return a
# Every third to to predict.
def predict(self, x):
z=[self.forpass(x_i) for x_i in x] #Calculate Forward
a = self.Activation(np.array(z)) #Apply activation function
return a>0.5
#Training logistic regression models
neuron = LogisticNeuron()
neuron.fit(x_train, y_train)
#Evaluate the accuracy of the model using the test set if the predictions are correct
np.mean(neuron.predict(x_test) == y_test)
==> Results: 0.8245614035087719 82%
All I want is,
x = cancel.data
I want to check the accuracy with the 4th feature, rather than correcting this part with 30 characteristics in total
This part
x = cancer.data[3]
I corrected it like this, and then an error occurs here.
x_train, x_test, y_train, y_test = train_test_split(x, y, stratify = y, test_size=0.2, random_state=42)
ValueError Traceback (most recent call last)
<ipython-input-51-49b444fce24c> in <module>()
----> 1 x_train, x_test, y_train, y_test = train_test_split(x, y, stratify = y, test_size=0.2, random_state=42)
2 frames
/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py in check_consistent_length(*arrays)
210 if len(uniques) > 1:
211 raise ValueError("Found input variables with inconsistent numbers of"
--> 212 " samples: %r" % [int(l) for l in lengths])
213
214
ValueError: Found input variables with inconsistent numbers of samples: [30, 569]
x = cancer.data[:, 3]
x = cancer.data[:, 3:4]
© 2024 OneMinuteCode. All rights reserved.