machine learning algorithms
machine learning algorithms: See The GitHub
[1]:
import numpy as np
import sklearn
In [2]:
#features = [[140, 'smooth'], [130, 'smooth'], [150, 'bumpy'], [170, 'bumpy']]
#labels = ['apple', 'apple', 'orange', 'orange']
#this is how test and train data looks like
features = [[140, 1], [130, 1], [150, 0], [170, 0]]
labels = [0, 0, 1, 1]
In [3]:
#using decision tree classifier(function) , will learn about classifier in further codes see below
from sklearn import tree
clf = tree.DecisionTreeClassifier()
#learning algorithm
clf = clf.fit(features, labels)
clf.predict([[150, 0]]) #must prdict orange= 1
Out[3]:
In [ ]:
In [4]:
#working with iris dataset
#import dataset
#train classifier
#Predict for the flower
In [5]:
#iris dataset is already available in sklearn
from sklearn.datasets import load_iris
iris = load_iris()
print(iris.feature_names)
print(iris.target_names)
In [6]:
#feature data
print(iris.data[0])
In [7]:
#label data
iris.target[0] #0 setosa
Out[7]:
In [8]:
#working with data
i = 0
for i in range(len(iris.target)):
print(f'Example {i}: label {iris.target[i]}, features {iris.data[i]}')
i += 1
In [9]:
# train classifier
#will remove some data from the origional data for testing purpose called test data
#not part of train data
test_idx = [0, 50, 100]
#remover from label and data(feature)
train_target = np.delete(iris.target, test_idx) #label, np nned dto mention axis b/c only one column
train_data = np.delete(iris.data, test_idx, axis = 0) #data
#creatiing testing dataset
test_target = iris.target[test_idx]
test_data = iris.data[test_idx]
In [10]:
#creating decession tree classifier
clf = tree.DecisionTreeClassifier()
clf.fit(train_data, train_target)
print(test_target)
In [11]:
#predicting
print(clf.predict(test_data))
In [ ]:
In [12]:
#a good feature explain
In [13]:
import matplotlib.pyplot as plt
greyhounds = 500
labs = 500
grey_height = 28 + 4 * np.random.randn(greyhounds)
lab_height = 24 + 4 * np.random.randn(labs)
plt.hist([grey_height, lab_height], stacked= True,color = ['r', 'b'])
plt.show()
In [ ]:
Machine learning: classifier is a function similar to
def classify(features):
do some logic
return label
In [14]:
#4
from sklearn import datasets
iris = datasets.load_iris()
#feature= x and label = y, b/c y = f(x) , classifier is as a function
X = iris.data
y = iris.target
#spliting into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5)
#creating classifier
from sklearn import tree
my_classifier = tree.DecisionTreeClassifier()
my_classifier.fit(X_train, y_train)
predictions = my_classifier.predict(X_test)
# print(predictions)
#test acurracy
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions))
In [15]:
#now using different classifier to do the same above thing
#4
from sklearn import datasets
iris = datasets.load_iris()
#feature= x and label = y, b/c y = f(x) , classifier is as a function
X = iris.data
y = iris.target
#spliting into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5)
#creating classifier, when want diff. classifier change below 2 lines
from sklearn.neighbors import KNeighborsClassifier
my_classifier = KNeighborsClassifier()
my_classifier.fit(X_train, y_train)
predictions = my_classifier.predict(X_test)
# print(predictions)
#test acurracy
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions))
In [16]:
#neural network is more sofisticated type of classifier like decessiontree and knn
In [ ]:
In [17]:
#creating my own classifier
In [21]:
import random
class ScrappyKNN():
def fit(self,X_train, y_train):
self.X_train = X_train
self.y_train = y_train
def predict(self, X_test):
predictions = []
for row in X_test:
label = random.choice(self.y_train)
predictions.append(label)
return predictions
from sklearn import datasets
iris = datasets.load_iris()
#feature= x and label = y, b/c y = f(x) , classifier is as a function
X = iris.data
y = iris.target
#spliting into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5)
#creating classifier, when want diff. classifier change below 2 lines
# from sklearn.neighbors import KNeighborsClassifier
my_classifier = ScrappyKNN()
my_classifier.fit(X_train, y_train)
predictions = my_classifier.predict(X_test)
# print(predictions)
#test acurracy
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions))
In [ ]:
In [ ]:
#increasing accuracy
In [29]:
import random
from scipy.spatial import distance
#euclidean distance
def euc(a,b):
return distance.euclidean(a,b)
class ScrappyKNN():
def fit(self,X_train, y_train):
self.X_train = X_train
self.y_train = y_train
def predict(self, X_test):
predictions = []
for row in X_test:
label = self.closest(row) #************
predictions.append(label)
return predictions
def closest(self, row):
best_dist = euc(row, self.X_train[0])
best_index = 0
for i in range(1, len(self.X_train)):
dist = euc(row, self.X_train[i])
if dist < best_dist:
best_dist = dist
best_index = i
return self.y_train[best_index]
from sklearn import datasets
iris = datasets.load_iris()
#feature= x and label = y, b/c y = f(x) , classifier is as a function
X = iris.data
y = iris.target
#spliting into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5)
#creating classifier, when want diff. classifier change below 2 lines
# from sklearn.neighbors import KNeighborsClassifier
my_classifier = ScrappyKNN()
my_classifier.fit(X_train, y_train)
predictions = my_classifier.predict(X_test)
# print(predictions)
#test acurracy
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions))