#import pandas and load hand written digits dataset
import pandas as pd
from sklearn.datasets import load_digits
digits=load_digits()
#Get the features in digits
dir(digits)
['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_name s']
#Display first five digits images
%matplotlib inline
import matplotlib.pyplot as plt
plt.gray()
for i in range(5):
plt.matshow(digits.images[i])
<Figure size 432x288 with 0 Axes>
#get the data of digits
digits.data[:5]
array([[ 0., 0., 5., 13., 9., 1., 0., 0., 0., 0., 13., 15., 10.,
15., 5., 0., 0., 3., 15., 2., 0., 11., 8., 0., 0., 4.,
12., 0., 0., 8., 8., 0., 0., 5., 8., 0., 0., 9., 8.,
0., 0., 4., 11., 0., 1., 12., 7., 0., 0., 2., 14., 5.,
10., 12., 0., 0., 0., 0., 6., 13., 10., 0., 0., 0.],
[ 0., 0., 0., 12., 13., 5., 0., 0., 0., 0., 0., 11., 16.,
9., 0., 0., 0., 0., 3., 15., 16., 6., 0., 0., 0., 7.,
15., 16., 16., 2., 0., 0., 0., 0., 1., 16., 16., 3., 0.,
0., 0., 0., 1., 16., 16., 6., 0., 0., 0., 0., 1., 16.,
16., 6., 0., 0., 0., 0., 0., 11., 16., 10., 0., 0.],
[ 0., 0., 0., 4., 15., 12., 0., 0., 0., 0., 3., 16., 15.,
14., 0., 0., 0., 0., 8., 13., 8., 16., 0., 0., 0., 0.,
1., 6., 15., 11., 0., 0., 0., 1., 8., 13., 15., 1., 0.,
0., 0., 9., 16., 16., 5., 0., 0., 0., 0., 3., 13., 16.,
16., 11., 5., 0., 0., 0., 0., 3., 11., 16., 9., 0.],
[ 0., 0., 7., 15., 13., 1., 0., 0., 0., 8., 13., 6., 15.,
4., 0., 0., 0., 2., 1., 13., 13., 0., 0., 0., 0., 0.,
2., 15., 11., 1., 0., 0., 0., 0., 0., 1., 12., 12., 1.,
0., 0., 0., 0., 0., 1., 10., 8., 0., 0., 0., 8., 4.,
5., 14., 9., 0., 0., 0., 7., 13., 13., 9., 0., 0.],
[ 0., 0., 0., 1., 11., 0., 0., 0., 0., 0., 0., 7., 8.,
0., 0., 0., 0., 0., 1., 13., 6., 2., 2., 0., 0., 0.,
7., 15., 0., 9., 8., 0., 0., 5., 16., 10., 0., 16., 6.,
0., 0., 4., 15., 16., 13., 16., 1., 0., 0., 0., 0., 3.,
15., 10., 0., 0., 0., 0., 0., 2., 16., 4., 0., 0.]])
#Create DataFrame by taking digits.data output value
df=pd.DataFrame(digits.data)
df.head()
#Get the target value of digits
digits.target
array([0, 1, 2, ..., 8, 9, 8])
#Add new column named as target and assign the value of digits.target
df['target']=digits.target
df.head()
#Split dataset into training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(df.drop(['target'],axis='columns'),digits.data.target(,test_size=0.2)
#Create RandomForestClassifier model and trained the model
from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier()
model.fit(X_train,y_train)
RandomForestClassifier()
#Get the model accuracy
model.score(X_test,y_test)
0.9638888888888889
#Model predicted by taking X_test data
y_pred=model.predict(X_test)
y_pred
array([9, 3, 4, 7, 4, 0, 2, 4, 2, 7, 9, 9, 2, 3, 8, 8, 9, 1, 3, 3, 5, 3,
5, 7, 8, 1, 4, 6, 6, 3, 6, 7, 8, 2, 6, 1, 5, 8, 1, 4, 8, 6, 9, 2,
2, 5, 8, 4, 0, 4, 7, 5, 6, 9, 2, 1, 1, 0, 8, 4, 4, 4, 1, 0, 3, 1,
7, 8, 8, 0, 3, 9, 7, 2, 2, 9, 3, 7, 8, 8, 8, 7, 6, 4, 8, 2, 4, 1,
4, 7, 1, 8, 4, 6, 4, 5, 4, 1, 9, 3, 2, 1, 1, 4, 1, 0, 1, 7, 7, 9,
5, 2, 0, 2, 5, 7, 4, 5, 4, 6, 3, 6, 5, 4, 5, 1, 7, 8, 7, 3, 4, 3,
9, 8, 0, 6, 1, 8, 1, 7, 4, 2, 2, 7, 3, 1, 3, 5, 9, 8, 6, 2, 0, 0,
6, 7, 3, 4, 8, 3, 2, 4, 6, 4, 6, 1, 6, 5, 1, 6, 5, 7, 6, 6, 7, 1,
2, 5, 5, 0, 5, 6, 0, 9, 9, 6, 4, 6, 4, 8, 6, 0, 0, 1, 7, 0, 4, 3,
9, 3, 6, 0, 9, 0, 6, 6, 5, 0, 5, 5, 8, 9, 1, 3, 3, 2, 8, 3, 8, 5,
2, 7, 4, 9, 0, 6, 8, 4, 0, 4, 3, 1, 5, 8, 9, 7, 1, 7, 4, 3, 7, 6,
7, 1, 1, 9, 4, 0, 4, 7, 4, 7, 0, 8, 8, 1, 8, 4, 0, 1, 7, 6, 4, 6,
6, 0, 1, 2, 5, 3, 7, 3, 6, 2, 7, 5, 5, 9, 0, 2, 3, 7, 4, 7, 7, 7,
0, 2, 4, 7, 4, 6, 5, 7, 6, 1, 7, 1, 2, 0, 0, 5, 7, 9, 2, 3, 0, 5,
3, 1, 2, 9, 0, 0, 2, 6, 6, 5, 0, 4, 0, 1, 3, 8, 2, 7, 6, 5, 7, 5,
7, 9, 4, 5, 4, 4, 5, 6, 6, 1, 9, 0, 7, 1, 8, 4, 9, 0, 7, 7, 0, 5,
0, 8, 5, 6, 9, 8, 8, 2])
#Get the accuracy by confusion matrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
cm
array([[37, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 36, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 30, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 30, 0, 0, 0, 1, 0, 0],
[ 0, 0, 0, 0, 44, 0, 0, 3, 0, 0],
[ 0, 0, 0, 0, 0, 32, 0, 0, 0, 1],
[ 0, 0, 0, 0, 0, 0, 40, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 39, 0, 0],
[ 0, 1, 0, 0, 1, 1, 0, 1, 33, 0],
[ 0, 0, 0, 0, 0, 2, 0, 1, 1, 26]], dtype=int64)
#Get the accuracy by invoking accuracy_score()
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,y_pred))
0.9638888888888889
#Create the plot by taking cm value
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10,7))
sns.heatmap(cm,annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')
Text(69.0, 0.5, 'Truth')
Silan Software is one of the India's leading provider of offline & online training for Java, Python, AI (Machine Learning, Deep Learning), Data Science, Software Development & many more emerging Technologies.
We provide Academic Training || Industrial Training || Corporate Training || Internship || Java || Python || AI using Python || Data Science etc