IRIS Flower Classification

# import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from warnings import filterwarnings
filterwarnings(action='ignore')


# importing the dataset in python dataframe format
iris=pd.read_csv("iris.csv")

# to determine the number of rows and columns
iris.shape
Output:
(150, 5)

# to display the statistical information about the data
iris.describe()
Output:
img
# Checking for null values
iris.isna().sum()

Output:
sepal_length      0
sepal_width       0
petal_length      0
petal_width       0
species             0
dtype: int64

# to display the frst five rows
iris.head()
Output:
img
# to determine whether the data is balanced or imbalanced

n = len(iris[iris['species'] == 'Iris-versicolor'])
print("No of Versicolor in Dataset:",n)

No of Versicolor in Dataset: 50


n1 = len(iris[iris['species'] == 'Iris-virginica'])
print("No of Virginica in Dataset:",n1)

No of Virginica in Dataset: 50


n2 = len(iris[iris['species'] == 'Iris-setosa'])
print("No of Setosa in Dataset:",n2)

No of Setosa in Dataset: 50


fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.axis('equal')
l = ['Versicolor', 'Setosa', 'Virginica']
s = [50,50,50]
ax.pie(s, labels = l,autopct='%1.2f%%')
plt.show()
img
#Checking for outliers
import matplotlib.pyplot as plt
plt.figure(1)
plt.boxplot([iris['sepal_length']])
plt.figure(2)
plt.boxplot([iris['sepal_width']])
plt.figure(3)
plt.boxplot([iris['petal_length']])
plt.figure(4)
plt.boxplot([iris['petal_width']])
plt.show()
img img img img
iris.plot(kind ='density',subplots = True, layout =(3,3),sharex = False)
Output:
array([[<AxesSubplot:ylabel='Density'>, <AxesSubplot:ylabel='Density'>,
	<AxesSubplot:ylabel='Density'>],
 	[<AxesSubplot:ylabel='Density'>, <AxesSubplot:ylabel='Density'>,
 	<AxesSubplot:ylabel='Density'>],
 	[<AxesSubplot:ylabel='Density'>, <AxesSubplot:ylabel='Density'>,
 	<AxesSubplot:ylabel='Density'>]], dtype=object)

img
sns.pairplot(iris,hue='species')
Output:

<seaborn.axisgrid.PairGrid at 0x18963832280>

img
#Heat Maps
fig=plt.gcf()
fig.set_size_inches(10,7)
fig=sns.heatmap(iris.corr(),annot=True,cmap='cubehelix',linewidths=1,linecolor='k',
 	square=True,mask=False, vmin=-1, vmax=1,cbar_kws={"orientation": "vertical"},
 	cbar=True)

img
X = iris.drop(['species'],axis=1)
y = iris['species']
#Correlation 
corr_mat = iris.corr()
corr_mat
Output:
img
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=101)
#Using LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('Accuracy:',metrics.accuracy_score(y_pred,y_test))

Accuracy: 1.0


#Using Support Vector
from sklearn.svm import SVC
model1 = SVC()
model1.fit(X_train,y_train)
y_pred_svc = model1.predict(X_test)
from sklearn.metrics import accuracy_score
print("Acc=",accuracy_score(y_test,y_pred_svc))

Acc= 1.0


#Using KNN Neighbors
from sklearn.neighbors import KNeighborsClassifier
model2 = KNeighborsClassifier(n_neighbors=5)
model2.fit(X_train,y_train)
y_pred_KNN = model2.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(y_test,y_pred_KNN))

Accuracy Score: 1.0


#Using GaussianNB
from sklearn.naive_bayes import GaussianNB
model3 = GaussianNB()
model3.fit(X_train,y_train)
y_pred_GaussianNB = model3.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(y_test,y_pred_GaussianNB))

Accuracy Score: 0.9736842105263158


#Using Decision Tree
from sklearn.tree import DecisionTreeClassifier
model4 = DecisionTreeClassifier(criterion='entropy',random_state=7)
model4.fit(X_train,y_train)
y_pred_DT = model4.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(y_test,y_pred_DT))

Accuracy Score: 0.9736842105263158


About the Author



Silan Software is one of the India's leading provider of offline & online training for Java, Python, AI (Machine Learning, Deep Learning), Data Science, Software Development & many more emerging Technologies.

We provide Academic Training || Industrial Training || Corporate Training || Internship || Java || Python || AI using Python || Data Science etc





 PreviousNext