Heart Disease Prediction in Machine Learning

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style("whitegrid")

#Create the dataframe


df=pd.read_csv("E:\dataset\heart.csv")
df.head()

Output:

img
# to display the no of rows and columns in the dataset
df.shape

Output:
(303, 14)


df.info()
< class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0  age      303 non-null   int64
1  sex      303 non-null   int64
2  cp       303 non-null   int64
3  trestbps 303 non-null   int64
4  chol     303 non-null   int64
5  fbs      303 non-null   int64
6  restecg  303 non-null   int64
7  thalach  303 non-null   int64
8  exang    303 non-null   int64
9  oldpeak  303 non-null   float64
10 slope    303 non-null   int64
11 ca       303 non-null   int64
12 thal     303 non-null   int64
13 target   303 non-null   int64
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


df.isna().sum()

Output:

age      0
sex      0
cp       0
trestbps 0
chol     0
fbs      0
restecg  0
thalach  0
exang    0
oldpeak  0
slope    0
ca       0
thal     0
target   0
dtype: int64

df.target.unique()

Output:
array([1, 0], dtype=int64)


df.target.value_counts()

Output:

1 165
0 138
Name: target, dtype: int64
df.target.value_counts().plot(kind="bar", color=["red", "green"])
plt.show()
img
#Choose X and y
X = df.drop('target', axis=1)
y = df.target
#X=df.iloc[:,:-1]
#y=df.iloc[:,13]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.3,random_state=0)
print(X_train.shape)

Output:
(212, 13)


print(X_test)
img
#Create LogisticRegression model
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\linear_model\_logistic.p
y:762: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html (https://scik
it-learn.org/stable/modules/preprocessing.html)
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regre
ssion (https://scikit-learn.org/stable/modules/linear_model.html#logistic-re
gression)
n_iter_i = _check_optimize_result(

Output:
LogisticRegression()


y_pred=model.predict(X_train)
y_pred
array([1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,
0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0,
0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0,
0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0,
1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1], dtype=int64)

y_predicted=model.predict(X_test)
y_predicted
array([0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0,
0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0,
0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1,
1, 0, 0], dtype=int64)

from sklearn.metrics import accuracy_score
test_score = accuracy_score(y_test, y_predicted)
train_score = accuracy_score(y_train, y_pred)
print(test_score)
print(train_score)

0.8351648351648352
0.8632075471698113


result_df = pd.DataFrame(data=[["Logistic Regression", train_score, test_score]],
columns=['Model', 'Training Accuracy %', 'Testing Accuracy %'])
result_df

Output:

img
#Create SVM model,
from sklearn.svm import SVC
obj=SVC()

#Trained SVM model
obj.fit(X_train,y_train)

Output:
SVC()


y_pred=obj.predict(X_train)
y_pred

Output:

array([1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,
1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0,
1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1], dtype=int64)
y_prediction=obj.predict(X_test)
y_prediction

Output:

array([0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0,
0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1,
1, 0, 1], dtype=int64)

from sklearn.metrics import accuracy_score
train_accuracy=accuracy_score(y_train,y_pred)
test_accuracy=accuracy_score(y_test,y_prediction)
print(train_accuracy)
print(test_accuracy)

0.6650943396226415
0.6703296703296703


results_df_1 = pd.DataFrame(data=[["Support Vector Machine", train_accuracy, test_accuracy]
columns=['Model', 'Training Accuracy %', 'Testing Accuracy %'])
results_df_2 = result_df.append(results_df_1, ignore_index=True)
results_df_2

Output:

img
#Create DecisionTree Model
from sklearn.tree import DecisionTreeClassifier
obj1=DecisionTreeClassifier()
#Trained the model
obj1.fit(X_train,y_train)

Output:
DecisionTreeClassifier()


y_pred_train=obj1.predict(X_train)
y_pred_train

Output:

array([1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0,
0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,
0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,
0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0,
0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0,
0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0,
1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0], dtype=int64)

y_pred_test=obj1.predict(X_test)
y_pred_test

Output:

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0,
0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0,
1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
0, 1, 0], dtype=int64)

from sklearn.metrics import accuracy_score
train_accuracy_tree=accuracy_score(y_train,y_pred_train)
test_accuracy_tree=accuracy_score(y_test,y_pred_test)
print(train_accuracy_tree)
print(test_accuracy_tree)

1.0
0.7582417582417582


results_df_3 = pd.DataFrame(data=[["DecisionTree", train_accuracy_tree, test_accuracy_tree]
columns=['Model', 'Training Accuracy %', 'Testing Accuracy %'])
results_df_4 = results_df_2.append(results_df_3, ignore_index=True)
results_df_4

Output:

img
#Create RandomForest Model
from sklearn.ensemble import RandomForestClassifier
obj2=RandomForestClassifier(n_estimators=1000, random_state=42)
obj2.fit(X_train,y_train)

Output:
RandomForestClassifier(n_estimators=1000, random_state=42)


y_pred_forest_train=obj2.predict(X_train)
y_pred_forest_train

Output:

array([1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0,
0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,
0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,
0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0,
0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0,
0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0,
1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0], dtype=int64)

y_pred_forest_test=obj2.predict(X_test)
y_pred_forest_test
array([0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0,
0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0,
0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1,
1, 1, 0], dtype=int64)
from sklearn.metrics import accuracy_score
train_accuracy_forest=accuracy_score(y_train,y_pred_forest_train)
test_accuracy_forest=accuracy_score(y_test,y_pred_forest_test)
print(train_accuracy_forest)
print(test_accuracy_forest)

1.0
0.8351648351648352

results_df_5 = pd.DataFrame(data=[["RandomForest", train_accuracy_forest, test_accuracy_for
columns=['Model', 'Training Accuracy %', 'Testing Accuracy %'])
results_df_6 = results_df_4.append(results_df_5, ignore_index=True)
results_df_6

Output:

img

About the Author



Silan Software is one of the India's leading provider of offline & online training for Java, Python, AI (Machine Learning, Deep Learning), Data Science, Software Development & many more emerging Technologies.

We provide Academic Training || Industrial Training || Corporate Training || Internship || Java || Python || AI using Python || Data Science etc





 PreviousNext