The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. The attribute to be predicted is the class of iris plant. The classes are as follows: 1. Iris Setosa, 2. Iris Versicolour, 3. Iris Virginica
There are 4 features:
There are 3 classes represneting class label of iris flower {1,2,3}
Dr. Ryan @STEMplicity
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
url = "https://datascienceschools.github.io/Machine_Learning/Classification_Models_CaseStudies/Iris.csv"
df = pd.read_csv(url)
df.head()
plt.figure(figsize=(10,10))
plt.subplot(2,2,1)
sns.scatterplot(df['SepalLengthCm'], df['SepalWidthCm'], hue = df['Species'])
plt.subplot(2,2,2)
sns.scatterplot(df['PetalLengthCm'], df['PetalWidthCm'], hue = df[ 'Species'])
plt.show()
f, ax = plt.subplots(2,2, figsize=(10,10))
f00 = sns.violinplot(df['Species'], df['PetalLengthCm'], ax=ax[0,0])
f01 = sns.violinplot(df['Species'],df['PetalWidthCm'], ax=ax[0,1])
f10 = sns.violinplot(df['Species'], df['SepalLengthCm'], ax=ax[1,0])
f11 = sns.violinplot(df['Species'],df['SepalWidthCm'], ax=ax[1,1])
sns.pairplot(df, hue = 'Species')
plt.show()
corr = df.corr()
matrix = np.triu(corr)
sns.heatmap(corr, annot=True, mask = matrix)
plt.show()
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 1)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy is: {:.2f} %".format(accuracy*100))
sns.heatmap(cm, annot=True, fmt="d")
plt.show()
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = model, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))