import pandas as pd
df = pd.read_csv('hr_satisfaction.csv')
df.head()
- Converting Categorical data to Numerical data
categorial = ['department','salary']
df = pd.get_dummies(df, columns=categorial, drop_first=True)
- Remving the label values from training data
X = df.drop(['left'],axis=1).values
- Assigning label values to Y dataset
Y = df['left'].values
- Splitting data -> 70:30 Ratio Train:Test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
- Data Normalization
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
categorial = ['department','salary']
df = pd.get_dummies(df, columns=categorial, drop_first=True)
from sklearn.model_selection import train_test_split
X = df.drop(['left'],axis=1).values
Y = df['left'].values
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
logreg_clf = LogisticRegression()
logreg_model = logreg_clf.fit(X_train, Y_train)
logreg_prediction = logreg_clf.predict(X_test)
Accuracy = 100*accuracy_score(logreg_prediction, Y_test)
Confusion_Matrix = confusion_matrix(logreg_prediction, Y_test)
Classification_Report = classification_report(logreg_prediction, Y_test)
print("Accuracy is {0:.2f}%\n".format(Accuracy))
print("Confusion Matrix:\n", Confusion_Matrix )
print("\nClassification Report:\n", Classification_Report )
ranfor_clf = RandomForestClassifier()
ranfor_model = ranfor_clf.fit(X_train, Y_train)
ranfor_prediction = ranfor_clf.predict(X_test)
Accuracy = 100*accuracy_score(ranfor_prediction, Y_test)
Confusion_Matrix = confusion_matrix(ranfor_prediction, Y_test)
Classification_Report = classification_report(ranfor_prediction, Y_test)
print("Accuracy is {0:.2f}%\n".format(Accuracy))
print("Confusion Matrix:\n", Confusion_Matrix )
print("\nClassification Report:\n", Classification_Report )
svm_clf = SVC()
svm_model = svm_clf.fit(X_train, Y_train)
svm_prediction = svm_clf.predict(X_test)
Accuracy = 100*accuracy_score(svm_prediction, Y_test)
Confusion_Matrix = confusion_matrix(svm_prediction, Y_test)
Classification_Report = classification_report(svm_prediction, Y_test)
print("Accuracy is {0:.2f}%\n".format(Accuracy))
print("Confusion Matrix:\n", Confusion_Matrix )
print("\nClassification Report:\n", Classification_Report)
knn_clf = KNeighborsClassifier()
knn_model = knn_clf.fit(X_train, Y_train)
knn_prediction = knn_clf.predict(X_test)
Accuracy = 100*accuracy_score(knn_prediction, Y_test)
Confusion_Matrix = confusion_matrix(knn_prediction, Y_test)
Classification_Report = classification_report(knn_prediction, Y_test)
print("Accuracy is {0:.2f}%\n".format(Accuracy))
print("Confusion Matrix:\n", Confusion_Matrix )
print("\nClassification Report:\n", Classification_Report)
Random Forest Accuracy is 98.29%