Data were extracted from images that were taken from genuine and forged banknote-like specimens. For digitization, an industrial camera usually used for print inspection was used. The final images have 400x 400 pixels. Due to the object lens and distance to the investigated object gray-scale pictures with a resolution of about 660 dpi were gained. Wavelet Transform tool were used to extract features from images.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
url = "https://datascienceschools.github.io/Machine_Learning/Classification_Models_CaseStudies/BankNote_Authentication.csv"
df = pd.read_csv(url)
df.head()
X = df.iloc[:,:-1].values
y = df.iloc[:, -1].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)
df_feature = df.drop('class', axis=1)
feature_importances = pd.DataFrame(data = df_feature.columns.values, columns = ['Features'])
feature_importances['Importance'] = model.feature_importances_
feature_importances.sort_values('Importance',ascending=False)
y_pred = model.predict(X_test)
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy is: {:.2f} %".format(accuracy*100))
sns.heatmap(cm, annot=True, fmt='d')
plt.show()
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = model, X = X_train, y = y_train, cv = 9)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))
import pickle
pickle_out = open("RFClassifier.pkl","wb")
pickle.dump(model, pickle_out)
pickle_out.close()
url = "https://datascienceschools.github.io/Machine_Learning/Classification_Models_CaseStudies/BankNote_Authentication_Test.csv"
new_data = pd.read_csv(url)
new_data.head()
X_test = new_data.iloc[:,:].values
y_pred_test = model.predict(X_test)
new_data['predicted_Survive'] = y_pred_test
new_data