Principal Component Analysis

Breast Cancer Wisconsin

In [1]:
#Importing the Relevant Libraries
#---------------------------------------------------------------------

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


#Importing the Dataset from SkLearn &  Converting to Dataframe
#---------------------------------------------------------------------

from sklearn.datasets import load_breast_cancer

dataset = load_breast_cancer()

df = pd.DataFrame(dataset.data, columns= dataset.feature_names)


#Feature Scaling
#---------------------------------------------------------------------

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

df_scaled = sc.fit_transform(df)


#Principal Component Analysis (PCA)
#---------------------------------------------------------------------

from sklearn.decomposition import PCA

pca = PCA(n_components = 2)

df_pca = pca.fit_transform(df_scaled)


#Shape of dataframe before & after Applying PCA
#---------------------------------------------------------------------

print('Before Applying PCA:', df.shape)

print('\nAfter Applying PCA:', df_pca.shape)
Before Applying PCA: (569, 30)

After Applying PCA: (569, 2)

Data Visulalisation (Breast Cancer Wisconsin After PCA)

In [2]:
plt.figure(figsize=(8,6))

plt.scatter(df_pca[:,0],df_pca[:,1],c = dataset['target'])

plt.xlabel('First principle component')

plt.ylabel('Second principle component')
Out[2]:
Text(0, 0.5, 'Second principle component')

IRIS Flower

In [3]:
#Importing the Relevant Libraries
#---------------------------------------------------------------------

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns



#Importing the Dataset from SkLearn &  Converting to Dataframe
#---------------------------------------------------------------------

from sklearn.datasets import load_iris

dataset = load_iris()

df = pd.DataFrame(dataset.data, columns= dataset.feature_names)


#Feature Scaling
#---------------------------------------------------------------------

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

df_scaled = sc.fit_transform(df)


#Principal Component Analysis (PCA)
#---------------------------------------------------------------------

from sklearn.decomposition import PCA

pca = PCA(n_components = 2)

df_pca = pca.fit_transform(df_scaled)


#Shape of dataframe before & after Applying PCA
#---------------------------------------------------------------------

print('Before Applying PCA:', df.shape)

print('\nAfter Applying PCA:', df_pca.shape)
Before Applying PCA: (150, 4)

After Applying PCA: (150, 2)

Data Visulalisation (IRIS Flower After PCA)

In [4]:
plt.figure(figsize=(8,6))

plt.scatter(df_pca[:,0],df_pca[:,1],c = dataset['target'])

plt.xlabel('First principle component')

plt.ylabel('Second principle component')
Out[4]:
Text(0, 0.5, 'Second principle component')