- Normalization -> to scale a variable to have a values between 0 and 1
1. Converting categorical data to numerical data
2. Scaling variables to have a values between 0 and 1
import pandas as pd
df = pd.read_csv('hr_satisfaction.csv')
df.head()
df.select_dtypes(exclude=['int', 'float']).columns
categorial = ['department','salary']
df = pd.get_dummies(df, columns=categorial, drop_first=True)
df.head()
- Remving the label values from training data
X = df.drop(['left'],axis=1).values
- Assigning label values to Y dataset
Y = df['left'].values
- Splitting data -> 70:30 Ratio Train:Test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
from sklearn.model_selection import train_test_split
X = df.drop(['left'],axis=1).values
Y = df['left'].values
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
X_train
X_test
df_X_train = pd.DataFrame(X_train)
df_X_train.head()
df_X_test = pd.DataFrame(X_test)
df_X_test.head()