Three ways to check missing values:
- DataFrame.isnull().sum()
- DataFrame.isnull()
- DataFrame.notnull()
- DataFrame.columns[df.isnull().any()]
import pandas as pd
import numpy as np
df = pd.read_csv('carsinfo.csv')
df["normalized_losses"] = df["normalized_losses"].replace({'?': np.nan}).dropna().astype(int)
df["bore"] = df["bore"].replace({'?': np.nan}).dropna().astype(float)
df["stroke"] = df["stroke"].replace({'?': np.nan}).dropna().astype(float)
df["horsepower"] = df["horsepower"].replace({'?': np.nan}).dropna().astype(float)
df["peak_rpm"] = df["peak_rpm"].replace({'?': np.nan}).dropna().astype(float)
df["price"] = df["price"].replace({'?': np.nan}).dropna().astype(float)
DataFrame.isnull().sum()
- You can sort values by adding -> .sort_values(ascending=False)
df.isnull().sum().sort_values(ascending=False)
DataFrame.isnull()
# Showing first 5 rows by adding -> [:5]
df.isnull()[:5]
DataFrame.notnull()
# Showing first 5 rows by adding -> [:5]
df.notnull()[:5]
DataFrame.columns[df.isnull().any()]
- Showing names of columns having missing values
# Columns with missing values
df.columns[df.isnull().any()]
DataFrame.loc[:,df.isnull().any()]
df.loc[:,df.isnull().any()][:6]
def missing_zero_values_table(dataframe):
zero_val = (df == 0.00).astype(int).sum(axis=0)
mis_val = df.isnull().sum()
mis_val_percent = 100 * df.isnull().sum() / len(df)
mz_table = pd.concat([zero_val, mis_val, mis_val_percent], axis=1)
mz_table = mz_table.rename(
columns = {0 : 'Zero Values', 1 : 'Missing Values', 2 : '% of Total Values'})
mz_table['Total Zero Missing Values'] = mz_table['Zero Values'] + mz_table['Missing Values']
mz_table['% Total Zero Missing Values'] = 100 * mz_table['Total Zero Missing Values'] / len(df)
mz_table['Data Type'] = df.dtypes
mz_table = mz_table[
mz_table.iloc[:,1] != 0].sort_values(
'% of Total Values', ascending=False).round(1)
print ("Your selected dataframe has " + str(df.shape[1]) + " columns and " + str(df.shape[0]) + " Rows.\n"
"There are " + str(mz_table.shape[0]) +
" columns that have missing values.")
# mz_table.to_excel('D:/sampledata/missing_and_zero_values.xlsx', freeze_panes=(1,0), index = False)
return mz_table
missing_zero_values_table(df)