Summary Statistics

In [16]:
import pandas as pd 

df = pd.read_csv("bank.csv", sep=';') 

# Showing first 10 rows
df.head(10)
Out[16]:
age job marital education default balance housing loan contact day month duration campaign pdays previous poutcome y
0 30 unemployed married primary no 1787 no no cellular 19 oct 79 1 -1 0 unknown no
1 33 services NaN secondary no 4789 yes yes cellular 11 may 220 1 339 4 failure no
2 35 management single tertiary no 1350 yes no cellular 16 apr 185 1 330 1 failure no
3 30 management married tertiary no 1476 yes yes unknown 3 jun 199 4 -1 0 unknown no
4 59 blue-collar married secondary no 0 yes no unknown 5 may 226 1 -1 0 unknown no
5 35 management single tertiary no 747 no no cellular 23 feb 141 2 176 3 failure no
6 36 self-employed married tertiary no 307 yes no cellular 14 may 341 1 330 2 other no
7 39 technician married secondary no 147 yes no cellular 6 may 151 2 -1 0 unknown no
8 41 entrepreneur married tertiary no 221 yes no unknown 14 may 57 2 -1 0 unknown no
9 43 services married primary no -88 yes yes cellular 17 apr 313 1 147 2 failure no
In [17]:
# Mean for duration

df['duration'].mean()
Out[17]:
263.96129174961294
In [20]:
# Median for duration

df['duration'].median()
Out[20]:
185.0
In [30]:
gb = df.groupby("education")
In [34]:
gb.describe()
Out[34]:
age balance ... pdays previous
count mean std min 25% 50% 75% max count mean ... 75% max count mean std min 25% 50% 75% max
education
primary 678.0 46.833333 11.200085 19.0 39.0 46.0 55.0 87.0 678.0 1411.544248 ... -1.0 461.0 678.0 0.460177 1.857726 0.0 0.0 0.0 0.0 24.0
secondary 2306.0 40.062446 10.226439 19.0 32.0 38.0 47.0 86.0 2306.0 1196.814397 ... -1.0 808.0 2306.0 0.528621 1.599432 0.0 0.0 0.0 0.0 25.0
tertiary 1350.0 39.645926 9.612536 22.0 32.0 37.0 46.0 78.0 1350.0 1775.423704 ... -1.0 871.0 1350.0 0.612593 1.787525 0.0 0.0 0.0 0.0 22.0
unknown 187.0 45.299465 11.373718 19.0 37.0 47.0 54.0 79.0 187.0 1701.245989 ... -1.0 683.0 187.0 0.508021 1.482402 0.0 0.0 0.0 0.0 13.0

4 rows × 56 columns