Summary Statistics¶

In [16]:

import pandas as pd 

df = pd.read_csv("bank.csv", sep=';') 

# Showing first 10 rows
df.head(10)

Out[16]:

	age	job	marital	education	default	balance	housing	loan	contact	day	month	duration	campaign	pdays	previous	poutcome	y
0	30	unemployed	married	primary	no	1787	no	no	cellular	19	oct	79	1	-1	0	unknown	no
1	33	services	NaN	secondary	no	4789	yes	yes	cellular	11	may	220	1	339	4	failure	no
2	35	management	single	tertiary	no	1350	yes	no	cellular	16	apr	185	1	330	1	failure	no
3	30	management	married	tertiary	no	1476	yes	yes	unknown	3	jun	199	4	-1	0	unknown	no
4	59	blue-collar	married	secondary	no	0	yes	no	unknown	5	may	226	1	-1	0	unknown	no
5	35	management	single	tertiary	no	747	no	no	cellular	23	feb	141	2	176	3	failure	no
6	36	self-employed	married	tertiary	no	307	yes	no	cellular	14	may	341	1	330	2	other	no
7	39	technician	married	secondary	no	147	yes	no	cellular	6	may	151	2	-1	0	unknown	no
8	41	entrepreneur	married	tertiary	no	221	yes	no	unknown	14	may	57	2	-1	0	unknown	no
9	43	services	married	primary	no	-88	yes	yes	cellular	17	apr	313	1	147	2	failure	no

In [17]:

# Mean for duration

df['duration'].mean()

Out[17]:

263.96129174961294

In [20]:

# Median for duration

df['duration'].median()

Out[20]:

185.0

In [30]:

gb = df.groupby("education")

In [34]:

gb.describe()

Out[34]:

	age								balance		...	pdays		previous
	count	mean	std	min	25%	50%	75%	max	count	mean	...	75%	max	count	mean	std	min	25%	50%	75%	max
education
primary	678.0	46.833333	11.200085	19.0	39.0	46.0	55.0	87.0	678.0	1411.544248	...	-1.0	461.0	678.0	0.460177	1.857726	0.0	0.0	0.0	0.0	24.0
secondary	2306.0	40.062446	10.226439	19.0	32.0	38.0	47.0	86.0	2306.0	1196.814397	...	-1.0	808.0	2306.0	0.528621	1.599432	0.0	0.0	0.0	0.0	25.0
tertiary	1350.0	39.645926	9.612536	22.0	32.0	37.0	46.0	78.0	1350.0	1775.423704	...	-1.0	871.0	1350.0	0.612593	1.787525	0.0	0.0	0.0	0.0	22.0
unknown	187.0	45.299465	11.373718	19.0	37.0	47.0	54.0	79.0	187.0	1701.245989	...	-1.0	683.0	187.0	0.508021	1.482402	0.0	0.0	0.0	0.0	13.0

4 rows × 56 columns