import pandas as pd
df = pd.read_csv("bank.csv", sep=';')
# Showing first 10 rows
df.head(10)
age | job | marital | education | default | balance | housing | loan | contact | day | month | duration | campaign | pdays | previous | poutcome | y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 30 | unemployed | married | primary | no | 1787 | no | no | cellular | 19 | oct | 79 | 1 | -1 | 0 | unknown | no |
1 | 33 | services | NaN | secondary | no | 4789 | yes | yes | cellular | 11 | may | 220 | 1 | 339 | 4 | failure | no |
2 | 35 | management | single | tertiary | no | 1350 | yes | no | cellular | 16 | apr | 185 | 1 | 330 | 1 | failure | no |
3 | 30 | management | married | tertiary | no | 1476 | yes | yes | unknown | 3 | jun | 199 | 4 | -1 | 0 | unknown | no |
4 | 59 | blue-collar | married | secondary | no | 0 | yes | no | unknown | 5 | may | 226 | 1 | -1 | 0 | unknown | no |
5 | 35 | management | single | tertiary | no | 747 | no | no | cellular | 23 | feb | 141 | 2 | 176 | 3 | failure | no |
6 | 36 | self-employed | married | tertiary | no | 307 | yes | no | cellular | 14 | may | 341 | 1 | 330 | 2 | other | no |
7 | 39 | technician | married | secondary | no | 147 | yes | no | cellular | 6 | may | 151 | 2 | -1 | 0 | unknown | no |
8 | 41 | entrepreneur | married | tertiary | no | 221 | yes | no | unknown | 14 | may | 57 | 2 | -1 | 0 | unknown | no |
9 | 43 | services | married | primary | no | -88 | yes | yes | cellular | 17 | apr | 313 | 1 | 147 | 2 | failure | no |
# Mean for duration
df['duration'].mean()
263.96129174961294
# Median for duration
df['duration'].median()
185.0
gb = df.groupby("education")
gb.describe()
age | balance | ... | pdays | previous | |||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | mean | std | min | 25% | 50% | 75% | max | count | mean | ... | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | |
education | |||||||||||||||||||||
primary | 678.0 | 46.833333 | 11.200085 | 19.0 | 39.0 | 46.0 | 55.0 | 87.0 | 678.0 | 1411.544248 | ... | -1.0 | 461.0 | 678.0 | 0.460177 | 1.857726 | 0.0 | 0.0 | 0.0 | 0.0 | 24.0 |
secondary | 2306.0 | 40.062446 | 10.226439 | 19.0 | 32.0 | 38.0 | 47.0 | 86.0 | 2306.0 | 1196.814397 | ... | -1.0 | 808.0 | 2306.0 | 0.528621 | 1.599432 | 0.0 | 0.0 | 0.0 | 0.0 | 25.0 |
tertiary | 1350.0 | 39.645926 | 9.612536 | 22.0 | 32.0 | 37.0 | 46.0 | 78.0 | 1350.0 | 1775.423704 | ... | -1.0 | 871.0 | 1350.0 | 0.612593 | 1.787525 | 0.0 | 0.0 | 0.0 | 0.0 | 22.0 |
unknown | 187.0 | 45.299465 | 11.373718 | 19.0 | 37.0 | 47.0 | 54.0 | 79.0 | 187.0 | 1701.245989 | ... | -1.0 | 683.0 | 187.0 | 0.508021 | 1.482402 | 0.0 | 0.0 | 0.0 | 0.0 | 13.0 |
4 rows × 56 columns