- Crosstab -> pandas.crosstab(index, columns, values=None, rownames=None,
colnames=None, aggfunc=None, margins=False, dropna=True)
- A frequency table of the factors unless an array of values and an aggregation function are passed
import pandas as pd
import numpy as np
df = pd.read_csv('tele_customers')
df.head()
customerID | gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | ... | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7590-VHVEG | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
1 | 5575-GNVDE | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | ... | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
2 | 3668-QPYBK | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | ... | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
3 | 7795-CFOCW | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | ... | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
4 | 9237-HQITU | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
5 rows × 21 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7043 entries, 0 to 7042 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 customerID 7043 non-null object 1 gender 7043 non-null object 2 SeniorCitizen 7043 non-null int64 3 Partner 7043 non-null object 4 Dependents 7043 non-null object 5 tenure 7043 non-null int64 6 PhoneService 7043 non-null object 7 MultipleLines 7043 non-null object 8 InternetService 7043 non-null object 9 OnlineSecurity 7043 non-null object 10 OnlineBackup 7043 non-null object 11 DeviceProtection 7043 non-null object 12 TechSupport 7043 non-null object 13 StreamingTV 7043 non-null object 14 StreamingMovies 7043 non-null object 15 Contract 7043 non-null object 16 PaperlessBilling 7043 non-null object 17 PaymentMethod 7043 non-null object 18 MonthlyCharges 7043 non-null float64 19 TotalCharges 7043 non-null object 20 Churn 7043 non-null object dtypes: float64(1), int64(2), object(18) memory usage: 1.1+ MB
df.drop(['customerID','MonthlyCharges', 'TotalCharges', 'tenure'], axis=1, inplace=True)
df.head()
gender | SeniorCitizen | Partner | Dependents | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | Churn | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Female | 0 | Yes | No | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | No |
1 | Male | 0 | No | No | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | No |
2 | Male | 0 | No | No | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | Yes |
3 | Male | 0 | No | No | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | No |
4 | Female | 0 | No | No | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | Yes |
crosstab = pd.concat([pd.crosstab(df[x], df.Churn) for x in df.columns[:-1]], keys = df.columns[:-1])
crosstab
Churn | No | Yes | |
---|---|---|---|
gender | Female | 2549 | 939 |
Male | 2625 | 930 | |
SeniorCitizen | 0 | 4508 | 1393 |
1 | 666 | 476 | |
Partner | No | 2441 | 1200 |
Yes | 2733 | 669 | |
Dependents | No | 3390 | 1543 |
Yes | 1784 | 326 | |
PhoneService | No | 512 | 170 |
Yes | 4662 | 1699 | |
MultipleLines | No | 2541 | 849 |
No phone service | 512 | 170 | |
Yes | 2121 | 850 | |
InternetService | DSL | 1962 | 459 |
Fiber optic | 1799 | 1297 | |
No | 1413 | 113 | |
OnlineSecurity | No | 2037 | 1461 |
No internet service | 1413 | 113 | |
Yes | 1724 | 295 | |
OnlineBackup | No | 1855 | 1233 |
No internet service | 1413 | 113 | |
Yes | 1906 | 523 | |
DeviceProtection | No | 1884 | 1211 |
No internet service | 1413 | 113 | |
Yes | 1877 | 545 | |
TechSupport | No | 2027 | 1446 |
No internet service | 1413 | 113 | |
Yes | 1734 | 310 | |
StreamingTV | No | 1868 | 942 |
No internet service | 1413 | 113 | |
Yes | 1893 | 814 | |
StreamingMovies | No | 1847 | 938 |
No internet service | 1413 | 113 | |
Yes | 1914 | 818 | |
Contract | Month-to-month | 2220 | 1655 |
One year | 1307 | 166 | |
Two year | 1647 | 48 | |
PaperlessBilling | No | 2403 | 469 |
Yes | 2771 | 1400 | |
PaymentMethod | Bank transfer (automatic) | 1286 | 258 |
Credit card (automatic) | 1290 | 232 | |
Electronic check | 1294 | 1071 | |
Mailed check | 1304 | 308 |
crosstab['Churn_Percentage'] = crosstab['Yes']*100 / (crosstab['No'] + crosstab['Yes'])
crosstab
Churn | No | Yes | Churn_Percentage | |
---|---|---|---|---|
gender | Female | 2549 | 939 | 26.920872 |
Male | 2625 | 930 | 26.160338 | |
SeniorCitizen | 0 | 4508 | 1393 | 23.606168 |
1 | 666 | 476 | 41.681261 | |
Partner | No | 2441 | 1200 | 32.957979 |
Yes | 2733 | 669 | 19.664903 | |
Dependents | No | 3390 | 1543 | 31.279140 |
Yes | 1784 | 326 | 15.450237 | |
PhoneService | No | 512 | 170 | 24.926686 |
Yes | 4662 | 1699 | 26.709637 | |
MultipleLines | No | 2541 | 849 | 25.044248 |
No phone service | 512 | 170 | 24.926686 | |
Yes | 2121 | 850 | 28.609896 | |
InternetService | DSL | 1962 | 459 | 18.959108 |
Fiber optic | 1799 | 1297 | 41.892765 | |
No | 1413 | 113 | 7.404980 | |
OnlineSecurity | No | 2037 | 1461 | 41.766724 |
No internet service | 1413 | 113 | 7.404980 | |
Yes | 1724 | 295 | 14.611194 | |
OnlineBackup | No | 1855 | 1233 | 39.928756 |
No internet service | 1413 | 113 | 7.404980 | |
Yes | 1906 | 523 | 21.531494 | |
DeviceProtection | No | 1884 | 1211 | 39.127625 |
No internet service | 1413 | 113 | 7.404980 | |
Yes | 1877 | 545 | 22.502064 | |
TechSupport | No | 2027 | 1446 | 41.635474 |
No internet service | 1413 | 113 | 7.404980 | |
Yes | 1734 | 310 | 15.166341 | |
StreamingTV | No | 1868 | 942 | 33.523132 |
No internet service | 1413 | 113 | 7.404980 | |
Yes | 1893 | 814 | 30.070188 | |
StreamingMovies | No | 1847 | 938 | 33.680431 |
No internet service | 1413 | 113 | 7.404980 | |
Yes | 1914 | 818 | 29.941435 | |
Contract | Month-to-month | 2220 | 1655 | 42.709677 |
One year | 1307 | 166 | 11.269518 | |
Two year | 1647 | 48 | 2.831858 | |
PaperlessBilling | No | 2403 | 469 | 16.330084 |
Yes | 2771 | 1400 | 33.565092 | |
PaymentMethod | Bank transfer (automatic) | 1286 | 258 | 16.709845 |
Credit card (automatic) | 1290 | 232 | 15.243101 | |
Electronic check | 1294 | 1071 | 45.285412 | |
Mailed check | 1304 | 308 | 19.106700 |