Seaborn

Importing the Relevant Libraries

In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
sns.set()

Importing the Dataset

In [2]:
url = "DataScienceSchools.github.io/Data_Visualization/Movie_Ratings.csv"

df = pd.read_csv('Movie_Ratings.csv')

df.head()
Out[2]:
Film Genre Rotten Tomatoes Ratings % Audience Ratings % Budget (million $) Year of release
0 (500) Days of Summer Comedy 87 81 8 2009
1 10,000 B.C. Adventure 9 44 105 2008
2 12 Rounds Action 30 52 20 2009
3 127 Hours Adventure 93 84 18 2010
4 17 Again Comedy 55 70 20 2009

Changing the Column Names

In [3]:
df.columns = ['Film', 'Genre', 'Critic_Ratings', 'Audience_Ratings',
       'Budget_Million', 'Year']

df.head()
Out[3]:
Film Genre Critic_Ratings Audience_Ratings Budget_Million Year
0 (500) Days of Summer Comedy 87 81 8 2009
1 10,000 B.C. Adventure 9 44 105 2008
2 12 Rounds Action 30 52 20 2009
3 127 Hours Adventure 93 84 18 2010
4 17 Again Comedy 55 70 20 2009

Checking DataType

In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 559 entries, 0 to 558
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Film              559 non-null    object
 1   Genre             559 non-null    object
 2   Critic_Ratings    559 non-null    int64 
 3   Audience_Ratings  559 non-null    int64 
 4   Budget_Million    559 non-null    int64 
 5   Year              559 non-null    int64 
dtypes: int64(4), object(2)
memory usage: 26.3+ KB

Change Datatype 'Film' & 'Genre' to 'category'

In [5]:
df['Film'] = df['Film'].astype('category')

df['Genre'] = df['Genre'].astype('category')

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 559 entries, 0 to 558
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   Film              559 non-null    category
 1   Genre             559 non-null    category
 2   Critic_Ratings    559 non-null    int64   
 3   Audience_Ratings  559 non-null    int64   
 4   Budget_Million    559 non-null    int64   
 5   Year              559 non-null    int64   
dtypes: category(2), int64(4)
memory usage: 44.0 KB

Jointplot

Sample 1

In [6]:
sns.jointplot(data = df, x='Critic_Ratings', y='Audience_Ratings')

plt.show()

Sample 2

In [7]:
sns.jointplot(data=df, x='Critic_Ratings', y='Audience_Ratings', kind='hex')

plt.show()

Histogram

Sample 1

In [8]:
sns.distplot(df['Audience_Ratings'])

plt.show()

Sample 2

In [9]:
sns.distplot(df['Critic_Ratings'], bins=15)

plt.show()

Sample 3

In [10]:
sns.set_style('darkgrid')

plt.hist(df['Audience_Ratings'])

plt.show()

Sample 4

In [11]:
sns.set_style('white')

plt.hist(df['Critic_Ratings'], bins=15)

plt.show()

Sample 5

In [12]:
sns.set_style("dark")

plt.hist(df['Budget_Million'])

plt.show()

Stacked Histogram

Sample 1

In [13]:
plt.hist(df[df["Genre"]== 'Action'].Budget_Million, bins=15)
plt.hist(df[df["Genre"]== 'Drama'].Budget_Million, bins=15)
plt.hist(df[df["Genre"]== 'Thriller'].Budget_Million, bins=15)

plt.show()

Sample 2

In [14]:
list=[]
labels=[]

for genre in df['Genre'].unique():
    list.append(df[df["Genre"]== genre].Budget_Million)
    labels.append(genre)
    
plt.hist(list, bins=30, stacked=True, rwidth=2, label=labels)

plt.legend()

plt.show()

lmplot

Sample 1

In [15]:
sns.lmplot(data=df, x='Critic_Ratings', y='Audience_Ratings', hue='Genre')

plt.show()

Sample 2

In [16]:
sns.lmplot(data=df, x='Critic_Ratings', y='Audience_Ratings', hue='Genre',
          
          fit_reg=False, height=5, aspect=1)

plt.show()

KDE Plot

Sample 1

In [17]:
sns.kdeplot(df['Critic_Ratings'], df['Audience_Ratings'])

plt.show()

Sample 2

In [18]:
sns.kdeplot(df['Critic_Ratings'], df['Audience_Ratings'], 
            
            shade=True, shade_lowest=False, cmap='Blues')

plt.show()

Sample 3

In [19]:
sns.kdeplot(df['Critic_Ratings'], df['Audience_Ratings'], cmap='Blues')

plt.show()

Sample 4

In [20]:
sns.kdeplot(df['Critic_Ratings'], df['Audience_Ratings'], 
            
            shade=True, shade_lowest=False, cmap='Blues')

sns.kdeplot(df['Critic_Ratings'], df['Audience_Ratings'], cmap='Blues')

plt.show()

Sample 5

In [21]:
sns.kdeplot(df['Budget_Million'], df['Audience_Ratings'])

plt.show()

Sample 6

In [22]:
sns.kdeplot(df['Budget_Million'], df['Critic_Ratings'])

plt.show()

Sample 7 - Subplot

In [23]:
f, ax = plt.subplots(1,2, figsize=(12,6), sharex=True, sharey=True)

f1 = sns.kdeplot(df['Budget_Million'], df['Audience_Ratings'], ax=ax[0])
f2 = sns.kdeplot(df['Budget_Million'], df['Critic_Ratings'], ax=ax[1])

f1.set(xlim=(-20,160))

plt.show()

Boxplot

Sample 1

In [24]:
sns.boxplot(data=df, x='Genre', y='Critic_Ratings')

plt.xticks(rotation = 60)

plt.show()

Sample 2

In [25]:
sns.boxplot(data=df[df['Genre']== 'Drama'], x='Genre', y='Critic_Ratings')

plt.xticks(rotation = 60)

plt.show()

Violin Plot

Sample 1

In [26]:
sns.violinplot(data=df, x='Genre', y='Critic_Ratings')

plt.xticks(rotation = 60)

plt.show()

Sample 2

In [27]:
sns.violinplot(data=df[df['Genre']== 'Drama'], x='Year', y='Critic_Ratings')

plt.show()

Facet Grid

Sample 1

In [28]:
g = sns.FacetGrid(df, row='Genre', col='Year', hue='Genre')

kws = dict(s=50, linewidth=0.5, edgecolor='black')

g = g.map(plt.scatter, 'Audience_Ratings', 'Critic_Ratings', **kws)

Sample 2

In [29]:
g = sns.FacetGrid(df, row='Genre', col='Year', hue='Genre')

g = g.map(plt.hist, 'Audience_Ratings')

Sample 3

In [30]:
g = sns.FacetGrid(df, row='Genre', col='Year', hue='Genre')

g = g.map(plt.hist, 'Critic_Ratings')

Controling Axes and Adding Diagonals

In [31]:
g = sns.FacetGrid(df, row='Genre', col='Year', hue='Genre')

kws = dict(s=50, linewidth=0.5, edgecolor='black')

g = g.map(plt.scatter, 'Audience_Ratings', 'Critic_Ratings', **kws)

g.set(xlim=(0,100), ylim=(0,100))

g.add_legend()

for ax in g.axes.flat:
    
    ax.plot((0,100), (0,100), c='red', ls="--")

Dashboard

In [32]:
f, ax = plt.subplots(2,3, figsize=(30,30))

f1 = sns.kdeplot(df['Budget_Million'], df['Audience_Ratings'], ax=ax[0,0])

f2 = sns.kdeplot(df['Budget_Million'], df['Critic_Ratings'], ax=ax[0,1])

f3 = sns.kdeplot(df['Critic_Ratings'], df['Audience_Ratings'],shade=True, shade_lowest=False, cmap='Blues',ax=ax[1,0] )

f3 = sns.kdeplot(df['Critic_Ratings'], df['Audience_Ratings'], cmap='Blues', ax=ax[1,0] )

f4 = sns.violinplot(data=df, x='Genre', y='Critic_Ratings', ax=ax[1,1])

f5 = sns.boxplot(data=df, x='Genre', y='Critic_Ratings', ax=ax[1,2])

ax[0,2].hist(df['Audience_Ratings']) # If it is not a seaborn plot

f1.set(xlim=(-20,160))
f2.set(xlim=(-20,160))

plt.show()

Styling Dashboard

In [33]:
sns.set_style('dark', {'axes.facecolor':'black'})

f, ax = plt.subplots(3,2, figsize=(30,30))

f1 = sns.kdeplot(df['Budget_Million'], df['Audience_Ratings'], shade=True, shade_lowest=True, cmap='inferno', ax=ax[0,0])

f1 = sns.kdeplot(df['Budget_Million'], df['Audience_Ratings'], cmap='cool', ax=ax[0,0])


f2 = sns.kdeplot(df['Budget_Million'], df['Critic_Ratings'], shade=True, shade_lowest=True, cmap='inferno', ax=ax[0,1])

f2 = sns.kdeplot(df['Budget_Million'], df['Critic_Ratings'], cmap='cool', ax=ax[0,1])


f3 = sns.kdeplot(df['Critic_Ratings'], df['Audience_Ratings'],shade=True, shade_lowest=False, cmap='Blues_r',ax=ax[1,0] )

f3 = sns.kdeplot(df['Critic_Ratings'], df['Audience_Ratings'], cmap='gist_yarg_r', ax=ax[1,0] )


f4 = sns.violinplot(data=df, x='Genre', y='Critic_Ratings', palette= 'YlOrRd', ax=ax[1,1])


f5 = sns.boxplot(data=df, x='Genre', y='Critic_Ratings', ax=ax[2,0])


ax[2,1].hist(df['Audience_Ratings'], color = "purple" , ec="blue", lw=5)

f1.set(xlim=(-20,160))
f2.set(xlim=(-20,160))

plt.show()

Cmap Supported values

source

'Accent', 'Accent_r', 'Blues', 'Blues_r', 'BrBG', 'BrBG_r', 'BuGn', 'BuGn_r', 'BuPu', 'BuPu_r', 'CMRmap', 'CMRmap_r', 'Dark2', 'Dark2_r', 'GnBu', 'GnBu_r', 'Greens', 'Greens_r', 'Greys', 'Greys_r', 'OrRd', 'OrRd_r', 'Oranges', 'Oranges_r', 'PRGn', 'PRGn_r', 'Paired', 'Paired_r', 'Pastel1', 'Pastel1_r', 'Pastel2', 'Pastel2_r', 'PiYG', 'PiYG_r', 'PuBu', 'PuBuGn', 'PuBuGn_r', 'PuBu_r', 'PuOr', 'PuOr_r', 'PuRd', 'PuRd_r', 'Purples', 'Purples_r', 'RdBu', 'RdBu_r', 'RdGy', 'RdGy_r', 'RdPu', 'RdPu_r', 'RdYlBu', 'RdYlBu_r', 'RdYlGn', 'RdYlGn_r', 'Reds', 'Reds_r', 'Set1', 'Set1_r', 'Set2', 'Set2_r', 'Set3', 'Set3_r', 'Spectral', 'Spectral_r', 'Wistia', 'Wistia_r', 'YlGn', 'YlGnBu', 'YlGnBu_r', 'YlGn_r', 'YlOrBr', 'YlOrBr_r', 'YlOrRd', 'YlOrRd_r', 'afmhot', 'afmhot_r', 'autumn', 'autumn_r', 'binary', 'binary_r', 'bone', 'bone_r', 'brg', 'brg_r', 'bwr', 'bwr_r', 'cividis', 'cividis_r', 'cool', 'cool_r', 'coolwarm', 'coolwarm_r', 'copper', 'copper_r', 'cubehelix', 'cubehelix_r', 'flag', 'flag_r', 'gist_earth', 'gist_earth_r', 'gist_gray', 'gist_gray_r', 'gist_heat', 'gist_heat_r', 'gist_ncar', 'gist_ncar_r', 'gist_rainbow', 'gist_rainbow_r', 'gist_stern', 'gist_stern_r', 'gist_yarg', 'gist_yarg_r', 'gnuplot', 'gnuplot2', 'gnuplot2_r', 'gnuplot_r', 'gray', 'gray_r', 'hot', 'hot_r', 'hsv', 'hsv_r', 'icefire', 'icefire_r', 'inferno', 'inferno_r', 'jet', 'jet_r', 'magma', 'magma_r', 'mako', 'mako_r', 'nipy_spectral', 'nipy_spectral_r', 'ocean', 'ocean_r', 'pink', 'pink_r', 'plasma', 'plasma_r', 'prism', 'prism_r', 'rainbow', 'rainbow_r', 'rocket', 'rocket_r', 'seismic', 'seismic_r', 'spring', 'spring_r', 'summer', 'summer_r', 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 'tab20c', 'tab20c_r', 'terrain', 'terrain_r', 'twilight', 'twilight_r', 'twilight_shifted', 'twilight_shifted_r', 'viridis', 'viridis_r', 'vlag', 'vlag_r', 'winter', 'winter_r'

Styles

In [34]:
sns.set_style('dark')


fig, ax = plt.subplots(figsize=(15,6))

list=[]
labels=[]

for genre in df['Genre'].unique():
    list.append(df[df["Genre"]== genre].Budget_Million)
    labels.append(genre)
    
plt.hist(list, bins=30, stacked=True, rwidth=2, label=labels)

plt.title('Movie Budget Distribution', fontsize='30', color='darkblue', fontname='DejaVu Sans')
plt.xlabel('Budget', fontsize='30', color='darkblue', fontname='DejaVu Sans')
plt.ylabel('Number of Movies', fontsize='30', color='darkblue', fontname='DejaVu Sans')

plt.yticks(fontsize='20')
plt.xticks(fontsize='20')

font = {'family': 'DejaVu Sans', 'size':15}
plt.legend(prop=font)

plt.show()