import pandas as pd
url = "http://datascienceschools.github.io/Amazon_Fires.csv"
df = pd.read_csv(url)
df.head()
new_columns = {'ano' : 'year',
'estado': 'state',
'mes': 'month',
'numero': 'number_of_fires',
'encontro': 'date'}
df.rename(columns = new_columns, inplace=True)
df.head()
- Converting month from Portuguese to English
month_English = {
'Janeiro': 'January',
'Fevereiro': 'February',
'Março': 'March',
'Abril': 'April',
'Maio': 'May',
'Junho': 'June',
'Julho': 'July',
'Agosto': 'August',
'Setembro': 'September',
'Outubro': 'October',
'Novembro': 'November',
'Dezembro': 'December'}
df["month"] = df["month"].map(month_English)
df.head()
- Columns indexes starts from 0, left to right
- Reordering columns -> date, month, year
new_order = [4,1,0,2,3,]
df = df[df.columns[new_order]]
df.head()
df.columns
new_order = ['date', 'month', 'year', 'state', 'number_of_fires']
df = df[new_order]
df.head(10)
df.dtypes
df['date'] = pd.to_datetime(df['date'])
df.dtypes
df['number_of_fires'] = df['number_of_fires'].str.strip(" Fires")
df["number_of_fires"] = df["number_of_fires"].astype(float)
df.dtypes
df.isnull().sum()
df[df['number_of_fires'].isnull()]
- Removing rows with missing values -> df.dropna(axis=0)
- axis=0 -> means rows
- Reset index & drop old index column -> df.reset_index(drop=True)
df = df.dropna(axis=0)
df = df.reset_index(drop=True)
df.head()
- Filling null values with 0
df['number_of_fires'] = df['number_of_fires'].fillna(0)
- axis=1 -> means columns
df = df.drop(["date"], axis=1)
df.head()
df = df.drop(["year", "date"], axis=1)
df.head()
- Using the df.index function
df = df.drop(df.index[0])
df = df.reset_index(drop=True)
df.head()
df = df.drop(df.index[[2,3]])
df.head()
df = df.drop(df.index[[1,4]])
df.head()
df.to_csv('Amazon_Fires_editted.csv', index=False)