Z-unlock Challenge: Data Visualization
We will Analyze the correlation of temperatures changes on energy use, land cover,waste use and deforestoration by questioning these questions.
- What are the areas with biggest/smallest change in temperature?
- Are there any correlations between the hottest changes and other phenomena (like land coverage, land fires, CO2 emissions etc.)
- How does the seasonal temperature change look like?
- How does this vary by continent? Particularly South America?
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
df_temperature = pd.read_csv("/kaggle/input/z-unlocked-challenge-1-data-visualization/temperature_change_data_11-29-2021.csv")
df_temperature.head()
temp_max = df_temperature.groupby("Area")["Value"].max().sort_values(ascending=False).reset_index()
temp_min = df_temperature.groupby("Area")["Value"].min().sort_values().reset_index()
d2 = temp_max[:5]
plt.figure(figsize=(10, 7))
plt.bar(d2['Area'], d2['Value'], width=0.3)
for i, val in enumerate(d2['Value'].values):
plt.text(i, val, round(float(val)), horizontalalignment='center',
verticalalignment='bottom', fontdict={'fontweight':500, 'size': 16})
plt.gca().set_xticklabels(d2['Area'], fontdict={'size': 14},rotation=60)
plt.title("Max temperature Change for top 5 Area", fontsize=22)
plt.ylabel("Temperature", fontsize=16)
plt.xlabel("Area", fontsize=16)
plt.show()
d2 = temp_min[:5]
plt.figure(figsize=(10, 7))
plt.bar(d2['Area'], d2['Value'], width=0.3)
for i, val in enumerate(d2['Value'].values):
plt.text(i, val, round(float(val)), horizontalalignment='center',
verticalalignment='bottom', fontdict={'fontweight':500, 'size': 16})
plt.gca().set_xticklabels(d2['Area'], fontdict={'size': 14},rotation=60)
plt.title("Min temperature Change for top 5 Area", fontsize=22)
plt.ylabel("Temperature", fontsize=16)
plt.xlabel("Area", fontsize=16)
plt.show()
energy_df = pd.read_csv("/kaggle/input/z-unlocked-challenge-1-data-visualization/energy_use_data_11-29-2021.csv")
energy_df.head()
land_df = pd.read_csv("/kaggle/input/z-unlocked-challenge-1-data-visualization/land_cover_data_11-30-2021.csv")
land_df.head()
energy_df = pd.read_csv("/kaggle/input/z-unlocked-challenge-1-data-visualization/energy_use_data_11-29-2021.csv")
energy_df.head()
df_temperature.head()
waste_df = pd.read_csv("/kaggle/input/z-unlocked-challenge-1-data-visualization/waste_disposal_data_11-29-2021.csv")
waste_df.head()
fires_df = pd.read_csv("/kaggle/input/z-unlocked-challenge-1-data-visualization/fires_data_11-29-2021.csv")
fires_df.head()
temp_change= df_temperature.groupby(["Year","Months"])["Value"].mean().reset_index()
plt.figure(figsize=(15, 10))
ax = sns.scatterplot(x='Year',
y='Value',
hue='Months',
legend='full',
data=temp_change,
palette=sns.color_palette("Set1", n_colors=len(temp_change.Months.unique())))
max_value_per_year = temp_change.groupby('Year')['Value'].max()
sns.lineplot(data=max_value_per_year,
ax=ax.axes,
color='black')
plt.ylabel("Temperature", fontsize=16)
plt.xlabel("Year", fontsize=16)
plt.title("The trend for temperature change annually over Months")
plt.axvspan(2015, 2020,alpha=0.15)
plt.show()
land_cover= land_df.groupby(["Year"])["Value"].mean().reset_index()
plt.figure(figsize=(15, 10))
ax = sns.scatterplot(x='Year',
y='Value',
legend='full',
data=land_cover,
palette=sns.color_palette("Set1", n_colors=len(land_cover.Year.unique())))
max_value_per_year = land_cover.groupby('Year')['Value'].max()
sns.lineplot(data=max_value_per_year,
ax=ax.axes,
color='black')
plt.ylabel("Land Cover", fontsize=16)
plt.xlabel("Year", fontsize=16)
plt.axvspan(2004, 2006,alpha=0.15)
plt.show()
energy_use= energy_df.groupby(["Year"])["Value"].mean().reset_index()
plt.figure(figsize=(15, 10))
ax = sns.scatterplot(x='Year',
y='Value',
legend='full',
data=energy_use,
palette=sns.color_palette("Set1", n_colors=len(energy_use.Year.unique())))
max_value_per_year = energy_use.groupby('Year')['Value'].max()
sns.lineplot(data=max_value_per_year,
ax=ax.axes,
color='black')
plt.ylabel("Energy Use", fontsize=16)
plt.xlabel("Year", fontsize=16)
plt.axvspan(1985, 1989,alpha=0.15)
plt.show()
waste_use= waste_df.groupby(["Year"])["Value"].mean().reset_index()
plt.figure(figsize=(15, 10))
ax = sns.scatterplot(x='Year',
y='Value',
legend='full',
data=waste_use,
palette=sns.color_palette("Set1", n_colors=len(waste_use.Year.unique())))
max_value_per_year = waste_use.groupby('Year')['Value'].max()
sns.lineplot(data=max_value_per_year,
ax=ax.axes,
color='black')
plt.ylabel("Waste Use", fontsize=16)
plt.xlabel("Year", fontsize=16)
plt.axvspan(1990, 1993,alpha=0.15)
plt.show()
fires_use= fires_df.groupby(["Year"])["Value"].mean().reset_index()
plt.figure(figsize=(15, 10))
ax = sns.scatterplot(x='Year',
y='Value',
legend='full',
data=fires_use,
palette=sns.color_palette("Set1", n_colors=len(fires_use.Year.unique())))
max_value_per_year = fires_use.groupby('Year')['Value'].max()
sns.lineplot(data=max_value_per_year,
ax=ax.axes,
color='black')
plt.ylabel("Fires Use", fontsize=16)
plt.xlabel("Year", fontsize=16)
plt.axvspan(1999, 2003,alpha=0.15)
plt.show()
Correlations between the hottest changes and other phenomena (like land coverage, land fires, CO2 emissions and Fires.)
Insight
Based on Aggregating the mean per year shows correlation among temperature, energy use, land cover, waste use, and fires. All country-Value indicator(Value feature based on each tables) combinations show an increase, but there are subtle differences:
- In Land cover use, in 2004-2005, there was a signifant increase followed by a slighly increase in from 2011-2017.
- In Energy use, in 1985-1989, there was a signifant increase followed by a slighly increase in from 2019-2020.
- In Waste use, in 1999-1993, there was a signifant drop followed by a significant increase from 1994-2020.
In Fires use, in 1990-2003, there was a signifant increase followed by a slighly decrease from 2003-2020.
Almost everywhere, the end-of-year show an correlation that the the temperature that increase yearly affect the use of waste, energy,deforestoration, and land cover yearly.
df_temperature.head()
df_temperature.groupby("Months")["Value"].agg(["sum","mean","max"])
plt.figure(figsize=(18, 12))
for i, (combi, df) in enumerate(df_temperature.groupby(['Months'])):
ax = plt.subplot(6, 3, i+1, ymargin=0.5)
ax.plot(df.Value)
ax.set_title(combi)
#if i == 6: break
plt.tight_layout(h_pad=3.0)
plt.suptitle('Seasonal Temperature Change', y=1.03)
plt.show()
south_america_countries =['Brazil','Argentina','Chile','Colombia',
'Ecuador','Venezuela (Bolivarian Republic of)',
'Bolivia (Plurinational State of)','Guyana',
'Uruguay','Suriname', 'Paraguay','Aruba','Trinidad and Tobago']
temperature_sa =df_temperature[df_temperature["Area"].isin(south_america_countries)]
temperature_sa.head()
temperature_sa.groupby(["Area"])["Value"].agg(["max","min"]).plot(kind="bar",figsize=(12,8))
plt.ylabel("Temperature")
temperature_sa= temperature_sa.groupby(["Year","Months"])["Value"].mean().reset_index()
plt.figure(figsize=(15, 10))
ax = sns.scatterplot(x='Year',
y='Value',
legend='full',
hue='Months',
data=temperature_sa,
palette=sns.color_palette("Set1", n_colors=len(temperature_sa.Months.unique())))
max_value_per_year = temperature_sa.groupby('Year')['Value'].max()
sns.lineplot(data=max_value_per_year,
ax=ax.axes,
color='black')
plt.ylabel("Temperature Change", fontsize=16)
plt.xlabel("Year", fontsize=16)
plt.axvspan(2013, 2016,alpha=0.15)
plt.show()
- Ultimately, there is an uptrend for temperature change in South America annually in which the peak is around 2013-2016.
For joining this competition, see Z-Unlocked_Challenge1. There is a chance to visit Barcelona for Kaggle Competition.