import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from IPython.display import display
import itertools
%matplotlib inline

sns.set(rc={'figure.figsize':(12,8)})


            
              df = pd.read_csv('all_data.csv')
display(df.head())
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96 entries, 0 to 95
Data columns (total 4 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Country                           96 non-null     object 
 1   Year                              96 non-null     int64  
 2   Life expectancy at birth (years)  96 non-null     float64
 3   GDP                               96 non-null     float64
dtypes: float64(2), int64(1), object(1)
memory usage: 3.1+ KB
None


            
              print(f"There are {len(df['Country'].unique())} countries")

There are 6 countries


            
              # for country in df['Country'].unique():
#     sns.scatterplot(data=df[df['Country'] == country], x='Year', y='GDP', label=country, loc='upper left')
sns.scatterplot(data=df, x='Year', y='GDP', hue='Country')
plt.title('GDP over Time by Country')

Text(0.5, 1.0, 'GDP over Time by Country')


            
              sns.pairplot(df, kind="reg", hue='Country')
plt.show()


            
              fig, axes = plt.subplots(3, 2, sharex=True)
axes = list(itertools.chain(*axes))
for idx, country in enumerate(df['Country'].unique()):
    sns.scatterplot(x='Year', y='GDP', data=df[df['Country'] == country], ax=axes[idx])
    axes[idx].set_title(f'{country} GDP over time')
plt.show()
plt.clf()

<Figure size 864x576 with 0 Axes>


            
              fig, axes = plt.subplots(3, 2, sharex=True)
axes = list(itertools.chain(*axes))
for idx, country in enumerate(df['Country'].unique()):
    sns.scatterplot(x='Year', y='Life expectancy at birth (years)', data=df[df['Country'] == country], ax=axes[idx])
    axes[idx].set_title(f'{country} Life expectancy over time')
plt.show()
plt.clf()

<Figure size 864x576 with 0 Axes>


            
              sns.violinplot(y='Life expectancy at birth (years)', x='Country', data=df)

<AxesSubplot:xlabel='Country', ylabel='Life expectancy at birth (years)'>


            
              from io import StringIO

csv = ''.join(open('API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4251000/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4251000.csv', 'r', encoding='utf-8').readlines()[4:])
csvFile = StringIO(csv)
df2 = pd.read_csv(csvFile).iloc[: , :-1]
display(df2.head())


            
              years = [str(year) for year in range(1960, 2022)]
c = df2.columns.difference(years, sort=False).tolist()
display(df2.set_index(c).stack().to_frame().reset_index().rename(columns= {
    'level_4': 'Year',
    0: 'GDP'
}))
# .index.to_frame(index=False).rename(columns={len(c): 'Year'})


            
              years = [str(year) for year in range(1960, 2022)] # self explanatory, creates a list of years ["1960"..."2022"]


            
              c = df2.columns.difference(years, sort=False).tolist()
print(df2.columns)

Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
       '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021'],
      dtype='object')

Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
       '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021'],
      dtype='object')


            
              display(df2.set_index(c).head())


            
              display(df2.set_index(c).stack().head(10))

Country Name  Country Code  Indicator Name     Indicator Code      
Aruba         ABW           GDP (current US$)  NY.GDP.MKTP.CD  1986    4.055866e+08
                                                               1987    4.877095e+08
                                                               1988    5.966480e+08
                                                               1989    6.955307e+08
                                                               1990    7.648045e+08
                                                               1991    8.720670e+08
                                                               1992    9.586592e+08
                                                               1993    1.083240e+09
                                                               1994    1.245810e+09
                                                               1995    1.320670e+09
dtype: float64


            
              display(df2.set_index(c).stack().to_frame())


            
              display(df2.set_index(c).stack().to_frame().reset_index())

	Country	Year	Life expectancy at birth (years)	GDP
0	Chile	2000	77.3	7.786093e+10
1	Chile	2001	77.3	7.097992e+10
2	Chile	2002	77.8	6.973681e+10
3	Chile	2003	77.9	7.564346e+10
4	Chile	2004	78.0	9.921039e+10

	Country Name	Country Code	Indicator Name	Indicator Code	1960	1961	1962	1963	1964	1965	...	2012	2013	2014	2015	2016	2017	2018	2019	2020	2021
0	Aruba	ABW	GDP (current US$)	NY.GDP.MKTP.CD	NaN	NaN	NaN	NaN	NaN	NaN	...	2.615084e+09	2.727933e+09	2.791061e+09	2.963128e+09	2.983799e+09	3.092179e+09	3.202235e+09	3.310056e+09	2.496648e+09	NaN
1	Africa Eastern and Southern	AFE	GDP (current US$)	NY.GDP.MKTP.CD	2.129059e+10	2.180847e+10	2.370702e+10	2.821004e+10	2.611879e+10	2.968217e+10	...	9.730435e+11	9.839370e+11	1.003679e+12	9.242525e+11	8.823551e+11	1.020647e+12	9.910223e+11	9.975340e+11	9.216459e+11	1.082096e+12
2	Afghanistan	AFG	GDP (current US$)	NY.GDP.MKTP.CD	5.377778e+08	5.488889e+08	5.466667e+08	7.511112e+08	8.000000e+08	1.006667e+09	...	1.990732e+10	2.014640e+10	2.049713e+10	1.913421e+10	1.811656e+10	1.875347e+10	1.805323e+10	1.879945e+10	2.011614e+10	NaN
3	Africa Western and Central	AFW	GDP (current US$)	NY.GDP.MKTP.CD	1.040414e+10	1.112789e+10	1.194319e+10	1.267633e+10	1.383837e+10	1.486223e+10	...	7.275704e+11	8.207927e+11	8.649905e+11	7.607345e+11	6.905464e+11	6.837487e+11	7.416899e+11	7.945430e+11	7.844457e+11	8.358084e+11
4	Angola	AGO	GDP (current US$)	NY.GDP.MKTP.CD	NaN	NaN	NaN	NaN	NaN	NaN	...	1.249982e+11	1.334016e+11	1.372444e+11	8.721929e+10	4.984049e+10	6.897276e+10	7.779294e+10	6.930910e+10	5.361907e+10	7.254699e+10

				1960	1961	1962	1963	1964	1965	1966	1967	1968	1969	...	2012	2013	2014	2015	2016	2017	2018	2019	2020	2021
Country Name	Country Code	Indicator Name	Indicator Code
Aruba	ABW	GDP (current US$)	NY.GDP.MKTP.CD	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	2.615084e+09	2.727933e+09	2.791061e+09	2.963128e+09	2.983799e+09	3.092179e+09	3.202235e+09	3.310056e+09	2.496648e+09	NaN
Africa Eastern and Southern	AFE	GDP (current US$)	NY.GDP.MKTP.CD	2.129059e+10	2.180847e+10	2.370702e+10	2.821004e+10	2.611879e+10	2.968217e+10	3.223912e+10	3.351455e+10	3.652148e+10	4.182834e+10	...	9.730435e+11	9.839370e+11	1.003679e+12	9.242525e+11	8.823551e+11	1.020647e+12	9.910223e+11	9.975340e+11	9.216459e+11	1.082096e+12
Afghanistan	AFG	GDP (current US$)	NY.GDP.MKTP.CD	5.377778e+08	5.488889e+08	5.466667e+08	7.511112e+08	8.000000e+08	1.006667e+09	1.400000e+09	1.673333e+09	1.373333e+09	1.408889e+09	...	1.990732e+10	2.014640e+10	2.049713e+10	1.913421e+10	1.811656e+10	1.875347e+10	1.805323e+10	1.879945e+10	2.011614e+10	NaN
Africa Western and Central	AFW	GDP (current US$)	NY.GDP.MKTP.CD	1.040414e+10	1.112789e+10	1.194319e+10	1.267633e+10	1.383837e+10	1.486223e+10	1.583259e+10	1.442604e+10	1.488035e+10	1.688209e+10	...	7.275704e+11	8.207927e+11	8.649905e+11	7.607345e+11	6.905464e+11	6.837487e+11	7.416899e+11	7.945430e+11	7.844457e+11	8.358084e+11
Angola	AGO	GDP (current US$)	NY.GDP.MKTP.CD	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	1.249982e+11	1.334016e+11	1.372444e+11	8.721929e+10	4.984049e+10	6.897276e+10	7.779294e+10	6.930910e+10	5.361907e+10	7.254699e+10

Correlogram¶

Code explanation¶

What do the results show?¶

World Bank Data ¶

A guide to cleaning data¶

Pandas¶

Pure explicit python¶

Correlogram¶

Code explanation¶

What do the results show?¶

World Bank Data¶

A guide to cleaning data¶

Pandas¶

Pure explicit python¶

World Bank Data ¶