Bangladesh 2020 population pyramids by upazila

BGD_pyr.png

Population Pyramids of Georgia in ggplot2

You can download session 9 files for constructing the population pyramids of Georgia here: RLadies Tbilisi 

rm(list=ls(all=TRUE))
cat("\014")
mypath <- "/Users/GozerTheGozerian/Keymasters Folder/"
setwd(paste(mypath)) #set your working directory
##  [1] "Index"
##  [2] "Variant"
##  [3] "Major.area..region..country.or.area.."
##  [4] "sex"
##  [5] "Notes"
##  [6] "Country.code"
##  [7] "Reference.date..as.of.1.July."
##  [8] "X0.4"
##  [9] "X05.Sep"
## [10] "Oct.14"
## [11] "X15.19"
[...]
## [23] "X75.79"
## [24] "X80."
## [25] "X80.84"
## [26] "X85.89"
## [27] "X90.94"
## [28] "X95.99"
## [29] "X100."
head(pyr)
##   Index   Variant Major.area..region..country.or.area..  sex Notes
## 1     1 Estimates                                 WORLD both
## 2     2 Estimates                                 WORLD both
## 3     3 Estimates                                 WORLD both
## 4     4 Estimates                                 WORLD both
## 5     5 Estimates                                 WORLD both
## 6     6 Estimates                                 WORLD both
##   Country.code Reference.date..as.of.1.July.   X0.4 X05.Sep Oct.14 X15.19
## 1          900                          1950 337432  269550 260286 238628
## 2          900                          1955 402845  315055 263266 254815
## 3          900                          1960 430565  380319 309276 257899
## 4          900                          1965 477798  409020 372817 303891
## 5          900                          1970 522641  458298 403911 367789
## 6          900                          1975 543225  503753 452706 398384
##   X20.24 X25.29 X30.34 X35.39 X40.44 X45.49 X50.54 X55.59 X60.64 X65.69
## 1 221781 194424 166937 162917 147483 127415 107608  88601  73422  55106
## 2 231892 214878 187941 160385 155546 138743 119084  97441  76843  59322
## 3 248413 225957 208747 181632 153398 147699 130210 108435  85064  62665
## 4 251897 242692 219978 202499 174884 145701 138601 119505  95085  70256
## 5 297557 246921 237657 214330 196585 168438 137799 128954 107201  81023
## 6 361883 293531 243384 233137 209181 190299 161058 128755 116704  92385
##   X70.74 X75.79  X80. X80.84 X85.89 X90.94 X95.99 X100.
## 1  37360  21997 14202     NA     NA     NA     NA    NA
## 2  40346  23755 16158     NA     NA     NA     NA    NA
## 3  44018  25986 18061     NA     NA     NA     NA    NA
## 4  47382  29457 21032     NA     NA     NA     NA    NA
## 5  55168  32876 25340     NA     NA     NA     NA    NA
## 6  64337  38934 29743     NA     NA     NA     NA    NA
pyr <- read.csv("Session_2_POPULATION_BY_AGE_BOTH_SEXES.csv", header=T)
names(pyr)
##  [1] "Index"
##  [2] "Variant"
##  [3] "Major.area..region..country.or.area.."
##  [4] "sex"
##  [5] "Notes"
##  [6] "Country.code"
##  [7] "Reference.date..as.of.1.July."
##  [8] "X0.4"
##  [9] "X05.Sep"
## [10] "Oct.14"
## [11] "X15.19"
[...]
## [22] "X70.74"
## [23] "X75.79"
## [24] "X80."
## [25] "X80.84"
## [26] "X85.89"
## [27] "X90.94"
## [28] "X95.99"
## [29] "X100."
#make a new variable with names of all variables:
#make a new variable with names of all variables:
vars <- names(pyr)
#and change those variables names that start with an X
age <- c(paste(seq(0, 75, by=5), "-", seq(4, 79, by=5)), "80+", paste(seq(80, 95, by=5), "-", seq(84, 99, by=5)), "100+")
age
##  [1] "0 - 4"   "5 - 9"   "10 - 14" "15 - 19" "20 - 24" "25 - 29" "30 - 34"
##  [8] "35 - 39" "40 - 44" "45 - 49" "50 - 54" "55 - 59" "60 - 64" "65 - 69"
## [15] "70 - 74" "75 - 79" "80+"     "80 - 84" "85 - 89" "90 - 94" "95 - 99"
## [22] "100+"
names(pyr) <- c(vars[1], vars[2], "Major.Area", "sex", vars[5], vars[6], "year", age)
names(pyr)[1:15]
##  [1] "Index"        "Variant"      "Major.Area"   "sex"
##  [5] "Notes"        "Country.code" "year"         "0 - 4"
##  [9] "5 - 9"        "10 - 14"      "15 - 19"      "20 - 24"
## [13] "25 - 29"      "30 - 34"      "35 - 39"
library(tidyr)
# transform the data from wide to long format
pyr <- gather(pyr, "age.group", "value", 8:29)
head(pyr)
##   Index   Variant Major.Area  sex Notes Country.code year age.group  value
## 1     1 Estimates      WORLD both                900 1950     0 - 4 337432
## 2     2 Estimates      WORLD both                900 1955     0 - 4 402845
## 3     3 Estimates      WORLD both                900 1960     0 - 4 430565
## 4     4 Estimates      WORLD both                900 1965     0 - 4 477798
## 5     5 Estimates      WORLD both                900 1970     0 - 4 522641
## 6     6 Estimates      WORLD both                900 1975     0 - 4 543225
#replace all NA with 0
library(dplyr)
is.na(pyr$value) <- 0
pyr.g <- pyr %>%
 filter(Major.Area=="Georgia"&sex!="both") # exclude "both"

#create an order vector to sort data
o <- seq(1,22, by=1) # 22 is the number of age groups length(unique(pyr$age.group))
oo <- rep(o,28) # 28 number of years
order <- as.vector(sort(oo, decreasing=F))
pyr.g$order <- order
breaks <- pyr.g$age.group
library(ggplot2)
###
# get rid of the 80+ abridged age group
pyr.g1 <- pyr.g[-c(which(pyr.g$age.group=="80+")),]
### simple pyramid plot
p <- ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex)))+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="2015"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="2015"),
aes(x=reorder(age.group, order), y=-value), stat="identity")+ #negative value for males not to overlap; reorder values of age group by order; "identity" is only for bar charts
coord_flip()+ #bending function: flip the coordinates
labs(x = "", y = "")+
scale_fill_manual(values = c(female = "red", male = "blue"), name="")+
scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+"),labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+ #not to show all the age groups all the time
scale_y_continuous(breaks=seq(-200,200,25),labels=abs(seq(-200,200,25)))+ #tell R t paste absolute numbers of values not to have negative values on graph
theme_bw()+
theme(axis.text.x = element_text(size=10, color="black"), # size of x axis text
axis.text.y = element_text(size=10, color="black"))

pyr1

#############################################
### STEP 2: add lines/bars to compare other years
#############################################
p+
geom_line(data=pyr.g1 %>%
filter(sex=="male"&year=="1975"),
aes(x=reorder(age.group, order), y=-value), colour="lightblue", group=1)+
geom_line(data=pyr.g1 %>%
filter(sex=="female"&year=="1975"),
aes(x=reorder(age.group, order), y=value), colour="pink", group=1)

pyr2

# bars: since in ggplot the last plot is
#the one that appears on top (hiding everything underneath),
#we can add alpha=0.5 to add some transparence, 1 being the
#full color
p+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="1975"),
aes(x=reorder(age.group, order), y=-value), fill="lightblue", alpha=.5,stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="1975"),
aes(x=reorder(age.group, order), y=value), fill="pink", alpha=.5, stat="identity")

pyr3

#######################################################################
# STEP 3: add different legends for the two years: now we only have one for the sex, as the fill factors for all 4 geom_bar(s) is the same
#
ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex), col=factor(year)))+ # add different colors for the two years 1975 and 2015 by adding col=factor(year)
# this part stays the same
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="2015"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="2015"),
aes(x=reorder(age.group, order), y=-value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="1975"),
aes(x=reorder(age.group, order),y=-value), alpha=.5,stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="1975"),
aes(x=reorder(age.group, order), y=value), alpha=.5, stat="identity")+
coord_flip()+
labs(x = "", y = "")+
scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)) , "100+"),labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+
scale_y_continuous(breaks=seq(-200,200,25),labels=abs(seq(-200,200,25)))+
theme_bw()+
theme(axis.text.x = element_text(size=10, color="black"),
axis.text.y = element_text(size=10, color="black"))+
# add the legends with scale_fill_manual which controls the filling colors for sex and scale_color_manual which controls the border color that distinguisces the two years
scale_fill_manual(values = c(female = "red", male = "blue"), name="")+
scale_color_manual(values=c("1975"="black", "2015"="grey"), name="" )+
# and I want the year legend squares to look empty
guides(colour = guide_legend(override.aes = list(alpha = 0))) #makes the squares for the years legend empty of any color

pyr4

################################################################
## STEP 4: one pyramid plot for each year in one page with facet_wrap
##
ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex)))+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"),
aes(x=reorder(age.group, order), y=-value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
coord_flip()+
labs(x = "", y = "")+
scale_x_discrete(breaks=c( paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+
scale_y_continuous(breaks=seq(-300,300,100),labels=abs(seq(-300,300,100)))+
scale_fill_manual(values = c(female = "red", male = "blue"), name="")+
theme_bw()+
theme(axis.text.x = element_text(size=10, color="black"),
axis.text.y = element_text(size=10, color="black"))+
facet_wrap(~year)

Untitled

pyr.ar <- pyr %>%
filter(Major.Area=="Armenia"&sex!="both") # exclude "both"
pyr.az <- pyr %>%
filter(Major.Area=="Azerbaijan"&sex!="both") # exclude "both"
pyr.ar$order <- order
pyr.az$order <- order
pyr.c <- rbind(pyr.g, pyr.ar, pyr.az)
pyr.c1 <- pyr.c[-c(which(pyr.c$age.group=="80+")),] 

ggplot(pyr.c1,
aes(x=age.group, y=value,
fill=factor(Major.Area)))+
 geom_bar(data=pyr.c1 %>%
filter(sex=="female"&year=="2015"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
geom_bar(data=pyr.c1 %>%
filter(sex=="male"&year=="2015"),
aes(x=reorder(age.group, order), y=-value),
stat="identity")+
coord_flip()+
 labs(x = "", y = "")+
scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)) , "100+"),
labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+
 scale_y_continuous(breaks=seq(-400,400,200),
labels=abs(seq(-400,400,200)))+
 theme_bw()+
scale_fill_manual(values = c(Armenia = "red",
Georgia="green", Azerbaijan = "blue"), name="")+
theme(axis.text.x = element_text(size=10, color="black"),
axis.text.y = element_text(size=10, color="black"),
legend.position="none")+
facet_wrap(~Major.Area)
#facet_wrap(~Major.Area, scales="free_x")

Untitled3

And with scales=”free_x”

Untitled1


BAR CHART + LINE: a ggplot balance plot (1)

You can download session 9 files here (R-Ladies Tbilisi) and specify your working directory with setwd(“/Users/mydomain/myforlder/)

BAR CHART + LINE:

###Graph 1: Total services trade, by value

 require(ggplot2)
require(dplyr)
mypath <- "/Users/StayPuftMarshmallowMan/Shandor Folder/"
setwd(paste(mypath))
mydt <- read.csv("Georgia_Data_UN.csv", header=T)

head(mydt)
##                                            variable     type year   value
## 1 GDP: Gross domestic product (million current US$) economic 2014 16530.0
## 2 GDP: Gross domestic product (million current US$) economic 2010 11638.0
## 3 GDP: Gross domestic product (million current US$) economic 2005  6411.0
## 4    GDP growth rate (annual %, const. 2005 prices) economic 2014     4.8
## 5    GDP growth rate (annual %, const. 2005 prices) economic 2010     6.2
## 6    GDP growth rate (annual %, const. 2005 prices) economic 2005     9.6
##   geo
## 1
## 2
## 3
## 4
## 5
## 6
levels(mydt$variable)
##  [1] "Agricultural production index (2004-2006=100)"
##  [2] "Balance (million US$)"
##  [3] "Balance of payments, current account (million US$)"
##  [4] "CO2 emission estimates (tons per capita)"
##  [5] "CPI: Consumer price index (2000=100)"
##  [6] "Economy: Agriculture (% of GVA)"
##  [7] "Economy: Industry (% of GVA)"
##  [8] "Economy: Services and other activity (% of GVA)"
##  [9] "Education: Government expenditure (% of GDP)"
## [10] "Education: Tertiary gross enrolment ratio (f-m per 100 pop.)"
[...]
## [48] "Unemployment (% of labour force)"
## [49] "Urban population (%)"
## [50] "Urban population growth rate (average annual %)"
ser.dt <- mydt %>%
filter(variable=="Total Services Trade")

Balance <- ser.dt%>%
group_by(year)%>%
summarise(value=-diff(value))

Balance <- cbind(variable=c(rep("Total Services Trade", 13)),
type= c(rep("Balance", 13)), Balance, geo=c(rep("NA", 13)))

mydata <- rbind(ser.dt, Balance)

subset with the pipe operator %>%

base <- mydata %>%
filter(type != "Balance") %>%
mutate(
value = ifelse(type == "Exports", value, -value)
)
balance <- mydata %>%
filter(type == "Balance")

ggplot(balance, aes(x = year, y = value)) +
geom_bar(data = base, aes(fill = type), stat = "identity") +
geom_point(aes(colour = type)) +
geom_line(aes(colour = type, group=1)) +
scale_fill_manual(values = c(Exports = "#D55E00", Imports = "#E69F00"), name="") +
scale_colour_manual(values = c(Balance = "#660000"), name="") +
labs(x = "", y = "Total Services Trade")+
theme_bw()

Presentation1