You can download session 9 files for constructing the population pyramids of Georgia here: RLadies Tbilisi
rm(list=ls(all=TRUE)) cat("\014") mypath <- "/Users/GozerTheGozerian/Keymasters Folder/" setwd(paste(mypath)) #set your working directory
## [1] "Index" ## [2] "Variant" ## [3] "Major.area..region..country.or.area.." ## [4] "sex" ## [5] "Notes" ## [6] "Country.code" ## [7] "Reference.date..as.of.1.July." ## [8] "X0.4" ## [9] "X05.Sep" ## [10] "Oct.14" ## [11] "X15.19" [...] ## [23] "X75.79" ## [24] "X80." ## [25] "X80.84" ## [26] "X85.89" ## [27] "X90.94" ## [28] "X95.99" ## [29] "X100."
head(pyr)
## Index Variant Major.area..region..country.or.area.. sex Notes ## 1 1 Estimates WORLD both ## 2 2 Estimates WORLD both ## 3 3 Estimates WORLD both ## 4 4 Estimates WORLD both ## 5 5 Estimates WORLD both ## 6 6 Estimates WORLD both ## Country.code Reference.date..as.of.1.July. X0.4 X05.Sep Oct.14 X15.19 ## 1 900 1950 337432 269550 260286 238628 ## 2 900 1955 402845 315055 263266 254815 ## 3 900 1960 430565 380319 309276 257899 ## 4 900 1965 477798 409020 372817 303891 ## 5 900 1970 522641 458298 403911 367789 ## 6 900 1975 543225 503753 452706 398384 ## X20.24 X25.29 X30.34 X35.39 X40.44 X45.49 X50.54 X55.59 X60.64 X65.69 ## 1 221781 194424 166937 162917 147483 127415 107608 88601 73422 55106 ## 2 231892 214878 187941 160385 155546 138743 119084 97441 76843 59322 ## 3 248413 225957 208747 181632 153398 147699 130210 108435 85064 62665 ## 4 251897 242692 219978 202499 174884 145701 138601 119505 95085 70256 ## 5 297557 246921 237657 214330 196585 168438 137799 128954 107201 81023 ## 6 361883 293531 243384 233137 209181 190299 161058 128755 116704 92385 ## X70.74 X75.79 X80. X80.84 X85.89 X90.94 X95.99 X100. ## 1 37360 21997 14202 NA NA NA NA NA ## 2 40346 23755 16158 NA NA NA NA NA ## 3 44018 25986 18061 NA NA NA NA NA ## 4 47382 29457 21032 NA NA NA NA NA ## 5 55168 32876 25340 NA NA NA NA NA ## 6 64337 38934 29743 NA NA NA NA NA
pyr <- read.csv("Session_2_POPULATION_BY_AGE_BOTH_SEXES.csv", header=T) names(pyr)
## [1] "Index" ## [2] "Variant" ## [3] "Major.area..region..country.or.area.." ## [4] "sex" ## [5] "Notes" ## [6] "Country.code" ## [7] "Reference.date..as.of.1.July." ## [8] "X0.4" ## [9] "X05.Sep" ## [10] "Oct.14" ## [11] "X15.19" [...] ## [22] "X70.74" ## [23] "X75.79" ## [24] "X80." ## [25] "X80.84" ## [26] "X85.89" ## [27] "X90.94" ## [28] "X95.99" ## [29] "X100."
#make a new variable with names of all variables: #make a new variable with names of all variables: vars <- names(pyr) #and change those variables names that start with an X age <- c(paste(seq(0, 75, by=5), "-", seq(4, 79, by=5)), "80+", paste(seq(80, 95, by=5), "-", seq(84, 99, by=5)), "100+") age
## [1] "0 - 4" "5 - 9" "10 - 14" "15 - 19" "20 - 24" "25 - 29" "30 - 34" ## [8] "35 - 39" "40 - 44" "45 - 49" "50 - 54" "55 - 59" "60 - 64" "65 - 69" ## [15] "70 - 74" "75 - 79" "80+" "80 - 84" "85 - 89" "90 - 94" "95 - 99" ## [22] "100+"
names(pyr) <- c(vars[1], vars[2], "Major.Area", "sex", vars[5], vars[6], "year", age) names(pyr)[1:15]
## [1] "Index" "Variant" "Major.Area" "sex" ## [5] "Notes" "Country.code" "year" "0 - 4" ## [9] "5 - 9" "10 - 14" "15 - 19" "20 - 24" ## [13] "25 - 29" "30 - 34" "35 - 39"
library(tidyr) # transform the data from wide to long format pyr <- gather(pyr, "age.group", "value", 8:29) head(pyr)
## Index Variant Major.Area sex Notes Country.code year age.group value ## 1 1 Estimates WORLD both 900 1950 0 - 4 337432 ## 2 2 Estimates WORLD both 900 1955 0 - 4 402845 ## 3 3 Estimates WORLD both 900 1960 0 - 4 430565 ## 4 4 Estimates WORLD both 900 1965 0 - 4 477798 ## 5 5 Estimates WORLD both 900 1970 0 - 4 522641 ## 6 6 Estimates WORLD both 900 1975 0 - 4 543225
#replace all NA with 0 library(dplyr) is.na(pyr$value) <- 0 pyr.g <- pyr %>% filter(Major.Area=="Georgia"&sex!="both") # exclude "both" #create an order vector to sort data o <- seq(1,22, by=1) # 22 is the number of age groups length(unique(pyr$age.group)) oo <- rep(o,28) # 28 number of years order <- as.vector(sort(oo, decreasing=F)) pyr.g$order <- order breaks <- pyr.g$age.group
library(ggplot2) ### # get rid of the 80+ abridged age group pyr.g1 <- pyr.g[-c(which(pyr.g$age.group=="80+")),] ### simple pyramid plot p <- ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex)))+ geom_bar(data=pyr.g1 %>% filter(sex=="female"&year=="2015"), aes(x=reorder(age.group, order), y=value), stat="identity")+ geom_bar(data=pyr.g1 %>% filter(sex=="male"&year=="2015"), aes(x=reorder(age.group, order), y=-value), stat="identity")+ #negative value for males not to overlap; reorder values of age group by order; "identity" is only for bar charts coord_flip()+ #bending function: flip the coordinates labs(x = "", y = "")+ scale_fill_manual(values = c(female = "red", male = "blue"), name="")+ scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+"),labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+ #not to show all the age groups all the time scale_y_continuous(breaks=seq(-200,200,25),labels=abs(seq(-200,200,25)))+ #tell R t paste absolute numbers of values not to have negative values on graph theme_bw()+ theme(axis.text.x = element_text(size=10, color="black"), # size of x axis text axis.text.y = element_text(size=10, color="black"))
############################################# ### STEP 2: add lines/bars to compare other years ############################################# p+ geom_line(data=pyr.g1 %>% filter(sex=="male"&year=="1975"), aes(x=reorder(age.group, order), y=-value), colour="lightblue", group=1)+ geom_line(data=pyr.g1 %>% filter(sex=="female"&year=="1975"), aes(x=reorder(age.group, order), y=value), colour="pink", group=1)
# bars: since in ggplot the last plot is #the one that appears on top (hiding everything underneath), #we can add alpha=0.5 to add some transparence, 1 being the #full color p+ geom_bar(data=pyr.g1 %>% filter(sex=="male"&year=="1975"), aes(x=reorder(age.group, order), y=-value), fill="lightblue", alpha=.5,stat="identity")+ geom_bar(data=pyr.g1 %>% filter(sex=="female"&year=="1975"), aes(x=reorder(age.group, order), y=value), fill="pink", alpha=.5, stat="identity")
####################################################################### # STEP 3: add different legends for the two years: now we only have one for the sex, as the fill factors for all 4 geom_bar(s) is the same # ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex), col=factor(year)))+ # add different colors for the two years 1975 and 2015 by adding col=factor(year) # this part stays the same geom_bar(data=pyr.g1 %>% filter(sex=="female"&year=="2015"), aes(x=reorder(age.group, order), y=value), stat="identity")+ geom_bar(data=pyr.g1 %>% filter(sex=="male"&year=="2015"), aes(x=reorder(age.group, order), y=-value), stat="identity")+ geom_bar(data=pyr.g1 %>% filter(sex=="male"&year=="1975"), aes(x=reorder(age.group, order),y=-value), alpha=.5,stat="identity")+ geom_bar(data=pyr.g1 %>% filter(sex=="female"&year=="1975"), aes(x=reorder(age.group, order), y=value), alpha=.5, stat="identity")+ coord_flip()+ labs(x = "", y = "")+ scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)) , "100+"),labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+ scale_y_continuous(breaks=seq(-200,200,25),labels=abs(seq(-200,200,25)))+ theme_bw()+ theme(axis.text.x = element_text(size=10, color="black"), axis.text.y = element_text(size=10, color="black"))+ # add the legends with scale_fill_manual which controls the filling colors for sex and scale_color_manual which controls the border color that distinguisces the two years scale_fill_manual(values = c(female = "red", male = "blue"), name="")+ scale_color_manual(values=c("1975"="black", "2015"="grey"), name="" )+ # and I want the year legend squares to look empty guides(colour = guide_legend(override.aes = list(alpha = 0))) #makes the squares for the years legend empty of any color
################################################################ ## STEP 4: one pyramid plot for each year in one page with facet_wrap ## ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex)))+ geom_bar(data=pyr.g1 %>% filter(sex=="male"), aes(x=reorder(age.group, order), y=-value), stat="identity")+ geom_bar(data=pyr.g1 %>% filter(sex=="female"), aes(x=reorder(age.group, order), y=value), stat="identity")+ coord_flip()+ labs(x = "", y = "")+ scale_x_discrete(breaks=c( paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+ scale_y_continuous(breaks=seq(-300,300,100),labels=abs(seq(-300,300,100)))+ scale_fill_manual(values = c(female = "red", male = "blue"), name="")+ theme_bw()+ theme(axis.text.x = element_text(size=10, color="black"), axis.text.y = element_text(size=10, color="black"))+ facet_wrap(~year)
pyr.ar <- pyr %>% filter(Major.Area=="Armenia"&sex!="both") # exclude "both" pyr.az <- pyr %>% filter(Major.Area=="Azerbaijan"&sex!="both") # exclude "both" pyr.ar$order <- order pyr.az$order <- order pyr.c <- rbind(pyr.g, pyr.ar, pyr.az) pyr.c1 <- pyr.c[-c(which(pyr.c$age.group=="80+")),] ggplot(pyr.c1, aes(x=age.group, y=value, fill=factor(Major.Area)))+ geom_bar(data=pyr.c1 %>% filter(sex=="female"&year=="2015"), aes(x=reorder(age.group, order), y=value), stat="identity")+ geom_bar(data=pyr.c1 %>% filter(sex=="male"&year=="2015"), aes(x=reorder(age.group, order), y=-value), stat="identity")+ coord_flip()+ labs(x = "", y = "")+ scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)) , "100+"), labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+ scale_y_continuous(breaks=seq(-400,400,200), labels=abs(seq(-400,400,200)))+ theme_bw()+ scale_fill_manual(values = c(Armenia = "red", Georgia="green", Azerbaijan = "blue"), name="")+ theme(axis.text.x = element_text(size=10, color="black"), axis.text.y = element_text(size=10, color="black"), legend.position="none")+ facet_wrap(~Major.Area) #facet_wrap(~Major.Area, scales="free_x")
And with scales=”free_x”