Geofacet grids: Nigeria Federal States

Geofacet grid for Nigeria’s 37 Federal States (below):

Screen Shot 2017-07-20 at 12.31.24.png

row,col,code,name
1,4,NG.KT,Katsina
1,5, NG.KN, Kano
1,2,NG.SO,Sokoto
1,3, NG.ZA, Zamfara
1,6, NG.JI, Jigawa
1,7, NG.YO, Yobe
2,2, NG.KE, Kebbi
2,3, NG.NI, Niger
2,4, NG.KD, Kaduna
2,7, NG.BO, Borno
2,6, NG.GO, Gombe
2,5, NG.BA, Bauchi
3,1, NG.OY, Oyo
3,2, NG.KW, Kwara
3,3,NG.FC, Abuja FCT
3,4, NG.NA, Nassarawa
3,6, NG.AD, Adamawa
3,5, NG.PL, Plateau
4,3, NG.EK, Ekiti
4,1, NG.OG, Ogun
4,2, NG.OS, Osun
4,4, NG.KO, Kogi
4,6, NG.TA, Taraba
4,5, NG.BE, Benue
5,3,NG.ED, Edo
5,1, NG.LA, Lagos
5,2, NG.ON, Ondo
5,6,NG.EB, Ebonyi
5,4, NG.AN, Anambra
5,5, NG.EN, Enugu
6,2, NG.DE, Delta
6,3, NG.IM, Imo
6,4,NG.AB, Abia
6,5, NG.CR, Cross River
7,3, NG.BY, Bayelsa
7,4, NG.RI, Rivers
7,5, NG.AK, Akwa Ibom

A nice example of hafen/geofacet from Washington Post to ggplot2

I’ve recently came across the hafen/geofacet function and was pondering to blog an example. Then, I came across a perfect example, thanks to  kanishkamisra for working on the dataset & code and making it available via github here!

 

usa_vs_state1

BAR CHART: a ggplot balance plot (2)

Merchandise trade balance plot in ggplot2

BAR CHART+LINE

Graph 2: Merchandise trade balance

You can find the data for this plot here or alternatively here is the dput data for balance:

structure(list(variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Merchandize Trade Balance", class = "factor"),
type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "Balance", class = "factor"), year = c(2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L), value = c(-0.5, -1.5, -0.1, -0.4, -0.2,
0, 0.1, -0.1, -0.6, -0.2, -0.2, -1.3, 0), geo = structure(c(2L,
4L, 7L, 9L, 1L, 6L, 12L, 5L, 3L, 11L, 10L, 13L, 8L), .Label = c("CIS",
"Dev. Asia Pacific", "Eastern Asia", "Europe", "Latin Am. And Carr.",
"North Africa", "North America", "Oceania", "South Eastern Europe",
"South-Eastern Asia", "Southern Asia", "Sub-Saharan Africa",
"Western Asia"), class = "factor")), .Names = c("variable",
"type", "year", "value", "geo"), class = "data.frame", row.names = c(NA,
-13L))
library(dplyr) #to manipulate the dataset
library(ggplot2) #plotting
mer.bal <- mydt %>%
filter(variable == "Merchandize Trade Balance")

base <- mer.bal %>%
filter(type != "Balance") %>%
mutate(
value = ifelse(type == "Exports", value, -value)
)
balance <- mer.bal %>%
filter(type == "Balance")

ggplot(balance, aes(x = geo, y = value, fill=factor(type))) +
geom_bar(data = base %>%
filter(type=="Exports"), aes(col=type), stat = "identity") +
geom_bar(data = base %>%
filter(type=="Imports"), aes(col=type), stat = "identity") +
geom_bar(data = balance, aes(col=type), stat = "identity", width=.2) +
ggtitle(expression(atop("Merchandise trade balance", atop(italic("(Bln US$ by MDG Regions in 2013)"), "")))) +
theme_bw()+
theme(axis.text.x = element_text(size=8, color="black"),
axis.text.y = element_text(size=8, color="black"),
legend.text=element_text(size=10),
plot.title = element_text(size = 20, face = "bold", colour = "black", vjust = -1))+
scale_fill_manual(values = c(Exports = "#0072B2", Imports = "#56B4E9", Balance="red"), name="") +
scale_colour_manual(values = c(Exports = "#0072B2", Imports = "#56B4E9", Balance="red"), name="") +
coord_flip()+
labs(x = "", y = "")

graph3

BAR CHART + LINE: a ggplot balance plot (1)

You can download session 9 files here (R-Ladies Tbilisi) and specify your working directory with setwd(“/Users/mydomain/myforlder/)

BAR CHART + LINE:

###Graph 1: Total services trade, by value

 require(ggplot2)
require(dplyr)
mypath <- "/Users/StayPuftMarshmallowMan/Shandor Folder/"
setwd(paste(mypath))
mydt <- read.csv("Georgia_Data_UN.csv", header=T)

head(mydt)
##                                            variable     type year   value
## 1 GDP: Gross domestic product (million current US$) economic 2014 16530.0
## 2 GDP: Gross domestic product (million current US$) economic 2010 11638.0
## 3 GDP: Gross domestic product (million current US$) economic 2005  6411.0
## 4    GDP growth rate (annual %, const. 2005 prices) economic 2014     4.8
## 5    GDP growth rate (annual %, const. 2005 prices) economic 2010     6.2
## 6    GDP growth rate (annual %, const. 2005 prices) economic 2005     9.6
##   geo
## 1
## 2
## 3
## 4
## 5
## 6
levels(mydt$variable)
##  [1] "Agricultural production index (2004-2006=100)"
##  [2] "Balance (million US$)"
##  [3] "Balance of payments, current account (million US$)"
##  [4] "CO2 emission estimates (tons per capita)"
##  [5] "CPI: Consumer price index (2000=100)"
##  [6] "Economy: Agriculture (% of GVA)"
##  [7] "Economy: Industry (% of GVA)"
##  [8] "Economy: Services and other activity (% of GVA)"
##  [9] "Education: Government expenditure (% of GDP)"
## [10] "Education: Tertiary gross enrolment ratio (f-m per 100 pop.)"
[...]
## [48] "Unemployment (% of labour force)"
## [49] "Urban population (%)"
## [50] "Urban population growth rate (average annual %)"
ser.dt <- mydt %>%
filter(variable=="Total Services Trade")

Balance <- ser.dt%>%
group_by(year)%>%
summarise(value=-diff(value))

Balance <- cbind(variable=c(rep("Total Services Trade", 13)),
type= c(rep("Balance", 13)), Balance, geo=c(rep("NA", 13)))

mydata <- rbind(ser.dt, Balance)

subset with the pipe operator %>%

base <- mydata %>%
filter(type != "Balance") %>%
mutate(
value = ifelse(type == "Exports", value, -value)
)
balance <- mydata %>%
filter(type == "Balance")

ggplot(balance, aes(x = year, y = value)) +
geom_bar(data = base, aes(fill = type), stat = "identity") +
geom_point(aes(colour = type)) +
geom_line(aes(colour = type, group=1)) +
scale_fill_manual(values = c(Exports = "#D55E00", Imports = "#E69F00"), name="") +
scale_colour_manual(values = c(Balance = "#660000"), name="") +
labs(x = "", y = "Total Services Trade")+
theme_bw()

Presentation1

DONUT CHART in ggplot2

 DONUT CHART

I personally don’t like pie charts that much, I prefer donut charts, they take up less space and the center can be used for extra annotations. In ggplot2 to get the “Donut” you design a bar chart (geom_bar) and then just bend it (coord_polar) at the extremities to get a donut.

To reproduce the chart below, you can download the data from the RLadies Tbilisi github webpage, Session 9 on Plotting.

Alternatively here’s the dput(-ted) data:

structure(list(X = 1:3, variable = structure(c(1L, 1L, 1L), .Label = "Export of Services", class = "factor"), type = structure(c(3L, 2L, 1L), .Label = c("Remaining", "Transportation", "Travel"), class = "factor"), year = c(2012L, 2012L, 2012L ), value = c(55.5, 33.4, 11.1), geo = c(NA, NA, NA), pos = c(27.75, 72.2, 94.45)), .Names = c("X", "variable", "type", "year", "value", "geo", "pos"), class = "data.frame", row.names = c(NA, -3L))

Exports of services by EBOPS category

#set the working directory
setwd("/Users/DrVenkman/The Gatekeepers Folder/")

require(tidyverse) #data manipulation

exp.ser %
filter(variable == "Export of Services")

exp.ser % group_by(year) %>% mutate(pos = cumsum(value)- value/2)

p <- ggplot(exp.ser, aes(x=2, y=value, fill=type))+
geom_bar(stat="identity")+
geom_text( aes(label = value, y=pos), size=10, fontface="bold")+
xlim(0.5, 2.5) +
coord_polar(theta = "y")+
labs(x=NULL, y=NULL)+
labs(fill="") +
scale_fill_manual(values = c(Remaining = "blue", Transportation = "#E69F00", Travel= "#D55E00"), name="")+
ggtitle("Exports of services by EBOPS category, 2013")+
theme_bw()+
theme(plot.title = element_text(face="bold",family=c("sans"),size=15),
legend.text=element_text(size=10),
axis.ticks=element_blank(),
axis.text=element_blank(),
axis.title=element_blank(),
panel.grid=element_blank(),
panel.border=element_blank())

p

graph2

 giphy

Violin plots in ggplot2

Use geom_violin() to quickly plot a visual summary of variables, using the Boston dataset, MASS library.

Use geom_violin() to quickly plot a visual summary of variables, using the Boston dataset from the MASS library.

1. Upload the relevant libraries:

require(tidyr)
require(ggplot2)
require(RColorBrewer)
require(randomcoloR)
require(MASS)

2. Load data and use the tidyr package to transform wide into long format:

data(Boston)
dt.long <- gather(Boston, "variable",
"value", crim:medv)

3. Create some color palettes:

col <- colorRampPalette(c("red", "blue"))(14)
# col.bp <- brewer.pal(9, "Set1") # brewer.pal only has a max of 9 colors
col.rc <- as.vector(distinctColorPalette(14))

4. Plot(s):

  • With the standard colors produced by ggplot2:
ggplot(dt.long,aes(factor(variable), value))+
geom_violin(aes(fill=factor(variable)))+
geom_boxplot(alpha=0.3, color="black", width=.1)+
labs(x = "", y = "")+
theme_bw()+
theme(legend.title = element_blank())+
facet_wrap(~variable, scales="free")

violin-ggplot-color

  • With the color palette produced by colorRampPalette:
ggplot(dt.long,aes(factor(variable), value))+
geom_violin(aes(fill=factor(variable)))+
geom_boxplot(alpha=0.3, color="black", width=.1)+
labs(x = "", y = "")+
scale_fill_manual(values = col, name="")+
theme_bw()+
facet_wrap(~variable, scales="free")

violin-auto-color

  • With the color palette produced by randomcoloR library:
ggplot(dt.long,aes(factor(variable), value))+
geom_violin(aes(fill=factor(variable)))+
geom_boxplot(alpha=0.3, color="black", width=.1)+
labs(x = "", y = "")+
scale_fill_manual(values = col.rc, name="")+
theme_bw()+
facet_wrap(~variable, scales="free")

violin-rc-color

Plot maps with base mapping tools and ggmap in R

Plot maps with ‘base’ mapping tools in R

Understanding what kind of data you have (polygons or points?) and what you want to map is pivotal to start your mapping.

  1. First you need a shapefile of the area you want to plot, such as metropolitan France. There are various resources where to get them from: DIVA-GIS and EUROSTAT are those that I use the most. It’s always important to have a .prj file included, as your final map ‘should’ be projecte. I say “should” as sometimes it is just not possible, especially if you work with historical maps.
  2. Upload libraries

Load and prepare data

setwd(paste(mypath))
fr.prj <- readOGR(".", "FRA_adm2")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "FRA_adm2"
## with 96 features
## It has 18 fields
## NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files
map(fr.prj)
rplot
## Warning in SpatialPolygons2map(database, namefield = namefield): database
## does not (uniquely) contain the field 'name'.

head(fr.prj@data)
##   ID_0 ISO NAME_0 ID_1    NAME_1  ID_2         NAME_2   VARNAME_2
## 0   76 FRA France  989    Alsace 13755       Bas-Rhin  Unterelsaá
## 1   76 FRA France  989    Alsace 13756      Haut-Rhin   Oberelsaá
## 2   76 FRA France  990 Aquitaine 13757       Dordogne        <NA>
## 3   76 FRA France  990 Aquitaine 13758        Gironde Bec-D'Ambes
## 4   76 FRA France  990 Aquitaine 13759         Landes      Landas
## 5   76 FRA France  990 Aquitaine 13760 Lot-Et-Garonne        <NA>
##   NL_NAME_2 HASC_2 CC_2      TYPE_2  ENGTYPE_2 VALIDFR_2 VALIDTO_2
## 0      <NA>  FR.BR <NA> Département Department  17900226   Unknown
## 1      <NA>  FR.HR <NA> Département Department  17900226   Unknown
## 2      <NA>  FR.DD <NA> Département Department  17900226   Unknown
## 3      <NA>  FR.GI <NA> Département Department  17900226   Unknown
## 4      <NA>  FR.LD <NA> Département Department  17900226   Unknown
## 5      <NA>  FR.LG <NA> Département Department  17900226   Unknown
##   REMARKS_2 Shape_Leng Shape_Area
## 0      <NA>   4.538735  0.5840273
## 1      <NA>   3.214178  0.4198797
## 2      <NA>   5.012795  1.0389622
## 3      <NA>   9.200047  1.1489822
## 4      <NA>   5.531231  1.0372815
## 5      <NA>   4.489830  0.6062017
# load or create data
set.seed(100)
myvar <- rnorm(1:96)
# manipulate data for the plot
france.geodata  <- data.frame(id=rownames(fr.prj@data), mapvariable=myvar)
head(france.geodata)
##   id mapvariable
## 1  0  1.12200636
## 2  1  0.05912043
## 3  2 -1.05873510
## 4  3 -1.31513865
## 5  4  0.32392954
## 6  5  0.09152878

Use ggmap

# fortify prepares the shape data for ggplot
france.dataframe <- fortify(fr.prj) # convert to data frame for ggplot
## Regions defined for each Polygons
head(france.dataframe)
##       long      lat order  hole piece id group
## 1 7.847912 49.04728     1 FALSE     1  0   0.1
## 2 7.844539 49.04495     2 FALSE     1  0   0.1
## 3 7.852439 49.04510     3 FALSE     1  0   0.1
## 4 7.854333 49.04419     4 FALSE     1  0   0.1
## 5 7.855955 49.04431     5 FALSE     1  0   0.1
## 6 7.856299 49.03776     6 FALSE     1  0   0.1
#now combine the values by id values in both dataframes
france.dat <- join(france.geodata, france.dataframe, by="id")
head(france.dat)
##   id mapvariable     long      lat order  hole piece group
## 1  0    1.122006 7.847912 49.04728     1 FALSE     1   0.1
## 2  0    1.122006 7.844539 49.04495     2 FALSE     1   0.1
## 3  0    1.122006 7.852439 49.04510     3 FALSE     1   0.1
## 4  0    1.122006 7.854333 49.04419     4 FALSE     1   0.1
## 5  0    1.122006 7.855955 49.04431     5 FALSE     1   0.1
## 6  0    1.122006 7.856299 49.03776     6 FALSE     1   0.1
# Plot 3
p <- ggplot(data=france.dat, aes(x=long, y=lat, group=group))
p <- p + geom_polygon(aes(fill=mapvariable)) +
       geom_path(color="white",size=0.1) +
       coord_equal() +
       scale_fill_gradient(low = "#ffffcc", high = "#ff4444") +
       labs(title="Our map",fill="My variable")
# plot the map
p

image-22-02-2017-at-12-11

Use plot basic

nclassint <- 5 #number of colors to be used in the palette
cat <- classIntervals(myvar, nclassint,style = "jenks") #style refers to how the breaks are created
colpal <- brewer.pal(nclassint,"RdBu")
color <- findColours(cat,rev(colpal)) #sequential
bins <- cat$brks
lb <- length(bins)
plot(fr.prj, col=color,border=T)
legend("bottomleft",fill=rev(colpal),legend=paste(round(bins[-length(bins)],1),":",round(bins[-1],1)),cex=1, bg="white")

image-22-02-2017-at-12-23-copy