Georgia Mapping in R

You can download session 9 files for constructing the population pyramids of Georgia here: https://github.com/rladies/meetup-presentations_tbilisi and specify your working directory with setwd(“/Users/mydomain/myfolder/”)

#set working directory
mypath<-"/Users/DrSpengler/The rectification of the Vuldrini/"
#upload shape files
georgia <- readOGR("./GEO_adm/","GEO_adm0")
## OGR data source with driver: ESRI Shapefile
## Source: "./GEO_adm/", layer: "GEO_adm0"
## with 1 features
## It has 70 fields
# plot(georgia, lwd=1.5)

georgia1 <- readOGR("./GEO_adm/","GEO_adm1")
## OGR data source with driver: ESRI Shapefile
## Source: "./GEO_adm/", layer: "GEO_adm1"
## with 12 features
## It has 16 fields
# plot(georgia1)

georgia2 <- readOGR("./GEO_adm/","GEO_adm2")
## OGR data source with driver: ESRI Shapefile
## Source: "./GEO_adm/", layer: "GEO_adm2"
## with 69 features
## It has 18 fields
# plot(georgia2)

gwat <- readOGR("./GEO_wat/" , "GEO_water_lines_dcw")
## OGR data source with driver: ESRI Shapefile
## Source: "./GEO_wat/", layer: "GEO_water_lines_dcw"
## with 559 features
## It has 5 fields
# plot(gwat)

gpop <- raster("./GEO_pop/geo_pop.grd")
# plot(gpop)

galt <- raster("./GEO_msk_alt/GEO_msk_alt.grd")
# plot(galt)
 plot(georgia, lwd=1.5) #n1

map1

 plot(georgia1, lwd=1.5) #n2

map2

 plot(georgia2, lwd=1.5) #n3

map3

 plot(georgia, lwd=1.5) #n4
 plot(gwat, lwd=1.5, col="blue", add=T) #n4

map4

 plot(gpop) #n5
 plot(georgia, lwd=1.5,  add=T) #n5

map5

 plot(galt, lwd=1.5) #n6

map6

Plot neighbouring countries

tur <- readOGR("./TUR_adm" , "TUR_adm0")
## OGR data source with driver: ESRI Shapefile
## Source: "./TUR_adm", layer: "TUR_adm0"
## with 1 features
## It has 70 fields
## Integer64 fields read as strings:  ID_0 OBJECTID_1
arm <- readOGR("./ARM_adm" , "ARM_adm0")
## OGR data source with driver: ESRI Shapefile
## Source: "./ARM_adm", layer: "ARM_adm0"
## with 1 features
## It has 70 fields
## Integer64 fields read as strings:  ID_0 OBJECTID_1
rus <- readOGR("./RUS_adm" , "RUS_adm0")
## OGR data source with driver: ESRI Shapefile
## Source: "./RUS_adm", layer: "RUS_adm0"
## with 1 features
## It has 70 fields
## Integer64 fields read as strings:  ID_0 OBJECTID_1
aze <- readOGR("./AZE_adm" , "AZE_adm0")
## OGR data source with driver: ESRI Shapefile
## Source: "./AZE_adm", layer: "AZE_adm0"
## with 1 features
## It has 70 fields
## Integer64 fields read as strings:  ID_0 OBJECTID_1

plot maps

plot(georgia, lwd=1.5, col="white", bg="lightblue")
plot(georgia1, add=T, lty=2)
plot(tur, add=T, col="white")
plot(arm, add=T, col="white")
plot(rus, add=T, col="white")
plot(aze, add=T, col="white")

map7

add labels for the countries

x.loc <- c(44.32002, 46.35746, 44.40421, 42.18156, 40.71662)
y.loc <- c(43.42472, 40.87209, 40.82228, 40.90945, 41.99276)
nb.lab <- c("Russia", "Azerbaijan", "Armenia", "Turkey", "Black Sea")
plot(georgia, lwd=1.5, col="white", bg="lightblue")
plot(georgia1, add=T, lty=2)
plot(tur, add=T, col="white")
plot(arm, add=T, col="white")
plot(rus, add=T, col="white")
plot(aze, add=T, col="white")
text(x.loc, y.loc, nb.lab)

let’s add everything (or almost everything) together

plot(gwat, col="blue")
# plot(georgia1[1,], lwd=1, col="lightblue", border="black", add=T)
plot(georgia2, lwd=0.5, border="black", lty=3, add=T)
plot(georgia1, border="black", lty=2, add=T)
plot(georgia, lwd=1.5, add=T)

map8

check georgia@data

head(georgia1)
##   ID_0 ISO  NAME_0 ID_1       NAME_1 VARNAME_1 NL_NAME_1 HASC_1 CC_1
## 0   81 GEO Georgia 1034     Abkhazia   Sokhumi      <NA>  GE.AB <NA>
## 1   81 GEO Georgia 1035       Ajaria    Batumi      <NA>  GE.AJ <NA>
## 2   81 GEO Georgia 1036        Guria  Ozurgeti      <NA>  GE.GU <NA>
## 3   81 GEO Georgia 1037      Imereti   Kutaisi      <NA>  GE.IM <NA>
## 4   81 GEO Georgia 1038      Kakheti    Telavi      <NA>  GE.KA <NA>
## 5   81 GEO Georgia 1039 Kvemo Kartli   Rustavi      <NA>  GE.KK <NA>
##                   TYPE_1           ENGTYPE_1 VALIDFR_1 VALIDTO_1 REMARKS_1
## 0 Avtonomiuri Respublika Autonomous Republic      1994   Present      <NA>
## 1 Avtonomiuri Respublika Autonomous Republic      1994   Present      <NA>
## 2                 Region              Region      1994   Present      <NA>
## 3                 Region              Region      1994   Present      <NA>
## 4                 Region              Region      1994   Present      <NA>
## 5                 Region              Region      1994   Present      <NA>
##   Shape_Leng Shape_Area
## 0   6.643211  0.9744622
## 1   3.055014  0.3074264
## 2   2.880653  0.2092665
## 3   4.214567  0.6783179
## 4   6.820519  1.2485036
## 5   5.219352  0.6807876

print labels on the map

labels for admin 2

coords2<- coordinates(georgia2[2:6,])
admin2 <- c(as.character(georgia2$NAME_2[1:5]))
admin2
## [1] "Gagra"      "Gali"       "Gudauta"    "Gulripshi"  "Ochamchire"

Upload data from World Bank

dt <- read.csv("/Users/ac1y15/Google Drive/blog/RLadies_Georgia_files/Session_3/Data_Extract_From_Subnational_Malnutrition/3f075abc-c51c-40c5-afb1-f8fbcfa30f23_Data.csv", header=T)
dt.1 <- subset(dt, dt$type==1&dt$select==1)

head(dt.1)
##            Admin.Region.Name select order
## 6                                 1     1
## 7  Georgia, Adjara Aut. Rep.      1     2
## 16            Georgia, Guria      1     3
## 26          Georgia, Imereti      1     4
## 31          Georgia, Kakheti      1     5
## 36     Georgia, Kvemo Kartli      1     6
##                         Admin.Region.Code type
## 6                                            1
## 7  GEO_Adjara_Aut._Rep._GE.AR_1297_GEO002    1
## 16            GEO_Guria_GE.GU_1298_GEO003    1
## 26          GEO_Imereti_GE.IM_1299_GEO004    1
## 31          GEO_Kakheti_GE.KA_1300_GEO005    1
## 36     GEO_Kvemo_Kartli_GE.KK_1301_GEO006    1
##                                                            Series.Name
## 6
## 7  Prevalence of overweight, weight for height (% of children under 5)
## 16 Prevalence of overweight, weight for height (% of children under 5)
## 26 Prevalence of overweight, weight for height (% of children under 5)
## 31 Prevalence of overweight, weight for height (% of children under 5)
## 36 Prevalence of overweight, weight for height (% of children under 5)
##          Series.Code YR2000 YR2005 YR2009
## 6                        NA     NA     NA
## 7  SN.SH.STA.OWGH.ZS     NA   28.1     NA
## 16 SN.SH.STA.OWGH.ZS     NA    7.9     NA
## 26 SN.SH.STA.OWGH.ZS    9.9   21.5     NA
## 31 SN.SH.STA.OWGH.ZS    7.0   19.6   13.2
## 36 SN.SH.STA.OWGH.ZS    9.5   28.2   19.1

Map the prevalence overweight w/h

library(classInt)
nclassint <- 3 #number of colors to be used in the palette
cat <- classIntervals(dt.1$YR2005, nclassint,style = "quantile") #style refers to how the breaks are created
colpal <- brewer.pal(nclassint,"Greens") #sequential
color.palette <- findColours(cat,colpal)
is.na(color.palette)
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE
## [12] FALSE
bins <- cat$brks
lb <- length(bins)

color.palette[c(1, 10)] <- "gray"
value.vec <- c(round(bins[-length(bins)],2))
value.vec.tail <- c(round(bins[-1],2))

Plot and SAVE map:

plot(georgia1, col=color.palette, border=T, main="Prevalence of overweight, \nweight for height (% of children under 5)")
legend("topright",fill=c("gray", "#E5F5E0", "#A1D99B", "#31A354"),legend=c("NA",paste(value.vec,":",value.vec.tail)),cex=1.1, bg="white", bty = "n")
# map.scale(41, 41, 2, "km", 2, 100)
map.scale(x=40.1, y=41.2, relwidth=0.1 , metric=T, ratio=F, cex=0.8)
SpatialPolygonsRescale(layout.north.arrow(2), offset= c(40.1, 41.6), scale = 0.5, plot.grid=F)

map12

Violin plots in ggplot2

Use geom_violin() to quickly plot a visual summary of variables, using the Boston dataset, MASS library.

Use geom_violin() to quickly plot a visual summary of variables, using the Boston dataset from the MASS library.

1. Upload the relevant libraries:

require(tidyr)
require(ggplot2)
require(RColorBrewer)
require(randomcoloR)
require(MASS)

2. Load data and use the tidyr package to transform wide into long format:

data(Boston)
dt.long <- gather(Boston, "variable",
"value", crim:medv)

3. Create some color palettes:

col <- colorRampPalette(c("red", "blue"))(14)
# col.bp <- brewer.pal(9, "Set1") # brewer.pal only has a max of 9 colors
col.rc <- as.vector(distinctColorPalette(14))

4. Plot(s):

  • With the standard colors produced by ggplot2:
ggplot(dt.long,aes(factor(variable), value))+
geom_violin(aes(fill=factor(variable)))+
geom_boxplot(alpha=0.3, color="black", width=.1)+
labs(x = "", y = "")+
theme_bw()+
theme(legend.title = element_blank())+
facet_wrap(~variable, scales="free")

violin-ggplot-color

  • With the color palette produced by colorRampPalette:
ggplot(dt.long,aes(factor(variable), value))+
geom_violin(aes(fill=factor(variable)))+
geom_boxplot(alpha=0.3, color="black", width=.1)+
labs(x = "", y = "")+
scale_fill_manual(values = col, name="")+
theme_bw()+
facet_wrap(~variable, scales="free")

violin-auto-color

  • With the color palette produced by randomcoloR library:
ggplot(dt.long,aes(factor(variable), value))+
geom_violin(aes(fill=factor(variable)))+
geom_boxplot(alpha=0.3, color="black", width=.1)+
labs(x = "", y = "")+
scale_fill_manual(values = col.rc, name="")+
theme_bw()+
facet_wrap(~variable, scales="free")

violin-rc-color

1887 crude mortality rate in Spain using classInt package

TBM_1887 jenks
Crude Mortality Rate in Spain, 1887 Census

TBM_1887 quantile TBM_1887 bclust TBM_1887 fisher

>nclassint <- 5 #number of colors to be used in the palette
>cat <- classIntervals(dt$TBM, nclassint,style = "jenks")
>colpal <- brewer.pal(nclassint,"Reds")
>color <- findColours(cat,colpal) #sequential
>bins <- cat$brks
>lb <- length(bins)
>cat

style: jenks
[20.3,25.9] (25.9,30.5] (30.5,34.4] (34.4,38.4] (38.4,58.2]
68         114         130         115          35

Save the categories into a data.frame (dat)

type first second third fourth fifth
1 quantile    91     93    92     91    95
2       sd    10    202   244      5     0
3    equal   100    246   113      2     1
4   kmeans    68    115   142    118    19
5    jenks    68    114   130    115    35
6   hclust   100    174   153     34     1
7   bclust    53    120   275     13     1
8   fisher    68    114   130    115    35

and melt it into a long format (required by ggplot):

dat1 <- melt(dat,id.vars=c("type"),value.name="n.breaks")

ggplot(dat1,aes(x=variable,y=n.breaks,fill=type))+
geom_bar(stat="identity", position=position_dodge())

Rplot

Mean Age at Childbearing in Spain 2011

TFR 2011 fixed