Kuwait population pyramids: changes between 2000-2020

Plot made in ggplot2 using geofacet, personal elaboration of Census data.

KWT_pyr2000-2020

Georgia Mapping in R

You can download session 9 files for constructing the population pyramids of Georgia here: https://github.com/rladies/meetup-presentations_tbilisi and specify your working directory with setwd(“/Users/mydomain/myfolder/”)

#set working directory
mypath<-"/Users/DrSpengler/The rectification of the Vuldrini/"
#upload shape files
georgia <- readOGR("./GEO_adm/","GEO_adm0")
## OGR data source with driver: ESRI Shapefile
## Source: "./GEO_adm/", layer: "GEO_adm0"
## with 1 features
## It has 70 fields
# plot(georgia, lwd=1.5)

georgia1 <- readOGR("./GEO_adm/","GEO_adm1")
## OGR data source with driver: ESRI Shapefile
## Source: "./GEO_adm/", layer: "GEO_adm1"
## with 12 features
## It has 16 fields
# plot(georgia1)

georgia2 <- readOGR("./GEO_adm/","GEO_adm2")
## OGR data source with driver: ESRI Shapefile
## Source: "./GEO_adm/", layer: "GEO_adm2"
## with 69 features
## It has 18 fields
# plot(georgia2)

gwat <- readOGR("./GEO_wat/" , "GEO_water_lines_dcw")
## OGR data source with driver: ESRI Shapefile
## Source: "./GEO_wat/", layer: "GEO_water_lines_dcw"
## with 559 features
## It has 5 fields
# plot(gwat)

gpop <- raster("./GEO_pop/geo_pop.grd")
# plot(gpop)

galt <- raster("./GEO_msk_alt/GEO_msk_alt.grd")
# plot(galt)
 plot(georgia, lwd=1.5) #n1

map1

 plot(georgia1, lwd=1.5) #n2

map2

 plot(georgia2, lwd=1.5) #n3

map3

 plot(georgia, lwd=1.5) #n4
 plot(gwat, lwd=1.5, col="blue", add=T) #n4

map4

 plot(gpop) #n5
 plot(georgia, lwd=1.5,  add=T) #n5

map5

 plot(galt, lwd=1.5) #n6

map6

Plot neighbouring countries

tur <- readOGR("./TUR_adm" , "TUR_adm0")
## OGR data source with driver: ESRI Shapefile
## Source: "./TUR_adm", layer: "TUR_adm0"
## with 1 features
## It has 70 fields
## Integer64 fields read as strings:  ID_0 OBJECTID_1
arm <- readOGR("./ARM_adm" , "ARM_adm0")
## OGR data source with driver: ESRI Shapefile
## Source: "./ARM_adm", layer: "ARM_adm0"
## with 1 features
## It has 70 fields
## Integer64 fields read as strings:  ID_0 OBJECTID_1
rus <- readOGR("./RUS_adm" , "RUS_adm0")
## OGR data source with driver: ESRI Shapefile
## Source: "./RUS_adm", layer: "RUS_adm0"
## with 1 features
## It has 70 fields
## Integer64 fields read as strings:  ID_0 OBJECTID_1
aze <- readOGR("./AZE_adm" , "AZE_adm0")
## OGR data source with driver: ESRI Shapefile
## Source: "./AZE_adm", layer: "AZE_adm0"
## with 1 features
## It has 70 fields
## Integer64 fields read as strings:  ID_0 OBJECTID_1

plot maps

plot(georgia, lwd=1.5, col="white", bg="lightblue")
plot(georgia1, add=T, lty=2)
plot(tur, add=T, col="white")
plot(arm, add=T, col="white")
plot(rus, add=T, col="white")
plot(aze, add=T, col="white")

map7

add labels for the countries

x.loc <- c(44.32002, 46.35746, 44.40421, 42.18156, 40.71662)
y.loc <- c(43.42472, 40.87209, 40.82228, 40.90945, 41.99276)
nb.lab <- c("Russia", "Azerbaijan", "Armenia", "Turkey", "Black Sea")
plot(georgia, lwd=1.5, col="white", bg="lightblue")
plot(georgia1, add=T, lty=2)
plot(tur, add=T, col="white")
plot(arm, add=T, col="white")
plot(rus, add=T, col="white")
plot(aze, add=T, col="white")
text(x.loc, y.loc, nb.lab)

let’s add everything (or almost everything) together

plot(gwat, col="blue")
# plot(georgia1[1,], lwd=1, col="lightblue", border="black", add=T)
plot(georgia2, lwd=0.5, border="black", lty=3, add=T)
plot(georgia1, border="black", lty=2, add=T)
plot(georgia, lwd=1.5, add=T)

map8

check georgia@data

head(georgia1)
##   ID_0 ISO  NAME_0 ID_1       NAME_1 VARNAME_1 NL_NAME_1 HASC_1 CC_1
## 0   81 GEO Georgia 1034     Abkhazia   Sokhumi      <NA>  GE.AB <NA>
## 1   81 GEO Georgia 1035       Ajaria    Batumi      <NA>  GE.AJ <NA>
## 2   81 GEO Georgia 1036        Guria  Ozurgeti      <NA>  GE.GU <NA>
## 3   81 GEO Georgia 1037      Imereti   Kutaisi      <NA>  GE.IM <NA>
## 4   81 GEO Georgia 1038      Kakheti    Telavi      <NA>  GE.KA <NA>
## 5   81 GEO Georgia 1039 Kvemo Kartli   Rustavi      <NA>  GE.KK <NA>
##                   TYPE_1           ENGTYPE_1 VALIDFR_1 VALIDTO_1 REMARKS_1
## 0 Avtonomiuri Respublika Autonomous Republic      1994   Present      <NA>
## 1 Avtonomiuri Respublika Autonomous Republic      1994   Present      <NA>
## 2                 Region              Region      1994   Present      <NA>
## 3                 Region              Region      1994   Present      <NA>
## 4                 Region              Region      1994   Present      <NA>
## 5                 Region              Region      1994   Present      <NA>
##   Shape_Leng Shape_Area
## 0   6.643211  0.9744622
## 1   3.055014  0.3074264
## 2   2.880653  0.2092665
## 3   4.214567  0.6783179
## 4   6.820519  1.2485036
## 5   5.219352  0.6807876

print labels on the map

labels for admin 2

coords2<- coordinates(georgia2[2:6,])
admin2 <- c(as.character(georgia2$NAME_2[1:5]))
admin2
## [1] "Gagra"      "Gali"       "Gudauta"    "Gulripshi"  "Ochamchire"

Upload data from World Bank

dt <- read.csv("/Users/ac1y15/Google Drive/blog/RLadies_Georgia_files/Session_3/Data_Extract_From_Subnational_Malnutrition/3f075abc-c51c-40c5-afb1-f8fbcfa30f23_Data.csv", header=T)
dt.1 <- subset(dt, dt$type==1&dt$select==1)

head(dt.1)
##            Admin.Region.Name select order
## 6                                 1     1
## 7  Georgia, Adjara Aut. Rep.      1     2
## 16            Georgia, Guria      1     3
## 26          Georgia, Imereti      1     4
## 31          Georgia, Kakheti      1     5
## 36     Georgia, Kvemo Kartli      1     6
##                         Admin.Region.Code type
## 6                                            1
## 7  GEO_Adjara_Aut._Rep._GE.AR_1297_GEO002    1
## 16            GEO_Guria_GE.GU_1298_GEO003    1
## 26          GEO_Imereti_GE.IM_1299_GEO004    1
## 31          GEO_Kakheti_GE.KA_1300_GEO005    1
## 36     GEO_Kvemo_Kartli_GE.KK_1301_GEO006    1
##                                                            Series.Name
## 6
## 7  Prevalence of overweight, weight for height (% of children under 5)
## 16 Prevalence of overweight, weight for height (% of children under 5)
## 26 Prevalence of overweight, weight for height (% of children under 5)
## 31 Prevalence of overweight, weight for height (% of children under 5)
## 36 Prevalence of overweight, weight for height (% of children under 5)
##          Series.Code YR2000 YR2005 YR2009
## 6                        NA     NA     NA
## 7  SN.SH.STA.OWGH.ZS     NA   28.1     NA
## 16 SN.SH.STA.OWGH.ZS     NA    7.9     NA
## 26 SN.SH.STA.OWGH.ZS    9.9   21.5     NA
## 31 SN.SH.STA.OWGH.ZS    7.0   19.6   13.2
## 36 SN.SH.STA.OWGH.ZS    9.5   28.2   19.1

Map the prevalence overweight w/h

library(classInt)
nclassint <- 3 #number of colors to be used in the palette
cat <- classIntervals(dt.1$YR2005, nclassint,style = "quantile") #style refers to how the breaks are created
colpal <- brewer.pal(nclassint,"Greens") #sequential
color.palette <- findColours(cat,colpal)
is.na(color.palette)
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE
## [12] FALSE
bins <- cat$brks
lb <- length(bins)

color.palette[c(1, 10)] <- "gray"
value.vec <- c(round(bins[-length(bins)],2))
value.vec.tail <- c(round(bins[-1],2))

Plot and SAVE map:

plot(georgia1, col=color.palette, border=T, main="Prevalence of overweight, \nweight for height (% of children under 5)")
legend("topright",fill=c("gray", "#E5F5E0", "#A1D99B", "#31A354"),legend=c("NA",paste(value.vec,":",value.vec.tail)),cex=1.1, bg="white", bty = "n")
# map.scale(41, 41, 2, "km", 2, 100)
map.scale(x=40.1, y=41.2, relwidth=0.1 , metric=T, ratio=F, cex=0.8)
SpatialPolygonsRescale(layout.north.arrow(2), offset= c(40.1, 41.6), scale = 0.5, plot.grid=F)

map12

See you at IUSSP to talk about the fantastic work we do at WorldPop! (plus Demotrends and R-Ladies)

As IUSSP is approaching, I’m looking forward to talk more about fine grid scale mapping research at WorldPop (University of Southampton) and Flowminder.

I will present my research on  the 30th of October in session 5 at 8:30am Integrating spatial and statistical methods in demographic research, meeting room 1.41 and 1.42.

Prof. Andy Tatem will host a side meeting Geospatial Demography: Combining Satellite, Survey, Census and Cellphone Data to Provide Small-area Estimates on the 29 October 2017, 8:30-16:00.

and  will be contributing to the Cape Town R-Ladies chapter Saturday 4 November (details here) with a talk on R-Ladies and data visualization in ggplot2. Come talk to us and become an R-Lady, we are looking forward to sharing our experiences.

A few Demotrend(ers) will also be presenting at IUSSP, come talk to us 🙂

See you in Cape Town!

 

Global and Local Measures of Spatial Autocorrelation

This post aims at being a summary of the available techniques to investigate spatial autocorrelation for the social sciences, rather than presenting the theory behind spatial autocorrelation. For that there are great books available on line, like Anselin’s, Le Page, and Bivand just to cite a few.

The techniques presented here work for a spatial polygons data frame. The difference between spatial points and polygons data frames is not that big, the idea is the same and most of what I am doing here can be applied to data points.

Why do we look at spatial autocorrelation at all? Spatial autocorrelation leads to biased results in regressions, this is the reason why we want to compute Moran’s I and why we include spatial autocorrelation if its measurement proves to be significative and non-random.

Spatial autocorrelation can be investigated globally or locally. “Globally”, implies that the measure you’re going to obtain refers to the dataset as a whole, whether it is a whole country, continent or region. “Locally”, means that you are taking into consideration each and every polygon and getting a measure for each one of them.

We start by uploading the data, projecting them (important when considering the distance based measures -earth is not flat, whatever they may say!), and construct neighborhood relations (in this case Queen and Rook, but could be any other). For more detail see this post on how to construct neighbor relations.

library(maptools)
library(spdep)
NC= readShapePoly(system.file("shapes/sids.shp", package="maptools")[1], IDvar="FIPSNO", proj4string=CRS("+proj=longlat +ellps=clrk66"))
nb.FOQ = poly2nb(NC, queen=TRUE, row.names=NC$FIPSNO)
nb.RK = poly2nb(NC, queen=FALSE, row.names=NC$FIPSNO)

Global measures of spatial autocorrelation

There are two main measures of global spatial autocorrelation: Moran’s I and Geary’s C. Moran’s I is the most used in my experience, but both work are perfectly acceptable.
Moran’s I ranges between -1 (strong negative spatial autocorrelation with a dispersed pattern) and 1 (strong positive spatial autocorrelation with a clustered pattern) with 0 being the absence of spatial autocorrelation.
Geary’s C ranges between 0 and 2, with positive spatial autocorrelation ranging from 0 to 1 and negative spatial autocorrelation between 1 and 2.
Of course being these inferential measures, if the p-value is non significant we cannot exclude that the patterns could be random(!)

In this case Moran’s I is positive and significant, the z-score (not provided by moran.test) is positive implying spatial clusters, so we can reject the null hypothesis.

library(spdep)
nwb <- NC$NWBIR74
moran.test(nwb, listw = nb2listw(nb.RK))
##
##  Moran I test under randomisation
##
## data:  nwb
## weights: nb2listw(nb.RK)
##
## Moran I statistic standard deviate = 3.0787, p-value = 0.001039
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance
##       0.185965551      -0.010101010       0.004055701
geary.test(nwb, listw = nb2listw(nb.RK))
##
##  Geary C test under randomisation
##
## data:  nwb
## weights: nb2listw(nb.RK)
##
## Geary C statistic standard deviate = 2.0324, p-value = 0.02106
## alternative hypothesis: Expectation greater than statistic
## sample estimates:
## Geary C statistic       Expectation          Variance
##        0.83274888        1.00000000        0.00677185

if you have polygons with no neighbors remember to specify zero.policy=NULL

moran.plot(nwb, listw = nb2listw(nb.RK))

moran plot

moran.mc(nwb, listw = nb2listw(nb.RK), nsim=100)
##
##  Monte-Carlo simulation of Moran I
##
## data:  nwb
## weights: nb2listw(nb.RK)
## number of simulations + 1: 101
##
## statistic = 0.18597, observed rank = 100, p-value = 0.009901
## alternative hypothesis: greater

permutation Moran_s I

plot(moran.mc(nwb, listw = nb2listw(nb.RK), nsim=100))


Same thing can be done for Geary’s C:

geary.mc(nwb, listw = nb2listw(nb.RK), nsim=100)
##
##  Monte-Carlo simulation of Geary C
##
## data:  nwb
## weights: nb2listw(nb.RK)
## number of simulations + 1: 101
##
## statistic = 0.83275, observed rank = 5, p-value = 0.0495
## alternative hypothesis: greater
plot(geary.mc(nwb, listw = nb2listw(nb.RK), nsim=100))

Local measures of spatial autocorrelation

Local Moran and Local G

locm <- localmoran(nwb, listw = nb2listw(nb.RK))
locG <- localG(nwb, listw = nb2listw(nb.RK))

Get the neighbor matrix into a listwise format with listw: there’s two options here, row-standardized weights matrix style = "W" creates proportional weights when polygons have an unequal number of neighbors, balancing out observations with few neighbors. Binary weights style = "B" upweight observations with many neighbors.

library(classInt)
library(dplyr)
myvar <- NC$NWBIR74
nb <- nb.RK
# Define weight style
ws <- c("W")

# Define significance for the maps
significance <- 0.05
plot.only.significant <- TRUE

# Transform the neigh mtx into a listwise object
listw <- nb2listw(nb, style=ws)

# Create the lagged variable
lagvar <- lag.listw(listw, myvar)

# get the mean of each
m.myvar <- mean(myvar)
m.lagvar <- mean(lagvar)

The next step is to derive the quadrants and set the coloring scheme. I like to color the border of each polygon with the color of their local moran score, regardless of their pvalue, and then fill only the significant ones.

n <- length(NC)
#
vec <- c(1:n)
vec <- ifelse(locm[,5] < significance, 1,0)

# Derive quadrants
q <- c(1:n) for (i in 1:n) {   if (myvar[[i]]>=m.myvar & lagvar[[i]]>=m.lagvar) q[i] <- 1
  if (myvar[[i]]<m.myvar & lagvar[[i]]<m.lagvar) q[i] <- 2
  if (myvar[[i]]<m.myvar & lagvar[[i]]>=m.lagvar) q[i] <- 3   if (myvar[[i]]>=m.myvar & lagvar[[i]]<m.lagvar) q[i] <- 4
}

# set coloring scheme
q.all <- q
colors <- c(1:n)
for (i in 1:n) {
  if (q.all[i]==1) colors[i] <- "red"
  if (q.all[i]==2) colors[i] <- "blue"
  if (q.all[i]==3) colors[i] <- "lightblue"
  if (q.all[i]==4) colors[i] <- "pink"
  if (q.all[i]==0) colors[i] <- "white"   if (q.all[i]>4) colors[i] <- "white"
}

# Mark all non-significant regions white
locm.dt <- q*vec
colors1 <- colors
for (i in 1:n)
{
  if ( !(is.na (locm.dt[i])) )  {
  if (locm.dt[i]==0) colors1[i] <- "white"
}
}
colors2 <- colors
colors2 <- paste(colors2,vec)
pos = list()
for (i in 1:n) {
  pos[[i]] <- c(which(NC$NWBIR74==colors2["blue 0"]))
}

blue0 <- which(colors2=="blue 0")
red0 <- which(colors2=="red 0")
lightblue0 <- which(colors2=="lightblue 0")
pink0 <- which(colors2=="pink 0")
lb <- 6
labels=c("High-High", "High-Low", "Low-High", "Low-Low")

# plot the map
if (plot.only.significant==TRUE) plot(NC, col=colors1,border=F) else
  plot(NC, col=colors,border=F)
plot(NC[blue0,],border="blue",lwd=0.5,add=T)
plot(NC[lightblue0,],border="lightblue",add=T,lwd=0.5)
plot(NC[red0,],border="red",add=T,lwd=0.5)
plot(NC[pink0,],border="pink",add=T,lwd=0.5)
legend("bottomleft", legend = labels, fill = c("red", "pink", "lightblue", "blue"), bty = "n")

lisa map
Local G gives back z-scores values and indicate the posibility of a local cluster of high values of the variable being analysed, very low values indicate a similar cluster of low values.

library(RColorBrewer)

nclassint <- 3
colpal <- brewer.pal(nclassint,"PiYG")
cat <- classIntervals(locG, nclassint, style = "jenks", na.ignore=T)
color.z <- findColours(cat, colpal)

plot(NC, col= color.z, border=T)

Rplot

# color only significant polygons
plot(NC, border=T)
plot(NC[which(vec==1),], col=color.z[which(vec==1)], border=T, add=T)

lisa map2

A nice example of hafen/geofacet from Washington Post to ggplot2

I’ve recently came across the hafen/geofacet function and was pondering to blog an example. Then, I came across a perfect example, thanks to  kanishkamisra for working on the dataset & code and making it available via github here!

 

usa_vs_state1

High Resolution Mapping of Fertility and Mortality from Household Survey Data in Low Income Settings – PAA presentation

I will present at PAA my WorldPop mapping of Demographic indicators in low-income settings at PAA in Chicago.  “Advances in Mathematical, Spatial, and Small-Area Demography”, Thursday, April 27, 2017: 10:15 AM – 11:45 AM, Hilton, Joliet Room.

Find color breaks for mapping (fast)

I’ve stumbled upon a little trick to compute jenks breaks faster than with the classInt package, just be sure to use n+1 instead of n as the breaks are computed a little bit differently. That is to say, if you want 5 breaks, n=6, no biggie there.

For more on the Bayesian Analysis of Macroevolutionary Mixtures see BAMMtools library

install.packages("BAMMtools")
library(BAMMtools)
system.time(getJenksBreaks(mydata$myvar, 6))
> user system elapsed
> 0.970 0.001 0.971

On the other hand this takes way more time with large datasets
library(classInt)
system.time(classIntervals(mydata$myvar, n=5, style="jenks"))
> Timing stopped at: 1081.894 1.345 1083.511