Spatial Statistics in R

Spatial Statistics in R
# The number of points to create
n <- 200
# Set the range
xmin <- 0
xmax <- 1
ymin <- 0
ymax <- 2
# Sample from a Uniform distribution
x <- runif(n, xmin, xmax)
y <- runif(n, ymin, ymax)
# See pre-defined variables
ls.str()
# Plot points and a rectangle
mapxy <- function(a = NA){
plot(x, y, asp = a)
rect(xmin, ymin, xmax, ymax)
mapxy(1)
# Load the spatstat package
library(spatstat)
# Create this many points, in a circle of this radius

n_points <- 300
radius <- 10
# Generate uniform random numbers up to radius-squared
r_squared <- runif(n_points, 0, radius^2)
angle <- runif(n_points, 0, 2*pi)
# Take the square root of the values to get a uniform spatial distribution
x <- sqrt(r_squared) * cos(angle)
y <- sqrt(r_squared) * sin(angle)
plot(disc(radius)); points(x, y)
# Some variables have been pre-defined
ls.str()
# Set coordinates and window
ppxy <- ppp(x = x, y = y, window = disc(radius))
# Test the point pattern
qt <- quadrat.test(ppxy)
# Inspect the results
plot(qt)
print(qt)
# Create a disc of radius 10
disc10 <- disc(10)
# Compute the rate as count divided by area
lambda <- 500 / area(disc10)
# Create a point pattern object
ppois <- rpoispp(lambda = lambda, win = disc10)
# Plot the Poisson point pattern
plot(ppois)
# Create a disc of radius 10
disc10 <- disc(10)
# Generate clustered points from a Thomas process

set.seed(123)
p_cluster <- rThomas(kappa = 0.35, scale = 1, mu = 3, win = disc10)
plot(p_cluster)
# Run a quadrat test
quadrat.test(p_cluster, alternative = "clustered")
# Regular points from a Strauss process
set.seed(123)
p_regular <- rStrauss(beta = 2.9, gamma = 0.025, R = .5, W = disc10)
plot(p_regular)
# Run a quadrat test
quadrat.test(p_regular, alternative = "regular")
# Point patterns are pre-defined
p_poisson; p_regular
# Calc nearest-neighbor distances for Poisson point data
nnd_poisson <- nndist(p_poisson)
# Draw a histogram of nearest-neighbor distances
hist(nnd_poisson)
# Estimate G(r)
G_poisson <- Gest(p_poisson)
# Plot G(r) vs. r
plot(G_poisson)
# Repeat for regular point data
nnd_regular <- nndist(p_regular)
hist(nnd_regular)
G_regular <- Gest(p_regular)
plot(G_regular)
# Point patterns are pre-defined
p_poisson; p_cluster; p_regular
# Estimate the K-function for the Poisson points
K_poisson <- Kest(p_poisson, correction = "border")
# The default plot shows quadratic growth
plot(K_poisson, . ~ r)
# Subtract pi * r^2 from the Y-axis and plot
plot(K_poisson, . - pi * r ^ 2 ~ r)
# Compute envelopes of K under random locations
K_cluster_env <- envelope(p_cluster, Kest, correction = "border")
# Insert the full formula to plot K minus pi * r^2
plot(K_cluster_env, . - pi * r ^ 2 ~ r)
# Repeat for regular data
K_regular_env <- envelope(p_regular, Kest, correction = "border")
plot(K_regular_env, . - pi * r ^ 2 ~ r)
# Load the spatstat package
library(spatstat)
# Get some summary information on the dataset
summary(preston_crime)
# Get a table of marks
table(marks(preston_crime))
# Define a function to create a map
preston_map <- function(cols = c("green", "red"), cex = c(1, 1), pch = c(1, 1)) {
plotRGB(preston_osm) # from the raster package
plot(preston_crime, cols = cols, pch = pch, cex = cex, add = TRUE, show.window = TRUE)
# Draw the map with colors, sizes and plot characters

preston_map(
cols = c("black", "red"),
cex = c(0.5, 1),
pch = c(19, 19)
# preston_crime has been pre-defined
preston_crime
# Use the split function to show the two point patterns
crime_splits <- split(preston_crime)
# Plot the split crime
plot(crime_splits)
# Compute the densities of both sets of points
crime_densities <- density(crime_splits)
# Calc the violent density divided by the sum of both
frac_violent_crime_density <- crime_densities[["Violent crime"]] /
(crime_densities[["Non-violent crime"]] + crime_densities[["Violent crime"]])
# Plot the density of the fraction of violent crime
plot(frac_violent_crime_density)
# Scan from 500m to 1000m in steps of 50m
bw_choice <- spseg(
preston_crime,
h = seq(500, 1000, by = 50),
opt = 1)
head(bw_choice)}
# Plot the results and highlight the best bandwidth
plotcv(bw_choice); abline(v = bw_choice$hcv, lty = 2, col = "red")

# Print the best bandwidth
print(bw_choice$hcv)
# Set the correct bandwidth and run for 10 simulations only
seg10 <- spseg(
pts = preston_crime,
h = bw_choice$hcv,
opt = 3,
ntest = 10,
proc = FALSE)
# Plot the segregation map for violent crime
plotmc(seg10, "Violent crime")
# Plot seg, the result of running 1000 simulations
plotmc(seg, "Violent crime")

# Inspect the structure of the spatial segregation object
str(seg)
# Get the number of columns in the data so we can rearrange to a grid
ncol <- length(seg$gridx)
# Rearrange the probability column into a grid
prob_violent <- list(x = seg$gridx,
y = seg$gridy,
z = matrix(seg$p[, "Violent crime"],
ncol = ncol))
image(prob_violent)
# Rearrange the p-values, but choose a p-value threshold

p_value <- list(x = seg$___,
y = seg$___,
z = matrix(seg$___[, "Violent crime"] < 0.05,
ncol = ncol))
image(p_value)
# Rearrange the p-values, but choose a p-value threshold
p_value <- list(x = seg$gridx,
y = seg$gridy,
z = matrix(seg$stpvalue[, "Violent crime"] < 0.05,
ncol = ncol))
image(p_value)
# Get a quick summary of the dataset
summary(sasq)
# Plot unmarked points
plot(unmark(sasq))
# Plot the points using a circle sized by date
plot(sasq, which.marks = "date")
Spatial pattern of bigfoot sightings

Does the pattern of sightings look regular, homogeneous, or clustered in the
study region? The sasq object has been loaded for you in case you need to plot
it again.
These terms were defined in Simulating clustered and inhibitory patterns.
# Show the available marks
names(marks(sasq))
# Histogram the dates of the sightings, grouped by year
hist(marks(sasq)$date, "years", freq = TRUE)
# Plot and tabulate the calendar month of all the sightings
plot(table(marks(sasq)$month))
# Split on the month mark
sasq_by_month <- split(sasq, "month", un = TRUE)
# Plot monthly maps
plot(sasq_by_month)
# Plot smoothed versions of the above maps.
plot(density(sasq_by_month))
# Use the cartogram and rgeos packages
library(cartogram)
library(rgeos)
# Make a scatterplot of electorate vs borough area

names(london_ref)
plot(london_ref$Electorate, gArea(london_ref, byid = TRUE))
# Make a cartogram, scaling the area to the electorate
carto_ref <- cartogram(london_ref, "Electorate")
plot(carto_ref)
# Check the linearity of the electorate-area plot
plot(carto_ref$Electorate, gArea(carto_ref, byid = TRUE))
# Make a fairer map of the Remain percentage
spplot(carto_ref, "Pct_Remain")
# Use the spdep package
library(spdep)
# Make neighbor list
borough_nb <- poly2nb(london_ref)
# Get center points of each borough
borough_centers <- coordinates(london_ref)
# Show the connections
plot(london_ref); plot(borough_nb, borough_centers, add = TRUE)
# Map the total pop'n
spplot(london_ref, zcol = "TOTAL_POP")

# Run a Moran I test on total pop'n
moran.test(
london_ref$TOTAL_POP,
nb2listw(borough_nb)
# Map % Remain
spplot(london_ref, zcol = "Pct_Remain")
# Run a Moran I MC test on % Remain
moran.mc(
london_ref$Pct_Remain,
nb2listw(borough_nb),
999
)
# Get a summary of the data set
summary(london)
# Map the OBServed number of flu reports
spplot(london, "Flu_OBS")
# Compute and print the overall incidence of flu
r <- sum(london$Flu_OBS) / sum(london$TOTAL_POP)
# Calculate the expected number for each borough
london$Flu_EXP <- london$TOTAL_POP * r
# Calculate the ratio of OBServed to EXPected
london$Flu_SMR <- london$Flu_OBS / london$Flu_EXP
# Map the SMR
spplot(london, "Flu_SMR")
# For the binomial statistics function
library(epitools)
# Get CI from binomial distribution
flu_ci <- binom.exact(london$Flu_OBS, london$TOTAL_POP)
# Add borough names
flu_ci$NAME <- london$NAME
# Calculate London rate, then compute SMR
r <- sum(london$Flu_OBS) / sum(london$TOTAL_POP)
flu_ci$SMR <- flu_ci$proportion / r
# Subset the high SMR data
flu_high <- flu_ci[flu_ci$SMR > 1, ]
# Plot estimates with CIs
library(ggplot2)
ggplot(flu_high, aes(x = NAME, y = proportion / r,
ymin = lower / r, ymax = upper / r)) +
geom_pointrange() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Probability of a binomial exceeding a multiple
binom.exceed <- function(observed, population, expected, e){
1 - pbinom(e * expected, population, prob = observed / population)
# Compute P(rate > 2)
london$Flu_gt_2 <- binom.exceed(
observed = london$Flu_OBS,
population = london$TOTAL_POP,
expected = london$Flu_EXP,
e = 2)
# Use a 50-color palette that only starts changing at around 0.9
pal <- c(
rep("#B0D0B0", 40),
colorRampPalette(c("#B0D0B0", "orange"))(5),
colorRampPalette(c("orange", "red"))(5)
# Plot the P(rate > 2) map
spplot(london, "Flu_gt_2", col.regions = pal, at = seq(0, 1, len = 50))
# Fit a poisson GLM.
model_flu <- glm(
Flu_OBS ~ HealthDeprivation,
offset = log(TOTAL_POP),
data = london,
family = poisson)
# Is HealthDeprivation significant?
summary(model_flu)
# Put residuals into the spatial data.
london$Flu_Resid <- residuals(model_flu)
# Map the residuals using spplot
spplot(london, "Flu_Resid")
# Compute the neighborhood structure.
library(spdep)
borough_nb <- poly2nb(london)
# Test spatial correlation of the residuals.
moran.mc(london$Flu_Resid, listw = nb2listw(borough_nb), nsim = 999)
# Use R2BayesX
library(R2BayesX)
# Fit a GLM
model_flu <- glm(Flu_OBS ~ HealthDeprivation, offset = log(TOTAL_POP),
data = london, family = poisson)
# Summarize it
summary(model_flu)
# Calculate coeff confidence intervals
confint(model_flu)
# Fit a Bayesian GLM
bayes_flu <- bayesx(Flu_OBS ~ HealthDeprivation, offset = log(london$TOTAL_POP),
family = "poisson", data = data.frame(london),
control = bayesx.control(seed = 17610407))
# Summarize it
summary(bayes_flu)
# Look at the samples from the Bayesian model
plot(samples(bayes_flu))
# Compute adjacency objects
borough_nb <- poly2nb(london)
borough_gra <- nb2gra(borough_nb)
# Fit spatial model
flu_spatial <- bayesx(
Flu_OBS ~ HealthDeprivation + sx(i, bs = "spatial", map = borough_gra),
offset = log(london$TOTAL_POP),
family = "poisson", data = data.frame(london),
control = bayesx.control(seed = 17610407)

)
# Summarize the model
summary(flu_spatial)
# Summarize the model
summary(flu_spatial)
# Map the fitted spatial term only
london$spatial <- fitted(flu_spatial, term = "sx(i):mrf")[, "Mean"]
spplot(london, zcol = "spatial")
# Map the residuals
london$spatial_resid <- residuals(flu_spatial)[, "mu"]
spplot(london, zcol = "spatial_resid")
# Test residuals for spatial correlation
moran.mc(london$spatial_resid, nb2listw(borough_nb), 999)

# ca_geo has been pre-defined
str(ca_geo, 1)
# See what measurements are at each location
names(ca_geo)
# Get a summary of the acidity (pH) values
summary(ca_geo$pH)
# Look at the distribution
hist(ca_geo$pH)
# Make a vector that is TRUE for the missing data
miss <- is.na(ca_geo$pH)
table(miss)
# Plot a map of acidity
spplot(ca_geo[!miss, ], "pH")
# ca_geo has been pre-defined
str(ca_geo, 1)
# Are they called lat-long, up-down, or what?
coordnames(ca_geo)
# Complete the formula
m_trend <- lm(pH ~ x + y, as.data.frame(ca_geo))
# Check the coefficients
summary(m_trend)

Spatial Statistics in R

Cargado por

Información del documento

Derechos de autor

Formatos disponibles

Compartir este documento

Compartir o incrustar documentos

Opciones para compartir

¿Le pareció útil este documento?

¿Este contenido es inapropiado?

Copyright:

Formatos disponibles

Spatial Statistics in R

Cargado por

Copyright:

Formatos disponibles

Spatial Statistics in R

# The number of points to create

# Set the range

# Sample from a Uniform distribution

x <- runif(n, xmin, xmax)

y <- runif(n, ymin, ymax)

# See pre-defined variables

# Plot points and a rectangle

mapxy <- function(a = NA){

rect(xmin, ymin, xmax, ymax)

# Load the spatstat package

# Create this many points, in a circle of this radius

# Generate uniform random numbers up to radius-squared

r_squared <- runif(n_points, 0, radius^2)

angle <- runif(n_points, 0, 2*pi)

x <- sqrt(r_squared) * cos(angle)

y <- sqrt(r_squared) * sin(angle)

# Some variables have been pre-defined

# Set coordinates and window

ppxy <- ppp(x = x, y = y, window = disc(radius))

# Test the point pattern

# Inspect the results

# Create a disc of radius 10

disc10 <- disc(10)

# Compute the rate as count divided by area

lambda <- 500 / area(disc10)

# Create a point pattern object

ppois <- rpoispp(lambda = lambda, win = disc10)

# Plot the Poisson point pattern

# Create a disc of radius 10

disc10 <- disc(10)

# Generate clustered points from a Thomas process

p_cluster <- rThomas(kappa = 0.35, scale = 1, mu = 3, win = disc10)

# Run a quadrat test

quadrat.test(p_cluster, alternative = "clustered")

# Regular points from a Strauss process

p_regular <- rStrauss(beta = 2.9, gamma = 0.025, R = .5, W = disc10)

# Run a quadrat test

quadrat.test(p_regular, alternative = "regular")

# Point patterns are pre-defined

nnd_poisson <- nndist(p_poisson)

# Draw a histogram of nearest-neighbor distances

G_poisson <- Gest(p_poisson)

# Plot G(r) vs. r

# Repeat for regular point data

nnd_regular <- nndist(p_regular)

G_regular <- Gest(p_regular)

p_poisson; p_cluster; p_regular

# Estimate the K-function for the Poisson points

K_poisson <- Kest(p_poisson, correction = "border")

# The default plot shows quadratic growth

# Subtract pi * r^2 from the Y-axis and plot

# Compute envelopes of K under random locations

K_cluster_env <- envelope(p_cluster, Kest, correction = "border")

# Insert the full formula to plot K minus pi * r^2

# Repeat for regular data

K_regular_env <- envelope(p_regular, Kest, correction = "border")

# Get some summary information on the dataset

# Get a table of marks

# Define a function to create a map

plotRGB(preston_osm) # from the raster package

# Draw the map with colors, sizes and plot characters

cols = c("black", "red"),

cex = c(0.5, 1),