Está en la página 1de 29

Spatial Statistics in R

# The number of points to create

n <- 200

# Set the range

xmin <- 0

xmax <- 1

ymin <- 0

ymax <- 2

# Sample from a Uniform distribution

x <- runif(n, xmin, xmax)

y <- runif(n, ymin, ymax)

# See pre-defined variables

ls.str()

# Plot points and a rectangle

mapxy <- function(a = NA){

plot(x, y, asp = a)

rect(xmin, ymin, xmax, ymax)

mapxy(1)

# Load the spatstat package

library(spatstat)

# Create this many points, in a circle of this radius


n_points <- 300

radius <- 10

# Generate uniform random numbers up to radius-squared

r_squared <- runif(n_points, 0, radius^2)

angle <- runif(n_points, 0, 2*pi)

# Take the square root of the values to get a uniform spatial distribution

x <- sqrt(r_squared) * cos(angle)

y <- sqrt(r_squared) * sin(angle)

plot(disc(radius)); points(x, y)

# Some variables have been pre-defined

ls.str()

# Set coordinates and window

ppxy <- ppp(x = x, y = y, window = disc(radius))

# Test the point pattern

qt <- quadrat.test(ppxy)

# Inspect the results

plot(qt)
print(qt)

# Create a disc of radius 10

disc10 <- disc(10)

# Compute the rate as count divided by area

lambda <- 500 / area(disc10)

# Create a point pattern object

ppois <- rpoispp(lambda = lambda, win = disc10)

# Plot the Poisson point pattern

plot(ppois)

# Create a disc of radius 10

disc10 <- disc(10)

# Generate clustered points from a Thomas process


set.seed(123)

p_cluster <- rThomas(kappa = 0.35, scale = 1, mu = 3, win = disc10)

plot(p_cluster)

# Run a quadrat test

quadrat.test(p_cluster, alternative = "clustered")

# Regular points from a Strauss process

set.seed(123)

p_regular <- rStrauss(beta = 2.9, gamma = 0.025, R = .5, W = disc10)

plot(p_regular)

# Run a quadrat test

quadrat.test(p_regular, alternative = "regular")

# Point patterns are pre-defined

p_poisson; p_regular
# Calc nearest-neighbor distances for Poisson point data

nnd_poisson <- nndist(p_poisson)

# Draw a histogram of nearest-neighbor distances

hist(nnd_poisson)

# Estimate G(r)

G_poisson <- Gest(p_poisson)

# Plot G(r) vs. r

plot(G_poisson)

# Repeat for regular point data

nnd_regular <- nndist(p_regular)

hist(nnd_regular)

G_regular <- Gest(p_regular)

plot(G_regular)
# Point patterns are pre-defined

p_poisson; p_cluster; p_regular

# Estimate the K-function for the Poisson points

K_poisson <- Kest(p_poisson, correction = "border")

# The default plot shows quadratic growth

plot(K_poisson, . ~ r)

# Subtract pi * r^2 from the Y-axis and plot

plot(K_poisson, . - pi * r ^ 2 ~ r)

# Compute envelopes of K under random locations

K_cluster_env <- envelope(p_cluster, Kest, correction = "border")

# Insert the full formula to plot K minus pi * r^2

plot(K_cluster_env, . - pi * r ^ 2 ~ r)

# Repeat for regular data

K_regular_env <- envelope(p_regular, Kest, correction = "border")

plot(K_regular_env, . - pi * r ^ 2 ~ r)
# Load the spatstat package

library(spatstat)

# Get some summary information on the dataset

summary(preston_crime)

# Get a table of marks

table(marks(preston_crime))

# Define a function to create a map

preston_map <- function(cols = c("green", "red"), cex = c(1, 1), pch = c(1, 1)) {

plotRGB(preston_osm) # from the raster package

plot(preston_crime, cols = cols, pch = pch, cex = cex, add = TRUE, show.window = TRUE)

# Draw the map with colors, sizes and plot characters


preston_map(

cols = c("black", "red"),

cex = c(0.5, 1),

pch = c(19, 19)

# preston_crime has been pre-defined

preston_crime

# Use the split function to show the two point patterns

crime_splits <- split(preston_crime)

# Plot the split crime

plot(crime_splits)

# Compute the densities of both sets of points

crime_densities <- density(crime_splits)

# Calc the violent density divided by the sum of both

frac_violent_crime_density <- crime_densities[["Violent crime"]] /

(crime_densities[["Non-violent crime"]] + crime_densities[["Violent crime"]])

# Plot the density of the fraction of violent crime

plot(frac_violent_crime_density)
# Scan from 500m to 1000m in steps of 50m

bw_choice <- spseg(

preston_crime,

h = seq(500, 1000, by = 50),

opt = 1)

head(bw_choice)}

# Plot the results and highlight the best bandwidth

plotcv(bw_choice); abline(v = bw_choice$hcv, lty = 2, col = "red")


# Print the best bandwidth

print(bw_choice$hcv)

# Set the correct bandwidth and run for 10 simulations only

seg10 <- spseg(

pts = preston_crime,

h = bw_choice$hcv,

opt = 3,

ntest = 10,

proc = FALSE)

# Plot the segregation map for violent crime

plotmc(seg10, "Violent crime")

# Plot seg, the result of running 1000 simulations

plotmc(seg, "Violent crime")


# Inspect the structure of the spatial segregation object

str(seg)

# Get the number of columns in the data so we can rearrange to a grid

ncol <- length(seg$gridx)

# Rearrange the probability column into a grid

prob_violent <- list(x = seg$gridx,

y = seg$gridy,

z = matrix(seg$p[, "Violent crime"],

ncol = ncol))

image(prob_violent)

# Rearrange the p-values, but choose a p-value threshold


p_value <- list(x = seg$___,

y = seg$___,

z = matrix(seg$___[, "Violent crime"] < 0.05,

ncol = ncol))

image(p_value)

# Rearrange the p-values, but choose a p-value threshold

p_value <- list(x = seg$gridx,

y = seg$gridy,

z = matrix(seg$stpvalue[, "Violent crime"] < 0.05,

ncol = ncol))

image(p_value)
# Get a quick summary of the dataset

summary(sasq)

# Plot unmarked points

plot(unmark(sasq))

# Plot the points using a circle sized by date

plot(sasq, which.marks = "date")

Spatial pattern of bigfoot sightings


Does the pattern of sightings look regular, homogeneous, or clustered in the
study region? The sasq object has been loaded for you in case you need to plot
it again.

These terms were defined in Simulating clustered and inhibitory patterns.

# Show the available marks

names(marks(sasq))

# Histogram the dates of the sightings, grouped by year

hist(marks(sasq)$date, "years", freq = TRUE)

# Plot and tabulate the calendar month of all the sightings

plot(table(marks(sasq)$month))

# Split on the month mark

sasq_by_month <- split(sasq, "month", un = TRUE)

# Plot monthly maps

plot(sasq_by_month)

# Plot smoothed versions of the above maps.

plot(density(sasq_by_month))
# Use the cartogram and rgeos packages

library(cartogram)

library(rgeos)

# Make a scatterplot of electorate vs borough area


names(london_ref)

plot(london_ref$Electorate, gArea(london_ref, byid = TRUE))

# Make a cartogram, scaling the area to the electorate

carto_ref <- cartogram(london_ref, "Electorate")

plot(carto_ref)

# Check the linearity of the electorate-area plot

plot(carto_ref$Electorate, gArea(carto_ref, byid = TRUE))

# Make a fairer map of the Remain percentage

spplot(carto_ref, "Pct_Remain")
# Use the spdep package

library(spdep)

# Make neighbor list

borough_nb <- poly2nb(london_ref)

# Get center points of each borough

borough_centers <- coordinates(london_ref)

# Show the connections

plot(london_ref); plot(borough_nb, borough_centers, add = TRUE)

# Map the total pop'n

spplot(london_ref, zcol = "TOTAL_POP")


# Run a Moran I test on total pop'n

moran.test(

london_ref$TOTAL_POP,

nb2listw(borough_nb)

# Map % Remain

spplot(london_ref, zcol = "Pct_Remain")

# Run a Moran I MC test on % Remain

moran.mc(

london_ref$Pct_Remain,

nb2listw(borough_nb),

999

)
# Get a summary of the data set

summary(london)

# Map the OBServed number of flu reports

spplot(london, "Flu_OBS")
# Compute and print the overall incidence of flu

r <- sum(london$Flu_OBS) / sum(london$TOTAL_POP)

# Calculate the expected number for each borough

london$Flu_EXP <- london$TOTAL_POP * r

# Calculate the ratio of OBServed to EXPected

london$Flu_SMR <- london$Flu_OBS / london$Flu_EXP

# Map the SMR

spplot(london, "Flu_SMR")
# For the binomial statistics function

library(epitools)

# Get CI from binomial distribution

flu_ci <- binom.exact(london$Flu_OBS, london$TOTAL_POP)

# Add borough names

flu_ci$NAME <- london$NAME

# Calculate London rate, then compute SMR

r <- sum(london$Flu_OBS) / sum(london$TOTAL_POP)

flu_ci$SMR <- flu_ci$proportion / r

# Subset the high SMR data

flu_high <- flu_ci[flu_ci$SMR > 1, ]

# Plot estimates with CIs

library(ggplot2)

ggplot(flu_high, aes(x = NAME, y = proportion / r,

ymin = lower / r, ymax = upper / r)) +

geom_pointrange() +

theme(axis.text.x = element_text(angle = 45, hjust = 1))


# Probability of a binomial exceeding a multiple

binom.exceed <- function(observed, population, expected, e){

1 - pbinom(e * expected, population, prob = observed / population)

# Compute P(rate > 2)

london$Flu_gt_2 <- binom.exceed(

observed = london$Flu_OBS,

population = london$TOTAL_POP,

expected = london$Flu_EXP,

e = 2)

# Use a 50-color palette that only starts changing at around 0.9

pal <- c(

rep("#B0D0B0", 40),

colorRampPalette(c("#B0D0B0", "orange"))(5),

colorRampPalette(c("orange", "red"))(5)

# Plot the P(rate > 2) map

spplot(london, "Flu_gt_2", col.regions = pal, at = seq(0, 1, len = 50))

# Fit a poisson GLM.

model_flu <- glm(

Flu_OBS ~ HealthDeprivation,

offset = log(TOTAL_POP),

data = london,

family = poisson)

# Is HealthDeprivation significant?

summary(model_flu)
# Put residuals into the spatial data.

london$Flu_Resid <- residuals(model_flu)

# Map the residuals using spplot

spplot(london, "Flu_Resid")

# Compute the neighborhood structure.

library(spdep)

borough_nb <- poly2nb(london)

# Test spatial correlation of the residuals.

moran.mc(london$Flu_Resid, listw = nb2listw(borough_nb), nsim = 999)

# Use R2BayesX

library(R2BayesX)

# Fit a GLM

model_flu <- glm(Flu_OBS ~ HealthDeprivation, offset = log(TOTAL_POP),

data = london, family = poisson)

# Summarize it

summary(model_flu)
# Calculate coeff confidence intervals

confint(model_flu)

# Fit a Bayesian GLM

bayes_flu <- bayesx(Flu_OBS ~ HealthDeprivation, offset = log(london$TOTAL_POP),

family = "poisson", data = data.frame(london),

control = bayesx.control(seed = 17610407))

# Summarize it

summary(bayes_flu)

# Look at the samples from the Bayesian model

plot(samples(bayes_flu))

# Compute adjacency objects

borough_nb <- poly2nb(london)

borough_gra <- nb2gra(borough_nb)

# Fit spatial model

flu_spatial <- bayesx(

Flu_OBS ~ HealthDeprivation + sx(i, bs = "spatial", map = borough_gra),

offset = log(london$TOTAL_POP),

family = "poisson", data = data.frame(london),

control = bayesx.control(seed = 17610407)


)

# Summarize the model

summary(flu_spatial)

# Summarize the model

summary(flu_spatial)

# Map the fitted spatial term only

london$spatial <- fitted(flu_spatial, term = "sx(i):mrf")[, "Mean"]

spplot(london, zcol = "spatial")

# Map the residuals

london$spatial_resid <- residuals(flu_spatial)[, "mu"]

spplot(london, zcol = "spatial_resid")

# Test residuals for spatial correlation

moran.mc(london$spatial_resid, nb2listw(borough_nb), 999)


# ca_geo has been pre-defined

str(ca_geo, 1)

# See what measurements are at each location

names(ca_geo)

# Get a summary of the acidity (pH) values

summary(ca_geo$pH)

# Look at the distribution

hist(ca_geo$pH)

# Make a vector that is TRUE for the missing data

miss <- is.na(ca_geo$pH)

table(miss)

# Plot a map of acidity

spplot(ca_geo[!miss, ], "pH")
# ca_geo has been pre-defined

str(ca_geo, 1)

# Are they called lat-long, up-down, or what?

coordnames(ca_geo)

# Complete the formula

m_trend <- lm(pH ~ x + y, as.data.frame(ca_geo))

# Check the coefficients

summary(m_trend)

También podría gustarte