## ---- Define CRAN Packages ----
list_of_packages <- c(
# Core ENM tools
"terra", "dismo", "ENMeval", "ENMTools", "biomod2",
# Spatial / Mapping
"sf", "rnaturalearth", "ggspatial", "leaflet",
"fields", "rangeBuilder",
# Visualization
"ggplot2", "viridis", "gridExtra",
# Data manipulation
"dplyr", "tidyr", "stringr", "gtools",
# Phylogenetics
"ape", "phytools",
# Biodiversity data
"ridigbio", "gatoRs",
# Other utilities
"multcompView", "usdm", "predicts", "rJava"
)
## ---- Install Missing CRAN Packages ----
# Identify any packages from the list not currently installed.
new.packages <- list_of_packages[!(list_of_packages %in% installed.packages()[,"Package"])]
if (length(new.packages)) {
cat("\nInstalling missing CRAN packages:\n")
print(new.packages)
install.packages(new.packages)
} else {
cat("\nAll required CRAN packages already installed.\n")
}
## ---- Load All CRAN Packages ----
# Try loading all required CRAN packages.
loaded <- sapply(list_of_packages, require, character.only = TRUE)
if (any(!loaded)) {
cat("\nWARNING: These CRAN packages failed to load:\n")
print(list_of_packages[!loaded])
} else {
cat("\nAll CRAN packages loaded successfully.\n")
}
## ---- Install GitHub Packages ----
# devtools is required for installing from GitHub.
if (!require(devtools)) {
install.packages("devtools")
library(devtools)
}
# List of GitHub repositories for required packages.
github_repos <- c(
"vqv/ggbiplot"
)
# Check whether these GitHub packages are installed, and install if not.
for (repo in github_repos) {
pkg <- tail(strsplit(repo, "/")[[1]], 1)
if (!pkg %in% installed.packages()[,"Package"]) {
cat(paste("\nInstalling GitHub package:", repo, "\n"))
devtools::install_github(repo, upgrade = "never")
} else {
cat(paste("\nGitHub package already installed:", pkg, "\n"))
}
}
# Try loading GitHub packages.
github_packages <- c("ggbiplot")
github_loaded <- sapply(github_packages, require, character.only = TRUE)
if (any(!github_loaded)) {
cat("\nWARNING: These GitHub packages failed to load:\n")
print(github_packages[!github_loaded])
} else {
cat("\nAll GitHub packages loaded successfully.\n")
}
library(ridigbio) # Interface to iDigBio API
library(gatoRs) # Unified taxonomic/occurrence tools
library(leaflet) # Interactive mapping
Search the iDigBio API for occurrence record by taxonomic designations.
# Search for specific species (Galax urceolata)
iDigBio_GU <- idig_search_records(rq = list(scientificname = "Galax urceolata"))
# Search for all Diapensiaceae occurrences (limit to 1000 for example)
iDigBio_GU_family <- idig_search_records(rq = list(family = "Diapensiaceae"), limit = 1000)
Search the API with a geographic bounding box (e.g. Eastern USA extent)
# format a request in the structure of a list
rq_input <- list(scientificname = list(type = "exists"),
family = "Diapensiaceae",
geopoint = list(type = "geo_bounding_box",
top_left = list(lon = -98.16,
lat = 48.92),
bottom_right = list(lon = -64.02,
lat = 23.06)
)
)
# input request into idigbio search fxn
iDigBio_GU_family_USA <- idig_search_records(rq_input, limit = 1000)
Save results as a Comma Separated File (.csv)
write.csv(iDigBio_GU,
file = "data/01_download/iDigBio_GU_2025_06_27.csv",
row.names = FALSE)
write.csv(iDigBio_GU_family,
file = "data/01_download/iDigBio_GU_family_2025_06_27.csv",
row.names = FALSE)
Create a synonym vector for species to search by.
Shortia_galacifolia <- c("Shortia galacifolia",
"Sherwoodia galacifolia")
Galax_urceolata <- c("Galax urceolata",
"Galax aphylla")
Pyxidanthera_barbulata <- c("Pyxidanthera barbulata",
"Pyxidanthera barbulata var. barbulata")
Pyxidanthera_brevifolia <- c("Pyxidanthera brevifolia",
"Pyxidanthera barbulata var. brevifolia")
Use gatoRs to download species occurrence data for each name vector, searching both iDigBio & GBIF. Allows us to retrieve all versions of a name as these data aggregators have their own taxonomic backbone for organizing occurrence data downloads.
gators_download(synonyms.list = Shortia_galacifolia,
write.file = TRUE,
filename = "./data/01_download/raw/Shortia_galacifolia_raw_2025_06_27.csv")
gators_download(synonyms.list = Galax_urceolata,
write.file = TRUE,
filename = "./data/01_download/raw/Galax_urceolata_raw_2025_06_27.csv")
gators_download(synonyms.list = Pyxidanthera_barbulata,
write.file = TRUE,
filename = "./data/01_download/raw/Pyxidanthera_barbulata_raw_2025_06_27.csv")
gators_download(synonyms.list = Pyxidanthera_brevifolia,
write.file = TRUE,
filename = "./data/01_download/raw/Pyxidanthera_brevifolia_raw_2025_06_27.csv")
Read in Shortia galacifolia’s data we downloaded with gatoRs above.
rawdf <- read.csv("data/01_download/raw/Shortia_galacifolia_raw_2025_06_27.csv")
View number of columns/fields in the dataset, also return the number of rows (individual occurrence records)
names(rawdf) # returns field headers
## [1] "scientificName" "genus"
## [3] "specificEpithet" "infraspecificEpithet"
## [5] "ID" "occurrenceID"
## [7] "basisOfRecord" "eventDate"
## [9] "year" "month"
## [11] "day" "institutionCode"
## [13] "recordedBy" "country"
## [15] "county" "stateProvince"
## [17] "locality" "latitude"
## [19] "longitude" "coordinateUncertaintyInMeters"
## [21] "informationWithheld" "habitat"
## [23] "aggregator"
nrow(rawdf) # returns the number of rows = occurrence records
## [1] 1534
Use Leaflet to visualize records interactively - The error message here indicates many points do not have long/lat values
leaflet(rawdf) %>%
addTiles() %>%
addMarkers(label = paste0(rawdf$longitude,
", ",
rawdf$latitude))
library(gatoRs) # Taxonomic and geographic cleaning
library(fields) # For calculating spatial distances
library(sf) # For spatial operations and visualization
library(ggplot2) # For plotting
library(ggspatial) # Scale bars and north arrows
library(leaflet) # For interactive mapping
Read in raw species-specific data downloaded in 01 Data Downloads. Check how many occurrence records we start with in the raw data.
rawdf <- read.csv("data/01_download/raw/Shortia_galacifolia_raw_2025_06_27.csv")
nrow(rawdf) # Starting number of records
## [1] 1534
Our initial download includes iDigBio & GBIF’s synonym lists in
addition to the name vectors we searched. Let’s inspect and clean to our
taxonomic standard. Note that taxa_clean()
contains the
argument taxa.filter which allow the options exact
,
fuzzy
or interactive
to allow us to tighten or
loosen the requirements for name matching. Read more by running
?taxa_clean()
.
# Inspect unique names
unique(rawdf$scientificName)
## [1] "Shortia galacifolia Torr. & A.Gray"
## [2] "Sherwoodia galacifolia (Torr. & A.Gray) House"
## [3] "Shortia galacifolia"
## [4] "Shortia galacifolia Torr. & A. Gray"
## [5] "Shortia galacifolia var. galacifolia"
## [6] "Shortia galacifolia Torrey & A. Gray"
## [7] "Shortia galacifolia var. brevistyla"
## [8] "Shortia galacifolia var. brevistyla P. A. Davies"
## [9] "Sherwoodia galacifolia"
# Define our synonym list
search <- c("Shortia galacifolia", "Sherwoodia galacifolia")
# Clean names with fuzzy matching
df <- taxa_clean(df = rawdf,
synonyms.list = search,
taxa.filter = "fuzzy", # fuzzy allows some leeway over exact matching
accepted.name = "Shortia galacifolia")
## Current scientific names:
## [1] "Shortia galacifolia Torr. & A.Gray"
## [2] "Sherwoodia galacifolia (Torr. & A.Gray) House"
## [3] "Shortia galacifolia"
## [4] "Shortia galacifolia Torr. & A. Gray"
## [5] "Shortia galacifolia var. galacifolia"
## [6] "Shortia galacifolia Torrey & A. Gray"
## [7] "Shortia galacifolia var. brevistyla"
## [8] "Shortia galacifolia var. brevistyla P. A. Davies"
## [9] "Sherwoodia galacifolia"
## User selected a(n) fuzzy match.
nrow(df) # Updated count
## [1] 1534
Here we remove any records with missing coordinates, impossible coordinates, coordinates at (0,0), and any that are flagged as skewed. We also round the provided latitude and longitude values to a specified number of decimal places.
# Remove invalid/skewed records and round coordinates
df <- basic_locality_clean(df = df,
remove.zero = TRUE,
precision = TRUE,
digits = 2,
remove.skewed = TRUE)
nrow(df)
## [1] 77
Remove coordinates in cultivated zones, botanical gardens, and
outside our desired range. Here we set interactive = FALSE
,
however you can visualize the outliers by setting
interactive = TRUE
.
df <- process_flagged(df,
interactive = FALSE,
scientific.name = "accepted_name")
## Reading layer `ne_50m_land' from data source
## `/scratch/local/8316817/Rtmp2XxVuo/ne_50m_land.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1420 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -89.99893 xmax: 180 ymax: 83.59961
## Geodetic CRS: WGS 84
nrow(df)
## [1] 69
Here we identify and remove both (1) specimen duplicates and (2)
aggregator duplicates based on each specimens coordinates, occurrenceID,
and eventDate. To leverage all date information available, set
remove.unparseable = FALSE
to manually populate the year,
month, and day columns. Here, we also confirm all ID (UUID and key) are
unique to remove any within-aggregator duplicates that may accumulate
due to processing errors.
# Remove duplicate specimens and aggregator artifacts
df <- remove_duplicates(df,
remove.unparseable = TRUE)
nrow(df)
## [1] 56
Maxent will only retain one point per pixel. To make the ecological niche analysis comparable, we will retain only one point per pixel. Note: Default is a raster with 30 arc sec resolution.
# Retain one point per raster pixel (30 arc-sec default)
df <- one_point_per_pixel(df)
nrow(df)
## [1] 27
Oversampled regions/habitats can bias our model. To reduce bias, we
can thin occurrence data.
Step 1: What should your minimum distance be?
First, lets assess the current minimum distance among your occurrence records. Here, we calculate the minimum nearest neighbor distance in km.
# Calculate minimum nearest-neighbor distance
nnDm <- rdist.earth(as.matrix(df[, c("longitude", "latitude")]),
miles = FALSE)
nnDmin <- do.call(rbind,
lapply(1:5, function(i) sort(nnDm[, i])[2]))
min(nnDmin) # e.g., 2.22 km
## [1] 2.226477
Here the current minimum distance is 2.22 km. Based on literature, we
find a 2 meters (or 0.002 km) distance was enough to collect unique
genets, so we do not need to thin our points.
Step 2: Thin occurrence records using spThin through gatoRs
However when you do need to thin your records, this is a great function in gatoRs to do so!
# thin if you need
df <- thin_points(df,
distance = 0.002, # in km
reps = 100)
Make points spatial
df_fixed <- st_as_sf(df,
coords = c("longitude", "latitude"),
crs = 4326)
Set up base maps
USA <- borders(database = "usa",
colour = "gray80",
fill = "gray80")
state <- borders(database = "state",
colour = "black",
fill = NA)
Plot: Use ggplot2 & ggspatial to plot occurrence records. We are
using two base maps, USA and State. The geom_sf()
fxn adds
our cleaned occurrence points as blue based on their lon & lat
values. We can also fix coordinate spatial view through
coord_sf()
. Finally can add a scale and arrow.
simple_map <- ggplot() +
USA +
state +
geom_sf(data = df_fixed,
color = "blue") +
coord_sf(xlim = c(min(df$longitude) - 3,
max(df$longitude) + 3),
ylim = c(min(df$latitude) - 3,
max(df$latitude) + 3)) +
xlab("Longitude") +
ylab("Latitude") +
annotation_scale(plot_unit = "km") +
annotation_north_arrow(location = "tl",
height = unit(1, "cm"),
width = unit(1, "cm"))
simple_map
Another fun way to visualize occurrence data is using leaflet’s interactive maps.
leaflet(df_fixed) %>%
addMarkers(label = paste0(df$longitude, ", ", df$latitude)) %>%
addTiles()
write.csv(df, "data/02_cleaning/Shortia_galacifolia_2025_06_27_cleaned.csv", row.names = FALSE)
Use a for-loop to iterate through all of the species.
files <- list.files("data/01_download/raw", full.names = TRUE)[1:3]
synonymns <- list(
Galax_urceolata = c("Galax urceolata",
"Galax urceolata (Poir.) Brummitt",
"Galax urceolata (Poiret) Brummitt",
"Galax urceolaa",
"Galax aphylla L.",
"Galax aphylla"),
Pyxidanthera_barbulata = c("Pyxidanthera barbulata",
"Pyxidanthera barbulata Michx.",
"Pyxidanthera barbulata var. barbulata",
"Pyxidenthera barbulata"),
Pyxidanthera_brevifolia = c("Pyxidanthera brevifolia",
"Pyxidanthera brevifolia Wells",
"Pyxidanthera barbulata var. brevifolia (Wells) H.E.Ahles",
"Pyxidanthera barbulata var. brevifolia")
)
for (i in 1:3) { # for loop just allows us to iterate through each species
df <- read.csv(files[3])
search <- synonymns[[3]]
df <- taxa_clean(df,
synonyms.list = search,
taxa.filter = "fuzzy",
accepted.name = search[1])
df <- basic_locality_clean(df,
remove.zero = TRUE,
precision = TRUE,
digits = 2,
remove.skewed = TRUE)
df <- process_flagged(df,
interactive = FALSE,
scientific.name = "accepted_name") # removing all points for brevifolia
df <- remove_duplicates(df,
remove.unparseable = TRUE)
df <- one_point_per_pixel(df)
df <- thin_points(df,
distance = 0.002,
reps = 100)
outfile <- paste0("data/02_cleaning/", gsub(" ", "_", search[1]), "_2025_06_27_cleaned.csv")
write.csv(df,
outfile,
row.names = FALSE)
rm(df, search, outfile)
}
alldf <- list.files("data/02_cleaning",
pattern = "*.csv",
full.names = TRUE)
alldf <- lapply(alldf, read.csv)
alldf <- do.call(rbind, alldf)
write.csv(alldf, "data/02_cleaning/maxent_ready/diapensiaceae_maxentready_2025_06_27.csv", row.names = FALSE)
alldf_fixed <- st_as_sf(alldf, coords = c("longitude", "latitude"), crs = 4326)
all_map <- ggplot() +
USA + state +
geom_sf(data = alldf_fixed, aes(col = factor(accepted_name))) +
coord_sf(xlim = c(min(alldf$longitude) - 3, max(alldf$longitude) + 3),
ylim = c(min(alldf$latitude) - 3, max(alldf$latitude) + 3)) +
xlab("Longitude") + ylab("Latitude") +
labs(color = "Scientific name") +
annotation_scale(plot_unit = "km") +
annotation_north_arrow(location = "tl", height = unit(1, "cm"), width = unit(1, "cm"))
all_map
Some records do not contain lat/lon info, but do have useful
information in the locality field that can give us a reasonable estimate
of location + some error. Here we’ll use the gatoRs fxn
need_to_georeference()
to isolate these records for batch
processing on the georeferencing platform GeoLocate.
Read in raw data and check what needs to be georeferenced
rawdf <- read.csv("data/01_download/raw/Shortia_galacifolia_raw_2025_06_27.csv")
rawdf_GeoRef <- need_to_georeference(rawdf)
Format the columns/fields for GeoLocate’s standards, then write out for batch processing.
# Format columns for GeoLocate submission
rawdf_GeoRef <- rawdf_GeoRef %>%
dplyr::select("locality string" = locality,
country,
state = stateProvince,
county,
latitude,
longitude,
ID,
name = scientificName,
basis = basisOfRecord)
# Add required empty columns
rawdf_GeoRef$'correction status' <- ""
rawdf_GeoRef$precision <- ""
rawdf_GeoRef$'error polygon' <- ""
rawdf_GeoRef$'multiple results' <- ""
# Reorder columns
rawdf_GeoRef2 <- rawdf_GeoRef[, c("locality string", "country",
"state", "county", "latitude",
"longitude","correction status",
"precision", "error polygon",
"multiple results","ID",
"name", "basis")]
# write out csv
write.csv(rawdf_GeoRef2,
"data/03_georeferencing/Shortia_galacifolia_Needing_GeoRef_2025_06_27.csv",
row.names = FALSE)
library(gtools) # For mixedsort of file names
library(terra) # For raster and spatial extraction
library(dplyr) # For data wrangling
library(tidyr) # For handling NAs and reshaping
library(ggplot2) # For plotting
library(ggbiplot) # For PCA biplot visualization
library(multcompView) # For compact letter display (Tukey HSD)
library(gridExtra) # For combining plots
# Read in cleaned occurrence data with geographic coordinates and species ID
alldf <- read.csv("data/02_cleaning/maxent_ready/diapensiaceae_maxentready_2025_06_27.csv")
Raster files provided by WorldClim
# List and sort all .tif files in the WorldClim directory
list <- list.files("data/04_climate_processing/BioClim/",
full.names = TRUE)
list <- gtools::mixedsort(sort(list))
# Stack the raster files
envtStack <- terra::rast(list)
Extract climatic variable from raster cell where occurrence pts sit
# Extract values of environmental rasters at occurrence points
ptExtracted <- terra::extract(envtStack,
alldf[, c("longitude", "latitude")])
Organize these data to have species information
# Combine with species names and coordinates
ptExtracteddf <- ptExtracted %>%
dplyr::mutate(name = as.character(alldf$accepted_name),
x = alldf$longitude,
y = alldf$latitude)
Drop any points that does not contain climatic data
# Remove any rows with missing climate values
ptExtracteddf <- ptExtracteddf %>% tidyr::drop_na()
Subset the climatic variables, assumed to be columns 2 to 21.
data.bioclim <- ptExtracteddf[, 2:21]
data.species <- ptExtracteddf[, "name"]
Scale and run a Principal Component Analysis (PCA) on climatic
variables. A PCA reduces the dimensionality of the climatic data to make
it easier to identify broad patterns and which variables are most
influential. For our data, it shows how species cluster &
differentiate according to the climatic variables associated with their
spatial presence.
When you use the command prcomp
your loading variables
show up as rotational variables.
pca_result <- prcomp(data.bioclim, scale. = TRUE)
Extract variable loadings: saving a matrix of ceofficients (called loadings) for each of the original climatic variable on each PC (principal component). These loadings show how much each original variable contributes to each PC. Variables with larger (pos or neg) loadings are the biggest drivers of variation in the data.
loadings <- pca_result$rotation
Calculate relative contributions of each variable to each PC
loadings_relative <- sweep(abs(loadings), 2, colSums(abs(loadings)), "/") * 100
Set up a nice custom theme for plotting aesthetics
# Customize theme
custom_theme <- theme(panel.background = element_blank(),
panel.border = element_rect(fill = NA),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
axis.ticks = element_line(colour = "black"),
plot.margin = unit(c(1, 1, 1, 1), "line"),
axis.text = element_text(size = 12),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
text = element_text(size = 12))
# Define color palette for species
pal <- c("#D43F3AFF", "#EEA236FF", "#5CB85CFF", "#46B8DAFF")
Create a species biplot. obs.scale
indicates the scale
factor to apply to observation, var.scale
indicates the
scale factor to apply to variables, ellipse
as
TRUE
draws a normal data ellipse for each group, and
circle
as TRUE
draws a correlation circle.
This is all basically to show how our data clusters across the top 2 PCs per species!
# PCA Biplot
pca_plot <- ggbiplot(pca_result,
obs.scale = 1,
var.scale = 1,
groups = data.species,
ellipse = TRUE,
circle = TRUE) +
scale_color_manual(name = '',
values = pal) +
theme(legend.direction = 'vertical',
legend.position = 'bottom',
legend.text = element_text(size = 12, face = "italic")) +
custom_theme
print(pca_plot)
Pull out the bioclim layers that comprise PC1 and PC2
# Top 2 contributing variables to PC1 and PC2
top_PC1_vars <- rownames(loadings)[order(abs(loadings[, "PC1"]),
decreasing = TRUE)][1:2]
top_PC2_vars <- rownames(loadings)[order(abs(loadings[, "PC2"]),
decreasing = TRUE)][1:2]
selected_vars <- unique(c(top_PC1_vars, top_PC2_vars)) # remove duplicates
We can run an ANOVA (Analysis of Variance) and a Tukey HSD (a
comparison test) Post-hoc to test whether there are statistically
significant differences in the average values of specific climatic
variables between the species.
Broken down, the ANOVA evaluates the difference of the mean value for
a climatic variable across all species, and tests for a significant
difference.
The Tukey HSD test follows up comparing all pairs of species to see
which species pairs differ from eachother.
Here, we’ll use a for-loop to move through each extracted (via species occurrence points) bioclim layer for PC1 & PC2 and conduct the ANOVA and Tukey HSD. Plotted as a boxplot with real data to show relative density.
## ANOVA + Tukey HSD + Plotting for Top Variables ----
plotlist <- list()
for (i in seq_along(selected_vars)) {
varname <- selected_vars[i]
# Run ANOVA
model <- aov(as.formula(paste0(varname, " ~ name")),
data = ptExtracteddf)
tukey <- TukeyHSD(model)
pvals <- tukey$`name`[, 4]
# Convert p-values to significance letters
letters_out <- multcompLetters(pvals)
groups_df <- data.frame(name = names(letters_out$Letters),
groups = letters_out$Letters)
# Set y position for labels
max_y <- max(ptExtracteddf[[varname]],
na.rm = TRUE)
groups_df$y_position <- max_y * 1.1
# Create plot
plotlist[[i]] <- ggplot(ptExtracteddf, aes(x = name,
y = .data[[varname]])) +
geom_jitter(aes(color = name),
width = 0.3,
alpha = 0.5,
size = 1) +
geom_boxplot(fill = NA,
color = "black",
size = 0.6) +
geom_text(data = groups_df,
aes(x = name,
y = y_position,
label = groups),
inherit.aes = FALSE,
size = 4) +
scale_fill_manual(values = pal) +
scale_color_manual(values = pal) +
ggtitle(paste0(varname)) +
ylab(varname) +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45,
hjust = 1,
face = "italic")) +
ylim(0, max_y * 1.3)
}
# Combine plots
gridExtra::grid.arrange(grobs = plotlist, ncol = 2)
library(terra) # For raster handling
library(sf) # For vector spatial operations
library(ggplot2) # For plotting
library(dplyr) # For data manipulation
library(usdm) # For VIF calculations
library(stringr) # For string operations
library(gridExtra) # For plotting
Load cleaned occurrence data & convert it to spatial data.
# Read in cleaned occurrence data with geographic coordinates and species ID
alldf <- read.csv("data/02_cleaning/maxent_ready/diapensiaceae_maxentready_2025_06_27.csv")
# Convert occurrence data to an sf object
alldfsp <- st_as_sf(alldf,
coords = c("longitude", "latitude"),
crs = 4326)
Load and stack climatic variable raster data
# List and sort all .tif files in the BioClim directory
biolist <- list.files("data/04_climate_processing/BioClim/",
pattern = "\\.tif$",
full.names = TRUE)
# Stack the raster files into a SpatRaster
biostack <- terra::rast(biolist)
We need to define the accessible space per species, done commonly
using an alpha hull approach. Once we have defined each species’ AA, we
can crop & mask climatic layers to these.
To do this, we’ll run a for-loop that processes each species’s AA
using the package rangeBuilder’s getDynamicAlphaHull()
,
crops + masks the climatic raster layers to these AA shapes, then
deposits them in a directory called Cropped.
# Define base output directory for cropped rasters
dir <- "data/04_climate_processing/Cropped"
# Loop through each unique species in the occurrence data
for (species in unique(alldf$accepted_name)) {
# Print current species for progress tracking
message("Processing species: ", species)
# Subset occurrence data for this species
species_df <- alldf %>%
filter(accepted_name == species)
# Convert species occurrences to sf
species_sf <- st_as_sf(species_df,
coords = c("longitude", "latitude"),
crs = 4326)
# Run alpha hull via rangeBuilder
hull <- rangeBuilder::getDynamicAlphaHull(x = species_df,
coordHeaders = c("longitude",
"latitude"),
fraction = 1, # min. fraction of
## records we want included
partCount = 1, # number of polygons
initialAlpha = 20, # initial alpha size,
## 20m, scales up from here.
clipToCoast = "terrestrial",
verbose = FALSE
)
# Convert hull to sf
hull_sf <- st_as_sf(hull[[1]])
# Transform hull and points to equal-area projection
proj <- "+proj=cea +lat_ts=0 +lon_0=0"
hull_sf_proj <- st_transform(hull_sf,
crs = proj)
species_sf_proj <- st_transform(species_sf,
crs = proj)
# Calculate nearest-neighbor distances
dist_matrix <- st_distance(species_sf_proj)
nearest_neighbor_dists <- apply(dist_matrix, 2, function(x) sort(x)[2])
buffer_distance <- quantile(nearest_neighbor_dists,
probs = 0.80,
na.rm = TRUE)
# Buffer the hull
buffer_proj <- st_buffer(hull_sf_proj,
dist = buffer_distance)
# Union polygons in case of multiple pieces
buffer_proj <- st_union(buffer_proj)
# Transform both hull and buffered hull back to WGS84 for plotting
hull_sf_wgs84 <- st_transform(hull_sf_proj,
crs = 4326)
buffer_wgs84 <- st_transform(buffer_proj,
crs = 4326)
# plot hull before buffering
p_before <- ggplot() +
geom_sf(data = hull_sf_wgs84,
fill = "gray70",
alpha = 0.5,
color = NA) +
geom_sf(data = species_sf,
color = "black",
size = 0.5) +
theme_minimal() +
ggtitle(paste("Alpha Hull (No Buffer) -", species))
# plot after buffering alpha hull
p_after <- ggplot() +
geom_sf(data = buffer_wgs84,
fill = "gray70",
alpha = 0.5,
color = NA) +
geom_sf(data = species_sf,
color = "red",
size = 0.5) +
theme_minimal() +
ggtitle(paste("Buffered Hull -", species))
gridExtra::grid.arrange(p_before, p_after, ncol = 2)
# Convert buffered area to terra SpatVector
buffer_vect <- terra::vect(buffer_wgs84)
# Define output directory for this species
spec <- gsub(" ", "_", species)
species_dir <- file.path(dir, spec)
dir.create(species_dir, showWarnings = FALSE)
# Loop through each raster layer and crop/mask to buffered area
for (i in seq_along(biolist)) {
rast <- biostack[[i]]
# Crop raster to extent
rast_crop <- crop(rast, buffer_vect)
# Mask raster to buffered hull shape
rast_mask <- mask(rast_crop, buffer_vect)
# Define output file name
layer_name <- names(rast)
outfile <- file.path(species_dir, paste0(layer_name, ".tif"))
# Write cropped raster
writeRaster(rast_mask,outfile,overwrite = TRUE)
message(" Cropped and saved: ", outfile)
}
}
Note that for defining AA, you can customize arguments to make more sense for your species of interest. Consider partCount for example, you can increase it to allow for individual polygons of AA per point cluster if you believe that populations are truly separated by some geographic boundary.
Variable Inflation Factor (VIF) can detect for multicollinearity in a
set of multiple regression variables. We can compute this using usdm’s
vifstep()
to test which variables should be excluded due to
collinearity.
# Loop through each species
for (species in unique(alldf$accepted_name)) {
spec <- gsub(" ", "_", species)
spec_dir <- file.path(dir, spec)
# Read in species cropped rasters and stack them
cropped_files <- list.files(spec_dir,
pattern = "\\.tif$",
full.names = TRUE)
species_stack <- terra::rast(cropped_files)
# Calculate VIF and exclude collinear variables
vif_result <- usdm::vifstep(species_stack, th = 10)
message("VIF calculated for species: ", species)
reduced_stack <- exclude(species_stack, vif_result)
# Create a folder for VIF-selected layers
vif_dir <- file.path(spec_dir, "VIF")
dir.create(vif_dir, showWarnings = FALSE)
# Save each retained raster layer
for (layer_name in names(reduced_stack)) {
r_layer <- reduced_stack[[layer_name]]
out_file <- file.path(vif_dir, paste0(layer_name, ".tif"))
terra::writeRaster(r_layer,
out_file,
overwrite = TRUE)
message("Saved layer: ", out_file)
}
}
We also check the overall correlation between climatic variables using a matrix.
# List species folders in the cropped directory
species_dirs <- list.dirs(dir, recursive = FALSE, full.names = TRUE)
# Loop through each species folder
for (species_dir in species_dirs) {
# Extract species name from folder path
species_name <- basename(species_dir)
# List all cropped raster files for this species
cropped_files <- list.files(species_dir,
pattern = "\\.tif$",
full.names = TRUE)
# Stack the rasters for this species
cropped_stack <- terra::rast(cropped_files)
# Calculate correlation matrix
corr <- terra::layerCor(cropped_stack, fun = "cor", na.rm = TRUE)
# Extract absolute Pearson correlation values
cor_abs <- abs(corr$correlation)
# Define output CSV path
corr_outfile <- file.path(species_dir,paste0("correlationBioclim_", species_name, ".csv"))
# Write correlation matrix to CSV
write.csv(cor_abs,corr_outfile,row.names = TRUE)
}
library(terra) # For raster data
library(ENMeval) # For ENM evaluation and tuning
library(predicts) # For niche modeling tools
library(dplyr) # For data manipulation
library(ggplot2) # Optional for additional plots
library(sf) # For vector spatial operations
library(rnaturalearth) # For downloading shapefiles of countries and states
library(ggspatial) # Scale bars and north arrows
Read in cleaned occurrence data, then subset to a sample species to demo modeling.
# Load occurrence data
alldf <- read.csv("./data/02_cleaning/maxent_ready/diapensiaceae_maxentready_2025_06_27.csv")
# Example species: Galax urceolata
Galax_urceolata <- alldf %>%
filter(accepted_name == "Galax urceolata")
# List environmental layers
vif_list <- list.files("./data/04_climate_processing/Cropped/Galax_urceolata/VIF/",
full.names = TRUE,
recursive = FALSE)
# Load as raster stack
vifStack <- terra::rast(vif_list)
ENMeval will generate multiple models and test them per the specified method. Two important variables here are the regularization multiplier (RM) value and the feature class (FC). FC will allow for different shapes in response curves (linear, hinge, quadratic, product, and threshold) to be used in the model and RM will influence how many parameters are included in the model.
eval <- ENMevaluate(occs = Galax_urceolata[, c("longitude", "latitude")],
envs = vifStack,
tune.args = list(fc = c("L", "Q"), rm = 1:2),
partitions = "block",
n.bg = 10000,
parallel = FALSE,
algorithm = 'maxent.jar')
## Package ecospat is not installed, so Continuous Boyce Index (CBI) cannot be calculated.
## *** Running initial checks... ***
## * Randomly sampling 10000 background points ...
## * Removed 1 occurrence points with NA predictor variable values.
## * Clamping predictor variable rasters...
## * Model evaluations with spatial block (4-fold) cross validation and lat_lon orientation...
##
## *** Running ENMeval v2.0.5 with maxent.jar v3.4.3 using the predicts package v0.1.19 ***
## | | | 0% | |================== | 25% | |=================================== | 50% | |==================================================== | 75% | |======================================================================| 100%
## Making model prediction rasters...
## ENMevaluate completed in 1 minutes 17.8 seconds.
# Save ENMeval object
save(eval, file = "./data/05_ENMs/Galax_urceolata_ENM_eval.RData")
# Extract predictions from ENMeval object
maps <- eval@predictions
# Plot first 6 prediction rasters
terra::plot(maps,
nc = 2,
main = names(maps))
Compares how similar the niche recovery is between the models produced by ENMeval.
# Calculate Schoener's D among all models
mod_overlap <- calc.niche.overlap(maps,
overlapStat = "D")
## | | | 0% | |======================= | 33% | |=============================================== | 67% | |======================================================================| 100%
print(mod_overlap)
## fc.L_rm.1 fc.Q_rm.1 fc.L_rm.2 fc.Q_rm.2
## fc.L_rm.1 NA NA NA NA
## fc.Q_rm.1 0.9019650 NA NA NA
## fc.L_rm.2 0.9867187 0.8949457 NA NA
## fc.Q_rm.2 0.9091335 0.9835308 0.9032003 NA
# Retrieve summary table
results <- eval.results(eval)
head(results)
## fc rm tune.args auc.train cbi.train auc.diff.avg auc.diff.sd auc.val.avg
## 1 L 1 fc.L_rm.1 0.8749852 NA 0.06271563 0.03333057 0.8554646
## 2 Q 1 fc.Q_rm.1 0.8697287 NA 0.08614076 0.03787514 0.8403033
## 3 L 2 fc.L_rm.2 0.8755327 NA 0.05888770 0.03229978 0.8584835
## 4 Q 2 fc.Q_rm.2 0.8691261 NA 0.07905733 0.03167739 0.8444271
## auc.val.sd cbi.val.avg cbi.val.sd or.10p.avg or.10p.sd or.mtp.avg
## 1 0.05953220 NA NA 0.1303279 0.07682796 0.000000000
## 2 0.07614258 NA NA 0.1446721 0.07991278 0.000000000
## 3 0.05696870 NA NA 0.1262295 0.07104245 0.001229508
## 4 0.07022422 NA NA 0.1372951 0.07074393 0.000000000
## or.mtp.sd AICc delta.AICc w.AIC ncoef
## 1 0.000000000 60548.43 0.00000 9.999570e-01 8
## 2 0.000000000 60568.54 20.11032 4.296153e-05 8
## 3 0.002459016 60591.95 43.51702 3.551249e-10 8
## 4 0.000000000 60639.36 90.92352 1.803802e-20 7
## Plot model tuning results
evalplot.stats(
e = eval,
stats = c("or.10p", "auc.val"),
color = "fc",
x.var = "rm")
We want to select the model that has the smallest AICc score, has the lowest omission (model that drops the least number of species occurrence points in the reconstructed niche), and maximum AUC (Area Under Curve) value.
opt.seq <- results %>%
filter(!is.na(AICc)) %>% # Exclude models with NA AICc
filter(AICc == min(AICc)) %>% # Minimum AICc
filter(or.10p.avg != 0) %>% # Exclude zero omission
filter(or.10p.avg == min(or.10p.avg)) %>% # Minimum omission
filter(auc.val.avg == max(auc.val.avg)) # Maximum AUC
opt.seq
## fc rm tune.args auc.train cbi.train auc.diff.avg auc.diff.sd auc.val.avg
## 1 L 1 fc.L_rm.1 0.8749852 NA 0.06271563 0.03333057 0.8554646
## auc.val.sd cbi.val.avg cbi.val.sd or.10p.avg or.10p.sd or.mtp.avg or.mtp.sd
## 1 0.0595322 NA NA 0.1303279 0.07682796 0 0
## AICc delta.AICc w.AIC ncoef
## 1 60548.43 0 0.999957 8
# Save optimal model summary
write.table(opt.seq,
file = "./data/05_ENMs/Galax_urceolata_OptModel.txt",
sep = "\t",
row.names = FALSE)
# Retrieve the optimal model from ENMeval object
opt.mod <- eval.models(eval)[[opt.seq$tune.args]]
# Save optimal model as RData
save(opt.mod, file = "./data/05_ENMs/Galax_urceolata_opt_mod.RData")
# Plot variables and their contribution to the model
png("./data/05_ENMs/Galax_urceolata_Variable_Contribution.png",
width = 1200,
height = 800,
res = 150)
# Plot variable contributions
dismo::plot(opt.mod, main = "Variable Contribution - Optimal Model")
dev.off()
## png
## 2
How does the species niche respond to the variable elevation:
# Plot response curves for optimal model
predicts::partialResponse(eval@models[[opt.seq$tune.args]], var = "elev")
Build niche map that shows most suitable vs least suitable pixels according to our best model.
opt.pred <- eval.predictions(eval)[[as.character(opt.seq$tune.args)]]
r_df <- as.data.frame(opt.pred, xy = TRUE)
colnames(r_df) <- c("x", "y", "suitability")
# Occurrences sf
occ_sf <- st_as_sf(eval@occs, coords = c("longitude", "latitude"), crs = 4326)
# get bounding box for plotting from occurences
bbox_vals <- sf::st_bbox(occ_sf)
# USA states
usa <- ne_states(country = "United States of America", returnclass = "sf")
# Plot
p <- ggplot() +
geom_tile(data = r_df,
aes(x = x,
y = y,
fill = suitability)) +
scale_fill_viridis_c(name = "Suitability") +
geom_sf(data = occ_sf,
color = "red",
size = 0.1) +
geom_sf(data = usa,
fill = NA, color = "white", linewidth = 0.5) +
coord_sf(xlim = c(bbox_vals["xmin"] - 2, bbox_vals["xmax"] + 2),
ylim = c(bbox_vals["ymin"] - 2, bbox_vals["ymax"] + 2),
expand = FALSE) +
labs(title = "Predicted Suitability (Optimal ENMeval Model)",
x = "Longitude",
y = "Latitude")+
theme_bw()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "grey80",
color = NA),
plot.background = element_rect(fill = "white",
color = NA)) +
annotation_north_arrow(location = "tl",
height = unit(1, "cm"),
width = unit(1, "cm"))
print(p)
# Save Outputs
ggsave(filename = "./data/05_ENMs/Galax_urceolata_Optimal_Model_Map.png",
plot = p,
width = 8,
height = 5,
dpi = 300)
terra::writeRaster(x = opt.pred,
filename = "./data/05_ENMs/Galax_urceolata_ENM_optModel.asc",
overwrite = TRUE,
NAflag = -9999)
# List all unique species
species_list <- unique(alldf$accepted_name)
# Exclude Galax urceolata (already processed)
species_list <- setdiff(species_list, "Galax urceolata")
# Loop over remaining species
for (sp in species_list) {
cat("Processing species:", sp, "\n")
## Subset occurrence data
sp_df <- alldf %>%
filter(accepted_name == sp)
## List VIF-selected rasters for this species
sp_vif_path <- paste0("./data/04_climate_processing/Cropped/",
gsub(" ", "_", sp),
"/VIF/")
vif_list <- list.files(sp_vif_path,
full.names = TRUE,
recursive = FALSE)
## Load rasters
vifStack <- terra::rast(vif_list)
## Run ENMevaluate
eval <- ENMevaluate(occs = sp_df[, c("longitude", "latitude")],
envs = vifStack,
tune.args = list(fc = c("L", "Q"), rm = 1:2),
partitions = "block",
n.bg = 10000,
parallel = FALSE,
algorithm = 'maxent.jar')
## Save ENMeval object
save(eval, file = paste0("./data/05_ENMs/", gsub(" ", "_", sp), "_ENM_eval.RData"))
## Identify optimal model
opt.seq <- results %>%
filter(!is.na(AICc)) %>%
filter(AICc == min(AICc)) %>%
filter(or.10p.avg != 0) %>%
filter(or.10p.avg == min(or.10p.avg)) %>%
filter(auc.val.avg == max(auc.val.avg))
## Save optimal model summary
write.table(opt.seq,
file = paste0("./data/05_ENMs/", gsub(" ", "_", sp), "_OptModel.txt"),
sep = "\t",
row.names = FALSE)
## Plot variable contributions
opt.mod <- eval.models(eval)[[opt.seq$tune.args]]
# Save optimal model as RData
save(opt.mod, file = paste0("data/05_ENMs/", gsub(" ", "_", sp), "_opt_mod.RData"))
png(filename = paste0("data/05_ENMs/", gsub(" ", "_", sp), "_Variable_Contribution.png"),
width = 1200,
height = 800,
res = 150)
dismo::plot(opt.mod, main = paste("Variable Contribution -", sp))
dev.off()
## Plot optimal prediction raster
opt.pred <- eval.predictions(eval)[[as.character(opt.seq$tune.args)]]
r_df <- as.data.frame(opt.pred, xy = TRUE)
colnames(r_df) <- c("x", "y", "suitability")
occ_sf <- st_as_sf(eval@occs, coords = c("longitude", "latitude"), crs = 4326)
usa <- ne_states(country = "United States of America", returnclass = "sf")
bbox_vals <- sf::st_bbox(occ_sf)
p <- ggplot() +
geom_tile(data = r_df, aes(x = x, y = y, fill = suitability)) +
scale_fill_viridis_c(name = "Suitability") +
geom_sf(data = occ_sf, color = "red", size = 0.1) +
geom_sf(data = usa, fill = NA, color = "white", linewidth = 0.5) +
coord_sf(
xlim = c(bbox_vals["xmin"] - 2, bbox_vals["xmax"] + 2),
ylim = c(bbox_vals["ymin"] - 2, bbox_vals["ymax"] + 2),
expand = FALSE) +
labs(
title = paste("Predicted Suitability -", sp),
x = "Longitude",
y = "Latitude"
) +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "grey80", color = NA),
plot.background = element_rect(fill = "white", color = NA)
) +
annotation_north_arrow(
location = "tl",
height = unit(1, "cm"),
width = unit(1, "cm")
)
print(p)
## Save raster
writeRaster(
x = opt.pred,
filename = paste0("./data/05_ENMs/", gsub(" ", "_", sp), "_ENM_optModel.asc"),
overwrite = TRUE,
NAflag = -9999
)
ggsave(
filename = paste0("./data/05_ENMs/", gsub(" ", "_", sp), "_Optimal_Model_Map.png"),
plot = p,
width = 8,
height = 5,
dpi = 300
)
}
## Processing species: Pyxidanthera barbulata
## Package ecospat is not installed, so Continuous Boyce Index (CBI) cannot be calculated.
## *** Running initial checks... ***
## * Randomly sampling 10000 background points ...
## * Removed 2 occurrence points with NA predictor variable values.
## * Clamping predictor variable rasters...
## * Model evaluations with spatial block (4-fold) cross validation and lat_lon orientation...
##
## *** Running ENMeval v2.0.5 with maxent.jar v3.4.3 using the predicts package v0.1.19 ***
## | | | 0% | |================== | 25% | |=================================== | 50% | |==================================================== | 75% | |======================================================================| 100%
## Making model prediction rasters...
## ENMevaluate completed in 0 minutes 58.8 seconds.
## Processing species: Pyxidanthera brevifolia
## Package ecospat is not installed, so Continuous Boyce Index (CBI) cannot be calculated.
## *** Running initial checks... ***
## * Randomly sampling 10000 background points ...
## * Clamping predictor variable rasters...
## * Model evaluations with spatial block (4-fold) cross validation and lat_lon orientation...
##
## *** Running ENMeval v2.0.5 with maxent.jar v3.4.3 using the predicts package v0.1.19 ***
## | | | 0% | |================== | 25% | |=================================== | 50% | |==================================================== | 75% | |======================================================================| 100%
## Making model prediction rasters...
## ENMevaluate completed in 0 minutes 13.4 seconds.
## Processing species: Shortia galacifolia
## Package ecospat is not installed, so Continuous Boyce Index (CBI) cannot be calculated.
## *** Running initial checks... ***
## * Randomly sampling 10000 background points ...
## * Clamping predictor variable rasters...
## * Model evaluations with spatial block (4-fold) cross validation and lat_lon orientation...
##
## *** Running ENMeval v2.0.5 with maxent.jar v3.4.3 using the predicts package v0.1.19 ***
## | | | 0% | |================== | 25% | |=================================== | 50% | |==================================================== | 75% | |======================================================================| 100%
## Making model prediction rasters...
## ENMevaluate completed in 0 minutes 16 seconds.
library(ENMeval)
library(ggplot2)
library(dplyr)
Load Occurrence Data
# Load all occurrence records
alldf <- read.csv("data/02_cleaning/maxent_ready/diapensiaceae_maxentready_2025_06_27.csv")
# Load optimal feature class (fc) and regularization multiplier (rm)
opt.seq <- read.delim("data/05_ENMs/Galax_urceolata_OptModel.txt")
# Extract parameters
fc <- opt.seq$fc
rm <- opt.seq$rm
# Load saved ENMeval object for Galax urceolata
load("data/05_ENMs/Galax_urceolata_ENM_eval.RData")
# Extract comparison results between empirical and null models
null_comparison_results <- null.emp.results(spec.mod.null)
# Save results as CSV
write.csv(
null_comparison_results,
file = "data/05_ENMs/Galax_urceolata_null_comparison_results.csv",
row.names = FALSE
)
# Generate histogram plots for AUC and OR.10p statistics
spec.null <- evalplot.nulls(
spec.mod.null,
stats = c("or.10p", "auc.val"),
plot.type = "histogram"
)
# Display plot in R console
plot(spec.null)
# Save plot to PNG
ggsave(
filename = "Galax_urceolata_null_histogram.png",
plot = spec.null,
path = "data/05_ENMs/",
height = 12,
width = 13
)
Increase Java memory limit to avoid OutOfMemoryError. Some ENM packages, especially dismo and older ENMeval workflows, may call Java routines that require more RAM. This sets the Java heap space to 8GB.
options(java.parameters = "-Xmx30g")
library(terra) # For reading and manipulating raster
## data (modern replacement for raster package)
library(dismo) # For predicting from ENMs and
## compatibility with Maxent models
library(ENMeval) # For calculating niche overlap statistics
library(ENMTools) # For calculating niche breadth (Levins' B2)
library(ggplot2) # For creating publication-quality plots
library(rnaturalearth) # For downloading shapefiles of countries and states
library(viridis) # For colorblind-friendly color scales
library(ggspatial) # For adding north arrows to plots
library(dplyr) # For data manipulation
library(ape) # For reading and processing phylogenetic trees
library(biomod2) # For range size change calculations
## between binary rasters
Load in climate files for the Eastern Temperate Forest Region
# List all climate raster files (.asc format) for the Eastern Temperate Forests (ETF) region.
# These will be used as environmental predictors for projecting the ENMs.
clim_files <- list.files("./data/04_climate_processing/CurrentEasternTemperateForests",
pattern = "\\.asc$",
full.names = TRUE)
# Read all climate rasters into a single SpatRaster object for efficient processing.
clim_stack <- terra::rast(clim_files)
# Plot a quick preview of the first four layers to check that rasters loaded correctly.
plot(clim_stack[[1:4]])
For a set of species, project where in the Eastern Temperate Forest Region they have suitable niche space given our model.
# Define a vector of the species you want to process.
# Each species has an ENM already built and saved to disk.
species_list <- c("Galax_urceolata",
"Pyxidanthera_barbulata",
"Pyxidanthera_brevifolia",
"Shortia_galacifolia")
# Load the USA state boundaries as an sf object.
# This will be used as a basemap for plotting projections.
usa <- rnaturalearth::ne_states(country = "United States of America",
returnclass = "sf")
# Loop through each species to project their ENM onto the current climate layers.
for (species in species_list) {
cat("\nProjecting current climate for:", species, "\n")
# Load the optimal ENM for this species.
# The object `opt.mod` will be loaded into the workspace.
mod_file <- paste0("data/05_ENMs/", species, "_opt_mod.RData")
load(mod_file) # loads object: opt.mod
# Identify which layers were used to build the ENM.
# This ensures that your projection uses the same predictor variables
# as the model was trained on.
vif_dir <- paste0("data/04_climate_processing/Cropped/", species, "/VIF/")
species_layers <- list.files(vif_dir,
full.names = TRUE)
# Read in those specific rasters
spec_stack <- terra::rast(species_layers)
layer_names <- names(spec_stack)
# Subset the full ETF raster stack to only the layers used in the ENM.
ETF_Rasters <- terra::subset(clim_stack,
layer_names)
#if error, try: ETF_Rasters <- raster::stack(ETF_Rasters)
# Project the ENM onto the current ETF climate layers.
proj_file <- paste0("data/06_ENM_processing/",
species,
"_EFT_Projection.asc")
p <- dismo::predict(opt.mod,
ETF_Rasters,
filename = proj_file,
overwrite = TRUE,
NAflag = -9999)
# Convert the raster to a data frame for plotting in ggplot.
p_df <- as.data.frame(p, xy = TRUE)
colnames(p_df)[3] <- "Habitat_Suitability"
# Get the bounding box coordinates of the raster for plotting.
bbox_vals <- terra::ext(p)
# Create the habitat suitability map.
p_plot <- ggplot() +
geom_sf(data = usa,
fill = "grey70") + # basemap
geom_tile(data = p_df, aes(x = x,
y = y,
fill = Habitat_Suitability)) +
geom_sf(data = usa,
fill = NA,
color = "white",
linewidth = 0.5) +
coord_sf(xlim = c(bbox_vals$xmin - 2,
bbox_vals$xmax + 2),
ylim = c(bbox_vals$ymin - 2,
bbox_vals$ymax + 2),
expand = FALSE) +
scale_fill_viridis_c(name = "Suitability") +
labs(title = paste0(gsub("_", " ", species),
" - Eastern Temperate Forest Habitat Suitability"),
x = "Longitude",
y = "Latitude") +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text = element_text(size = 8),
axis.title = element_text(size = 8),
plot.title = element_text(size = 10)) +
annotation_north_arrow(location = "tl",
height = unit(1, "cm"),
width = unit(1, "cm"))
# Save the plot as a PNG file.
png_file <- paste0("data/06_ENM_processing/",
species,
"_ETF_Projection_Plot.png")
ggsave(filename = png_file,
plot = p_plot,
width = 7,
height = 5,
dpi = 300)
}
For each species, calculate niche breadth across the ETF region. Niche breadth (Levins’ B2) indicates whether a species is a generalist (high value) or specialist (low value).
for (species in species_list) {
cat("\nCalculating niche breadth for:", species, "\n")
proj_file <- paste0("data/06_ENM_processing/", species, "_EFT_Projection.asc")
proj_raster <- terra::rast(proj_file)
# Compute Levins' B2 niche breadth
breadth <- ENMTools::raster.breadth(proj_raster)
cat("Niche breadth:", round(breadth$B2, 3), "\n")
}
We can stack all of the species niche models to evaluate how much they overlap
# Stack all species projections into a single SpatRaster object.
# This allows calculation of pairwise niche overlap (Schoener's D).
enm_stack <- c(terra::rast("data/06_ENM_processing/Galax_urceolata_EFT_Projection.asc"),
terra::rast("data/06_ENM_processing/Pyxidanthera_barbulata_EFT_Projection.asc"),
terra::rast("data/06_ENM_processing/Pyxidanthera_brevifolia_EFT_Projection.asc"),
terra::rast("data/06_ENM_processing/Shortia_galacifolia_EFT_Projection.asc")
)
# Assign species names as layer names in the raster stack.
names(enm_stack) <- c("Galax urceolata",
"Pyxidanthera barbulata",
"Pyxidanthera brevifolia",
"Shortia galacifolia")
enm_stack_raster <- stack(enm_stack)
# Calculate pairwise Schoener's D overlap between species.
# Values range from 0 (no overlap) to 1 (identical niches).
overlap_matrix <- calc.niche.overlap(enm_stack_raster,
overlapStat = "D")
print(overlap_matrix)
# Read the phylogenetic tree of the species.
tree <- ape::read.tree("data/06_ENM_processing/diapensiaceae_subset.tre")
# Drop the outgroup so only focal species remain.
tree <- ape::drop.tip(tree, "Cyrilla_racemiflora")
# Replace underscores with spaces to match overlap matrix species names.
tree$tip.label <- gsub("_", " ", tree$tip.label)
# Calculate pairwise phylogenetic distances between species.
phylo_dist <- cophenetic(tree)
# Ensure the phylogenetic distance matrix matches the species order in the overlap matrix.
species_names <- rownames(overlap_matrix)
species_names <- gsub("\\.", " ", species_names)
phylo_dist <- phylo_dist[species_names, species_names]
# Initialize vectors to store pairwise values.
pair_names <- NULL
phylo_vals <- NULL
overlap_vals <- NULL
# Loop through the lower triangle of the matrices to extract pairwise comparisons.
for (i in 2:length(species_names)) {
for (j in 1:(i - 1)) {
pair_names <- c(pair_names,
paste(species_names[i],
species_names[j], sep = " - "))
phylo_vals <- c(phylo_vals, phylo_dist[i, j])
overlap_vals <- c(overlap_vals,
overlap_matrix[i, j])
}
}
# Fit a linear regression between phylogenetic distance and niche overlap.
lm_res <- lm(overlap_vals ~ phylo_vals)
s <- summary(lm_res)
print(s)
# Format regression statistics for display on the plot.
label_text <- paste0("Intercept = ", format(round(coef(lm_res)[1], 3)), "\n",
"Slope = ", format(round(coef(lm_res)[2], 5)), "\n",
"P-value = ", format(signif(s$coefficients[2, 4], 3)), "\n",
"R² = ", format(round(s$r.squared, 3)))
# Plot the relationship between phylogenetic distance and niche overlap.
plot(phylo_vals, overlap_vals,
xlab = "Phylogenetic Distance",
ylab = "Niche Overlap (Schoener's D)",
pch = 19)
abline(lm_res, col = "red", lwd = 2)
text(phylo_vals - 3,
overlap_vals - 0.02,
labels = 1:length(pair_names),
pos = 3,
cex = 0.8)
text(x = max(phylo_vals) * 0.7,
y = max(overlap_vals) * 0.9,
labels = label_text,
col = "red",
cex = 0.9, pos = 4)
legend("topleft",
legend = paste(1:length(pair_names), pair_names, sep = ": "),
bty = "n", cex = 0.7)
For niche Age Overlap Correlations (AOCs), a high intercept and negative slope indicate niche conservatism, meaning closely related species tend to occupy similar ecological niches. A low intercept combined with a positive slope suggests niche divergence, where closely related species occupy different niches.The results here are pretty meaningless since we’re looking at very few, distantly related species, but it serves the purpose of the demo. A linear regression indicated no significant relationship between phylogenetic distance and niche overlap among the four species, suggesting minimal phylogenetic signal in ecological niche similarity.
# List future climate layers under the SSP370 scenario for 2081-2100.
future_files <- list.files("data/04_climate_processing/ACCESS-CM2_2081-2100_ssp370/",
pattern = "\\.asc$",
full.names = TRUE)
# Add the current elevation layer to the future stack, since elevation remains constant.
elev_file <- list.files("data/04_climate_processing/CurrentEasternTemperateForests",
pattern = "elev\\.asc$",
full.names = TRUE)
futurestack <- terra::rast(c(future_files, elev_file))
# Project each species onto future ETF climate conditions.
for (species in species_list) {
cat("\nProjecting future climate for:", species, "\n")
load(paste0("data/05_ENMs/", species, "_opt_mod.RData"))
vif_dir <- paste0("data/04_climate_processing/Cropped/", species, "/VIF/")
species_layers <- list.files(vif_dir, full.names = TRUE)
spec_stack <- terra::rast(species_layers)
layer_names <- names(spec_stack)
FutureETF_Rasters <- terra::subset(futurestack, layer_names)
#if error, try: FutureETF_Rasters <- raster::stack(FutureETF_Rasters)
future_file <- paste0("data/06_ENM_processing/", species, "_Future_ETF_Projection.asc")
p_future <- dismo::predict(opt.mod, FutureETF_Rasters, filename = future_file, overwrite = TRUE, NAflag = -9999)
save(p_future, file = sub("\\.asc$", ".RData", future_file))
# Plot the future habitat suitability map.
p_future_df <- as.data.frame(p_future, xy = TRUE)
colnames(p_future_df)[3] <- "Habitat_Suitability"
bbox_vals <- terra::ext(p_future)
p_plot <- ggplot() +
geom_sf(data = usa, fill = "grey70") +
geom_tile(data = p_future_df,
aes(x = x, y = y, fill = Habitat_Suitability)) +
geom_sf(data = usa, fill = NA, color = "white", linewidth = 0.5) +
coord_sf(xlim = c(bbox_vals$xmin - 2, bbox_vals$xmax + 2),
ylim = c(bbox_vals$ymin - 2, bbox_vals$ymax + 2),
expand = FALSE) +
scale_fill_viridis_c(name = "Future Suitability") +
labs(title = paste0(gsub("_", " ", species), " - Future Habitat Suitability"),
x = "Longitude",
y = "Latitude") +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text = element_text(size = 8),
axis.title = element_text(size = 8),
plot.title = element_text(size = 10)) +
annotation_north_arrow(location = "tl",
height = unit(1, "cm"),
width = unit(1, "cm"))
png_file <- paste0("data/06_ENM_processing/", species, "_Future_ETF_Projection.png")
ggsave(filename = png_file, plot = p_plot, width = 7, height = 5, dpi = 300)
}
# Initialize a list to hold change maps for each species.
all_dfs <- list()
for (species in species_list) {
cat("\nComparing projections for:", species, "\n")
# Load current and future suitability rasters.
current <- terra::rast(paste0("data/06_ENM_processing/", species, "_EFT_Projection.asc"))
future <- terra::rast(paste0("data/06_ENM_processing/", species, "_Future_ETF_Projection.asc"))
# Threshold each raster at 0.7 to produce binary presence/absence maps.
current_binary <- current >= 0.7
future_binary <- future >= 0.7
# Compute range changes between current and future projections.
RangeSizeDiff <- BIOMOD_RangeSize(as(current_binary, "Raster"),
as(future_binary, "Raster"))
diff_raster <- RangeSizeDiff$Diff.By.Pixel
df <- as.data.frame(diff_raster, xy = TRUE)
colnames(df) <- c("x", "y", "change")
# Convert numeric change codes to categorical labels for plotting.
df$fill <- factor(df$change,
levels = c(-2, -1, 0, 1),
labels = c("lost", "unchanged", "not occupied", "occupied in future"))
# Add species name for faceting plots.
df$species <- gsub("_", " ", species)
all_dfs[[species]] <- df
}
# Combine all species into one data frame for faceted plotting.
combined_df <- bind_rows(all_dfs)
# Determine overall plotting extent.
combined_bbox <- combined_df %>%
summarise(xmin = min(x, na.rm = TRUE),
xmax = max(x, na.rm = TRUE),
ymin = min(y, na.rm = TRUE),
ymax = max(y, na.rm = TRUE))
# Plot all species' range shifts together.
p_all <- ggplot(combined_df) +
geom_sf(data = usa, fill = "grey70") +
geom_tile(aes(x = x, y = y, fill = fill)) +
geom_sf(data = usa, fill = NA, color = "white", linewidth = 0.5) +
coord_sf(xlim = c(combined_bbox$xmin - 2, combined_bbox$xmax + 2),
ylim = c(combined_bbox$ymin - 2, combined_bbox$ymax + 2),
expand = FALSE) +
scale_fill_viridis_d(name = "Pixel Change", na.value = "white") +
facet_wrap(~ species) +
labs(title = "Current vs Future Habitat Suitability Change",
x = "Longitude",
y = "Latitude") +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text = element_text(size = 8),
axis.title = element_text(size = 8),
plot.title = element_text(size = 10),
strip.text = element_text(size = 9)) +
annotation_north_arrow(
location = "tl",
height = unit(1, "cm"),
width = unit(1, "cm"))
ggsave("data/06_ENM_processing/all_species_range_change_map.png",
plot = p_all,
width = 10,
height = 8,
dpi = 300)