This projekt deals with species distribution modelling of butterfly species in Pakistan. In the following script, distribution data of butterfly species as well as environmental data from http://chelsa-climate.org/ database is prepared to perform “boosted regression tree”(BRT) modelling process. BRT is an ensemble of methods for fitting statistical models (Elith et al. 2008). Aim of this work is to use the created model in order to predict on potential distribution areas of a particular buttefly species.
benefits of BRT-modelling:
Befor we start, i recommend you to watch the following video. It gives further information and clarification concerning BRT. It illustrates how to use BRT and also the mathmatics behind it.
library(vembedr)
## Warning: package 'vembedr' was built under R version 3.6.2
embed_url("https://www.youtube.com/watch?v=3CC4N4z3GJc")
First of all, you will need the following librarys for the process:
library(dismo)
library(raster)
library(gbm)
library(maptools)
library(rgdal)
library(shapefiles)
After loading in the librarys, we can start with the data preparation now. ***
# distribution-data
distribution_shape <- read.shp("C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/distribution_Pakistan_all_test.shp")
distribution_dataframe <- as.data.frame(distribution_shape)
occurrence_csv <- paste0(system.file(package="dismo"), "C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/distribution_shape_csv.csv")
head(occurrence_csv)
## [1] "C:/Users/ich/Documents/R/win-library/3.6/dismoC:/Users/ich/Desktop/UNI/räumliche_Vorhersage/distribution_shape_csv.csv"
occurrence <- read.csv("C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/distribution_shape_csv.csv")
head(occurrence)
## X shp.record shp.x shp.y shp.shape.type header.file.code
## 1 1 1 72.07099 35.99492 1 9994
## 2 2 2 71.99009 35.15735 1 9994
## 3 3 3 73.39397 34.45065 1 9994
## 4 4 4 73.16466 33.82338 1 9994
## 5 5 5 73.16823 34.13390 1 9994
## 6 6 6 73.41688 34.00422 1 9994
## header.file.length header.file.version header.shape.type header.xmin
## 1 82230 1000 1 62.23056
## 2 82230 1000 1 62.23056
## 3 82230 1000 1 62.23056
## 4 82230 1000 1 62.23056
## 5 82230 1000 1 62.23056
## 6 82230 1000 1 62.23056
## header.ymin header.xmax header.ymax header.zmin header.zmax header.mmin
## 1 24.13072 77.77783 37.11705 0 0 0
## 2 24.13072 77.77783 37.11705 0 0 0
## 3 24.13072 77.77783 37.11705 0 0 0
## 4 24.13072 77.77783 37.11705 0 0 0
## 5 24.13072 77.77783 37.11705 0 0 0
## 6 24.13072 77.77783 37.11705 0 0 0
## header.mmax
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
occurrence_lonlat <- occurrence [,3:4]
head(occurrence_lonlat)
## shp.x shp.y
## 1 72.07099 35.99492
## 2 71.99009 35.15735
## 3 73.39397 34.45065
## 4 73.16466 33.82338
## 5 73.16823 34.13390
## 6 73.41688 34.00422
dataframe_occurrence_record_lonlat <- occurrence [,2:4]
head(dataframe_occurrence_record_lonlat)
## shp.record shp.x shp.y
## 1 1 72.07099 35.99492
## 2 2 71.99009 35.15735
## 3 3 73.39397 34.45065
## 4 4 73.16466 33.82338
## 5 5 73.16823 34.13390
## 6 6 73.41688 34.00422
write.csv(dataframe_occurrence_record_lonlat, "C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/dataframe_occurrence_record_lonlat.csv")
head(dataframe_occurrence_record_lonlat)
## shp.record shp.x shp.y
## 1 1 72.07099 35.99492
## 2 2 71.99009 35.15735
## 3 3 73.39397 34.45065
## 4 4 73.16466 33.82338
## 5 5 73.16823 34.13390
## 6 6 73.41688 34.00422
Now we have built a dataset of the distribution data containing different types of data, showing the lat/lon values of all species sightings.
We go on with cutting out the species and subspecies names of our data. ***
# read in species_csv
species_subspecies <- read.csv("C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/Pakistan_alles/distribution_Pakistan_all_species.csv")
species_subspecies_cut <- species_subspecies [,0:2]
head(species_subspecies_cut)
## species.C.80 subspecies.C.80
## 1 Actinor_radians
## 2 Actinor_radians
## 3 Actinor_radians
## 4 Aeromachus_stigmatus
## 5 Aeromachus_stigmatus
## 6 Aeromachus_stigmatus
Now we can combine the two generated dataframes to one:
species_distribution_combined <- data.frame(dataframe_occurrence_record_lonlat, species_subspecies_cut)
head(species_distribution_combined)
## shp.record shp.x shp.y species.C.80 subspecies.C.80
## 1 1 72.07099 35.99492 Actinor_radians
## 2 2 71.99009 35.15735 Actinor_radians
## 3 3 73.39397 34.45065 Actinor_radians
## 4 4 73.16466 33.82338 Aeromachus_stigmatus
## 5 5 73.16823 34.13390 Aeromachus_stigmatus
## 6 6 73.41688 34.00422 Aeromachus_stigmatus
# subset for species: Pyrgus_cashmirensis
subset_Pyrgus_cashmirensis <- subset(species_distribution_combined, species.C.80=="Pyrgus_cashmirensis")
subset_Pyrgus_cashmirensis
## shp.record shp.x shp.y species.C.80 subspecies.C.80
## 363 363 76.34609 35.53195 Pyrgus_cashmirensis pseudoalpinus
## 364 364 75.50427 35.33850 Pyrgus_cashmirensis pseudoalpinus
## 365 365 75.60242 35.95936 Pyrgus_cashmirensis pseudoalpinus
## 366 366 75.31322 35.00677 Pyrgus_cashmirensis pseudoalpinus
## 367 367 75.22376 35.24662 Pyrgus_cashmirensis pseudoalpinus
## 368 368 74.88911 35.27707 Pyrgus_cashmirensis pseudoalpinus
## 369 369 74.64545 35.03342 Pyrgus_cashmirensis pseudoalpinus
## 370 370 74.78403 36.11664 Pyrgus_cashmirensis pseudoalpinus
## 371 371 74.89111 36.29153 Pyrgus_cashmirensis pseudoalpinus
## 372 372 74.59487 36.24870 Pyrgus_cashmirensis pseudoalpinus
## 373 373 75.16594 36.87331 Pyrgus_cashmirensis pseudoalpinus
## 374 374 74.62342 36.82334 Pyrgus_cashmirensis pseudoalpinus
## 375 375 74.27721 36.12735 Pyrgus_cashmirensis pseudoalpinus
## 376 376 74.11302 36.27012 Pyrgus_cashmirensis pseudoalpinus
## 377 377 73.81678 36.45929 Pyrgus_cashmirensis pseudoalpinus
## 378 378 73.79090 36.14564 Pyrgus_cashmirensis pseudoalpinus
## 379 379 73.96044 35.92971 Pyrgus_cashmirensis pseudoalpinus
## 380 380 73.24153 36.80245 Pyrgus_cashmirensis pseudoalpinus
## 381 381 73.14444 36.33322 Pyrgus_cashmirensis pseudoalpinus
## 382 382 72.70274 36.60831 Pyrgus_cashmirensis pseudoalpinus
## 383 383 72.48888 36.51182 Pyrgus_cashmirensis pseudoalpinus
## 384 384 72.34802 36.63651 Pyrgus_cashmirensis pseudoalpinus
## 385 385 72.31571 36.32925 Pyrgus_cashmirensis pseudoalpinus
## 386 386 72.06908 36.23717 Pyrgus_cashmirensis pseudoalpinus
## 387 387 71.88414 36.24764 Pyrgus_cashmirensis pseudoalpinus
## 388 388 73.04208 36.08512 Pyrgus_cashmirensis pseudoalpinus
## 389 389 72.80182 36.10568 Pyrgus_cashmirensis pseudoalpinus
## 390 390 72.62492 36.11361 Pyrgus_cashmirensis pseudoalpinus
## 391 391 72.63135 35.95371 Pyrgus_cashmirensis pseudoalpinus
## 392 392 72.29680 35.96861 Pyrgus_cashmirensis pseudoalpinus
## 393 393 71.83670 35.97789 Pyrgus_cashmirensis pseudoalpinus
## 394 394 72.05561 35.75802 Pyrgus_cashmirensis pseudoalpinus
## 395 395 71.91070 35.60360 Pyrgus_cashmirensis pseudoalpinus
## 396 396 71.73938 35.65428 Pyrgus_cashmirensis pseudoalpinus
## 397 397 70.74320 33.87831 Pyrgus_cashmirensis cashmirensis
## 398 398 71.43325 34.53719 Pyrgus_cashmirensis cashmirensis
## 399 399 76.32010 34.95386 Pyrgus_cashmirensis cashmirensis
## 400 400 75.82137 34.89866 Pyrgus_cashmirensis cashmirensis
## 401 401 74.00003 35.16516 Pyrgus_cashmirensis cashmirensis
## 402 402 73.80492 35.13423 Pyrgus_cashmirensis cashmirensis
## 403 403 73.73651 34.75946 Pyrgus_cashmirensis cashmirensis
## 404 404 73.61873 34.90044 Pyrgus_cashmirensis cashmirensis
## 405 405 73.39967 34.77456 Pyrgus_cashmirensis cashmirensis
subset_Pyrgus_cashmirensis_lat_lon <- subset_Pyrgus_cashmirensis[,2:3]
subset_Pyrgus_cashmirensis_lat_lon
## shp.x shp.y
## 363 76.34609 35.53195
## 364 75.50427 35.33850
## 365 75.60242 35.95936
## 366 75.31322 35.00677
## 367 75.22376 35.24662
## 368 74.88911 35.27707
## 369 74.64545 35.03342
## 370 74.78403 36.11664
## 371 74.89111 36.29153
## 372 74.59487 36.24870
## 373 75.16594 36.87331
## 374 74.62342 36.82334
## 375 74.27721 36.12735
## 376 74.11302 36.27012
## 377 73.81678 36.45929
## 378 73.79090 36.14564
## 379 73.96044 35.92971
## 380 73.24153 36.80245
## 381 73.14444 36.33322
## 382 72.70274 36.60831
## 383 72.48888 36.51182
## 384 72.34802 36.63651
## 385 72.31571 36.32925
## 386 72.06908 36.23717
## 387 71.88414 36.24764
## 388 73.04208 36.08512
## 389 72.80182 36.10568
## 390 72.62492 36.11361
## 391 72.63135 35.95371
## 392 72.29680 35.96861
## 393 71.83670 35.97789
## 394 72.05561 35.75802
## 395 71.91070 35.60360
## 396 71.73938 35.65428
## 397 70.74320 33.87831
## 398 71.43325 34.53719
## 399 76.32010 34.95386
## 400 75.82137 34.89866
## 401 74.00003 35.16516
## 402 73.80492 35.13423
## 403 73.73651 34.75946
## 404 73.61873 34.90044
## 405 73.39967 34.77456
#final presence data ***
p_a <- 1
final_presence <- cbind(subset_Pyrgus_cashmirensis_lat_lon, p_a)
final_presence
## shp.x shp.y p_a
## 363 76.34609 35.53195 1
## 364 75.50427 35.33850 1
## 365 75.60242 35.95936 1
## 366 75.31322 35.00677 1
## 367 75.22376 35.24662 1
## 368 74.88911 35.27707 1
## 369 74.64545 35.03342 1
## 370 74.78403 36.11664 1
## 371 74.89111 36.29153 1
## 372 74.59487 36.24870 1
## 373 75.16594 36.87331 1
## 374 74.62342 36.82334 1
## 375 74.27721 36.12735 1
## 376 74.11302 36.27012 1
## 377 73.81678 36.45929 1
## 378 73.79090 36.14564 1
## 379 73.96044 35.92971 1
## 380 73.24153 36.80245 1
## 381 73.14444 36.33322 1
## 382 72.70274 36.60831 1
## 383 72.48888 36.51182 1
## 384 72.34802 36.63651 1
## 385 72.31571 36.32925 1
## 386 72.06908 36.23717 1
## 387 71.88414 36.24764 1
## 388 73.04208 36.08512 1
## 389 72.80182 36.10568 1
## 390 72.62492 36.11361 1
## 391 72.63135 35.95371 1
## 392 72.29680 35.96861 1
## 393 71.83670 35.97789 1
## 394 72.05561 35.75802 1
## 395 71.91070 35.60360 1
## 396 71.73938 35.65428 1
## 397 70.74320 33.87831 1
## 398 71.43325 34.53719 1
## 399 76.32010 34.95386 1
## 400 75.82137 34.89866 1
## 401 74.00003 35.16516 1
## 402 73.80492 35.13423 1
## 403 73.73651 34.75946 1
## 404 73.61873 34.90044 1
## 405 73.39967 34.77456 1
Now we have the final presence dataset for the species Pyrgus cashmirensis. Later we will combine it with a generated set of pseudo-absence data and the predictor varriables of the environmental layers.
In order to convert our environmental data into the desired dataframe for model procession, have a look at the following procedure: # Preparation of environmental data: chelsa-dataframe
## chelsa data preparation ####
#merge
merged_chelsa_annual_mean_temp_path <- "C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/Pakistan_alles/chelsa/annual_mean_temperature/"
merged_chelsa_annual_mean_temp_path
## [1] "C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/Pakistan_alles/chelsa/annual_mean_temperature/"
merged_chelsa_annual_mean_temp_files <- list.files("C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/Pakistan_alles/chelsa/annual_mean_temperature/")
merged_chelsa_annual_mean_temp_files
## [1] "CHELSA_temp10_01_1979-2013_V1.2_land.tif"
## [2] "CHELSA_temp10_02_1979-2013_V1.2_land.tif"
## [3] "CHELSA_temp10_03_1979-2013_V1.2_land.tif"
## [4] "CHELSA_temp10_04_1979-2013_V1.2_land.tif"
## [5] "CHELSA_temp10_05_1979-2013_V1.2_land.tif"
## [6] "CHELSA_temp10_06_1979-2013_V1.2_land.tif"
## [7] "CHELSA_temp10_07_1979-2013_V1.2_land.tif"
## [8] "CHELSA_temp10_08_1979-2013_V1.2_land (1).tif"
## [9] "CHELSA_temp10_09_1979-2013_V1.2_land.tif"
## [10] "CHELSA_temp10_10_1979-2013_V1.2_land.tif"
## [11] "CHELSA_temp10_11_1979-2013_V1.2_land.tif"
## [12] "CHELSA_temp10_12_1979-2013_V1.2_land.tif"
## [13] "merged"
raster_list <- list(merged_chelsa_annual_mean_temp_files)
raster_list
## [[1]]
## [1] "CHELSA_temp10_01_1979-2013_V1.2_land.tif"
## [2] "CHELSA_temp10_02_1979-2013_V1.2_land.tif"
## [3] "CHELSA_temp10_03_1979-2013_V1.2_land.tif"
## [4] "CHELSA_temp10_04_1979-2013_V1.2_land.tif"
## [5] "CHELSA_temp10_05_1979-2013_V1.2_land.tif"
## [6] "CHELSA_temp10_06_1979-2013_V1.2_land.tif"
## [7] "CHELSA_temp10_07_1979-2013_V1.2_land.tif"
## [8] "CHELSA_temp10_08_1979-2013_V1.2_land (1).tif"
## [9] "CHELSA_temp10_09_1979-2013_V1.2_land.tif"
## [10] "CHELSA_temp10_10_1979-2013_V1.2_land.tif"
## [11] "CHELSA_temp10_11_1979-2013_V1.2_land.tif"
## [12] "CHELSA_temp10_12_1979-2013_V1.2_land.tif"
## [13] "merged"
for(i in 1:12)
{
tilesloop <- paste0(merged_chelsa_annual_mean_temp_path, merged_chelsa_annual_mean_temp_files[i])
tilesloop
raster_list[[i]] <- raster(tilesloop)
rasterliste <- raster_list[[i]]
rasterliste
}
raster_list$filename <- "C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/Pakistan_alles/chelsa/annual_mean_temperature/merged/merged_chelsa_annual_mean_temp.tif"
exporttif <- raster_list$filename
exporttif
## [1] "C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/Pakistan_alles/chelsa/annual_mean_temperature/merged/merged_chelsa_annual_mean_temp.tif"
raster_list$overwrite <- TRUE
all_merged_chelsa_annual_mean_temp_raster <- do.call(raster::merge, raster_list, quote=TRUE)
all_merged_chelsa_annual_mean_temp_raster
## class : RasterLayer
## dimensions : 20880, 43200, 902016000 (nrow, ncol, ncell)
## resolution : 0.008333333, 0.008333333 (x, y)
## extent : -180.0001, 179.9999, -90.00014, 83.99986 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## source : C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/Pakistan_alles/chelsa/annual_mean_temperature/merged/merged_chelsa_annual_mean_temp.tif
## names : merged_chelsa_annual_mean_temp
## values : -457, 345 (min, max)
plot(all_merged_chelsa_annual_mean_temp_raster, main="all_merged chelsa annual mean temperature")
chelsa_annual_global_prec <-
raster("C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/Pakistan_alles/chelsa/annual_global_prec/CHELSA_bio10_12.tif")
chelsa_prec_warmest_quarter <-
raster("C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/Pakistan_alles/chelsa/prec_warmest_quarter/CHELSA_bio10_18.tif")
chelsa_warmest_quarter_mean_temp <-
raster("C:/Users/ich/Desktop/UNI/räumliche_Vorhersage/Pakistan_alles/chelsa/warmest_quarter_mean_t_global/CHELSA_bio10_10.tif")
plot(all_merged_chelsa_annual_mean_temp_raster, main="all_merged chelsa annual mean temperature")
plot(chelsa_annual_global_prec, main="chelsa annual global prec")