Last active
February 18, 2020 03:47
-
-
Save willpearse/1bdfaff2eb8a93080f159d7a77993d96 to your computer and use it in GitHub Desktop.
MADcomm broken (fixable?) functions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #' @export | |
| .reed.2017a <- function(...) { | |
| data <-read.csv("http://pasta.lternet.edu/package/data/eml/knb-lter-sbc/17/30/a7899f2e57ea29a240be2c00cce7a0d4", as.is=TRUE) | |
| names(data) <- tolower(names(data)) | |
| data$count[data$count < 0] <- 0 | |
| data$taxon_species[data$taxon_species == -99999] <- NA | |
| data$taxon_genus[data$taxon_genus == -99999] <- NA | |
| data$species <- with(data, paste(taxon_genus, taxon_species, sep="_")) | |
| data$site <- with(data, paste(site, transect, sep="_")) | |
| data$site_year <- with(data, paste(site, year, sep="_")) | |
| data <- with(data, tapply(count, list(site_year, species), sum, na.rm = TRUE)) | |
| data[is.na(data)] <- 0 | |
| temp <- strsplit(rownames(data), "_") | |
| year <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,3] | |
| name <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,1] | |
| return(.matrix.melt(data, | |
| data.frame(units="#"), | |
| data.frame(id=rownames(data), year, name, lat=NA, long=NA, address=NA, area=NA), | |
| data.frame(species=colnames(data), taxonomy=NA))) | |
| } | |
| #' @export | |
| .reed.2017b <- function(...) { | |
| data <-read.csv("https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/19/23/5daf0da45925ba9014872c6bc9f6c8bb") | |
| names(data) <- tolower(names(data)) | |
| data$count[data$count < 0] <- 0 | |
| data$taxon_species[data$taxon_species == -99999] <- NA | |
| data$species <- with(data, paste(taxon_genus, taxon_species, sep="_")) | |
| data$site <- with(data, paste(site, transect, sep="_")) | |
| data$site_year <- with(data, paste(site, year, sep="_")) | |
| data <- with(data, tapply(count, list(site_year, species), sum, na.rm = TRUE)) | |
| data[is.na(data)] <- 0 | |
| temp <- strsplit(rownames(data), "_") | |
| year <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,3] | |
| name <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,1] | |
| return(.matrix.melt(data, | |
| data.frame(units="#"), | |
| data.frame(id=rownames(data), year, name, lat=NA, long=NA, address=NA, area=NA), | |
| data.frame(species=colnames(data), taxonomy=NA))) | |
| } | |
| #' @export | |
| .rodriguezBuritica.2013 <- function(...){ | |
| data <- read.csv(suppdata("E094-083","SMCover.csv",from = "esa_archives")) | |
| species.data <- read.csv(suppdata("E094-083","Species.csv",from = "esa_archives")) | |
| species.data$ReportedName <- sub(" ", "_", species.data$ReportedName) | |
| species.data$AcceptedName <- sub(" ", "_", species.data$AcceptedName) | |
| data$species <- species.data$AcceptedName[match(data$Code, species.data$Code)] | |
| data$plot_year <- with(data, paste(Plot, Year, sep = "_")) | |
| transformed.data <- with(data, tapply(Cover, list(plot_year, species), sum, na.rm=TRUE)) | |
| transformed.data[is.na(transformed.data)] <- 0 | |
| temp <- strsplit(rownames(transformed.data), "_") | |
| year <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,2] | |
| name <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,1] | |
| return(.matrix.melt(transformed.data, | |
| data.frame(units="#"), | |
| data.frame(id=rownames(transformed.data), year, name, lat=NA, long=NA, address=NA, area=NA), | |
| data.frame(species=colnames(transformed.data), taxonomy=NA))) | |
| } | |
| #' @export | |
| .ross.2014 <- function(...){ | |
| data <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-and.3136.5&entityid=88e40dc185bd3f00e7464398b61f40fc", header = TRUE) | |
| species <- data$SCI_NAME | |
| data$id <- rep(paste(data$BIOGEOGRAPHY,data$DATE)) | |
| site.metadata <- data[!duplicated(data$id),] | |
| site.metadata <- with(site.metadata, | |
| data.frame(id=id, year=DATE, name=BIOGEOGRAPHY, lat=NA,long=NA, address=NA,area=NA) | |
| ) | |
| site <- rep(paste(data$BIOGEOGRAPHY,data$DATE), 856) | |
| abundance <- as.vector(data$INDIVIDUALS) | |
| abundance[is.na(abundance)] <- 0 | |
| return(.df.melt(species, site, abundance, | |
| study.metadata=data.frame(units="#"), | |
| site.metadata, | |
| species.metadata=data.frame(species=unique(species), taxonomy=NA) | |
| )) | |
| } | |
| #' @export | |
| .truxa.2015 <- function(...){ | |
| data <- as.data.frame(read_xlsx(suppdata("10.5061/dryad.fg8f6/1", "Appendix_3.xlsx"), skip=1)) #use skip to skip any rows that you don't want/aren't useful | |
| comm <- data[,-1:-3] #get rid of columns you don't want | |
| rownames(comm) <- data$Species #name the rows what you want | |
| comm <- t(comm) #t=transpose, flip the rows and columns | |
| return(.matrix.melt(comm, | |
| data.frame(units="#"), | |
| data.frame(id=rownames(comm),year="2006-2008", | |
| name=c("Danube non-flooded", "Danude flooded", "Leitha non-flooded", "Leitha flooded", "Morava non-flooded", "Morava flooded"), | |
| lat=c("16\u00BA41'24", "16\u00BA42'20", "16\u00BA51'32", "16\u00BA53'26", "16\u00BA53'22"), | |
| long=c("48\u00BA08'41", "48\u00BA07'53", "48\u00BA00'19", "48\u00BA03'28", "48\u00BA17'00", "48\u00BA17'96"), | |
| address="Eastern Austria",area="na"), | |
| data.frame(species=colnames(comm),taxonomy="Lepidoptera"))) | |
| } | |
| #' @export | |
| .schmitt.2012 <- function(...){ | |
| addr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/46/3/4ded739e78e50552837cf100f251f7ab" | |
| addr <- sub("^https","http",addr) | |
| data <-read.csv(addr,header=F, skip=1, sep=",", quote='"', | |
| col.names=c("YEAR", "MONTH", "DATE", "SITE", "DEPTH", "REP", | |
| "SP_CODE", "COUNT", "COMMENTS", "Common_Name", | |
| "taxon_GROUP", "SURVEY", "taxon_PHYLUM", | |
| "taxon_CLASS", "taxon_ORDER", "taxon_FAMILY", | |
| "taxon_GENUS", "taxon_SPECIES"), check.names=TRUE) | |
| data$species <- with(data, paste(taxon_GENUS, taxon_SPECIES, sep="_")) | |
| data$site.year.depth <- with(data, paste(SITE, YEAR, DEPTH, sep="_")) | |
| site.id <- unique(data$site.year.depth) | |
| year <- data$YEAR[!duplicated(data$site.year.depth)] | |
| name <- data$SITE[!duplicated(data$site.year.depth)] | |
| return(.df.melt(data$species, data$site.year.depth, data$COUNT, | |
| data.frame(units="#"), | |
| data.frame(id=site.id, year, name, lat=NA, long=NA, address="Santa Cruz Island, CA, USA", area=NA), | |
| data.frame(species=unique(data$species), taxonomy="Pycnopodia"))) | |
| } | |
| #' @export | |
| .sandau.2017 <- function(...){ | |
| tmp.file <- tempfile() | |
| download.file("https://www.datadryad.org/bitstream/handle/10255/dryad.129944/BB_all_4_SimilMatrices_Dryad.xlsx?sequence=1", tmp.file) | |
| data <- read.xls(tmp.file, sheet=2) | |
| lookup <- read.xls(suppdata("10.5061/dryad.44bm6", "BB_all_4_SimilMatrices_Dryad.xlsx"), sheet=1, skip=5, header=FALSE, as.is=TRUE)[-1:-8,] | |
| lookup[,2] <- .sanitize.text(lookup[,2]) | |
| lookup[,2] <- sapply(strsplit(lookup[,2], " "), function(x) paste(x[1:2],collapse="_")) | |
| lookup <- setNames(lookup[,2], lookup[,1]) | |
| names(data)[names(data) %in% names(lookup)] <- lookup[names(data)[names(data) %in% names(lookup)]] | |
| site_year <- with(data, paste(data$PlotID, Year, sep="_")) | |
| data <- cbind(site_year, data) | |
| comm.mat <-data[-1:-11] | |
| #This sets the row names to the unique plot_year identifier | |
| rownames(comm.mat) <-data[,1] | |
| site.metadata <- data[!duplicated(data$site_year),] | |
| return(.matrix.melt(comm.mat, | |
| data.frame(units="%", treatment=""), | |
| data.frame(id=site.metadata$site_year, name=site.metadata$PlotID, year=site.metadata$Year, lat=NA, long=NA, address="Grandcour", treatment=site.metadata$Treat, area="20 x 20 m"), | |
| data.frame(species=unique(lookup, taxonomy="Plantae")))) | |
| } | |
| #' @export | |
| .russo.2015 <- function(...){ | |
| species <- read.xls(suppdata("10.5061/dryad.6cr82", "DataforDryad_netmaludome.xlsx"), header=FALSE, as.is=TRUE, nrow=2)[2:1,] | |
| species <- unname(apply(as.matrix(species), 2, paste, collapse="_"))[-1] | |
| data <- read.xls(suppdata("10.5061/dryad.6cr82", "DataforDryad_netmaludome.xlsx"), as.is=TRUE, skip=3) | |
| comm <- as.matrix(data[,-1]) | |
| colnames(comm) <- species; rownames(comm) <- data[,1] | |
| return(.matrix.melt(comm, | |
| data.frame(units="#"), | |
| data.frame(id=rownames(comm), name=colnames(comm), year="2008-2013", lat=NA, long=NA, address=NA, area="New York state, USA"), | |
| data.frame(species=colnames(comm), taxonomy=NA) | |
| )) | |
| } | |
| # YEAR UNKNOWN | |
| #' @export | |
| .mcknight.2000 <- function(...){ | |
| data <- read.csv(file="https://pasta.lternet.edu/package/data/eml/knb-lter-mcm/12/3/7f8537c0f0f80a255551ad61d9d512dc",header=TRUE) | |
| species <- unique(data$Species) | |
| data$id <- rep(paste(data$Location,data$Date)) | |
| site.metadata <- data[!duplicated(data$id),] | |
| site.metadata <- with(site.metadata, | |
| data.frame(id=id, year=Date, name=Location, lat=NA,long=NA, address="antarctica",area=NA) | |
| ) | |
| site <- rep(paste(data$Location,data$Date), 27) | |
| abundance <- as.vector(data[,10]) | |
| abundance[is.na(abundance)] <- 0 | |
| return(.df.melt(species, site, abundance, | |
| study.metadata=data.frame(units="#"), | |
| site.metadata, | |
| species.metadata=data.frame(species=unique(species), taxonomy=NA) | |
| )) | |
| } | |
| #' @export | |
| .mcmahon.2017 <- function(...){ | |
| abun <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=da11cbc268d91fef78c78bd2813adbf6", header = TRUE) | |
| site_meta1 <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=a508f609c7d45f1c10604a4722acfd04", header = TRUE) | |
| site_meta2 <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=d35b86dbfcf7bf6eab90a2fd5539809c", header = TRUE) | |
| org_meta <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=5c558e387eadadf707a3f84742b0d3e1", header = TRUE) | |
| colnames(abun)[1] <- "OTU" | |
| data_meta1 <- merge(abun, site_meta1, by = "Sample_Name") | |
| site_data <- merge(data_meta1, site_meta2, by = "Sample_Name") | |
| data <- merge(org_meta, site_data, by = "OTU") | |
| comm <- with(data, tapply(value, list(paste(Lake,Collection_Date,sep="_", OTU), length))) | |
| site.names <- sapply(strsplit(rownames(comm), "_"), function(x) x[1]) | |
| years <- sapply(strsplit(rownames(comm), "_"), function(x) x[2]) | |
| comm[is.na(comm)] <- 0 | |
| unique <- data[!duplicated(data$OTU),] | |
| colnames(unique)[1] <- 'Species' | |
| unique <- unique[,-9:-20] | |
| return(.matrix.melt(comm, | |
| data.frame(units="p/a"), | |
| data.frame(id=rownames(comm),years,site.names,lat=NA,long=NA,address="North of Minocqua, Wisconsin USA",area="Depth"), | |
| data.frame(species=colnames(comm),taxonomy=unique, ))) | |
| } | |
| #' @export | |
| .miller.2013 <- function(...){ | |
| data<- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-and.2739.7&entityid=1743caa458ea7bb640833d884576f51c", header = TRUE) | |
| species <- data$ENTITY | |
| data$id <- rep(paste(data$TRAPID,data$YEAR)) | |
| site.metadata <- data[!duplicated(data$id),] | |
| site.metadata <- with(site.metadata, | |
| data.frame(id=id, year=YEAR, name=TRAPID, lat=NA,long=NA, address="Willamette National Forest Oregon USA",area=NA) | |
| ) | |
| site <- rep(paste(data$TRAPID,data$YEAR), 17663) | |
| abundance <- as.vector(data$NO_INDIV) | |
| abundance[is.na(abundance)] <- 0 | |
| return(.df.melt(species, site, abundance, | |
| study.metadata=data.frame(units="#"), | |
| site.metadata, | |
| species.metadata=data.frame(species=unique(species), taxonomy=NA) | |
| )) | |
| } | |
| #' @export | |
| .myster.2010 <- function(...){ | |
| addr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-luq/100/246250/f718e683c7c425207c7d1f7adeddf85f" | |
| addr <- sub("^https","http", addr) | |
| data <-read.csv(addr, header=F, skip=1, sep=",", col.names=c("date", "plot", "species", "percent.cover"), check.names=TRUE) | |
| data$date <- format(as.Date(data$date, format="%d/%m/%Y"),"%Y") | |
| data$plot.year <- with(data, paste(plot, date, sep="_")) | |
| site.id <- unique(data$plot.year) | |
| year <- data$date[!duplicated(data$plot.year)] | |
| name <- data$plot[!duplicated(data$plot.year)] | |
| return(.df.melt(data$species, data$plot.year, data$percent.cover, | |
| data.frame(units="area"), | |
| data.frame(id=site.id, year, name, lat="-65.8257", long="18.3382", address="Luquillo Experimental Forest, Puerto Rico, USA", area="2mX5m"), | |
| data.frame(species=unique(data$species), taxonomy="Plantae"))) | |
| } | |
| #' @export | |
| .nichols.2006 <- function(...) { | |
| addr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-ntl/61/3/wgnhs_macrophyte_aquaplt2" | |
| addr <- sub("^https","http",addr) | |
| abundanceData <-read.csv(addr, header=F, skip=1, sep=",", quote='"', | |
| col.names=c("mwbc", "lake_unique", "lakename", | |
| "county", "county_id", "month", "year4", | |
| "spcode", "aqstano", "visual_abundance"), | |
| check.names=TRUE) | |
| specAddr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-ntl/61/3/wgnhs_macrophyte_pltname" | |
| specAddr <- sub("^https","http",specAddr) | |
| specData <-read.csv(specAddr, header=F, skip=1, sep=",", quote='"', | |
| col.names=c("spcode", "spec_no", "scientific_name", | |
| "common_name", "lifeform", "spec_category", | |
| "genus"), check.names=TRUE) | |
| abundanceData$site.year <- with(abundanceData, paste(lakename, year4, sep=">")) | |
| abundanceData$species <- specData$scientific_name[match(abundanceData$spcode, specData$spcode)] | |
| data <- with(abundanceData, tapply(visual_abundance, list(site.year, species), sum, na.rm = TRUE)) | |
| data[is.na(data)] <- 0 | |
| temp <- unlist(strsplit(rownames(data), ">", fixed=T)) | |
| name <- temp[seq(1,length(temp), 2)] | |
| year <- temp[seq(2,length(temp), 2)] | |
| return(.matrix.melt(data, | |
| data.frame(units="#"), | |
| data.frame(id=rownames(data), year, name, lat=NA, long=NA, address=NA, area=NA), | |
| data.frame(species=colnames(data), taxonomy=NA))) | |
| } | |
| #' @export | |
| .lorite.2017<-function(...){ | |
| expdata<-read.delim("https://doi.org/10.1371/journal.pone.0182414.s003", nrows=410) | |
| lookup <- read.delim("https://doi.org/10.1371/journal.pone.0182414.s003", skip=414, nrows=34,as.is = TRUE,header = FALSE) | |
| lookup<-lookup[,1:2] | |
| expdata$new.site<-paste(expdata$Site,expdata$transect,expdata$quadrat,sep="_") | |
| names(expdata)[7:40]<-lookup[,2] | |
| comm<-cbind(id=expdata[,41],expdata[,7:40]) | |
| #needs meta data, loc: scattered through paper/tables but existant. | |
| return(.matrix.melt(comm, | |
| data.frame(units="percent"), | |
| data.frame(id=comm$id,year=NA), | |
| data.frame(species=lookup[,2],taxonomy=NA) | |
| )) | |
| } | |
| #' @export | |
| .kaspari.2016 <- function(...) { | |
| addr <- "https://pasta.lternet.edu/package/data/eml/msb-tempbiodev/1111170/1/cfd3a55deef52e3a93469057053f5404" | |
| addr <- sub("^https", "http", addr) | |
| data <-read.csv(addr, header=F, skip=1, sep=",", | |
| col.names=c("location", "distance", "direction", | |
| "plotcode", "taxon", "abundance"), | |
| check.names=TRUE) | |
| return(.df.melt(data$taxon, data$plotcode, data$abundance, | |
| data.frame(units="#"), | |
| data.frame(id=unique(data$plotcode), year="2016", name=unique(data$plotcode), lat=NA, long=NA, address=NA, area=NA), | |
| data.frame(species=unique(data$taxon), taxonomy="Arthropoda"))) | |
| } | |
| #' @export | |
| .johnson.2017 <- function(...){ | |
| datam<-read.csv(suppdata("10.5061/dryad.cb13r","Species_x_SiteMatrix.csv"), as.is=TRUE) | |
| sitedataA<-read.csv(suppdata("10.5061/dryad.cb13r","RawSoilData.csv"),as.is = TRUE) | |
| sitedataB<-read.csv(suppdata("10.5061/dryad.cb13r","VacantLot_DemolitionDate.csv"),as.is = TRUE) | |
| sppdata<-read.csv(suppdata("10.5061/dryad.cb13r","Species_x_TraitsMatrix.csv"),as.is = TRUE) | |
| comm<-datam[,-(1:2)] | |
| sitedataB <- rbind(sitedataB, sitedataB) | |
| sitedataB$new.code <- paste(sitedataB$Code, rep(c("BF","RG"), each=nrow(sitedataB)/2), sep=".") | |
| sitedataA$new.code <- paste(sitedataA$LotID, rep(c("BF","RG"), each=nrow(sitedataA)/2), sep=".") | |
| sitedata<-merge(sitedataA,sitedataB,by="new.code",all.x=TRUE,all.y = TRUE) | |
| names(sitedata)[c(1,27)] <- c("id","address") | |
| sitedata$lat <- NA;sitedata$long <-NA; sitedata$area <- NA | |
| sitedata$year <- "2012-2013" | |
| sitedata$name <- sitedata$id | |
| names(sppdata)[1:2] <- c("species","taxonomy") | |
| return(.matrix.melt(comm, | |
| data.frame(units="percent"), | |
| sitedata, | |
| sppdata) | |
| ) | |
| } | |
| #' @export | |
| .hollibaugh.2017 <- function(...){ | |
| data <- read.csv(file = "https://pasta.lternet.edu/package/data/eml/knb-lter-pal/114/2/3ab81d869107c4b3a7f0fb76fed55ed4", header = TRUE) | |
| names(data)[7:8] <- c("latitude","longitude") | |
| taxon <- rep(c("Eub","AOB","Archaea","Cren","AOA", "AOB","Eub","AOB","Archaea","Cren","AOA","AOB"), nrow(data)) | |
| data$id <- paste(data$Station,data$Datetime.GMT) | |
| site.metadata <- data[!duplicated(data$id),] | |
| site.metadata <- with(site.metadata, | |
| data.frame(id=id, year=Datetime.GMT, name=Station, lat=latitude, long=longitude, address=NA, area=NA) | |
| ) | |
| site <- rep(paste(data$Station,data$Datetime.GMT), 12) | |
| abundance <- unname(unlist(data[,10:21])) | |
| return(.df.melt(taxon, site , abundance, | |
| study.metadata=data.frame(units="#"), | |
| site.metadata, | |
| species.metadata=data.frame(species=unique(taxon), taxonomy=NA))) | |
| } | |
| #' @export | |
| .harrower.2017<-function(...){ | |
| birddata<-read.csv(suppdata("10.5061/dryad.365dr", "bird_data.csv"),as.is = TRUE) | |
| envdata<-read.csv(suppdata("10.5061/dryad.365dr","envr_data.csv"),as.is=TRUE) | |
| envdata$name<-paste(envdata$block,envdata$transect,sep="_") | |
| birddata$id<-paste(birddata$block,birddata$transect,birddata$year,sep="_") | |
| birddata$name<-paste(birddata$block,birddata$transect,sep="_") | |
| birddata$lat<-"50o39'59\" N" | |
| birddata$long<-"120o19'09\" W" | |
| birddata$address<- "Lac du Bois Provincial Park near Kamloops, British Columbia, Canada" | |
| birddata$area<-"20ha" | |
| birddata$binom<-paste(birddata$genus,birddata$species,sep=".") | |
| comm <- with(birddata, tapply(binom, list(binom, site), length)) | |
| comm[is.na(comm)] <- 0 | |
| comm<-t(comm) | |
| birdsub<-birddata[!duplicated(birddata$site),] | |
| envsub<-envdata[,c(3,8)] | |
| envtest<-merge(birdsub,envsub,by="name") | |
| envtest<-envtest[,-c(6:11,17)] | |
| return(.matrix.melt(comm, | |
| data.frame(units="#"), | |
| envtest, | |
| data.frame(species=birddata$binom, taxonomy=NA) | |
| ) | |
| ) | |
| } | |
| #' @export | |
| .franklin.2018 <- function(...) { | |
| data <- read.xls("CopyofWESTCOSPPCOVER.xlsx", as.is=TRUE) | |
| ground_data <- read.xls("WEST CO GROUND COVER.xlsx") | |
| data$R4_SPP <- NULL | |
| colnames(data) <- colnames(ground_data) | |
| combined.data <- rbind(data, ground_data) | |
| combined.data$year <- NA | |
| for(i in seq_len(nrow(combined.data))){ | |
| t <- as.numeric(regexpr("[0-9]{4}", combined.data$SITE_ID[i]))[1] | |
| combined.data$year[i] <- substr(combined.data$SITE_ID[i], t, t+4) | |
| } | |
| metadata <- read.xls("WEST CO SAGEBRUSH PLOTS.xlsx", as.is=TRUE) | |
| combined.data$SITE_ID <- gsub(" ", "", combined.data$SITE_ID) | |
| combined.data$lat <- metadata$LATITUDE[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
| combined.data$long <- metadata$LONGITUDE[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
| combined.data$elevation.ft <- metadata$Elev..ft.[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
| combined.data$aspect <- metadata$Aspect[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
| combined.data$pct.slope <- metadata$Pct_Slope[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
| combined.data$project <- metadata$PROJECT[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
| combined.data$COVER_PERCENT <- as.numeric(combined.data$COVER_PERCENT) | |
| combined.data$COVER_PERCENT[is.na(combined.data$COVER_PERCENT)] <- 0 | |
| return(.df.melt(combined.data$NAME, | |
| combined.data$SITE_ID, | |
| combined.data$COVER_PERCENT, | |
| data.frame(units="area"), | |
| data.frame(id=unique(combined.data$SITE_ID), | |
| year=combined.data$year[!duplicated(combined.data$SITE_ID)], | |
| name=unique(combined.data$SITE_ID), | |
| lat=combined.data$lat[!duplicated(combined.data$SITE_ID)], | |
| long=combined.data$long[!duplicated(combined.data$SITE_ID)], | |
| address=NA, | |
| area="0.1ha", | |
| elevation.ft=combined.data$elevation.ft[!duplicated(combined.data$SITE_ID)], | |
| aspect=combined.data$aspect[!duplicated(combined.data$SITE_ID)], | |
| pct.slope=combined.data$pct.slope[!duplicated(combined.data$SITE_ID)], | |
| project=combined.data$project[!duplicated(combined.data$SITE_ID)]), | |
| data.frame(species=unique(combined.data$NAME), | |
| taxonomy=NA, | |
| other="Plant study; Percent cover of species and ground"))) | |
| } | |
| #' @export | |
| .ellison.2017 <- function(...){ | |
| data <- read.csv(file="https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-hfr.97.23&entityid=a840ed1f4c891cd7e6abe660aecb797a", header=TRUE) | |
| species <- data$species | |
| data$id <- rep(paste(data$plot,data$date)) | |
| site.metadata <- data[!duplicated(data$id),] | |
| site.metadata <- with(site.metadata, | |
| data.frame(id=id, year=date, name=plot, lat=NA,long=NA, address="North of West Point, New York, USA",area=NA) | |
| ) | |
| site <- rep(paste(data$plot,data$date), 3120) | |
| abundance <- as.vector(data$no.ants) | |
| abundance[is.na(abundance)] <- 0 | |
| return(.df.melt(species, site, abundance, | |
| study.metadata=data.frame(units="#"), | |
| site.metadata, | |
| species.metadata=data.frame(species=unique(species), taxonomy=NA) | |
| )) | |
| } | |
| #' @export | |
| .collins.2018 <- function(...) { | |
| # The species in this dataset are not named; Generic identifiers are given (e.g. 'sp1') | |
| # Species codes were added due to people not wanting scott to publish their data. | |
| data <- read.csv("https://pasta.lternet.edu/package/data/eml/edi/15/5/f69c8fe563067164191d61b6e33eff03", as.is=TRUE) | |
| names(data) <- tolower(names(data)) | |
| metadata <- read.csv("https://pasta.lternet.edu/package/data/eml/edi/15/5/8284876afe3a1cb0a919d37e1164357f", as.is=TRUE) | |
| names(metadata) <- tolower(names(metadata)) | |
| data$site_year <- with(data, paste(data$sitesubplot, experiment_year, sep="_")) | |
| data$latitude <- metadata$lat[match(data$site_project_comm, metadata$site_project_comm)] | |
| data$longitude <- metadata$long[match(data$site_project_comm, metadata$site_project_comm)] | |
| data$address <- metadata$location[match(data$site_project_comm, metadata$site_project_comm)] | |
| data$area <- metadata$plot_size[match(data$site_project_comm, metadata$site_project_comm)] | |
| return(.df.melt(data$species, | |
| data$site_year, | |
| data$relcover, | |
| data.frame(units="%"), | |
| data.frame(id=unique(data$site_year), | |
| year=data$experiment_year[!duplicated(data$site_year)], | |
| name=data$sitesubplot[!duplicated(data$site_year)], | |
| lat=data$latitude[!duplicated(data$site_year)], | |
| long=data$longitude[!duplicated(data$site_year)], | |
| address=data$address[!duplicated(data$site_year)], | |
| area=data$area[!duplicated(data$site_year)]), | |
| data.frame(species=unique(data$species), taxonomy="Plantae"))) | |
| } | |
| #' @export | |
| .coblentz.2015 <- function(...){ | |
| # This won't work on Windows OS. I might be wrong but I think that it has | |
| # something to do with the spaces in the file name. | |
| data <- read.xls(suppdata("10.5061/dryad.j2c13", "Invert Community Data 2012 RAW.xlsx"), stringsAsFactors=FALSE) | |
| colnames(data) <- with(data, paste(colnames(data), data[3,], sep="_")) | |
| data <- data[-1:-3,] | |
| species <- data[,1] | |
| data <- data[,-1] | |
| data <- sapply(data, as.numeric) | |
| rownames(data) <- species | |
| return(.matrix.melt(data)) | |
| } | |
| #' @export | |
| .chamailleJammes.2016 <- function(...){ | |
| data <- read.csv(suppdata("10.1371/journal.pone.0153639", 1), stringsAsFactors=FALSE) | |
| year <- (1992:2005)[-6] # Study excluded the year of 1997 | |
| data <- aggregate(. ~ WATERHOLE, data = data, FUN=sum) | |
| species <- colnames(data) | |
| data <- reshape(data, varying = list(names(data)[2:ncol(data)]), v.names = "Count", | |
| idvar = "WATERHOLE", times = c("ELEPHANT", "GIRAFFE", "IMPALA","KUDU", | |
| "ROAN", "SABLE", "WILDEBEEST", "ZEBRA"), timevar = "species", direction = "long") | |
| rownames(data) <- NULL | |
| id <- unique(data$WATERHOLE) | |
| year <- rep(year, each=length(id)) | |
| temp <- paste(data$WATERHOLE, year, sep="_") | |
| return(.df.melt(data$species, | |
| data$WATERHOLE, | |
| data$Count, | |
| data.frame(units="#"), | |
| data.frame(id=, lat="18", long="26", address="Hwange National Park, Zimbabwe, Africa", area=NA), | |
| data.frame(species=unique(data$species, taxonomy="Mammalia")))) | |
| } | |
| .brant.2018 <- function(...){ | |
| tmp.file <- tempfile() | |
| download.file("https://zenodo.org/record/1198846/files/template_MosquitoDataBrant77.xlsx", tmp.file) | |
| DailyHLC <- read.xls(tmp.file, sheet=4, as.is=TRUE, skip=9) | |
| lookup <- read.xls(tmp.file, sheet=3, as.is=TRUE) | |
| lookup[,2] <- .sanitize.text(lookup[,2]) | |
| #lookup[,2] <- sapply(strsplit(lookup[,2], " "), function(x) paste(x[1:2],collapse="_")) | |
| lookup <- setNames(lookup[,2], lookup[,1]) | |
| names(DailyHLC) <- gsub("_count", "", names(DailyHLC), fixed=TRUE) | |
| names(lookup) <- gsub(".", "_", names(lookup), fixed=TRUE) | |
| names(DailyHLC)[names(DailyHLC) %in% names(lookup)] <- lookup[names(DailyHLC)[names(DailyHLC) %in% names(lookup)]] | |
| DailyHLC$site_year <- with(DailyHLC, paste(field_name, Location, Date, sep="_")) | |
| #community matrix | |
| comm <- as.matrix(DailyHLC[,c(-1:-7,-ncol(DailyHLC))]) | |
| rownames(comm) <- DailyHLC$site_year | |
| site.metadata <- DailyHLC[,1:7] | |
| species.meta <- data.frame(species=colnames(comm), taxonomy="Insecta") | |
| return(.matrix.melt(comm, | |
| data.frame(units="#"), | |
| data.frame(id=DailyHLC$site_year, name=site.metadata$Location, year=site.metadata$Date, lat="4.6353 to 4.9654", long="116.9542 to 117.8004", address="SAFE project, Borneo", area="attracted to humans"), | |
| species.meta)) | |
| } | |
| .lightfoot.2016 <- function(...) { | |
| data <- read.table("http://sev.lternet.edu/sites/default/files/data/sev-106/sev106_hopperdynamics_20150826.txt", header=T, sep=",") | |
| data$month.year <- format(as.Date(data$DATE, format="%m/%d/%Y"),"%m/%Y") | |
| spec_codes <- c("ACPI","AGDE","AMCO","ARCO","ARPS","AUEL","AUFE","BOAR", | |
| "BRMA","CIPA","COCR","COOC","COTE","DABI","ERSI","HATR", | |
| "HERU","HEVI","HICA","LAAZ","LEWH","MEAR","MEAZ","MEBO", | |
| "MEGL","MELA","MEOC","METE","OPOB","PAPA","PHQU","PHRO", | |
| "PSDE","PSTE","SCNI","SYMO","TRCA","TRFO","TRKI","TRPA", | |
| "TRPI","XACO","XAMO") | |
| species <- c("Acantherus piperatus","Ageneotettix deorum", | |
| "Amphitornus coloradus","Arphia conspersa", | |
| "Arphia pseudonietana","Aulocara elliotti", | |
| "Aulocara femoratum","Bootettix argentatus", | |
| "Brachystola magna","Cibolacris parviceps", | |
| "Cordillacris crenulata","Cordillacris occipitalis", | |
| "Conozoa texana","Dactylotum bicolor", | |
| "Eritettix simplex","Hadtrotettix trifasciatus", | |
| "Heliaula rufa","Hesperotettix viridis", | |
| "Hippopedon capito","Lactista azteca","Leprus wheeleri", | |
| "Melanoplus aridus","Melanoplus arizonae", | |
| "Melanoplus bowditchi","Melanoplus gladstoni", | |
| "Melanoplus lakinus","Melanoplus occidentalis", | |
| "Mermeria texana","Opeia obscura","Paropomala pallida", | |
| "Phlibostroma quadrimaculatum","Phrynotettix robustus", | |
| "Psoloessa delicatula","Psoloessa texana", | |
| "Schistocerca nitens","Syrbula montezuma", | |
| "Trimerotropis californicus","Tropidolophus formosus", | |
| "Trachyrhachis kiowa","Trimerotropis pallidipennis", | |
| "Trimerotropis pistrinaria","Xanthippus corallipes", | |
| "Xanthippus montanus") | |
| metadata <- data.frame(spec_codes, species) | |
| data$SPECIES <- metadata$species[match(data$SPECIES, metadata$spec_codes)] | |
| data <- with(data, tapply(CNT, list(site_year, SPECIES), sum, na.rm=TRUE)) | |
| data$site_year <- with(data, paste(SITE, year, sep="_")) | |
| temp <- strsplit(rownames(data), "_") | |
| year <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,2] | |
| year <- format(as.Date(data$DATE, format="%m/%Y"),"%Y") | |
| name <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,1] | |
| #needs some "burned" info?... | |
| return(.df.melt(data$species, data$SITE, data$CNT, | |
| data.frame(units="#"), | |
| data.frame(id=unique(data$plot_year), year, name, lat=NA, long=NA, address="Sevilleta National Wildlife Refuge, New Mexico", area=NA), | |
| data.frame(species=unique(data$species), taxonomy="Orthoptera"))) | |
| } | |
| .fia.2018 <- function(...){ | |
| .get.fia <- function(state, var, select){ | |
| t.zip <- tempfile() | |
| download.file(paste0("https://apps.fs.usda.gov/fia/datamart/CSV/",state,"_",var,".zip"), t.zip) | |
| unzip(t.zip) | |
| data <- fread(paste0(state,"_",var,".csv"), select=select) | |
| unlink(paste0(state,"_",var,".csv")) | |
| return(data) | |
| } | |
| states <- c("AK","AL","AZ","AR","CA","CO","CT","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","ME","MD", | |
| "MA","MI","MN","MS","MO","MT","NC","NE","NH","NV","NM","NJ","NY","ND","OH","OK","OR","PA","RI", | |
| "SC","SD","TN","TX","UT","VA","VT","WA","WI","WV","WY","VI","PR") | |
| data <- vector("list", length(states)) | |
| for(i in seq_along(states)){ | |
| #Download/read in data | |
| tree <- .get.fia(states[i], "TREE", c("CN","PLT_CN","PLOT","SPCD","DIA","INVYR")) | |
| cond <- .get.fia(states[i], "COND", c("PLT_CN","PLOT","STDAGE","FORTYPCD","CONDID")) | |
| plot <- .get.fia(states[i], "PLOT", c("PLOT","LAT","LON","ELEV", "CN")) | |
| #Subset everything, remove sites with multiple/ambiguous codings, merge | |
| tree <- tree[tree$DIA > 1.96,] | |
| cond <- cond[cond$PLT_CN %in% as.integer64(names(Filter(function(x) x==1, table(cond$PLT_CN)))),] | |
| data[[i]] <- merge(tree, merge(cond, plot, by.x="PLT_CN", by.y="CN"), by.x="PLT_CN", by.y="PLT_CN") | |
| data[[i]]$state <- states[i] | |
| } | |
| data <- rbindlist(data) | |
| t <- setNames(seq_along(unique(data$PLT_CN)), unique(data$PLT_CN)) | |
| data$site.id <- paste0(data$state, "_", t[as.character(data$PLT_CN)]) | |
| uniq.site <- as.data.frame(unique(data[, 15:16])) | |
| sample.sites <- as.data.frame(uniq.site %>% group_by(state) %>% sample_n(size = 30)) | |
| data <- merge(sample.sites, data, by="site.id") | |
| data$site.id <- paste0(data$site.id, "_", data$INVYR) | |
| fia.spp <- read.csv("FIA_SppList.csv") #currently in raw_data folder | |
| fia.spp <- data.table(fia.spp$SPCD, paste0(fia.spp$GENUS, "_", fia.spp$SPECIES)) | |
| data <- merge(data, fia.spp, by.x="SPCD", by.y="V1") | |
| data <- data.frame(data$V2, data$site.id, data$LAT, data$LON, data$ELEV, | |
| data$STDAGE, data$FORTYPCD, data$CONDID, data$DIA) | |
| names(data) <- c("species", "site.id", "lat", "long", "elev", "stdage", "forestclass", "condclass", "diameter") | |
| comm <- t(as.matrix(with(data, table(species,site.id)))) | |
| dia <- aggregate(diameter~species, data, mean) | |
| dia.count <- aggregate(diameter~species, data, length) | |
| dia$diameter.n <- dia.count$diameter | |
| site.df <- data[!duplicated(data$site.id),] | |
| site.df <- site.df[,2:8] | |
| sites <- rownames(comm) | |
| site.df <- site.df[match(sites, site.df$site.id), ] | |
| return(.matrix.melt(comm, | |
| data.frame(units="#"), | |
| data.frame(id=site.df$site.id, name=NA, year=NA, lat=site.df$lat, | |
| long=site.df$long, address=NA, area=NA, | |
| elevation=site.df$elev, class=site.df$forestclass), | |
| data.frame(species=dia$species, taxonomy=NA, diameter=dia$diameter))) | |
| } | |
| .tomasovych.2010a <- function(...){ | |
| species <- read.xls(suppdata("10.5061/dryad.1225", "abundances-S California 1975.xls"), skip=1, header=TRUE) | |
| species.clean <- species[,-1] | |
| comm <- t(as.matrix(species.clean)) | |
| rownames(comm) <- species$X | |
| rownames(comm) | |
| } | |
| .mendonca.2018 <- function(...){ | |
| # need to fix the years | |
| tmp <- tempfile() | |
| download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp) | |
| data <- read.csv(.unzip("CERRADO_SM_Capture.csv", tmp), as.is=TRUE, fileEncoding = "Latin1") | |
| data <- data[!is.na(data$Individuals_captured),] | |
| data$Year_finsh <- as.numeric(data$Year_finish) | |
| data <- data[!is.na(data$Year_finish),] | |
| ids <- paste(data$id, data$Year_finish) | |
| #ids <- ids[-c(1513:1536)] | |
| # lat/long data | |
| tmp2 <- tempfile() | |
| download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp2) | |
| ll_data <- read.csv(.unzip("CERRADO_SM_Study_Site.csv", tmp), as.is=TRUE, fileEncoding = "Latin1") | |
| ll_data <- ll_data[,c(1,7,8)] | |
| ll_data$id <- unique(ids) | |
| names(ll_data) <- c("id", "lat", "long") | |
| ll_data$year <- ll_data$id; ll_data$name <- ll_data$id | |
| ll_data$address <- "Cerrado ecosystem: Brazil, Boliva, Paraguay"; ll_data$area <- "live_trap" | |
| return(.df.melt(data$Actual_species_name, | |
| ids, | |
| data$Individuals_captured, | |
| data.frame(units = "#"), | |
| ll_data, | |
| data.frame(species = unique(data$Actual_species_name), taxonomy = "Animalia") | |
| ) | |
| ) | |
| } | |
| # Error in data.frame(id = rownames(data), year = years, name = | |
| # names, lat = NA, : arguments imply differing number of rows: 20, | |
| # 24, 1 | |
| .sepulveda.2016 <- function(...){ | |
| tmp <- tempfile() | |
| download.file("http://journals.plos.org/plosone/article/file?type=supplementary&id=info:doi/10.1371/journal.pone.0157910.s001", tmp) | |
| data <- read.xls(tmp, 1, skip=1, fileEncoding="Latin1") | |
| data <- data[1:20,] | |
| years <- colnames(data)[2:25] | |
| names(data) <- c("species", paste(rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4), names(data)[2:25], sep="_")) | |
| d2 <- t(data) | |
| names <- rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4) | |
| return(.matrix.melt(data, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = years, name= names, lat= NA, long= NA, address="Southwestern Chilean coast", area = NA), | |
| data.frame(species=colnames(data), taxonomy = NA) | |
| ) | |
| ) | |
| } | |
| # Metadata woes | |
| .bried.2017 <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.151171/Dryad.data.xlsx?sequence=1", tmp) | |
| data <- read.xls(tmp, 1) | |
| n <- paste(data$Latitude, data$Longitude, sep = "_") | |
| comm <- data[,-c(1:4)] | |
| comm$Region <- n | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2017, name = , lat= , long = , address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Insecta") | |
| ) | |
| ) | |
| } | |
| # datasets on chiclids - each function downloads a community | |
| # dataset for a different region | |
| # Kigoma town | |
| .britton.2017.a <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148126/BrittonEtAl2017_KigomaTown.csv?sequence=3", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Kigoma deforested | |
| .britton.2017.b <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148127/BrittonEtAl2017_KigomaDeforested.csv?sequence=3", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Kalilani village | |
| .britton.2017.c <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148128/BrittonEtAl2017_KalilaniVillage.csv?sequence=1", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Jakobsen's beach | |
| .britton.2017.d <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148129/BrittonEtAl2017_Jakobsen%27sBeach.csv?sequence=3", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Gombe stream | |
| .britton.2017.e <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148130/BrittonEtAl2017_GombeNP.csv?sequence=1", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Mahale mountain 1 | |
| .britton.2017.f <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148131/BrittonEtAl2017_MahaleNPS1.csv?sequence=1", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Mahale mountain 2 | |
| .britton.2017.g <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148132/BrittonEtAl2017_MahaleNPS2.csv?sequence=3", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| .drew.2015<-function(...){ | |
| expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.93108/Supplemental%201.csv?sequence=1",as.is = TRUE) | |
| expdata$binom<-paste(expdata$Genus,expdata$species,sep=".") | |
| comm<-t(expdata[,4:6]) | |
| colnames(comm)<-expdata$binom | |
| #meta data is basically not a thing, so this may need to be scrapped after all | |
| return(.matrix.melt(comm, | |
| data.frame(units="p/a"), | |
| sitedata, | |
| data.frame(speccies=expdata$binom,taxonomy=NA) | |
| ) | |
| ) | |
| } | |
| .osuri.2016<-function(...){ | |
| expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.109139/Osuri_Sanakran_2016_JAE_plot_data.csv?sequence=2",as.is = TRUE) | |
| comm <- with(expdata, tapply(species, list(species, site.name), length)) | |
| comm[is.na(comm)] <- 0 | |
| comm<-t(comm) | |
| #meta data is limited, what could be easily found is in the expdata data frame | |
| return(.matrix.melt(comm, | |
| data.frame(units="#"), | |
| sitedata, | |
| data.frame(species=expdata$species,taxonomy=NA) | |
| ) | |
| ) | |
| } | |
| .helmus.2013 <- function(...){ | |
| library(pez) # This isn't how we declare packages in 'real' | |
| # packages for the time being this is sufficient | |
| data(laja) | |
| return(.matrix.melt(invert.sites)) | |
| } | |
| .jain.2017 <- function(...){ | |
| species <- read.xls(suppdata("10.5061/dryad.177q4", "Jain_etal_2016_Butterfly%20abundance%20across%20sites_22Dec2016.xlsx"), skip=5, header=TRUE, as.is=TRUE) | |
| species.clean <- species[,c(-1:-15,-38)] | |
| comm <- t(as.matrix(species.clean)) | |
| colnames(comm) <- species$Scientific.name | |
| return(.matrix.melt(comm, | |
| data.frame(units="#", treatment=NA), | |
| data.frame(id=rownames(comm), year=site.metadata$Date, name=site.metadata$SiteCombo, lat=NA, long=NA, address = "British Columbia", area=site.metadata$HaSurveyed), | |
| data.frame(species=colnames(comm), taxonomy=NA))) | |
| } | |
| .lightfoot.2016 <- function(...) { | |
| data <- read.table("http://sev.lternet.edu/sites/default/files/data/sev-106/sev106_hopperdynamics_20150826.txt", header=T, sep=",") | |
| data$month.year <- format(as.Date(data$DATE, format="%m/%d/%Y"),"%m/%Y") | |
| spec_codes <- c("ACPI","AGDE","AMCO","ARCO","ARPS","AUEL","AUFE","BOAR", | |
| "BRMA","CIPA","COCR","COOC","COTE","DABI","ERSI","HATR", | |
| "HERU","HEVI","HICA","LAAZ","LEWH","MEAR","MEAZ","MEBO", | |
| "MEGL","MELA","MEOC","METE","OPOB","PAPA","PHQU","PHRO", | |
| "PSDE","PSTE","SCNI","SYMO","TRCA","TRFO","TRKI","TRPA", | |
| "TRPI","XACO","XAMO") | |
| species <- c("Acantherus piperatus","Ageneotettix deorum", | |
| "Amphitornus coloradus","Arphia conspersa", | |
| "Arphia pseudonietana","Aulocara elliotti", | |
| "Aulocara femoratum","Bootettix argentatus", | |
| "Brachystola magna","Cibolacris parviceps", | |
| "Cordillacris crenulata","Cordillacris occipitalis", | |
| "Conozoa texana","Dactylotum bicolor", | |
| "Eritettix simplex","Hadtrotettix trifasciatus", | |
| "Heliaula rufa","Hesperotettix viridis", | |
| "Hippopedon capito","Lactista azteca","Leprus wheeleri", | |
| "Melanoplus aridus","Melanoplus arizonae", | |
| "Melanoplus bowditchi","Melanoplus gladstoni", | |
| "Melanoplus lakinus","Melanoplus occidentalis", | |
| "Mermeria texana","Opeia obscura","Paropomala pallida", | |
| "Phlibostroma quadrimaculatum","Phrynotettix robustus", | |
| "Psoloessa delicatula","Psoloessa texana", | |
| "Schistocerca nitens","Syrbula montezuma", | |
| "Trimerotropis californicus","Tropidolophus formosus", | |
| "Trachyrhachis kiowa","Trimerotropis pallidipennis", | |
| "Trimerotropis pistrinaria","Xanthippus corallipes", | |
| "Xanthippus montanus") | |
| metadata <- data.frame(spec_codes, species) | |
| data$SPECIES <- metadata$species[match(data$SPECIES, metadata$spec_codes)] | |
| data <- with(data, tapply(CNT, list(site_year, SPECIES), sum, na.rm=TRUE)) | |
| data$site_year <- with(data, paste(SITE, year, sep="_")) | |
| temp <- strsplit(rownames(data), "_") | |
| year <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,2] | |
| year <- format(as.Date(data$DATE, format="%m/%Y"),"%Y") | |
| name <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,1] | |
| #needs some "burned" info?... | |
| return(.df.melt(data$species, data$SITE, data$CNT, | |
| data.frame(units="#"), | |
| data.frame(id=unique(data$plot_year), year, name, lat=NA, long=NA, address="Sevilleta National Wildlife Refuge, New Mexico", area=NA), | |
| data.frame(species=unique(data$species), taxonomy="Orthoptera"))) | |
| } | |
| .fia.2018 <- function(...){ | |
| .get.fia <- function(state, var, select){ | |
| t.zip <- tempfile() | |
| download.file(paste0("https://apps.fs.usda.gov/fia/datamart/CSV/",state,"_",var,".zip"), t.zip) | |
| unzip(t.zip) | |
| data <- fread(paste0(state,"_",var,".csv"), select=select) | |
| unlink(paste0(state,"_",var,".csv")) | |
| return(data) | |
| } | |
| states <- c("AK","AL","AZ","AR","CA","CO","CT","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","ME","MD", | |
| "MA","MI","MN","MS","MO","MT","NC","NE","NH","NV","NM","NJ","NY","ND","OH","OK","OR","PA","RI", | |
| "SC","SD","TN","TX","UT","VA","VT","WA","WI","WV","WY","VI","PR") | |
| data <- vector("list", length(states)) | |
| for(i in seq_along(states)){ | |
| #Download/read in data | |
| tree <- .get.fia(states[i], "TREE", c("CN","PLT_CN","PLOT","SPCD","DIA","INVYR")) | |
| cond <- .get.fia(states[i], "COND", c("PLT_CN","PLOT","STDAGE","FORTYPCD","CONDID")) | |
| plot <- .get.fia(states[i], "PLOT", c("PLOT","LAT","LON","ELEV", "CN")) | |
| #Subset everything, remove sites with multiple/ambiguous codings, merge | |
| tree <- tree[tree$DIA > 1.96,] | |
| cond <- cond[cond$PLT_CN %in% as.integer64(names(Filter(function(x) x==1, table(cond$PLT_CN)))),] | |
| data[[i]] <- merge(tree, merge(cond, plot, by.x="PLT_CN", by.y="CN"), by.x="PLT_CN", by.y="PLT_CN") | |
| data[[i]]$state <- states[i] | |
| } | |
| data <- rbindlist(data) | |
| t <- setNames(seq_along(unique(data$PLT_CN)), unique(data$PLT_CN)) | |
| data$site.id <- paste0(data$state, "_", t[as.character(data$PLT_CN)]) | |
| uniq.site <- as.data.frame(unique(data[, 15:16])) | |
| sample.sites <- as.data.frame(uniq.site %>% group_by(state) %>% sample_n(size = 30)) | |
| data <- merge(sample.sites, data, by="site.id") | |
| data$site.id <- paste0(data$site.id, "_", data$INVYR) | |
| fia.spp <- read.csv("FIA_SppList.csv") #currently in raw_data folder | |
| fia.spp <- data.table(fia.spp$SPCD, paste0(fia.spp$GENUS, "_", fia.spp$SPECIES)) | |
| data <- merge(data, fia.spp, by.x="SPCD", by.y="V1") | |
| data <- data.frame(data$V2, data$site.id, data$LAT, data$LON, data$ELEV, | |
| data$STDAGE, data$FORTYPCD, data$CONDID, data$DIA) | |
| names(data) <- c("species", "site.id", "lat", "long", "elev", "stdage", "forestclass", "condclass", "diameter") | |
| comm <- t(as.matrix(with(data, table(species,site.id)))) | |
| dia <- aggregate(diameter~species, data, mean) | |
| dia.count <- aggregate(diameter~species, data, length) | |
| dia$diameter.n <- dia.count$diameter | |
| site.df <- data[!duplicated(data$site.id),] | |
| site.df <- site.df[,2:8] | |
| sites <- rownames(comm) | |
| site.df <- site.df[match(sites, site.df$site.id), ] | |
| return(.matrix.melt(comm, | |
| data.frame(units="#"), | |
| data.frame(id=site.df$site.id, name=NA, year=NA, lat=site.df$lat, | |
| long=site.df$long, address=NA, area=NA, | |
| elevation=site.df$elev, class=site.df$forestclass), | |
| data.frame(species=dia$species, taxonomy=NA, diameter=dia$diameter))) | |
| } | |
| .tomasovych.2010a <- function(...){ | |
| species <- read.xls(suppdata("10.5061/dryad.1225", "abundances-S California 1975.xls"), skip=1, header=TRUE) | |
| species.clean <- species[,-1] | |
| comm <- t(as.matrix(species.clean)) | |
| rownames(comm) <- species$X | |
| rownames(comm) | |
| } | |
| .mendonca.2018 <- function(...){ | |
| # need to fix the years | |
| tmp <- tempfile() | |
| download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp) | |
| data <- read.csv(.unzip("CERRADO_SM_Capture.csv", tmp), as.is=TRUE, fileEncoding = "Latin1") | |
| data <- data[!is.na(data$Individuals_captured),] | |
| data$Year_finsh <- as.numeric(data$Year_finish) | |
| data <- data[!is.na(data$Year_finish),] | |
| ids <- paste(data$id, data$Year_finish) | |
| #ids <- ids[-c(1513:1536)] | |
| # lat/long data | |
| tmp2 <- tempfile() | |
| download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp2) | |
| ll_data <- read.csv(.unzip("CERRADO_SM_Study_Site.csv", tmp), as.is=TRUE, fileEncoding = "Latin1") | |
| ll_data <- ll_data[,c(1,7,8)] | |
| ll_data$id <- unique(ids) | |
| names(ll_data) <- c("id", "lat", "long") | |
| ll_data$year <- ll_data$id; ll_data$name <- ll_data$id | |
| ll_data$address <- "Cerrado ecosystem: Brazil, Boliva, Paraguay"; ll_data$area <- "live_trap" | |
| return(.df.melt(data$Actual_species_name, | |
| ids, | |
| data$Individuals_captured, | |
| data.frame(units = "#"), | |
| ll_data, | |
| data.frame(species = unique(data$Actual_species_name), taxonomy = "Animalia") | |
| ) | |
| ) | |
| } | |
| # Error in data.frame(id = rownames(data), year = years, name = | |
| # names, lat = NA, : arguments imply differing number of rows: 20, | |
| # 24, 1 | |
| .sepulveda.2016 <- function(...){ | |
| tmp <- tempfile() | |
| download.file("http://journals.plos.org/plosone/article/file?type=supplementary&id=info:doi/10.1371/journal.pone.0157910.s001", tmp) | |
| data <- read.xls(tmp, 1, skip=1, fileEncoding="Latin1") | |
| data <- data[1:20,] | |
| years <- colnames(data)[2:25] | |
| names(data) <- c("species", paste(rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4), names(data)[2:25], sep="_")) | |
| d2 <- t(data) | |
| names <- rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4) | |
| return(.matrix.melt(data, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = years, name= names, lat= NA, long= NA, address="Southwestern Chilean coast", area = NA), | |
| data.frame(species=colnames(data), taxonomy = NA) | |
| ) | |
| ) | |
| } | |
| # Metadata woes | |
| .bried.2017 <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.151171/Dryad.data.xlsx?sequence=1", tmp) | |
| data <- read.xls(tmp, 1) | |
| n <- paste(data$Latitude, data$Longitude, sep = "_") | |
| comm <- data[,-c(1:4)] | |
| comm$Region <- n | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2017, name = , lat= , long = , address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Insecta") | |
| ) | |
| ) | |
| } | |
| # datasets on chiclids - each function downloads a community | |
| # dataset for a different region | |
| # Kigoma town | |
| .britton.2017.a <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148126/BrittonEtAl2017_KigomaTown.csv?sequence=3", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Kigoma deforested | |
| .britton.2017.b <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148127/BrittonEtAl2017_KigomaDeforested.csv?sequence=3", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Kalilani village | |
| .britton.2017.c <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148128/BrittonEtAl2017_KalilaniVillage.csv?sequence=1", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Jakobsen's beach | |
| .britton.2017.d <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148129/BrittonEtAl2017_Jakobsen%27sBeach.csv?sequence=3", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Gombe stream | |
| .britton.2017.e <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148130/BrittonEtAl2017_GombeNP.csv?sequence=1", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Mahale mountain 1 | |
| .britton.2017.f <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148131/BrittonEtAl2017_MahaleNPS1.csv?sequence=1", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| # Mahale mountain 2 | |
| .britton.2017.g <- function(...){ | |
| tmp <- tempfile() | |
| download.file("https://datadryad.org/bitstream/handle/10255/dryad.148132/BrittonEtAl2017_MahaleNPS2.csv?sequence=3", tmp) | |
| data <- read.csv(tmp, skip=1) | |
| data <- data[-c(1,2),] | |
| names(data)[1] <- "species" | |
| comm <- t(data) | |
| return(.matrix.melt(comm, | |
| data.frame(units = "#"), | |
| data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
| data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
| )) | |
| } | |
| .drew.2015<-function(...){ | |
| expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.93108/Supplemental%201.csv?sequence=1",as.is = TRUE) | |
| expdata$binom<-paste(expdata$Genus,expdata$species,sep=".") | |
| comm<-t(expdata[,4:6]) | |
| colnames(comm)<-expdata$binom | |
| #meta data is basically not a thing, so this may need to be scrapped after all | |
| return(.matrix.melt(comm, | |
| data.frame(units="p/a"), | |
| sitedata, | |
| data.frame(speccies=expdata$binom,taxonomy=NA) | |
| ) | |
| ) | |
| } | |
| .osuri.2016<-function(...){ | |
| expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.109139/Osuri_Sanakran_2016_JAE_plot_data.csv?sequence=2",as.is = TRUE) | |
| comm <- with(expdata, tapply(species, list(species, site.name), length)) | |
| comm[is.na(comm)] <- 0 | |
| comm<-t(comm) | |
| #meta data is limited, what could be easily found is in the expdata data frame | |
| return(.matrix.melt(comm, | |
| data.frame(units="#"), | |
| sitedata, | |
| data.frame(species=expdata$species,taxonomy=NA) | |
| ) | |
| ) | |
| } | |
| .helmus.2013 <- function(...){ | |
| library(pez) # This isn't how we declare packages in 'real' | |
| # packages for the time being this is sufficient | |
| data(laja) | |
| return(.matrix.melt(invert.sites)) | |
| } | |
| .jain.2017 <- function(...){ | |
| species <- read.xls(suppdata("10.5061/dryad.177q4", "Jain_etal_2016_Butterfly%20abundance%20across%20sites_22Dec2016.xlsx"), skip=5, header=TRUE, as.is=TRUE) | |
| species.clean <- species[,c(-1:-15,-38)] | |
| comm <- t(as.matrix(species.clean)) | |
| colnames(comm) <- species$Scientific.name | |
| return(.matrix.melt(comm, | |
| data.frame(units="#", treatment=NA), | |
| data.frame(id=rownames(comm), year=site.metadata$Date, name=site.metadata$SiteCombo, lat=NA, long=NA, address = "British Columbia", area=site.metadata$HaSurveyed), | |
| data.frame(species=colnames(comm), taxonomy=NA))) | |
| } | |
| ## Error in data.frame(id = id, year = YEAR, name = Waterbody_Name, lat = lat, : arguments imply differing number of rows: 7556, 20027, 1 | |
| ## does not yet work | |
| .rypel.2018 <- function(...){ | |
| tmp.file <- tempfile() | |
| download.file("https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.356.3&entityid=829ef0e4eea5e6392b19e595aa775832", tmp.file) | |
| abun <- read.csv(tmp.file, header=TRUE) | |
| taxon_inf <- read.csv(file="https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.356.3&entityid=490295acdaf716c90b58a5a089ab9847",header=TRUE) | |
| location <- read.csv(file="https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.356.3&entityid=3c23c7e39d30f047fe6b229d85df2a88",header=TRUE) | |
| abun <- merge(abun, taxon_inf, by = "taxon_id") | |
| data <- merge(abun, location, by = "WBIC") | |
| species <- data$taxon_name | |
| lat <- data$Latitude | |
| long <- data$Longitude | |
| data$id <- rep(paste(data$Waterbody_Name,data$YEAR)) | |
| site.metadata <- data[!duplicated(data$id),] | |
| site.metadata <- with(site.metadata, | |
| data.frame(id=id, year=YEAR, name=Waterbody_Name, lat=lat,long=long, address="Wisconsin USA",area=NA) | |
| ) | |
| site <- rep(paste(data$Waterbody_Name,data$Year), 7556) | |
| data$site.id <- paste(data$Waterbody_Name,data$Year) | |
| comm <- with(data, tapply(N, list(site.id, taxon_name), sum)) | |
| comm[is.na(comm)] <- 0 | |
| return(.df.melt(species, site, comm, | |
| study.metadata=data.frame(units="#"), | |
| site.metadata, | |
| species.metadata=data.frame(species=unique(species), taxonomy=NA) | |
| )) | |
| } | |
| ################################ | |
| # ARGON FUNCTIONS ############## | |
| # - WORKING BUT NOT DATA RELEASE | |
| ################################ | |
| if(FALSE){ | |
| #' @export | |
| .branstetter.2018 <- function(...) { | |
| data <- read.csv("TableA3.csv") | |
| metadata <- read.csv("TableA2.csv") | |
| rownames(data) <- data[,1] | |
| data[,1] <- NULL | |
| colnames(data) <- gsub(".", "-", colnames(data), fixed=TRUE) | |
| data <- t(data) | |
| rownames(data) <- paste(rownames(data), year, sep="_") | |
| metadata$year <- format(as.Date(metadata$datecollected, format="%d-%b-%Y"),"%Y") | |
| year <- metadata$year[!duplicated(metadata$site)] | |
| name <- unique(metadata$site) | |
| lat <- metadata$latitude[!duplicated(metadata$site)] | |
| long <- metadata$longitude[!duplicated(metadata$site)] | |
| return(.matrix.melt(data, | |
| data.frame(units="#"), | |
| data.frame(id=rownames(data), year, name, lat, long, | |
| address=NA, area=NA), | |
| data.frame(species=colnames(data), taxonomy="Hymenoptera"))) | |
| } | |
| .cobb.2016 <- function(...) { | |
| data <- read.xls("COMPLETE Dataset as of 4_recovery2.xlsx") | |
| data$name <- with(data, paste("study.area", Study.Area, "site", Site, sep="_")) | |
| data$month.year <- with(data, paste(Month, Year, sep="-")) | |
| data$site.year <- with(data, paste(name, month.year, sep="_")) | |
| metadata <- data[,c(1:11, 148, 149, 150)] | |
| metadata$Longitude <- gsub("\342\200\223", "-", metadata$Longitude) | |
| data[,1] <- data$site.year | |
| data[,c(2:11, 148, 149, 150)] <- NULL | |
| data <- aggregate(.~Sample.., data=data, FUN=sum) | |
| rownames(data) <- data[,1] | |
| data[,1] <- NULL | |
| rownames <- rownames(data) | |
| data <- apply(data, 2, as.numeric) | |
| rownames(data) <- rownames | |
| name <- metadata$name[match(rownames(data), metadata$site.year)] | |
| year <- metadata$Year[match(rownames(data), metadata$site.year)] | |
| lat <- metadata$Latitude[match(rownames(data), metadata$site.year)] | |
| long <- metadata$Longitude[match(rownames(data), metadata$site.year)] | |
| veg.type <- metadata$Veg.type[match(rownames(data), metadata$site.year)] | |
| burned <- metadata$Burn[match(rownames(data), metadata$site.year)] | |
| monsoon <- metadata$Monsoon[match(rownames(data), metadata$site.year)] | |
| return(.matrix.melt(data, | |
| data.frame(units="STD.#"), | |
| data.frame(id=rownames(data), | |
| year, | |
| name, | |
| lat, | |
| long, | |
| address=NA, | |
| area=NA, | |
| veg.type, | |
| burned, | |
| monsoon), | |
| data.frame(species=colnames(data), taxonomy="Arthropoda"))) | |
| } | |
| .mooney.2018 <- function(...) { | |
| #will need to loop through and do this for each year (sheet) in the dataset. | |
| data <- read.xls("Insect Abundance Population Summaries.xlsx", sheet="#") | |
| data <- data[which(data$Response == "Total"),] | |
| #remove all rows that contain only NA values | |
| data <- data[ ,!apply(data, 2, function(x) all(is.na(x)))] | |
| data <- melt(data, id=c("Population", "Response")) | |
| } | |
| .dyer.2017 <- function(...) { | |
| # Location is not always GPS coordinates in this dataset. Some are descriptions or titles of the locations. | |
| # Some of the values in data are blank. These do not mean that the value is zero but that the data is not complete. (Lee has the code to complete it). | |
| data <- read.xls("SWRS_plots_updated_nov_3_2017.xlsx") | |
| data<-data[,1:24] | |
| data$year <- format(as.Date(data$Date..D.M.Y., format="%Y-%m-%d"),"%Y") | |
| data$plot.year <- with(data, paste(X.number, year, sep=".")) | |
| return(.df.melt(data$plant.sp, | |
| data$plot.year, | |
| data$Leaf.area..cm.2., | |
| data.frame(units="area"), | |
| data.frame(id=unique(data$plot.year), | |
| year=data$year[!duplicated(data$plot.year)], | |
| name=data$X.number[!duplicated(data$plot.year)], | |
| lat=NA, | |
| long=NA, | |
| address=NA, | |
| area="cm2"), | |
| data.frame(species=unique(data$plant.sp), taxonomy="Plantae"))) | |
| } | |
| } | |
| #' @export | |
| .fia.2018 <- function(...){ | |
| .get.fia <- function(state, var, select){ | |
| t.zip <- tempfile() | |
| download.file(paste0("https://apps.fs.usda.gov/fia/datamart/CSV/",state,"_",var,".zip"), t.zip) | |
| unzip(t.zip) | |
| data <- fread(paste0(state,"_",var,".csv"), select=select) | |
| unlink(paste0(state,"_",var,".csv")) | |
| return(data) | |
| } | |
| states <- c("AL", "AK") #c("AL","AK","AZ","CA","CO","FL","GA","HI","KS","MD","MA","MI","NH","NM","ND","OK","TN","TX","UT","VA","WA","WI","WY") | |
| data <- vector("list", length(states)) | |
| for(i in seq_along(states)){ | |
| #Download/read in data | |
| tree <- .get.fia(states[i], "TREE", c("CN","PLT_CN","PLOT","SPCD","DIA","INVYR")) | |
| cond <- .get.fia(states[i], "COND", c("PLT_CN","PLOT","STDAGE","FORTYPCD","CONDID")) | |
| plot <- .get.fia(states[i], "PLOT", c("PLOT","LAT","LON","ELEV", "CN")) | |
| #Subset everything, remove sites with multiple/ambiguous codings, merge | |
| tree <- tree[tree$DIA > 1.96,] | |
| cond <- cond[cond$PLT_CN %in% as.integer64(names(Filter(function(x) x==1, table(cond$PLT_CN)))),] | |
| data[[i]] <- merge(tree, merge(cond, plot, by.x="PLT_CN", by.y="CN"), by.x="PLT_CN", by.y="PLT_CN") | |
| data[[i]]$state <- states[i] | |
| } | |
| data <- rbindlist(data) | |
| t <- setNames(seq_along(unique(data$PLT_CN)), unique(data$PLT_CN)) | |
| data$site.id <- paste0(data$state, "_", t[as.character(data$PLT_CN)]) | |
| data$site.id <- paste0(data$site.id, "_", data$INVYR) | |
| rndata <- with(data, ave(data, state, FUN=function(x) {sample.int(length(x))})) | |
| fia.spp <- read.csv("FIA_SppList.csv") #currently in the pglmm raw data folder | |
| fia.spp <- data.table(fia.spp$SPCD, paste0(fia.spp$GENUS, "_", fia.spp$SPECIES)) | |
| data <- merge(data, fia.spp, by.x="SPCD", by.y="V1") | |
| data <- data.frame(data$V2, data$site.id, data$LAT, data$LON, data$ELEV, | |
| data$STDAGE, data$FORTYPCD, data$CONDID, data$DIA) | |
| names(data) <- c("species", "site.id", "lat", "long", "elev", "stdage", "forestclass", "condclass", "diameter") | |
| comm <- t(as.matrix(with(data, table(species,site.id)))) | |
| # To get mean diameter of each species at each site: | |
| dia <- aggregate(diameter~species, data, mean) | |
| # To get count of diameters : | |
| dia.count <- aggregate(diameter~species, data, length) | |
| # data frame with diameter mean and count per species-site combination | |
| dia$diameter.n <- dia.count$diameter | |
| site.df <- data[!duplicated(data$site.id),] | |
| site.df <- site.df[,2:8] | |
| #site.df$site.id <- as.character(site.df$site.id); dia$species <- as.character(dia$species) | |
| sites <- rownames(comm) | |
| site.df <- site.df[match(sites, site.df$site.id), ] | |
| return(.matrix.melt(comm, | |
| data.frame(units="#"), | |
| data.frame(id=site.df$site.id, name=NA, year=NA, lat=site.df$lat, | |
| long=site.df$long, address=NA, area=NA, | |
| elevation=site.df$elev, class=site.df$forestclass), | |
| data.frame(species=dia$species, taxonomy=NA))) | |
| } | |
| #' @export | |
| .heidi.2018 <- function(...) { | |
| data <- read.xls("Heidi_Species_Cover_2017_Final_121817.xlsx", sheet=2, stringsAsFactors=FALSE) | |
| metadata <- read.xls("SiteSpeciesList_argon.xlsx", fileEncoding="latin1", stringsAsFactors=FALSE) | |
| data$geo <- metadata$Lat[match(data$Site, metadata$Site.Name)] | |
| data$lat <- NA | |
| data$long <- NA | |
| temp <- strsplit(data$geo, split=",") | |
| data$lat[1:471] <- matrix(unlist(temp[1:471]), ncol=2, byrow=TRUE)[,1] | |
| data$long[1:471] <- matrix(unlist(temp[1:471]), ncol=2, byrow=TRUE)[,2] | |
| data$Date <- format(as.Date(data$Date, format="%Y-%m-%d"),"%Y") | |
| data$site_plot <- with(data, paste(Site, Plot, Date, sep="_")) | |
| site.id <- unique(data$site_plot) | |
| year <- data$Date[!duplicated(data$site_plot)] | |
| name <- data$Site[!duplicated(data$site_plot)] | |
| return(.df.melt(data$Species.Ground.Cover, | |
| data$site_plot, | |
| data$Count, | |
| data.frame(units="#"), | |
| data.frame(id=unique(data$site_plot), year, name, lat=data$lat[!duplicated(data$site_plot)], long=data$long[!duplicated(data$site_plot)], address=NA, area=NA), | |
| data.frame(species=unique(data$Species.Ground.Cover), taxonomy=NA))) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment