gloomer = function(ps = data, taxa_level = taxa_level, NArm = "TRUE"){ rank.names = c('Kingdom','Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species')
ps = subset_taxa(ps, !Family %in% c("uncultured", "NA", "uncategorized", "unassigend", "", " ")) if(taxa_level == "Species") {
ps = subset_taxa(ps, !Genus %in% NA) tax_table(ps)[, taxa_level] <- ifelse(is.na(tax_table(ps)[, taxa_level]), paste0("unknown"), paste(tax_table(ps)[, taxa_level])) physeq = tax_glom(physeq = ps, taxrank = taxa_level, NArm = NArm) taxdat = tax_table(physeq)[, seq_along(rank.names[1:which(rank.names == taxa_level)])]
taxdat = taxdat[complete.cases(taxdat),] %>% as.data.frame otudat = otu_table(physeq)
taxdat[,6] = ifelse(taxdat[,6] %in% c("uncategorized", NA, "uncultured", "unassigend", "", " "), paste0("[", taxdat[,length(rank.names[1:which(rank.names=="Genus")])-1], "]", "_", taxdat[,6]), taxdat[,6]) spec1 = taxdat[, taxa_level] %>% as.vector spec2 = taxdat[, taxa_level] %>% as.vector
uni = matrix(NA, ncol = length(spec2), nrow = length(spec1)) for(i in seq_along(spec1)){ for(j in seq_along(spec2)){ uni[i, j] = ifelse(spec1[i] == spec2[j] , "TRUE", "FALSE") } }
rownames(uni) <-spec1 colnames(uni) <- spec2 uni[upper.tri(uni, diag = TRUE)] = 0
duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE")
if(dim(duplis)[[1]] > 0) { duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE") %>% dplyr::select(1) %>% unique() %>% unlist %>% as.vector taxdat = taxdat %>% mutate( uni= ifelse(taxdat[, taxa_level] %in% duplis, paste0("[", taxdat[,length(rank.names[1:which(rank.names==taxa_level)])-1], "]", "_", taxdat[,taxa_level]), taxdat[,taxa_level]))
dupies <- taxdat[duplicated(taxdat[,"uni"]), "uni"] if(length(dupies)>0) { taxdat = taxdat %>% data.frame %>% mutate( uni2= ifelse(taxdat[, "uni"] %in% dupies, paste0("[", taxdat[,length(rank.names[1:which(rank.names==taxa_level)])-2], "]", "_", taxdat[,"uni"]), taxdat[,"uni"])) taxdat[, taxa_level] = taxdat[, "uni2"] taxdat[, "uni"] <- NULL taxdat[, "uni2"] <- NULL taxdat <- as(taxdat, "matrix") rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level] rownames(taxdat) <- taxdat[, taxa_level] taxdat <- tax_table(taxdat) taxa_names(physeq) <- taxa_names(taxdat) tax_table(physeq) <- taxdat otu_table(physeq) <- otudat } else { taxdat[, taxa_level] = taxdat[, "uni"] taxdat[, "uni"] <- NULL taxdat <- as(taxdat, "matrix") rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level] rownames(taxdat) <- taxdat[, taxa_level] taxdat <- tax_table(taxdat) taxa_names(physeq) <- taxa_names(taxdat) tax_table(physeq) <- taxdat otu_table(physeq) <- otudat } } else { taxdat <- as.matrix(taxdat) taxdat <- tax_table(taxdat) rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level] rownames(taxdat) <- taxdat[, taxa_level] taxdat <- tax_table(taxdat) taxa_names(physeq) <- taxa_names(taxdat) tax_table(physeq) <- taxdat otu_table(physeq) <- otudat }
} else if (taxa_level == "Genus") { physeq = tax_glom(physeq = ps, taxrank = taxa_level, NArm = NArm) taxdat = tax_table(physeq)[, seq_along(rank.names[1:which(rank.names == taxa_level)])] taxdat = taxdat[complete.cases(taxdat),] %>% as.data.frame otudat = otu_table(physeq)
taxdat[,6] = ifelse(taxdat[,6] %in% c("uncategorized", NA, "uncultured", "unassigend", "", " "), paste0("[", taxdat[,length(rank.names[1:which(rank.names==taxa_level)])-1], "]", "_", taxdat[,taxa_level]), taxdat[,taxa_level]) gen1 = taxdat[, taxa_level] %>% as.vector gen2 = taxdat[, taxa_level] %>% as.vector
uni = matrix(NA, ncol = length(gen2), nrow = length(gen1)) for(i in seq_along(gen1)){ for(j in seq_along(gen2)){ uni[i, j] = ifelse(gen1[i] == gen2[j] , "TRUE", "FALSE") } }
rownames(uni) <-gen1 colnames(uni) <- gen2 uni[upper.tri(uni, diag = TRUE)] = 0
duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE")
if(dim(duplis)[[1]] > 0){ duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE") %>% dplyr::select(1)%>% unique() %>% unlist %>% as.vector taxdat = taxdat %>% mutate( uni= ifelse(taxdat[, taxa_level] %in% duplis, paste0("[", taxdat[,length(rank.names[1:which(rank.names==taxa_level)])-1], "]", "_", taxdat[,taxa_level]), taxdat[,taxa_level])) taxdat[, taxa_level] = taxdat[, "uni"] taxdat[, "uni"] <- NULL
taxdat <- as(taxdat, "matrix") rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level] rownames(taxdat) <- taxdat[taxdat[,taxa_level] %in% rownames(otudat), taxa_level] taxdat <- as.matrix(taxdat) taxdat <- tax_table(taxdat) taxa_names(physeq) <- taxa_names(taxdat) tax_table(physeq) <- taxdat otu_table(physeq) <- otudat } else {
taxdat <- as.matrix(taxdat) taxdat <- tax_table(taxdat) rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level] rownames(taxdat) <- taxdat[, taxa_level] taxdat <- tax_table(taxdat) taxa_names(physeq) <- taxa_names(taxdat) tax_table(physeq) <- taxdat otu_table(physeq) <- otudat } } else { physeq = tax_glom(physeq = ps, taxrank = taxa_level, NArm = TRUE) taxdat = tax_table(physeq)[, seq_along(rank.names[1:which(rank.names == taxa_level)])] taxdat = taxdat[complete.cases(taxdat),] %>% as.data.frame otudat = otu_table(physeq) spec1 = taxdat[, taxa_level] %>% as.vector spec2 = taxdat[, taxa_level] %>% as.vector
uni = matrix(NA, ncol = length(spec2), nrow = length(spec1)) for(i in seq_along(spec1)){ for(j in seq_along(spec2)){ uni[i, j] = ifelse(spec1[i] == spec2[j] , "TRUE", "FALSE") } }
rownames(uni) <-spec1 colnames(uni) <- spec2 uni[upper.tri(uni, diag = TRUE)] = 0
duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE")
if(dim(duplis)[[1]] > 0){ duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE") %>% dplyr::select(1)%>% unique() %>% unlist %>% as.vector taxdat = taxdat %>% mutate( uni= ifelse(taxdat[, taxa_level] %in% duplis, paste(taxdat[,length(rank.names[1:which(rank.names==taxa_level)])-1], "_", taxdat[,taxa_level]), taxdat[,taxa_level])) taxdat[, taxa_level] = taxdat[, "uni"] taxdat[, "uni"] <- NULL taxdat <- as.matrix(taxdat) rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level] rownames(taxdat) <- taxdat[, taxa_level] taxdat <- tax_table(taxdat) taxa_names(physeq) <- taxa_names(taxdat) tax_table(physeq) <- taxdat otu_table(physeq) <- otudat } else {
taxdat <- as.matrix(taxdat) taxdat <- tax_table(taxdat) rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level] rownames(taxdat) <- taxdat[, taxa_level] taxdat <- tax_table(taxdat) taxa_names(physeq) <- taxa_names(taxdat) tax_table(physeq) <- taxdat otu_table(physeq) <- otudat }
} return(physeq) }
|