The next Helmholtz Hacky Hour will take place on Wednesday, December 9, 2020 from 2PM to 3PM! Topic: Sustainable Programming! more...

Commit 36b31899 authored by Florian Centler's avatar Florian Centler

initial commit

parent 8e51bef1
# Assuming unique info for each tax and func! Check this in input data!
taxFile <- "SampleID-taxa.txt"
funcFile <- "SampleID-fct.txt"
# Get read ids as row headers, taxonomic info as column V2
taxData <- read.table(file=taxFile, sep="\t", row.names=1, quote="\"")
# Parse taxonomic info and put in columns
taxData$Kingdom <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*d__", replacement="", taxData$V2))
taxData$Phylum <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*p__", replacement="", taxData$V2))
taxData$Class <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*c__", replacement="", taxData$V2))
taxData$Order <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*o__", replacement="", taxData$V2))
taxData$Family <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*f__", replacement="", taxData$V2))
taxData$Genus <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*g__", replacement="", taxData$V2))
taxData$Species <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*s__", replacement="", taxData$V2))
taxData$taxInfo <- TRUE # mark read to have taxonomic info
# Record percentage info
taxData$KingdomPercent <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*d__[^;]+;", replacement="", taxData$V2))
taxData$PhylumPercent <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*p__[^;]+;", replacement="", taxData$V2))
taxData$ClassPercent <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*c__[^;]+;", replacement="", taxData$V2))
taxData$OrderPercent <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*o__[^;]+;", replacement="", taxData$V2))
taxData$FamilyPercent <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*f__[^;]+;", replacement="", taxData$V2))
taxData$GenusPercent <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*g__[^;]+;", replacement="", taxData$V2))
taxData$SpeciesPercent <- gsub(pattern=";.*", replacement="", gsub(pattern="^.*s__[^;]+;", replacement="", taxData$V2))
# drop parsed taxonomic input string
taxData$V2 <- NULL
# put NA to entries which were missing
for (i in 1:7) {
taxData[grepl('__', taxData[,i]), i] <- NA
for (i in 9:15) {
taxData[grepl('__', taxData[,i]), i] <- NA
# get functional info
funcData <- read.table(file=funcFile, sep="\t", quote="\"")
#funcData <- read.table(file=funcFile, sep="\t", row.names=1, quote="\"")
#funcData$Level1 <- gsub(pattern=";.*", replacement="", gsub(pattern="^[^;]*;", replacement="", funcData$V2))
funcData$Level2 <- gsub(pattern=";.*", replacement="", gsub(pattern="^[^;]*;[^;]*;", replacement="", funcData$V2))
#funcData$Level3 <- gsub(pattern=";.*", replacement="", gsub(pattern="^[^;]*;[^;]*;[^;]*;", replacement="", funcData$V2))
#funcData$Level4 <- gsub(pattern=";.*", replacement="", gsub(pattern="^[^;]*;[^;]*;[^;]*;[^;]*;", replacement="", funcData$V2))
#funcData$Level5 <- gsub(pattern=";.*", replacement="", gsub(pattern="^[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;", replacement="", funcData$V2))
#funcData$Level6 <- gsub(pattern=";.*", replacement="", gsub(pattern="^[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;", replacement="", funcData$V2))
funcData$funcInfo <- TRUE # mark read to have functional info
# put NA to entries which were missing
funcData[funcData==" eggNOG"] <- ""
funcData[funcData=="eggNOG"] <- ""
funcData[funcData==""] <- NA
# drop parsed taxonomic input string
funcData$V2 <- NULL
# if reads with only taxonomic or functional info are of interest:
#data <- merge(taxData, funcData, by = "row.names", all=TRUE)
# if only reads are of interest which have both taxonomic info and functional info use:
data <- merge(taxData, funcData, by = "row.names")
write.table(data, "datatxt.txt", sep="\t")
# lets get a table combining the levels of interest
# Select the taxa and eggnog level of interest (here exemplary: Species and EggNOG Level1)
myTable <- table(data$Species, data$Level1, useNA="always")
#rownames(myTable)[nrow(myTable)] <- "unclassified"
#colnames(myTable)[ncol(myTable)] <- "unclassified"
write.table(myTable, "myTable_sampleID_taxa-vs-fct.txt", sep = "\t")
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment