Analysis of mOTUs Output in R¶

R Studio¶

To work in R, we are going to use the R Studio interface:

The panels are:

top-left: script editor
top-right: variable list
bottom-left: console
bottom-right: file browser / plots / package list / help

First, set the correct working directory¶

setwd("S:/biolcourse-34")

What is the effect of running mOTUs with different parameters?¶

Sort mOTUs from most to least abundant and remove those that never appear¶

data <- read.table("M04.5-V3-stool-metaG.merged.motus",sep="\t",stringsAsFactors = F,quote="\"",row.names=1,comment="",skip=2,header=T)
data.sorted <- data[order(rowSums(data),decreasing=T),]
data.filtered <- data.sorted[rowSums(data.sorted)>0,]

Plot the data from the three different sets of parameters¶

matplot(100*data.filtered,log="y",type="l",lty=1,ylim=c(1e-4,1e2),ylab="Relative Abundance (%)")
legend("topright",legend=c("Default","High Precision","High Sensitivity"),fill=1:3)
grid()

Compare the default and high sensitivity data sets¶

plot(100*data.filtered[,1]+1e-4,100*data.filtered[,2]+1e-4,log="xy",xlim=c(1e-4,1e2),ylim=c(1e-4,1e2),xlab="Default",ylab="High Sensitivity",pch=20,col=rgb(1,0,0,0.25))
grid()
abline(0,1)

How does metagenomic data compare with metatranscriptomic data?¶

Import the data, remove rows with no abundance¶

all.data <- read.table("default.merge.motus",sep="\t",stringsAsFactors = F,quote="\"",row.names=1,comment="",skip=2,header=T)
valid.data <- all.data[rowSums(all.data)>0,]

Shortcuts for different sample types¶

metaG <- seq(1,ncol(valid.data),2)
metaT <- seq(2,ncol(valid.data),2)

Plot G against T¶

plot(1e-5+unlist(valid.data[,metaG]),1e-5+unlist(valid.data[,metaT]),log="xy",xlab="metaG",ylab="metaT",pch=20,col=rgb(1,0,0,0.1))
abline(0,1)
grid()

Calculate a distance matrix between samples, perform ordination and hierarchically cluster¶

Load libraries¶

library(vegan)
library(dendextend)

Import sample metadata¶

metadata <- read.table("../data/motus_metadata.tsv",sep="\t",row.names=1,header=T)

Color samples by individual¶

palette(c('#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', '#ffffff', '#000000'))
sample.individuals <- substr(colnames(valid.data),1,5)
sample.colors <- as.numeric(as.factor(sample.individuals))

Calculate distance matrix¶

dm <- vegdist(t(log(valid.data+1e-6)),method="euclidean")

Perform MDS¶

mds <- monoMDS(dm,pc=T,k=4)

Will individuals cluster, will families?¶

Create ordination plot¶

plot(mds$points,pch=20,col=sample.colors)
legend("topleft",legend=unique(sample.individuals),fill=unique(sample.colors),cex=0.5)

Repeat, but consider family¶

sample.family <- metadata[sample.individuals,"FAMNO"]
plot(mds$points,pch=20,col=sample.family)
legend("topleft",legend=unique(sample.family),fill=unique(sample.family))

Repeat with the metadata of your choice¶

sample.diabetes <- metadata[sample.individuals,"DIABETESTY1"]
plot(mds$points,pch=20,col=sample.diabetes)
legend("topleft",legend=unique(sample.diabetes),fill=unique(sample.diabetes))

Hierarchically cluster samples¶

hc <- as.dendrogram(hclust(dm))

Will samples cluster by individual, by diabetes status?¶

Plot with individual information¶

sample.colors
labels_colors(hc) <- sample.colors
par(mar=0.1+c(12,4,4,1))
plot(hc)

Plot diabetes status¶

diabetes.colors <- as.numeric(as.factor(metadata[sample.individuals,"DIABETESTY1"]))
labels_colors(hc) <- diabetes.colors
plot(hc)

Plot some metadata of your choice¶

meta.colors <- as.numeric(as.factor(metadata[sample.individuals,"SEX"]))
labels_colors(hc) <- meta.colors
plot(hc)