Analysis of mOTUs Output in R#

R Studio#

To work in R, we are going to use the R Studio interface:

The panels are:

top-left: script editor
top-right: variable list
bottom-left: console
bottom-right: file browser / plots / package list / help

First, set the correct working directory#

setwd("S:/biolcourse-34")

What is the effect of running mOTUs with different parameters?#

Sort mOTUs from most to least abundant and remove those that never appear#

data <- read.table("M04.5-V3-stool-metaG.merged.motus",sep="\t",stringsAsFactors = F,quote="\"",row.names=1,comment="",skip=2,header=T)
data.sorted <- data[order(rowSums(data),decreasing=T),]
data.filtered <- data.sorted[rowSums(data.sorted)>0,]

Plot the data from the three different sets of parameters#

matplot(100*data.filtered,log="y",type="l",lty=1,ylim=c(1e-4,1e2),ylab="Relative Abundance (%)")
legend("topright",legend=c("Default","High Precision","High Sensitivity"),fill=1:3)
grid()

Compare the default and high sensitivity data sets#

plot(100*data.filtered[,1]+1e-4,100*data.filtered[,2]+1e-4,log="xy",xlim=c(1e-4,1e2),ylim=c(1e-4,1e2),xlab="Default",ylab="High Sensitivity",pch=20,col=rgb(1,0,0,0.25))
grid()
abline(0,1)

How does metagenomic data compare with metatranscriptomic data?#

Import the data, remove rows with no abundance#

all.data <- read.table("default.merge.motus",sep="\t",stringsAsFactors = F,quote="\"",row.names=1,comment="",skip=2,header=T)
valid.data <- all.data[rowSums(all.data)>0,]

Shortcuts for different sample types#

metaG <- seq(1,ncol(valid.data),2)
metaT <- seq(2,ncol(valid.data),2)

Plot G against T#

plot(1e-5+unlist(valid.data[,metaG]),1e-5+unlist(valid.data[,metaT]),log="xy",xlab="metaG",ylab="metaT",pch=20,col=rgb(1,0,0,0.1))
abline(0,1)
grid()

Calculate a distance matrix between samples, perform ordination and hierarchically cluster#

Load libraries#

library(vegan)
library(dendextend)

Import sample metadata#

metadata <- read.table("../data/motus_metadata.tsv",sep="\t",row.names=1,header=T)

Color samples by individual#

palette(c('#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', '#ffffff', '#000000'))
sample.individuals <- substr(colnames(valid.data),1,5)
sample.colors <- as.numeric(as.factor(sample.individuals))

Calculate distance matrix#

dm <- vegdist(t(log(valid.data+1e-6)),method="euclidean")

Perform MDS#

mds <- monoMDS(dm,pc=T,k=4)

Will individuals cluster, will families?#

Create ordination plot#

plot(mds$points,pch=20,col=sample.colors)
legend("topleft",legend=unique(sample.individuals),fill=unique(sample.colors),cex=0.5)

Repeat, but consider family#

sample.family <- metadata[sample.individuals,"FAMNO"]
plot(mds$points,pch=20,col=sample.family)
legend("topleft",legend=unique(sample.family),fill=unique(sample.family))

Repeat with the metadata of your choice#

sample.diabetes <- metadata[sample.individuals,"DIABETESTY1"]
plot(mds$points,pch=20,col=sample.diabetes)
legend("topleft",legend=unique(sample.diabetes),fill=unique(sample.diabetes))

Hierarchically cluster samples#

hc <- as.dendrogram(hclust(dm))

Will samples cluster by individual, by diabetes status?#

Plot with individual information#

sample.colors
labels_colors(hc) <- sample.colors
par(mar=0.1+c(12,4,4,1))
plot(hc)

Plot diabetes status#

diabetes.colors <- as.numeric(as.factor(metadata[sample.individuals,"DIABETESTY1"]))
labels_colors(hc) <- diabetes.colors
plot(hc)

Plot some metadata of your choice#

meta.colors <- as.numeric(as.factor(metadata[sample.individuals,"SEX"]))
labels_colors(hc) <- meta.colors
plot(hc)

Analysis of mOTUs Output in R

Contents

Analysis of mOTUs Output in R#

R Studio#

First, set the correct working directory#

What is the effect of running mOTUs with different parameters?#

Sort mOTUs from most to least abundant and remove those that never appear#

Plot the data from the three different sets of parameters#

Compare the default and high sensitivity data sets#

How does metagenomic data compare with metatranscriptomic data?#

Import the data, remove rows with no abundance#

Shortcuts for different sample types#

Plot G against T#

Calculate a distance matrix between samples, perform ordination and hierarchically cluster#

Load libraries#

Import sample metadata#

Color samples by individual#

Calculate distance matrix#

Perform MDS#

Will individuals cluster, will families?#

Create ordination plot#

Repeat, but consider family#

Repeat with the metadata of your choice#

Hierarchically cluster samples#

Will samples cluster by individual, by diabetes status?#

Plot with individual information#

Plot diabetes status#

Plot some metadata of your choice#