## ---- echo=TRUE-------------------------------------------------- library(seqc) SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))] rownames(SampleA) <- ILM_aceview_gene_BGI[,2] SampleB <- ILM_aceview_gene_BGI[,grepl("B_",colnames(ILM_aceview_gene_BGI))] rownames(SampleB) <- ILM_aceview_gene_BGI[,2] ## ---- echo=TRUE-------------------------------------------------- set.seed(12345) SampleA3 <- SampleA[,sample(1:80,6)] SampleB3 <- SampleB[,sample(1:80,8)] ## ---- echo=TRUE-------------------------------------------------- datamatrix <- cbind(SampleA3,SampleB3) ## ---- echo=TRUE-------------------------------------------------- #6 samples for condition 1 and 8 samples for condition 2. design <- c(rep(1,6),rep(2,8)) design <- model.matrix(~ 0+factor(design)) colnames(design) <- c("group1", "group2") rownames(design) <- colnames(datamatrix) ## ---- echo=TRUE-------------------------------------------------- library(Linnorm) DEG_Results <- Linnorm.limma(datamatrix,design) #The DEG_Results matrix contains your DEG analysis results. ## ---- echo=TRUE-------------------------------------------------- library(Linnorm) BothResults <- Linnorm.limma(datamatrix,design,output="Both") #To separate results into two matrices for analysis: DEG_Results <- BothResults$DEResults #The DEG_Results matrix now contains DEG analysis results. TransformedMatrix <- BothResults$TMatrix #The TransformedMatrix matrix now contains a Linnorm Transformed dataset. ## ---------------------------------------------------------------- write.table(DEG_Results, "DEG_Results.txt", quote=FALSE, sep="\t", col.names=TRUE, row.names=TRUE) ## ---- echo=TRUE-------------------------------------------------- Genes15 <- DEG_Results[order(DEG_Results[,"adj.P.Val"]),][1:15,] #Users can print the gene list by the following command: #print(Genes15) ## ----kable, echo=FALSE------------------------------------------- library(knitr) kable(Genes15, digits=4) ## ---- echo=TRUE, fig.height=6, fig.width=6----------------------- #Remove Genes which fold changes are INF. You can skip this if there is no INF #values in the fold change column. NoINF <- DEG_Results[which(!is.infinite(DEG_Results[,1])),] #Record significant genes for coloring SignificantGenes <- NoINF[NoINF[,5] <= 0.05,1] #Draw volcano plot with Log q values. #Green dots are non-significant, red dots are significant. plot(x=NoINF[,1], y=NoINF[,5], col=ifelse(NoINF[,1] %in% SignificantGenes, "red", "green"),log="y", ylim = rev(range(NoINF[,5])), main="Volcano Plot", xlab="log2 Fold Change", ylab="q values", cex = 0.5) ## ---- echo=TRUE-------------------------------------------------- library(seqc) SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))] rownames(SampleA) <- ILM_aceview_gene_BGI[,2] #We are only transforming part of the matrix for an example. SampleA <- SampleA[,1:10] ## ---- echo=TRUE-------------------------------------------------- library(Linnorm) Transformed <- Linnorm(SampleA) ## ---- echo=TRUE-------------------------------------------------- #You can use this file with Excel. write.table(Transformed, "Transformed_Matrix.txt", quote=FALSE, sep="\t", col.names=TRUE, row.names=TRUE) ## ---- echo=TRUE-------------------------------------------------- Transformed <- Transformed[rowSums(Transformed == 0) <= length(Transformed[1,])/4,] ## ---- echo=TRUE-------------------------------------------------- #Here, we randomly choose 100 genes for an example. #However, users can provide a customized gene list here. set.seed(12345) Transformed <- Transformed[sample(1:length(Transformed[,1]),100),] #Calculate correlation. PCC <- cor(t(Transformed)) ## ---- echo=TRUE, fig.height=8, fig.width=8----------------------- #Please install these libraries if you need to. library(RColorBrewer) library(gplots) heatmap.2(as.matrix(PCC), Rowv=TRUE, Colv=TRUE, dendrogram='none', symbreaks=TRUE , trace="none", xlab = 'Genes', ylab = "Genes", density.info='none' ,key.ylab=NA, col = colorRampPalette(c("blue", "white", "yellow"))(n = 1000), lmat=rbind(c(4, 3), c(2, 1)),cexRow=0.3,cexCol=0.3,margins = c(8, 8)) ## ---- echo=TRUE-------------------------------------------------- library(seqc) SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))] rownames(SampleA) <- ILM_aceview_gene_BGI[,2] SampleB <- ILM_aceview_gene_BGI[,grepl("B_",colnames(ILM_aceview_gene_BGI))] rownames(SampleB) <- ILM_aceview_gene_BGI[,2] ## ---- echo=TRUE-------------------------------------------------- #Again, we are only using part of the matrices for an example. datamatrix <- cbind(SampleA[,1:5],SampleB[,1:5]) ## ---- echo=TRUE-------------------------------------------------- library(Linnorm) LNormData <- Linnorm(datamatrix) ## ---- echo=TRUE-------------------------------------------------- #Let LNormData be a matrix of Linnorm Transformed dataset. Newdata <- exp(LNormData) ## ---- echo=TRUE-------------------------------------------------- #Now, we can calculate fold changes between sample set 1 and sample set 2. #Index of sample set 1 from LNormData and Newdata: set1 <- 1:5 #Index of sample set 2 from LNormData and Newdata: set2 <- 6:10 #Define a function that calculates log 2 fold change: log2fc <- function(x) { return(log(mean(x[set1])/mean(x[set2]),2)) } #Calculate log 2 fold change of each gene in the dataset: foldchanges <- unlist(apply(Newdata,1,log2fc)) #Put resulting data into a matrix FCMatrix <- matrix(nrow=length(foldchanges),ncol=1) rownames(FCMatrix) <- rownames(LNormData) colnames(FCMatrix) <- c("Log 2 Fold Change") FCMatrix[,1] <- foldchanges #Now FCMatrix contains fold change results. ## ---- echo=TRUE, fig.height=6, fig.width=6----------------------- Density <- density(foldchanges) plot(Density$x,Density$y,type="n",xlab="Log 2 Fold Change", ylab="Probability Density",) lines(Density$x,Density$y, lwd=1.5, col="blue") title("Probability Density of Fold Change.\nSEQC Sample A vs Sample B") legend("topright",legend=paste("mean = ", round(mean(foldchanges),2), "\nStdev = ", round(sd(foldchanges),2))) ## ---- echo=TRUE-------------------------------------------------- library(seqc) SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))] rownames(SampleA) <- ILM_aceview_gene_BGI[,2] #We are only using part of the matrix for an example. #However, users are encouraged to use the whole matrix. SampleA <- SampleA[,1:10] ## ---- echo=TRUE-------------------------------------------------- library(Linnorm) #This will generate two sets of RNA-seq data with 3 replicates each. #It will have 20000 genes totally with 5000 genes being differentially #expressed. It has the Poisson distribution. SimulatedData <- RnaXSim(SampleA) ## ---- echo=TRUE-------------------------------------------------- #Index of differentially expressed genes. DE_Index <- SimulatedData[[2]] #Expression Matrix ExpMatrix <- SimulatedData[[1]] #Sample Set 1 Sample1 <- ExpMatrix[,1:3] #Sample Set 2 Sample2 <- ExpMatrix[,4:6] ## ---- echo=TRUE-------------------------------------------------- library(seqc) SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))] rownames(SampleA) <- ILM_aceview_gene_BGI[,2] #We are only using part of the matrix for an example. #However, users are encouraged to use the whole matrix. SampleA <- SampleA[,1:10] ## ---- echo=TRUE-------------------------------------------------- #Number of replicates for each sample set. NumReplicates <- 5 #Number of Genes in the Samples. NumGenes <- 5000 #Number of Differentially Expressed Genes. NumDiffGenes <- 1000 #Distribution. Put "NB" for Negative Binomial, "Gamma" for Gamma, #"Poisson" for Poisson and "LogNorm" for Log Normal distribution. Distribution <- "Gamma" ## ---- echo=TRUE-------------------------------------------------- library(Linnorm) SimulatedData <- RnaXSim(SampleA, distribution=Distribution, NumRep=NumReplicates, NumDiff = NumDiffGenes, NumFea = NumGenes) ## ---- echo=TRUE-------------------------------------------------- #Index of differentially expressed genes. DE_Index <- SimulatedData[[2]] #Expression Matrix ExpMatrix <- SimulatedData[[1]] #Sample Set 1 Sample1 <- ExpMatrix[,1:3] #Sample Set 2 Sample2 <- ExpMatrix[,4:6] ## ---- echo=TRUE-------------------------------------------------- #Obtain a transformed dataset for an example. library(seqc) SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))] rownames(SampleA) <- ILM_aceview_gene_BGI[,2] SampleA <- SampleA[,1:4] library(Linnorm) #LNormData is a matrix of Linnorm Transformed dataset. LNormData <- Linnorm(SampleA) #To convert Linnorm Transformed dataset into CPM or TPM: XPMdata <- exp(LNormData) - 1 for (i in seq_along(XPMdata[1,])) { XPMdata[,i] <- (XPMdata[,i]/sum(XPMdata[,i])) * 1000000 } #Now, XPMdata contains a CPM dataset if the original data is raw count or CPM. #It contains a TPM dataset if the original data is RPKM, FPKM or TPM.