## ---- echo=TRUE--------------------------------------------------
library(seqc)

SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))]
rownames(SampleA) <- ILM_aceview_gene_BGI[,2]

SampleB <- ILM_aceview_gene_BGI[,grepl("B_",colnames(ILM_aceview_gene_BGI))]
rownames(SampleB) <- ILM_aceview_gene_BGI[,2]

## ---- echo=TRUE--------------------------------------------------
set.seed(12345)
SampleA3 <- SampleA[,sample(1:80,6)]
SampleB3 <- SampleB[,sample(1:80,8)]


## ---- echo=TRUE--------------------------------------------------
datamatrix <- cbind(SampleA3,SampleB3)

## ---- echo=TRUE--------------------------------------------------
#6 samples for condition 1 and 8 samples for condition 2.
design <- c(rep(1,6),rep(2,8))
design <- model.matrix(~ 0+factor(design))
colnames(design) <- c("group1", "group2")
rownames(design) <- colnames(datamatrix)

## ---- echo=TRUE--------------------------------------------------
library(Linnorm)
DEG_Results <- Linnorm.limma(datamatrix,design)
#The DEG_Results matrix contains your DEG analysis results.

## ---- echo=TRUE--------------------------------------------------
library(Linnorm)
BothResults <- Linnorm.limma(datamatrix,design,output="Both")

#To separate results into two matrices for analysis:

DEG_Results <- BothResults$DEResults
#The DEG_Results matrix now contains DEG analysis results.

TransformedMatrix <- BothResults$TMatrix
#The TransformedMatrix matrix now contains a Linnorm Transformed dataset.

## ----------------------------------------------------------------
write.table(DEG_Results, "DEG_Results.txt", quote=FALSE, sep="\t", col.names=TRUE, 
row.names=TRUE)

## ---- echo=TRUE--------------------------------------------------
Genes15 <- DEG_Results[order(DEG_Results[,"adj.P.Val"]),][1:15,]
#Users can print the gene list by the following command:
#print(Genes15)

## ----kable, echo=FALSE-------------------------------------------
library(knitr)
kable(Genes15, digits=4)

## ---- echo=TRUE, fig.height=6, fig.width=6-----------------------
#Remove Genes which fold changes are INF. You can skip this if there is no INF 
#values in the fold change column.
NoINF <- DEG_Results[which(!is.infinite(DEG_Results[,1])),]

#Record significant genes for coloring
SignificantGenes <- NoINF[NoINF[,5] <= 0.05,1]

#Draw volcano plot with Log q values. 
#Green dots are non-significant, red dots are significant.
plot(x=NoINF[,1], y=NoINF[,5], col=ifelse(NoINF[,1] %in% SignificantGenes, 
"red", "green"),log="y", ylim = rev(range(NoINF[,5])), main="Volcano Plot", 
xlab="log2 Fold Change", ylab="q values", cex = 0.5)

## ---- echo=TRUE--------------------------------------------------
library(seqc)
SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))]
rownames(SampleA) <- ILM_aceview_gene_BGI[,2]
#We are only transforming part of the matrix for an example.
SampleA <- SampleA[,1:10]

## ---- echo=TRUE--------------------------------------------------
library(Linnorm)
Transformed <- Linnorm(SampleA)

## ---- echo=TRUE--------------------------------------------------
#You can use this file with Excel.
write.table(Transformed, "Transformed_Matrix.txt", quote=FALSE, sep="\t", 
col.names=TRUE, row.names=TRUE)

## ---- echo=TRUE--------------------------------------------------
Transformed <- Transformed[rowSums(Transformed == 0) <= length(Transformed[1,])/4,]

## ---- echo=TRUE--------------------------------------------------
#Here, we randomly choose 100 genes for an example. 
#However, users can provide a customized gene list here.
set.seed(12345)
Transformed <- Transformed[sample(1:length(Transformed[,1]),100),]

#Calculate correlation.
PCC <- cor(t(Transformed))

## ---- echo=TRUE, fig.height=8, fig.width=8-----------------------
#Please install these libraries if you need to.
library(RColorBrewer)
library(gplots)
heatmap.2(as.matrix(PCC), Rowv=TRUE, Colv=TRUE, dendrogram='none',
symbreaks=TRUE , trace="none", xlab = 'Genes', ylab = "Genes", 
density.info='none' ,key.ylab=NA, col = colorRampPalette(c("blue",  "white", 
"yellow"))(n = 1000), lmat=rbind(c(4, 3), c(2, 
1)),cexRow=0.3,cexCol=0.3,margins = c(8, 8))

## ---- echo=TRUE--------------------------------------------------
library(seqc)

SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))]
rownames(SampleA) <- ILM_aceview_gene_BGI[,2]

SampleB <- ILM_aceview_gene_BGI[,grepl("B_",colnames(ILM_aceview_gene_BGI))]
rownames(SampleB) <- ILM_aceview_gene_BGI[,2]

## ---- echo=TRUE--------------------------------------------------
#Again, we are only using part of the matrices for an example.
datamatrix <- cbind(SampleA[,1:5],SampleB[,1:5])

## ---- echo=TRUE--------------------------------------------------
library(Linnorm)
LNormData <- Linnorm(datamatrix)

## ---- echo=TRUE--------------------------------------------------
#Let LNormData be a matrix of Linnorm Transformed dataset. 
Newdata <- exp(LNormData)

## ---- echo=TRUE--------------------------------------------------
#Now, we can calculate fold changes between sample set 1 and sample set 2.
#Index of sample set 1 from LNormData and Newdata:
set1 <- 1:5
#Index of sample set 2 from LNormData and Newdata:
set2 <- 6:10

#Define a function that calculates log 2 fold change:
log2fc <- function(x) {
return(log(mean(x[set1])/mean(x[set2]),2))
}

#Calculate log 2 fold change of each gene in the dataset:
foldchanges <- unlist(apply(Newdata,1,log2fc))

#Put resulting data into a matrix
FCMatrix <- matrix(nrow=length(foldchanges),ncol=1)
rownames(FCMatrix) <- rownames(LNormData)
colnames(FCMatrix) <- c("Log 2 Fold Change")
FCMatrix[,1] <- foldchanges

#Now FCMatrix contains fold change results.

## ---- echo=TRUE, fig.height=6, fig.width=6-----------------------
Density <- density(foldchanges)
plot(Density$x,Density$y,type="n",xlab="Log 2 Fold Change", ylab="Probability Density",)
lines(Density$x,Density$y, lwd=1.5, col="blue")
title("Probability Density of Fold Change.\nSEQC Sample A vs Sample B")
legend("topright",legend=paste("mean = ", round(mean(foldchanges),2),
"\nStdev = ", round(sd(foldchanges),2)))

## ---- echo=TRUE--------------------------------------------------
library(seqc)

SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))]
rownames(SampleA) <- ILM_aceview_gene_BGI[,2]
#We are only using part of the matrix for an example.
#However, users are encouraged to use the whole matrix.
SampleA <- SampleA[,1:10]

## ---- echo=TRUE--------------------------------------------------
library(Linnorm)
#This will generate two sets of RNA-seq data with 3 replicates each. 
#It will have 20000 genes totally with 5000 genes being differentially 
#expressed. It has the Poisson distribution.
SimulatedData <- RnaXSim(SampleA)

## ---- echo=TRUE--------------------------------------------------
#Index of differentially expressed genes.
DE_Index <- SimulatedData[[2]]

#Expression Matrix
ExpMatrix <- SimulatedData[[1]]

#Sample Set 1
Sample1 <- ExpMatrix[,1:3]

#Sample Set 2
Sample2 <- ExpMatrix[,4:6]

## ---- echo=TRUE--------------------------------------------------
library(seqc)

SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))]
rownames(SampleA) <- ILM_aceview_gene_BGI[,2]

#We are only using part of the matrix for an example.
#However, users are encouraged to use the whole matrix.
SampleA <- SampleA[,1:10]

## ---- echo=TRUE--------------------------------------------------
#Number of replicates for each sample set.
NumReplicates <- 5
#Number of Genes in the Samples.
NumGenes <- 5000
#Number of Differentially Expressed Genes.
NumDiffGenes <- 1000
#Distribution. Put "NB" for Negative Binomial, "Gamma" for Gamma,
#"Poisson" for Poisson and "LogNorm" for Log Normal distribution.
Distribution <- "Gamma"

## ---- echo=TRUE--------------------------------------------------
library(Linnorm)
SimulatedData <- RnaXSim(SampleA, distribution=Distribution,
NumRep=NumReplicates, NumDiff = NumDiffGenes, NumFea = NumGenes)

## ---- echo=TRUE--------------------------------------------------
#Index of differentially expressed genes.
DE_Index <- SimulatedData[[2]]

#Expression Matrix
ExpMatrix <- SimulatedData[[1]]

#Sample Set 1
Sample1 <- ExpMatrix[,1:3]

#Sample Set 2
Sample2 <- ExpMatrix[,4:6]

## ---- echo=TRUE--------------------------------------------------
#Obtain a transformed dataset for an example.
library(seqc)
SampleA <- ILM_aceview_gene_BGI[,grepl("A_",colnames(ILM_aceview_gene_BGI))]
rownames(SampleA) <- ILM_aceview_gene_BGI[,2]
SampleA <- SampleA[,1:4]
library(Linnorm)
#LNormData is a matrix of Linnorm Transformed dataset.
LNormData <- Linnorm(SampleA)

#To convert Linnorm Transformed dataset into CPM or TPM:
XPMdata <- exp(LNormData) - 1
for (i in seq_along(XPMdata[1,])) {
	XPMdata[,i] <- (XPMdata[,i]/sum(XPMdata[,i])) * 1000000
}
#Now, XPMdata contains a CPM dataset if the original data is raw count or CPM. 
#It contains a TPM dataset if the original data is RPKM, FPKM or TPM.