Commit 5a14d622 authored by fc-ibb105's avatar fc-ibb105
Browse files

modified RNA-seq.R in R

parent 1844388c
Loading
Loading
Loading
Loading
+128 −104
Original line number Diff line number Diff line
c.rnaseq <- function(
c.rnaseq.old <- function(
					ctl_count_file,
					obs_count_file,
					organism = "human",
@@ -84,12 +84,134 @@ c.rnaseq <- function(
	.[,TSS_end := TSS_start + 1]
}

c.rnaseq <- function(
				dge_list = NULL,
				count_files,
				group,
				compare = NULL,
				organism = "human",
				gtf_dt = NULL,
				count_type = "star",
				result = "DE"
){
	group_uniq <- unique(group)

	if(is.null(compare)){
		if(length(group_uniq) == 2){
			compare = paste0(group_uniq[2],"-",group_uniq[1])
		}else if(length(group_uniq) > 2){
			compare = c.comb_str(group_uniq,rep = 2,inter = "inter",sep = "-")
		}
		
	}

	if(is.null(dge_list)){
		ct <- tolower(count_type)
		if(ct == "star"){
			cols <- c(1, 2)
			header_str <- FALSE
			skip_rows <- 4
		}else if(ct == "featurecounts"){
			cols <- c(1, 7)
			header_str <- FALSE
			skip_rows <- 1
		}else if(ct == "salmon"){
			cols <- c(1,5)
			header_str <- TRUE
			skip_rows <- 0
		}

		x <-	readDGE(
					count_files,
					columns = cols,
					header = header_str,
					skip = skip_rows
				)
	}else if(class(dge_list) == "DGEList"){
		x <- dge_list
	}

	x$samples$group <- group

	if(is.null(gtf_dt)){
		if(count_type %in% c("star","featurecounts")){
			gtf_dt <-	c.gtf(organism,"gene") %>% 
						as.data.table() %>% 
						.[,.(gene_id,gene_name,type = gene_type,chr = seqnames,start,end,strand)] |>
						setkey(gene_id,gene_name)
		}else if(count_type == "salmon"){
			gtf_dt <-	c.gtf(organism,"transcript") %>% 
						as.data.table() %>% 
						.[,.(gene_id = transcript_id,gene_name = transcript_name,type = transcript_type,chr = seqnames,start,end,strand)] |>
						setkey(gene_id,gene_name)
		}
	}

	## it will take few minutes to load the gtf_file
	## filter the unneeded rows
	
	x$genes <-	gtf_dt

	#get the correspondece between gene_id (Ensembl ID) and gene_name

	x <-	x[
				filterByExpr(x, group = group), 
				keep.lib.sizes = F
			] |>
			calcNormFactors(method = "TMM")

	re <- toupper(result)
	if(re == "DE"){
		design <- model.matrix(~0+group)
		colnames(design) <- gsub("group", "", colnames(design))

		DE <-	x |>
				voomWithQualityWeights(design, plot = F) |>
				lmFit(design) |>
				contrasts.fit(
					contrasts = makeContrasts(
									contrasts = compare,
									levels = colnames(design)
								)
				) |>
				eBayes() |>
				topTable(n = Inf) |>
				as.data.table() %>%
				.[,log10P := -log10(adj.P.Val)]

		if(length(compare) == 1){
			DE |>
			setnames("logFC","log2FC") %>% 
			.[,compare := gsub("-",".",compare)] %>% 
			.[]
		}else{
			melt(
				DE,
				c("gene_id","gene_name","type","chr","start","end","strand","AveExpr","F","P.Value","adj.P.Val","log10P"),
				value.name = "log2FC",
				variable.name = "compare"
			)
		}
	}else if(re == "PCA"){
		x |>
		cpm(log = T) |>
		limma::plotMDS()
	}else if(re == "PCA_DATA"){
		x |>
		cpm(log = T) |>
		limma::plotMDS(plot = F)
	}
}


de.dt <-	function(
				.data,
				fold_change = 2, 
				p_value = 0.05,
				hyperbola = FALSE
){
	common_cols <- c("gene_id","gene_name","type","chr","start","end","strand","AveExpr","t","P.Value","adj.P.Val","B","log10P","t","F")

	if(isFALSE(hyperbola)){
		as.data.table(
			.data
@@ -161,7 +283,7 @@ de.gene <- function(
c.volcano <-	function(
					.data,
					top_gene_number = 10,
					fold_change = 1.5,
					fold_change = 2,
					p_value = 0.05,
					special_gene = NULL,
					special_label = "special",
@@ -663,105 +785,7 @@ rnaseqqc <- function(
	}
}

c.rnaseq_multi <- function(
				dge_list = NULL,
				count_files,
				group,
				compare,
				organism = "human",
				gtf_dt = NULL,
				count_type = "star",
				result = "DE"
){
	if(is.null(dge_list)){
		ct <- tolower(count_type)
		if(ct == "star"){
			cols <- c(1, 2)
			header_str <- FALSE
			skip_rows <- 4
		}else if(ct == "featurecounts"){
			cols <- c(1, 7)
			header_str <- FALSE
			skip_rows <- 1
		}else if(ct == "salmon"){
			cols <- c(1,5)
			header_str <- TRUE
			skip_rows <- 0
		}

		x <-	readDGE(
					count_files,
					columns = cols,
					header = header_str,
					skip = skip_rows
				)
	}else if(class(dge_list) == "DGEList"){
		x <- dge_list
	}

	x$samples$group <- group

	if(is.null(gtf_dt)){
		if(count_type %in% c("star","featurecounts")){
			gtf_dt <-	c.gtf(organism,"gene") %>% 
						as.data.table() %>% 
						.[,.(gene_id,gene_name,type = gene_type,chr = seqnames,start,end,strand)] |>
						setkey(gene_id,gene_name)
		}else if(count_type == "salmon"){
			gtf_dt <-	c.gtf(organism,"transcript") %>% 
						as.data.table() %>% 
						.[,.(gene_id = transcript_id,gene_name = transcript_name,type = transcript_type,chr = seqnames,start,end,strand)] |>
						setkey(gene_id,gene_name)
		}
	}

	## it will take few minutes to load the gtf_file
	## filter the unneeded rows
	
	x$genes <-	gtf_dt

	#get the correspondece between gene_id (Ensembl ID) and gene_name

	x <-	x[
				filterByExpr(x, group = group), 
				keep.lib.sizes = F
			] |>
			calcNormFactors(method = "TMM")

	re <- toupper(result)
	if(re == "DE"){
		design <- model.matrix(~0+group)
		colnames(design) <- gsub("group", "", colnames(design))

		DE <-	x |>
				voomWithQualityWeights(design, plot = F) |>
				lmFit(design) |>
				contrasts.fit(
					contrasts = makeContrasts(
									contrasts = compare,
									levels = colnames(design)
								)
				) |>
				eBayes() |>
				topTable(n = Inf) |>
				as.data.table() %>%
				.[,log10P := -log10(adj.P.Val)]

		if("logFC" %in% colnames(DE)){
			setnames(DE,"logFC","log2FC")
		}

		DE
	}else if(re == "PCA"){
		x |>
		cpm(log = T) |>
		limma::plotMDS()
	}else if(re == "PCA_DATA"){
		x |>
		cpm(log = T) |>
		limma::plotMDS(plot = F)
	}
}

## the input data for venn_plot should come from rnaseq123_multi()
venn_plot <-	function(