Commit 7c8b1038 authored by Christoph's avatar Christoph
Browse files

gene names

parent a999cda2
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@ We provide a script to convert zUMIs output into loom file automatically based o
zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

## Changelog
22 Apr 2020: zUMIs2.7.3: zUMIs will try to parse a geneID to gene name mapping file from the user provided GTF annotations.

27 Mar 2020: zUMIs2.7.2: New barcode handling functionalities: When using intersection of automatic BC detection and BC whitelist and the full barcode is composed out of several barcode pieces (eg. RT barcode + illumina barcode), the whitelist can now also just be corresponding to just one of the barcode pieces (eg. RT barcode only whitelist). Furthermore, some scRNA-seq protocols may have several cell barcodes that belong to the same cell (eg. SPLiT-seq with oligo-dT/random-hex round 1 barcode; i7 barcode mix in 10x Genomics). zUMIs now supports internally combing the counts via the `barcode_sharing:` option. Please look at the [wiki for further details](https://github.com/sdparekh/zUMIs/wiki/Barcodes#barcode-sharing-feature) and at [examples for some protocols](https://github.com/sdparekh/zUMIs/wiki/Protocol-specific-setup).
 
16 Mar 2020: zUMIs2.7.1: Smart-seq3 data can be run with the proper consideration of strand information. When setting `strand: 1`, UMI reads will use this strand while non-UMI reads will stay unstranded.
+10 −0
Original line number Diff line number Diff line
@@ -495,6 +495,16 @@ fixMissingOptions <- function(config){
    config$counting_opts$write_ham <- FALSE
  }
  
  if(is.null(config$num_threads)){
    config$num_threads <- 8
  }
  
  if(is.null(config$mem_limit)){
    config$mem_limit <- 100
  }else if(config$mem_limit == 0){
    config$mem_limit <- 100
  }

  if(is.null(config$counting_opts$downsampling)){
    config$counting_opts$downsampling <- "0"
  }
+16 −0
Original line number Diff line number Diff line
@@ -107,3 +107,19 @@ suppressWarnings(suppressMessages(require(AnnotationDbi)))

  return(len_dt)
}
.get_gene_names <- function(gtf, threads){
  gtf.dt <- fread(gtf, sep="\t",header=F)
  ge <- gtf.dt[V3 == "gene"]
  gtf_info <- ge$V9
  info_parsed <- parallel::mclapply(gtf_info, function(x){
    dat <- data.table(V1=unlist(strsplit(x,"; ")))
    dat[,c("name","value") := tstrsplit(V1, " ")][
      ,V1 := NULL][
        ,value := gsub(pattern = "\"", replacement = "", x = value)]
    dat <- dat[name %in% c("gene_id","gene_name")]
    dat <- dcast(dat, .~name, value.var = "value")
    dat[,"." := NULL]
  }, mc.cores = threads)
  info_parsed <- rbindlist(info_parsed)
  return( info_parsed[! (is.na(gene_name) | is.na(gene_id))] )
}
 No newline at end of file
+5 −0
Original line number Diff line number Diff line
@@ -50,7 +50,12 @@ bccount<-splitRG(bccount=bccount, mem= opt$mem_limit)
##############################################################
##### featureCounts

## gene annotation
saf<-.makeSAF(paste0(opt$out_dir,"/",opt$project,".final_annot.gtf"))
try(gene_name_mapping <- .get_gene_names(gtf = paste0(opt$out_dir,"/",opt$project,".final_annot.gtf"), threads = opt$num_threads), silent = TRUE)
try(data.table::fwrite(gene_name_mapping, file = paste0(opt$out_dir,"/zUMIs_output/expression/",opt$project,".gene_names.txt"), sep ="\t", quote = FALSE), silent = TRUE)
##

abamfile<-paste0(opt$out_dir,"/",opt$project,".filtered.tagged.Aligned.out.bam")
outbamfile <-paste0(opt$out_dir,"/",opt$project,".filtered.Aligned.GeneTagged.bam")

+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
# Pipeline to run UMI-seq analysis from fastq to read count tables.
# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
vers=2.7.2b
vers=2.7.3
currentv=`curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/master/zUMIs-master.sh | grep '^vers=' | cut -f2 -d "="`
if [ "$currentv" != "$vers" ]; then echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------"; fi