Commit 15f6112f authored by ziegenhain@bio.lmu.de's avatar ziegenhain@bio.lmu.de
Browse files

Small optimizations & improve verbose

parent a5109444
Loading
Loading
Loading
Loading
+18 −20
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@ splitRG<-function(bccount,mem){
.rmUB<-function(b){ gsub("UB:Z:","",b)}
.rmXT<-function(b){ gsub("XT:Z:","",b)}

.ham_mat <- function(umistrings) {
ham_mat <- function(umistrings) {
  X<- matrix(unlist(strsplit(umistrings, "")),ncol = length(umistrings))
  #function below thanks to Johann de Jong
  #https://goo.gl/u8RBBZ
@@ -114,7 +114,7 @@ hammingFilter<-function(umiseq, edit=1, gbcid=NULL ){
  }else{ return(NULL) }
}
.makewide <- function(longdf,type){
  print("I am making a sparseMatrix!!")
  #print("I am making a sparseMatrix!!")
  ge<-as.factor(longdf$GE)
  xc<-as.factor(longdf$RG)
  widedf <- Matrix::sparseMatrix(i=as.integer(ge),
@@ -187,5 +187,3 @@ convert2countM<-function(alldt,what){
  }
  return(fmat)
}
  
  
+22 −17
Original line number Diff line number Diff line
suppressMessages(require(dplyr))
suppressMessages(require(GenomicRanges))
suppressMessages(require(GenomicFeatures))
suppressMessages(require(GenomicAlignments))
suppressMessages(require(AnnotationDbi))
suppressWarnings(suppressMessages(require(GenomicRanges)))
suppressWarnings(suppressMessages(require(GenomicFeatures)))
suppressWarnings(suppressMessages(require(GenomicAlignments)))
suppressWarnings(suppressMessages(require(AnnotationDbi)))

checkRsubreadVersion<- function(){
  if(length(grep("Rsubread",installed.packages()))==0){
@@ -18,17 +18,20 @@ checkRsubreadVersion<- function(){
}

.makeSAF<-function(gtf){
  txdb <- GenomicFeatures::makeTxDbFromGFF(file=gtf, format="gtf")
  print("Loading reference annotation from:")
  print(gtf)
  txdb <- suppressWarnings(suppressMessages(GenomicFeatures::makeTxDbFromGFF(file=gtf, format="gtf")))

  ## Make Gene-range GR-object
  se <- AnnotationDbi::select(txdb, keys(txdb, "GENEID"),
  se <- suppressMessages(
    AnnotationDbi::select(txdb, keys(txdb, "GENEID"),
                              columns=c("GENEID","TXCHROM","TXSTART","TXEND","TXSTRAND"),
                              keytype="GENEID") %>%
    dplyr::group_by(GENEID,TXCHROM,TXSTRAND)  %>%
    dplyr::mutate( txstart =ifelse(TXSTART<TXEND,min(TXSTART),min(TXEND)),
                   txend  =ifelse(TXSTART<TXEND,max(TXEND),min(TXSTART) ) ) %>%
    dplyr::select(GENEID,TXCHROM,TXSTRAND,txstart,txend)  %>% unique()
  
    )

  gr.gene<-GenomicRanges::GRanges(seqnames = se$TXCHROM,
                                  ranges =  IRanges(start= se$txstart,
@@ -61,12 +64,14 @@ checkRsubreadVersion<- function(){
  intron.saf<-dplyr::left_join(intron.saf,unique(exon.saf[,c("GeneID","Strand")]),by=c("GeneID"))

  saf <- list(introns=intron.saf,exons=exon.saf)
  print("Annotation loaded!")
#  safout <- paste(out,"/zUMIs_output/expression/",sn,".annotationsSAF.rds",sep="")
#  saveRDS(saf, file=safout)
  rm(se,gr.gene,intron,exon,intron.exon.red,intron.exon.dis,intron.only,ol.ex,ol.in,intron.saf,exon.saf)
  return(saf)
}
.runFeatureCount<-function(abamfile,RG,saf,strand,type,primaryOnly){
  print(paste0("Assigning reads to features (",type,")"))
  fc.stat<-Rsubread::featureCounts(files=abamfile,
                                   annot.ext=saf,
                                   isGTFAnnotationFile=F,
@@ -79,6 +84,6 @@ checkRsubreadVersion<- function(){
  fn<-paste0(abamfile,".featureCounts.bam")
  nfn<-paste0(abamfile,".",type,".featureCounts.bam")
  system(paste("mv",fn,nfn))
  
  invisible(suppressWarnings(suppressMessages(gc(verbose=F))))
  return(nfn)
}
+6 −6
Original line number Diff line number Diff line
#!/usr/bin/env Rscript
library(methods)
library(data.table)
library(yaml)
library(ggplot2)
suppressMessages(require(methods))
suppressMessages(require(data.table))
suppressMessages(require(yaml))
suppressMessages(require(ggplot2))

##########################
myYaml<-commandArgs(trailingOnly = T)
@@ -74,7 +74,7 @@ if( opt$counting_opts$introns ){
}

########################## assign reads to UB & GENE

print("Crunching reads to generate expression tables...")
for(i in unique(bccount$chunkID)){
     print( paste( "Working on barcode chunk", i, "out of",length(unique(bccount$chunkID)) ))
     print( paste( "Processing",length(bccount[chunkID==i]$XC), "barcodes in this chunk..." ))
@@ -96,7 +96,7 @@ for(i in unique(bccount$chunkID)){
       allC<-bindList(alldt=allC,newdt=tmp)
    }
}

print("Converting expression tables into sparseMatrix format...")
final<-list( umicount  = convert2countM(alldt=allC,what="umicount"),
             readcount = convert2countM(allC,"readcount"))