Commit 7b3969bb authored by ziegenhain@bio.lmu.de's avatar ziegenhain@bio.lmu.de
Browse files

Performance updates & prevent stats crash

parent 5030eb44
Loading
Loading
Loading
Loading
+5 −10
Original line number Diff line number Diff line
@@ -53,17 +53,12 @@ reads2genes <- function(featfiles,chunks,rgfile,cores){
  samcommand<-paste("cat freadHeader; samtools view -x NH -x AS -x nM -x HI -x IH -x NM -x uT -x MD -x jM -x jI -x XN -x XS -R",rgfile,"-@",cores)

   if(length(featfiles)==1){
          reads<-data.table::fread(paste(samcommand,featfiles), na.strings=c(""),
                             select=c(12,13,14),header=T,fill=T,colClasses = "character")[
                            , c("RG","UB","GE"):=list(.rmRG(V12),.rmUB(V13),.rmXT(V14))
                          ][,c("V12","V13","V14"):=NULL]
          reads<-data.table::fread(paste(samcommand,featfiles[1],"| cut -f12,13,14 | sed 's/RG:Z://' | sed 's/UB:Z://' | sed 's/XT:Z://' "), na.strings=c(""),
                                   select=c(1,2,3),header=T,fill=T,colClasses = "character" , col.names = c("RG","UB","GE") )
  }else{
    reads<-data.table::fread(paste(samcommand,featfiles[1]), na.strings=c(""),
                             select=c(12,13,14),header=T,fill=T,colClasses = "character" )[
                               , c("RG","UB","GE"):=list(.rmRG(V12),.rmUB(V13),.rmXT(V14))
                               ][,c("V12","V13","V14"):=NULL][
                                 ,"tmp":=fread(paste(samcommand,featfiles[2]),select=14,header=T,fill=T,na.strings=c(""),colClasses = "character")
                               ][ ,"GEin":=.rmXT(tmp) ][ ,tmp:=NULL
    reads<-data.table::fread(paste(samcommand,featfiles[1],"| cut -f12,13,14 | sed 's/RG:Z://' | sed 's/UB:Z://' | sed 's/XT:Z://' "), na.strings=c(""),
                             select=c(1,2,3),header=T,fill=T,colClasses = "character" , col.names = c("RG","UB","GE") )[
                                 ,"GEin":=fread(paste(samcommand,featfiles[2],"| cut -f13,14 | sed 's/XT:Z://'"),select=2,header=T,fill=T,na.strings=c(""),colClasses = "character")
                                  ][ ,"ftype":="NA"
                                  ][is.na(GEin)==F,ftype:="intron"
                                  ][is.na(GE)==F,  ftype:="exon"
+25 −30
Original line number Diff line number Diff line
@@ -11,13 +11,9 @@ sumstatBAM <- function(featfiles,cores,outdir,user_seq,bc,outfile){
  write(headerXX,paste(outdir,"freadHeader",sep="/"))
  samcommand<-paste("cat freadHeader; samtools view -x NH -x AS -x nM -x HI -x IH -x NM -x uT -x MD -x jM -x jI -x XN -x UB -@",cores)
  #issue with BC matching
  mapCount<-data.table::fread(paste(samcommand,featfiles[1]), na.strings=c(""),
                             select=c(12,13,14),header=T,fill=T,colClasses = "character" )[
                               , c("RG","XS","GE"):=list(.rmRG(V12),.rmXS(V13),.rmXT(V14))
                               ][,c("V12","V13","V14"):=NULL
                               ][,"tmp":=fread(paste(samcommand,featfiles[2]),select=14,header=T,fill=T,na.strings=c(""),colClasses = "character")
                               ][ ,"GEin":=.rmXT(tmp) 
                               ][ ,tmp:=NULL 
  mapCount<-data.table::fread(paste(samcommand,featfiles[1],"| cut -f12,13,14 | sed 's/RG:Z://' | sed 's/UB:Z://' | sed 's/XT:Z://' "), na.strings=c(""),
                             select=c(1,2,3),header=T,fill=T,colClasses = "character" , col.names = c("RG","UB","GE") )[
                              ,"GEin":=fread(paste(samcommand,featfiles[2],"| cut -f13,14 | sed 's/XT:Z://'"),select=2,header=T,fill=T,na.strings=c(""),colClasses = "character")
                               ][ ,"ftype":="NA"
                               ][is.na(GEin)==F,ftype:="Intron"
                               ][is.na(GE)==F  ,ftype:="Exon"
@@ -129,4 +125,3 @@ totReadBoxplot<-function(typeCount,fillcol){
              axis.title.x=element_blank())
  return(box)
}
  
 No newline at end of file
+1 −1
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ library(cowplot)
##########################
myYaml<-commandArgs(trailingOnly = T)
opt   <-read_yaml(myYaml)

setwd(opt$out_dir)
featColors<-c("#1A5084", "#914614" ,"#118730","grey33","tan1","gold1","grey73","firebrick3")
names(featColors)<-c("Exon","Intron+Exon","Intron","Unmapped","Ambiguity","Intergenic","Unused BC","User")
#####################################
+1 −4
Original line number Diff line number Diff line
@@ -81,7 +81,4 @@ use_SLURM: no
which_Stage: Filtering

#below, the fqfilter will add a read_layout flag defining SE or PE
read_layout:
read_layout:
read_layout:
read_layout: