Commit 7f41b80a authored by nchernia's avatar nchernia
Browse files

Juicer 1.5.6 update.

* Adding in new flags -b ligation and -t threads
* Changed juicer_arrowhead, juicer_hiccups, and juicer_postprocessing to not fail immediately when bed files
not found
* Corrected scripts so bc is not required
* Proper unzipping logic in SLURM and UGER
* Will always split if chunk size sent in
* Bug fixes in terms of looking for endings in splits directory
* Exits changed to 1
* Debugging changed to better determine exactly where script failed
* Touch files added to deal with jobs killed by the cluster
* Exits changed to 1
* Warning for genomePath
* Header added to hic file containing all the information about versions of software used to create that file
parent 32336457
Loading
Loading
Loading
Loading

CPU/common/diploid.pl

0 → 100755
+385 −0
Original line number Diff line number Diff line
#!/usr/bin/perl
##########
#The MIT License (MIT)
#
# Copyright (c) 2015 Aiden Lab
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#  THE SOFTWARE.
##########
# Perl script to calculate diploid reads on the infile.
# The infile should be in the merged_nodups form: no duplicates, >= 14 fields,
# laid out as:
#
# str1 chr1 pos1 frag1 str2 chr2 pos2 frag2 mapq1 cigar1 seq1 mapq2 cigar2 seq2
#
# The script also requires two versions of a SNP file:
# - The "chr_pos" site file lists on each line the sorted locations of the SNPs
# - The "paternal_maternal" file lists each SNP as chr:pos paternal_SNP maternal_SNP
#
# These files can be created using the script vcftotxt.awk from a phased VCF file
#
# Usage:	diploid.pl -s [chr_pos site file] -o [paternal_maternal SNP file] [infile or stream]
# Juicer version 1.5
use File::Basename;
use POSIX;
use List::Util qw[min max];
use Getopt::Std;
use vars qw/ $opt_s $opt_l $opt_d $opt_o $opt_h /;

# Check arguments
getopts('s:o:hl');

my $site_file;
my $phased_file;
my $star=0;

if ($opt_h) {
  print STDERR "Usage: diploid.pl <infile>\n";
  print STDERR " <infile>: file in intermediate format to calculate statistics on, can be stream\n";
  exit;
}
if ($opt_s) {  
  $site_file = $opt_s;
}

if ($opt_o) {
  $phased_file = $opt_o;
}

if ($opt_l) {
  $star=1;
}

if (scalar(@ARGV)==0) {
  print STDERR "No input file specified, reading from input stream\n";
}

# Global variables for calculating statistics
my %chromosomes;
my %maternal_snps;
my %paternal_snps;

# read in SNP site file and store as multidimensional array
open FILE, $site_file or die $!;
while (<FILE>) {
	my @locs = split;
  my $key = shift(@locs);
	my $ref = \@locs;
	$chromosomes{$key} = $ref;
}
close FILE;

# read in SNP definition file and store in hashtable
open FILE, $phased_file or die $!;
while (<FILE>) {
  my @locs = split;
  my $key = $locs[0];
  $paternal_snps{$key} = $locs[1];
  $maternal_snps{$key} = $locs[2];
}
close FILE;

# read in infile and find SNPs
my $checkedfile=0;
while (<>) {
  my @record = split;
  my $oldrecord = join(' ',@record);

  # holds the read assignments
  my @read1 = ();
  my @read2 = ();

  # holds the SNP assignments (nucleotides)
  my @snp1 = ();
  my @snp2 = ();

  # holds the SNP positions
  my @snppos1 = ();
  my @snppos2 = ();

  my $orgpos1 = -100;
  my $orgpos2 = -100;

  if ($checkedfile == 0) {
    # check that the file has the format expected
    # right now just check the sequence strings are in proper place
    if ($record[10] !~ /\A[acgtn]+\z/i || $record[13] !~ /\A[acgtn]+\z/i) {
      print STDERR "Expected DNA strings in fields 11 and 14, instead see " . $record[10] . " and " . $record[13] . "\n";
      print STDERR "Exiting.";
      exit(1);
    }
    else {
      $checkedfile = 1;
    }
  }

  # set "original" position for both reads. then we can process cigar/sequence string forward.
  # ignore mitochrondria
  # First read:

  # count Ms,Ds,Ns,Xs,=s for sequence length 
  my $seqlength1=0; 
  my $currstr=$record[9];

  my $where = $currstr =~ /[0-9]+[M|D|N|X|=|S|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
  while ($where > 0) {
    $seqlength1 += substr($currstr, ($where)-1, $RLENGTH - 1) + 0;
    $currstr = substr($currstr, ($where + $RLENGTH)-1);
    $where = $currstr =~ /[0-9]+[M|D|N|X|=|S|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
  }

  if (($record[0] == 0 && $record[1] ne "MT") || $star == 1) {
    $orgpos1 = $record[2];
  }
  elsif ($record[1] ne "MT") {
    # reverse strand, find original position
    $orgpos1 = $record[2] - $seqlength1 + 1;
  }  

    # count Ms,Ds,Ns,Xs,=s for sequence length 
  my $seqlength2=0; 
  my $currstr=$record[12];
  my $where = $currstr =~ /[0-9]+[M|D|N|X|=|S|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
  while ($where > 0) {
    $seqlength2 += substr($currstr, ($where)-1, $RLENGTH - 1) + 0;
    $currstr = substr($currstr, ($where + $RLENGTH)-1);
    $where = $currstr =~ /[0-9]+[M|D|N|X|=|S|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
  }

  # Second read:
  if (($record[4] == 0 && $record[5] ne "MT") || $star == 1) {
    $orgpos2 = $record[6];
  }
  elsif ($record[5] ne "MT") {
    # reverse strand, find original position
    $orgpos2 = $record[6] - $seqlength2 + 1;
  }

  # find first read position in the SNP site array
  my $ind1 = &bsearch($orgpos1,$chromosomes{$record[1]});
  my $orgpos11 = $orgpos1;
  my $orgpos22 = $orgpos2;
  # first read might land on SNP: if on forward strand, position + length overlaps, if on
  # reverse strand, position - length overlaps.
  # ind1 is first hit; keep incrementing it until no longer on read.
  # shouldn't matter reverse or forward strand at this point, just use orgpos and cigar and
  # count forward
  while ($orgpos11 < $chromosomes{$record[1]}->[$ind1] && $orgpos11+$seqlength1 >= $chromosomes{$record[1]}->[$ind1]) {
    # need to count forward from orgpos1.
    my $currstr=$record[9];
    my $where = $currstr =~ /[0-9]+[M|D|S|I|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
    my $seqstr=$record[10];

    $orgpos1=$orgpos11;
    while ($where > 0) {
      my $char = substr($currstr, ($where)-1 + $RLENGTH - 1,1);
      my $len  = substr($currstr, ($where)-1, $RLENGTH - 1) + 0;
      my $ind;

      # match. check if match spans SNP, if so, take it. if not, advance cigar string
      # and sequence string and keep parsing cigar
      if ($char eq "M") {
        if ($orgpos1 + $len > $chromosomes{$record[1]}->[$ind1]) {
          # matching spans SNP
          $ind = $chromosomes{$record[1]}->[$ind1] - $orgpos1;
          push @snp1,substr $seqstr, $ind, 1; 
          push @snppos1,$chromosomes{$record[1]}->[$ind1];
          $where = 0;
        }
        else {
          # advance cigar and sequence and keep going.
          $seqstr = substr($seqstr, $len);
          $orgpos1 = $orgpos1 + $len;
          $currstr = substr($currstr, ($where + $RLENGTH)-1);
          $where = $currstr =~ /[0-9]+[M|D|S|I|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
        }
      }
      elsif ($char eq "D" || $char eq "N") {
        # delete. if deletion spans SNP, we don't have it in our read.
        if ($orgpos1 + $len > $chromosomes{$record[1]}->[$ind1]) {
          $where = 0;
        }
        else {
          # doesn't span SNP, update genomic position and continue.
          $orgpos1 = $orgpos1 + $len;
          $currstr = substr($currstr, ($where + $RLENGTH)-1);
          $where = $currstr =~ /[0-9]+[M|D|S|I|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
        }
      }
      elsif ($char eq "I") {
        # insertion, advance sequence string, doesn't affect genomic position
        $currstr = substr($currstr, ($where + $RLENGTH)-1);
        $where = $currstr =~ /[0-9]+[M|D|S|I|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
        $seqstr = substr($seqstr, $len);
      }
      elsif ($char eq "S" || $char eq "H") {
        if ($orgpos1 + $len > $chromosomes{$record[1]}->[$ind1]) {
          # skip spans SNP
          $where = 0;
        }
        else {
          # skip does not span SNP, advance cigar and sequence and keep going.
          # hard clipped bases do not appear in seqstr
          if ($char eq "S") {
            $seqstr = substr($seqstr, $len);
          }
          $orgpos1 = $orgpos1 + $len;
          $currstr = substr($currstr, ($where + $RLENGTH)-1);
          $where = $currstr =~ /[0-9]+[M|D|S|I|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
        }
      }
    }
    $ind1++;
  }
  if (scalar @snp1 > 0) {
    for my $i (0 .. $#snp1) {
      my $key = $record[1] . ":" . $snppos1[$i];
      if ($snp1[$i] eq $paternal_snps{$key}) {
        $read1[$i] = "paternal";
      }
      elsif ($snp1[$i] eq $maternal_snps{$key}) {
        $read1[$i] = "maternal";
      }
      else {
        $read1[$i] = "mismatch";
      }
    }
  }

  # find read 2 position in SNP array
  my $ind1 = &bsearch($orgpos2,$chromosomes{$record[5]});

  # check that SNP falls in read.  
  while ($orgpos22 < $chromosomes{$record[5]}->[$ind1] && $orgpos22+$seqlength2 >= $chromosomes{$record[5]}->[$ind1]) {
    # need to count forward from orgpos2.

    my $currstr=$record[12];
    my $where = $currstr =~ /[0-9]+[M|D|S|I|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
    my $seqstr=$record[13];
    $orgpos2=$orgpos22;

    while ($where > 0) {
      my $char = substr($currstr, ($where)-1 + $RLENGTH - 1,1);
      my $len  = substr($currstr, ($where)-1, $RLENGTH - 1) + 0;

      my $ind;
      # match. check if match spans SNP, if so, take it. if not, advance cigar string
      # and sequence string and keep parsing cigar
      if ($char eq "M") {
	      if ($orgpos2 + $len > $chromosomes{$record[5]}->[$ind1]) {
          # matching spans SNP
          $ind = $chromosomes{$record[5]}->[$ind1] - $orgpos2;
          push @snp2,substr $seqstr, $ind, 1;
          push @snppos2,$chromosomes{$record[5]}->[$ind1];
          $where = 0;
	      }
	      else {
          # matching does not span SNP, advance cigar and sequence and keep going.
          $seqstr = substr($seqstr, $len);
          $orgpos2 = $orgpos2 + $len;
          $currstr = substr($currstr, ($where + $RLENGTH)-1);
          $where = $currstr =~ /[0-9]+[M|D|S|I|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
	      }
      }
      elsif ($char eq "D" || $char eq "N") {
	      # delete. if deletion spans SNP, we don't have it in our read.
	      if ($orgpos2 + $len > $chromosomes{$record[5]}->[$ind1]) {
          $where = 0;
	      }
	      else {
          # doesn't span SNP, update genomic position and continue.
          $orgpos2 = $orgpos2 + $len;
          $currstr = substr($currstr, ($where + $RLENGTH)-1);
          $where = $currstr =~ /[0-9]+[M|D|S|I|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
	      }
      }
      elsif ($char eq "I") {
	      # insertion or skip, advance sequence string, doesn't affect genomic position
	      $currstr = substr($currstr, ($where + $RLENGTH)-1);
	      $where = $currstr =~ /[0-9]+[M|D|S|I|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
        $seqstr = substr($seqstr, $len);
      }
      elsif ($char eq "S" || $char eq "H") {
	      if ($orgpos2 + $len > $chromosomes{$record[5]}->[$ind1]) {
          # skip spans SNP
          $where = 0;
	      }
	      else {
          # skip does not span SNP, advance cigar and sequence and keep going.
          # hard clipped bases do not appear in seqstr
          if ($char eq "S") {
            $seqstr = substr($seqstr, $len);
          }
          $orgpos2 = $orgpos2 + $len;
          $currstr = substr($currstr, ($where + $RLENGTH)-1);
          $where = $currstr =~ /[0-9]+[M|D|S|I|H|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
	      }
      }
    }
    $ind1++;
  }
  if (scalar @snp2 > 0) {
    for my $i (0 .. $#snp2) {
      my $key = $record[5] . ":" . $snppos2[$i];
      if ($snp2[$i] eq $paternal_snps{$key}) {
        $read2[$i] = "paternal";
      }
      elsif ($snp2[$i] eq $maternal_snps{$key}) {
        $read2[$i] = "maternal";
      }
      else {
        $read2[$i] = "mismatch";
      }
    }
  }

  if (scalar @read1 > 0 || scalar @read2 > 0) {
    print STDOUT $record[0] . " " . $record[1] . " " . $record[2] . " " . $record[3] . " " . $record[4] . " " . $record[5] . " " . $record[6] . " " . $record[7] . " " . $record[10] . " "  . $record[13] . " " . $record[14] . " " . "SNP1";
    for my $i (0 .. $#snp1) {
      print STDOUT ":" . $snp1[$i] . ":" . $snppos1[$i] . ":" . $read1[$i];
    }
    print STDOUT " SNP2";
    for my $i (0 .. $#snp2) {
      print STDOUT ":" . $snp2[$i] . ":" . $snppos2[$i] . ":" . $read2[$i];
    }
    print STDOUT "\n";
  }
}

# Binary search, array passed by reference
# search array of integers a for given integer x
# return index where found or upper index if not found
sub bsearch {
	my ($x, $a) = @_;		 # search for x in array a
	my ($l, $u) = (0, @$a - 1);	 # lower, upper end of search interval
	my $i;	      		 # index of probe
	while ($l <= $u) {
		$i = int(($l + $u)/2);
		if ($a->[$i] < $x) {
	    $l = $i+1;
		}
		elsif ($a->[$i] > $x) {
	    $u = $i-1;
		}
		else {
	    return $i; # found
		}
	}
	return $l;				 # not found, return upper
}

CPU/common/diploid.sh

0 → 100755
+16 −0
Original line number Diff line number Diff line
#!/bin/bash

stem=/aidenlab/work/neva/patski_

for i in chrX chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrY
  do
  awk -v chr=$i '$2==chr && $6==chr && $9 >= 10 && $12 >= 10' merged_nodups.txt | ${juiceDir}/scripts/diploid.pl -s ${stem}${i}_chr_pos.txt -o ${stem}${i}_paternal_maternal.txt > diploid_${i}.txt
done
cat diploid_chr1.txt diploid_chr10.txt diploid_chr11.txt diploid_chr12.txt diploid_chr13.txt diploid_chr14.txt diploid_chr15.txt diploid_chr16.txt diploid_chr17.txt diploid_chr18.txt diploid_chr19.txt diploid_chr2.txt diploid_chr3.txt diploid_chr4.txt diploid_chr5.txt diploid_chr6.txt diploid_chr7.txt diploid_chr8.txt diploid_chr9.txt diploid_chrX.txt diploid_chrY.txt > diploid.txt

awk -f ${juiceDir}/scripts/diploid_split.awk diploid.txt 
sort -k2,2d -m maternal.txt maternal_both.txt | awk '{split($11,a,"/"); print a[1], $1,$2,$3,$4,$5,$6,$7,$8,"100","100"}' > mat.txt 
${juiceDir}/scripts/juicer_tools pre mat.txt maternal.hic mm10 
sort -k2,2d -m paternal.txt paternal_both.txt | awk '{split($11,a,"/"); print a[1], $1,$2,$3,$4,$5,$6,$7,$8,"100","100"}' > pat.txt 
/aidenlab/juicebox pre pat.txt paternal.hic mm10
+62 −0
Original line number Diff line number Diff line
#!/usr/bin/awk -f    
##########
#The MIT License (MIT)
#
# Copyright (c) 2015 Aiden Lab
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#  THE SOFTWARE.
##########
# Splits the diploid output file into maternal, paternal, maternal_both,
# paternal_both, diff, and mismatch reads
# maternal_both and paternal_both have both read ends assigned
# diff has paternal assigned to one read end and maternal assigned to the other
# mismatch has at least one SNP that doesn't match either the reference or the
# alternate
# Juicer version 1.5
NF==13{
    if ($12 ~ /mismatch/ || $13 ~ /mismatch/) {
        print >> stem"mismatch.txt";
    }
    else if (($12 ~ /paternal/ && $12 ~ /maternal/) || ($13 ~ /paternal/ && $13 ~ /maternal/)) {
        print >> stem"mismatch.txt";
    }
    else if ($12 ~ /paternal/ && $13 ~ /paternal/) {
        print >> stem"paternal_both.txt";
    }
    else if ($12 ~ /maternal/ && $13 ~ /maternal/) {
        print >> stem"maternal_both.txt";
    }
    else if (($12 ~ /maternal/ && $13 ~ /paternal/) || ($12 ~ /paternal/ && $13 ~ /maternal/)) {
        print >> stem"diff.txt";
    }
    else {
        if ($12 ~ /maternal/ || $13 ~ /maternal/) {
            print >> stem"maternal.txt";
        }
        else if ($12 ~ /paternal/ || $13 ~ /paternal/) {
            print >> stem"paternal.txt";
        }
        else {
            print >> stem"problem.txt";
        }
    }
}
NF!=13 {
    print >> stem"problem.txt";
}

CPU/common/mega.sh

0 → 100755
+71 −0
Original line number Diff line number Diff line
#!/bin/bash
# Makes a mega map from the files given on the command line, presumes hg19 
# MboI
# Note: this is an older version and should be updated

tmpDir="/aidenlab/neva"
juiceDir="/aidenlab"
genomeID="hg19"
site="MboI"

# check usage
if [ $# -lt 2 ]
    then
    echo "Usage: `basename $0` <file1> <file2> ... <fileN>"
    echo " Combines the files on command line into mega map"
    exit 1
fi

# check that files exist, add to string
str=""
for var in "$@"
do
  if [ ! -f $var ]
      then
      echo "File $var does not exist"
      exit 1
  else
      str+="$var "
  fi
done

sort -T ${tmpDir} -k2,2d -k6,6d -m ${str} > merged_nodups.txt
if [ $? -eq 0 ]; then
    echo "merged_nodups successfully created"
else
    echo "merged_nodups failed, exiting"
    exit 1
fi
${juiceDir}/scripts/statistics.pl -q 1 -ointer.txt -s ${juiceDir}/restriction_sites/${genomeID}_${site}.txt merged_nodups.txt
if [ $? -eq 0 ]; then
    echo "inter.txt successfully created"
else
    echo "inter.txt failed, exiting"
    exit 1
fi
${juiceDir}/scripts/statistics.pl -q 30 -ointer_30.txt -s ${juiceDir}/restriction_sites/${genomeID}_${site}.txt merged_nodups.txt
if [ $? -eq 0 ]; then
    echo "inter_30.txt successfully created"
else
    echo "inter_30.txt failed, exiting"
    exit 1
fi
${juiceDir}/juicer_tools pre -f /aidenlab/restriction_sites/${genomeID}_${site}.txt -s inter.txt -g inter_hists.m -q 1 merged_nodups.txt inter.hic ${genomeID}
if [ $? -eq 0 ]; then
    echo "inter.hic successfully created"
else
    echo "inter.hic failed, exiting"
    exit 1
fi
/aidenlab/juicebox pre -f /aidenlab/restriction_sites/${genomeID}_${site}.txt -s inter_30.txt -q 30 -g inter_30_hists.m merged_nodups.txt inter_30.hic ${genomeID}
if [ $? -eq 0 ]; then
    echo "inter_30.txt successfully created"
else
    echo "inter_30.txt failed, exiting"
    exit 1
fi




+23 −9
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@
#             match any files with the read1str.   
set -e
shopt -s extglob
juicer_version="1.5" 
juicer_version="1.5.6" 
### LOAD BWA AND SAMTOOLS

# fastq files should look like filename_R1.fastq and filename_R2.fastq 
@@ -164,7 +164,7 @@ then
    case $genomeID in
	mm9) refSeq="${juiceDir}/references/Mus_musculus_assembly9_norandom.fasta";;
	mm10) refSeq="${juiceDir}/references/Mus_musculus_assembly10.fasta";;
	hg38) refSeq="${juiceDir}/references/hg38.fa";;
	hg38) refSeq="${juiceDir}/references/Homo_sapiens_assembly38.fasta";;
	hg19) refSeq="${juiceDir}/references/Homo_sapiens_assembly19.fasta";;
	
	*)  echo "$usageHelp"
@@ -299,7 +299,6 @@ fi
## Arguments have been checked and directories created. Now begins
## the real work of the pipeline


testname=$(ls -l ${fastqdir} | awk 'NR==1{print $9}')
if [ "${testname: -3}" == ".gz" ]
then
@@ -309,9 +308,23 @@ else
    read1=${splitdir}"/*${read1str}*.fastq"
fi

date 
echo "Juicer version:$juicer_version"
echo "$0 $@" 
headfile=${outputdir}/header
date > $headfile
# Experiment description
if [ -n "${about}" ]
then
    echo -ne 'Experiment description: ${about}; ' >> $headfile
else
    echo -ne 'Experiment description: ' >> $headfile
fi

# Get version numbers of all software
echo -ne "Juicer version $juicer_version;" >> $headfile
bwa 2>&1 | awk '\$1=="Version:"{printf(" BWA %s; ", \$2)}' >> $headfile 
echo -ne "$threads threads; " >> $headfile
java -version 2>&1 | awk 'NR==1{printf("%s; ", \$0);}' >> $headfile 
${juiceDir}/scripts/juicer_tools -V 2>&1 | awk '\$1=="Juicer" && \$2=="Tools"{printf("%s; ", \$0);}' >> $headfile
echo "$0 $@" >> $headfile

## ALIGN FASTQ AS SINGLE END, SORT BY READNAME, HANDLE CHIMERIC READS
 
@@ -488,7 +501,8 @@ then
    touch ${outputdir}/optdups.txt
    touch ${outputdir}/merged_nodups.txt
    awk -f ${juiceDir}/scripts/common/dups.awk -v name=${outputdir}/ ${outputdir}/merged_sort.txt
    mv ${outputdir}/optdups.txt ${outputdir}/opt_dups.txt # for consistency with cluster naming in split_rmdups
    # for consistency with cluster naming in split_rmdups
    mv ${outputdir}/optdups.txt ${outputdir}/opt_dups.txt 
fi
if [ -z "$genomePath" ]
then
@@ -504,7 +518,7 @@ then
    then        
        export _JAVA_OPTIONS=-Xmx16384m
        export LC_ALL=en_US.UTF-8 
        echo -e "Experiment description: $about" > $outputdir/inter.txt
	tail -n1 $headfile | awk '{printf"%-1000s\n", $0}' > $outputdir/inter.txt;
        ${juiceDir}/scripts/common/statistics.pl -s $site_file -l $ligation -o $outputdir/stats_dups.txt $outputdir/dups.txt
        cat $splitdir/*.res.txt | awk -f ${juiceDir}/scripts/common/stats_sub.awk >> $outputdir/inter.txt
        java -cp ${juiceDir}/scripts/common/ LibraryComplexity $outputdir inter.txt >> $outputdir/inter.txt
@@ -518,7 +532,7 @@ then
        else 
            ${juiceDir}/scripts/common/juicer_tools pre -f $site_file -s $outputdir/inter.txt -g $outputdir/inter_hists.m -q 1 $outputdir/merged_nodups.txt $outputdir/inter.hic $genomePath 
        fi 
        echo -e "Experiment description: $about" > $outputdir/inter_30.txt 
	tail -n1 $headfile | awk '{printf"%-1000s\n", $0}' > $outputdir/inter_30.txt;
        cat $splitdir/*.res.txt | awk -f ${juiceDir}/scripts/common/stats_sub.awk >> $outputdir/inter_30.txt
        java -cp ${juiceDir}/scripts/common/ LibraryComplexity $outputdir inter_30.txt >> $outputdir/inter_30.txt
        ${juiceDir}/scripts/common/statistics.pl -s $site_file -l $ligation -o $outputdir/inter_30.txt -q 30 $outputdir/merged_nodups.txt
Loading