Juicer 1.5.6 update. (7f41b80a) · Commits · Chaos / chaos.juicer

CPU/common/diploid.pl

0 → 100755

+385 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/perl
		##########
		#The MIT License (MIT)
		#
		# Copyright (c) 2015 Aiden Lab
		#
		# Permission is hereby granted, free of charge, to any person obtaining a copy
		# of this software and associated documentation files (the "Software"), to deal
		# in the Software without restriction, including without limitation the rights
		# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
		# copies of the Software, and to permit persons to whom the Software is
		# furnished to do so, subject to the following conditions:
		#
		# The above copyright notice and this permission notice shall be included in
		# all copies or substantial portions of the Software.
		#
		# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
		# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
		# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
		# THE SOFTWARE.
		##########
		# Perl script to calculate diploid reads on the infile.
		# The infile should be in the merged_nodups form: no duplicates, >= 14 fields,
		# laid out as:
		#
		# str1 chr1 pos1 frag1 str2 chr2 pos2 frag2 mapq1 cigar1 seq1 mapq2 cigar2 seq2
		#
		# The script also requires two versions of a SNP file:
		# - The "chr_pos" site file lists on each line the sorted locations of the SNPs
		# - The "paternal_maternal" file lists each SNP as chr:pos paternal_SNP maternal_SNP
		#
		# These files can be created using the script vcftotxt.awk from a phased VCF file
		#
		# Usage: diploid.pl -s [chr_pos site file] -o [paternal_maternal SNP file] [infile or stream]
		# Juicer version 1.5
		use File::Basename;
		use POSIX;
		use List::Util qw[min max];
		use Getopt::Std;
		use vars qw/ $opt_s $opt_l $opt_d $opt_o $opt_h /;

		# Check arguments
		getopts('s:o:hl');

		my $site_file;
		my $phased_file;
		my $star=0;

		if ($opt_h) {
		print STDERR "Usage: diploid.pl <infile>\n";
		print STDERR " <infile>: file in intermediate format to calculate statistics on, can be stream\n";
		exit;
		}
		if ($opt_s) {
		$site_file = $opt_s;
		}

		if ($opt_o) {
		$phased_file = $opt_o;
		}

		if ($opt_l) {
		$star=1;
		}

		if (scalar(@ARGV)==0) {
		print STDERR "No input file specified, reading from input stream\n";
		}

		# Global variables for calculating statistics
		my %chromosomes;
		my %maternal_snps;
		my %paternal_snps;

		# read in SNP site file and store as multidimensional array
		open FILE, $site_file or die $!;
		while (<FILE>) {
		my @locs = split;
		my $key = shift(@locs);
		my $ref = \@locs;
		$chromosomes{$key} = $ref;
		}
		close FILE;

		# read in SNP definition file and store in hashtable
		open FILE, $phased_file or die $!;
		while (<FILE>) {
		my @locs = split;
		my $key = $locs[0];
		$paternal_snps{$key} = $locs[1];
		$maternal_snps{$key} = $locs[2];
		}
		close FILE;

		# read in infile and find SNPs
		my $checkedfile=0;
		while (<>) {
		my @record = split;
		my $oldrecord = join(' ',@record);

		# holds the read assignments
		my @read1 = ();
		my @read2 = ();

		# holds the SNP assignments (nucleotides)
		my @snp1 = ();
		my @snp2 = ();

		# holds the SNP positions
		my @snppos1 = ();
		my @snppos2 = ();

		my $orgpos1 = -100;
		my $orgpos2 = -100;

		if ($checkedfile == 0) {
		# check that the file has the format expected
		# right now just check the sequence strings are in proper place
		if ($record[10] !~ /\A[acgtn]+\z/i \|\| $record[13] !~ /\A[acgtn]+\z/i) {
		print STDERR "Expected DNA strings in fields 11 and 14, instead see " . $record[10] . " and " . $record[13] . "\n";
		print STDERR "Exiting.";
		exit(1);
		}
		else {
		$checkedfile = 1;
		}
		}

		# set "original" position for both reads. then we can process cigar/sequence string forward.
		# ignore mitochrondria
		# First read:

		# count Ms,Ds,Ns,Xs,=s for sequence length
		my $seqlength1=0;
		my $currstr=$record[9];

		my $where = $currstr =~ /[0-9]+[M\|D\|N\|X\|=\|S\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		while ($where > 0) {
		$seqlength1 += substr($currstr, ($where)-1, $RLENGTH - 1) + 0;
		$currstr = substr($currstr, ($where + $RLENGTH)-1);
		$where = $currstr =~ /[0-9]+[M\|D\|N\|X\|=\|S\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		}

		if (($record[0] == 0 && $record[1] ne "MT") \|\| $star == 1) {
		$orgpos1 = $record[2];
		}
		elsif ($record[1] ne "MT") {
		# reverse strand, find original position
		$orgpos1 = $record[2] - $seqlength1 + 1;
		}

		# count Ms,Ds,Ns,Xs,=s for sequence length
		my $seqlength2=0;
		my $currstr=$record[12];
		my $where = $currstr =~ /[0-9]+[M\|D\|N\|X\|=\|S\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		while ($where > 0) {
		$seqlength2 += substr($currstr, ($where)-1, $RLENGTH - 1) + 0;
		$currstr = substr($currstr, ($where + $RLENGTH)-1);
		$where = $currstr =~ /[0-9]+[M\|D\|N\|X\|=\|S\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		}

		# Second read:
		if (($record[4] == 0 && $record[5] ne "MT") \|\| $star == 1) {
		$orgpos2 = $record[6];
		}
		elsif ($record[5] ne "MT") {
		# reverse strand, find original position
		$orgpos2 = $record[6] - $seqlength2 + 1;
		}

		# find first read position in the SNP site array
		my $ind1 = &bsearch($orgpos1,$chromosomes{$record[1]});
		my $orgpos11 = $orgpos1;
		my $orgpos22 = $orgpos2;
		# first read might land on SNP: if on forward strand, position + length overlaps, if on
		# reverse strand, position - length overlaps.
		# ind1 is first hit; keep incrementing it until no longer on read.
		# shouldn't matter reverse or forward strand at this point, just use orgpos and cigar and
		# count forward
		while ($orgpos11 < $chromosomes{$record[1]}->[$ind1] && $orgpos11+$seqlength1 >= $chromosomes{$record[1]}->[$ind1]) {
		# need to count forward from orgpos1.
		my $currstr=$record[9];
		my $where = $currstr =~ /[0-9]+[M\|D\|S\|I\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		my $seqstr=$record[10];

		$orgpos1=$orgpos11;
		while ($where > 0) {
		my $char = substr($currstr, ($where)-1 + $RLENGTH - 1,1);
		my $len = substr($currstr, ($where)-1, $RLENGTH - 1) + 0;
		my $ind;

		# match. check if match spans SNP, if so, take it. if not, advance cigar string
		# and sequence string and keep parsing cigar
		if ($char eq "M") {
		if ($orgpos1 + $len > $chromosomes{$record[1]}->[$ind1]) {
		# matching spans SNP
		$ind = $chromosomes{$record[1]}->[$ind1] - $orgpos1;
		push @snp1,substr $seqstr, $ind, 1;
		push @snppos1,$chromosomes{$record[1]}->[$ind1];
		$where = 0;
		}
		else {
		# advance cigar and sequence and keep going.
		$seqstr = substr($seqstr, $len);
		$orgpos1 = $orgpos1 + $len;
		$currstr = substr($currstr, ($where + $RLENGTH)-1);
		$where = $currstr =~ /[0-9]+[M\|D\|S\|I\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		}
		}
		elsif ($char eq "D" \|\| $char eq "N") {
		# delete. if deletion spans SNP, we don't have it in our read.
		if ($orgpos1 + $len > $chromosomes{$record[1]}->[$ind1]) {
		$where = 0;
		}
		else {
		# doesn't span SNP, update genomic position and continue.
		$orgpos1 = $orgpos1 + $len;
		$currstr = substr($currstr, ($where + $RLENGTH)-1);
		$where = $currstr =~ /[0-9]+[M\|D\|S\|I\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		}
		}
		elsif ($char eq "I") {
		# insertion, advance sequence string, doesn't affect genomic position
		$currstr = substr($currstr, ($where + $RLENGTH)-1);
		$where = $currstr =~ /[0-9]+[M\|D\|S\|I\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		$seqstr = substr($seqstr, $len);
		}
		elsif ($char eq "S" \|\| $char eq "H") {
		if ($orgpos1 + $len > $chromosomes{$record[1]}->[$ind1]) {
		# skip spans SNP
		$where = 0;
		}
		else {
		# skip does not span SNP, advance cigar and sequence and keep going.
		# hard clipped bases do not appear in seqstr
		if ($char eq "S") {
		$seqstr = substr($seqstr, $len);
		}
		$orgpos1 = $orgpos1 + $len;
		$currstr = substr($currstr, ($where + $RLENGTH)-1);
		$where = $currstr =~ /[0-9]+[M\|D\|S\|I\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		}
		}
		}
		$ind1++;
		}
		if (scalar @snp1 > 0) {
		for my $i (0 .. $#snp1) {
		my $key = $record[1] . ":" . $snppos1[$i];
		if ($snp1[$i] eq $paternal_snps{$key}) {
		$read1[$i] = "paternal";
		}
		elsif ($snp1[$i] eq $maternal_snps{$key}) {
		$read1[$i] = "maternal";
		}
		else {
		$read1[$i] = "mismatch";
		}
		}
		}

		# find read 2 position in SNP array
		my $ind1 = &bsearch($orgpos2,$chromosomes{$record[5]});

		# check that SNP falls in read.
		while ($orgpos22 < $chromosomes{$record[5]}->[$ind1] && $orgpos22+$seqlength2 >= $chromosomes{$record[5]}->[$ind1]) {
		# need to count forward from orgpos2.

		my $currstr=$record[12];
		my $where = $currstr =~ /[0-9]+[M\|D\|S\|I\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		my $seqstr=$record[13];
		$orgpos2=$orgpos22;

		while ($where > 0) {
		my $char = substr($currstr, ($where)-1 + $RLENGTH - 1,1);
		my $len = substr($currstr, ($where)-1, $RLENGTH - 1) + 0;

		my $ind;
		# match. check if match spans SNP, if so, take it. if not, advance cigar string
		# and sequence string and keep parsing cigar
		if ($char eq "M") {
		if ($orgpos2 + $len > $chromosomes{$record[5]}->[$ind1]) {
		# matching spans SNP
		$ind = $chromosomes{$record[5]}->[$ind1] - $orgpos2;
		push @snp2,substr $seqstr, $ind, 1;
		push @snppos2,$chromosomes{$record[5]}->[$ind1];
		$where = 0;
		}
		else {
		# matching does not span SNP, advance cigar and sequence and keep going.
		$seqstr = substr($seqstr, $len);
		$orgpos2 = $orgpos2 + $len;
		$currstr = substr($currstr, ($where + $RLENGTH)-1);
		$where = $currstr =~ /[0-9]+[M\|D\|S\|I\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		}
		}
		elsif ($char eq "D" \|\| $char eq "N") {
		# delete. if deletion spans SNP, we don't have it in our read.
		if ($orgpos2 + $len > $chromosomes{$record[5]}->[$ind1]) {
		$where = 0;
		}
		else {
		# doesn't span SNP, update genomic position and continue.
		$orgpos2 = $orgpos2 + $len;
		$currstr = substr($currstr, ($where + $RLENGTH)-1);
		$where = $currstr =~ /[0-9]+[M\|D\|S\|I\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		}
		}
		elsif ($char eq "I") {
		# insertion or skip, advance sequence string, doesn't affect genomic position
		$currstr = substr($currstr, ($where + $RLENGTH)-1);
		$where = $currstr =~ /[0-9]+[M\|D\|S\|I\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		$seqstr = substr($seqstr, $len);
		}
		elsif ($char eq "S" \|\| $char eq "H") {
		if ($orgpos2 + $len > $chromosomes{$record[5]}->[$ind1]) {
		# skip spans SNP
		$where = 0;
		}
		else {
		# skip does not span SNP, advance cigar and sequence and keep going.
		# hard clipped bases do not appear in seqstr
		if ($char eq "S") {
		$seqstr = substr($seqstr, $len);
		}
		$orgpos2 = $orgpos2 + $len;
		$currstr = substr($currstr, ($where + $RLENGTH)-1);
		$where = $currstr =~ /[0-9]+[M\|D\|S\|I\|H\|N]/ ? scalar($RLENGTH = length($&), $RSTART = length($`)+1) : 0;
		}
		}
		}
		$ind1++;
		}
		if (scalar @snp2 > 0) {
		for my $i (0 .. $#snp2) {
		my $key = $record[5] . ":" . $snppos2[$i];
		if ($snp2[$i] eq $paternal_snps{$key}) {
		$read2[$i] = "paternal";
		}
		elsif ($snp2[$i] eq $maternal_snps{$key}) {
		$read2[$i] = "maternal";
		}
		else {
		$read2[$i] = "mismatch";
		}
		}
		}

		if (scalar @read1 > 0 \|\| scalar @read2 > 0) {
		print STDOUT $record[0] . " " . $record[1] . " " . $record[2] . " " . $record[3] . " " . $record[4] . " " . $record[5] . " " . $record[6] . " " . $record[7] . " " . $record[10] . " " . $record[13] . " " . $record[14] . " " . "SNP1";
		for my $i (0 .. $#snp1) {
		print STDOUT ":" . $snp1[$i] . ":" . $snppos1[$i] . ":" . $read1[$i];
		}
		print STDOUT " SNP2";
		for my $i (0 .. $#snp2) {
		print STDOUT ":" . $snp2[$i] . ":" . $snppos2[$i] . ":" . $read2[$i];
		}
		print STDOUT "\n";
		}
		}

		# Binary search, array passed by reference
		# search array of integers a for given integer x
		# return index where found or upper index if not found
		sub bsearch {
		my ($x, $a) = @_; # search for x in array a
		my ($l, $u) = (0, @$a - 1); # lower, upper end of search interval
		my $i; # index of probe
		while ($l <= $u) {
		$i = int(($l + $u)/2);
		if ($a->[$i] < $x) {
		$l = $i+1;
		}
		elsif ($a->[$i] > $x) {
		$u = $i-1;
		}
		else {
		return $i; # found
		}
		}
		return $l; # not found, return upper
		}

CPU/common/diploid.sh

0 → 100755

+16 −0

Original line number	Diff line number	Diff line
		#!/bin/bash

		stem=/aidenlab/work/neva/patski_

		for i in chrX chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrY
		do
		awk -v chr=$i '$2==chr && $6==chr && $9 >= 10 && $12 >= 10' merged_nodups.txt \| ${juiceDir}/scripts/diploid.pl -s ${stem}${i}_chr_pos.txt -o ${stem}${i}_paternal_maternal.txt > diploid_${i}.txt
		done
		cat diploid_chr1.txt diploid_chr10.txt diploid_chr11.txt diploid_chr12.txt diploid_chr13.txt diploid_chr14.txt diploid_chr15.txt diploid_chr16.txt diploid_chr17.txt diploid_chr18.txt diploid_chr19.txt diploid_chr2.txt diploid_chr3.txt diploid_chr4.txt diploid_chr5.txt diploid_chr6.txt diploid_chr7.txt diploid_chr8.txt diploid_chr9.txt diploid_chrX.txt diploid_chrY.txt > diploid.txt

		awk -f ${juiceDir}/scripts/diploid_split.awk diploid.txt
		sort -k2,2d -m maternal.txt maternal_both.txt \| awk '{split($11,a,"/"); print a[1], $1,$2,$3,$4,$5,$6,$7,$8,"100","100"}' > mat.txt
		${juiceDir}/scripts/juicer_tools pre mat.txt maternal.hic mm10
		sort -k2,2d -m paternal.txt paternal_both.txt \| awk '{split($11,a,"/"); print a[1], $1,$2,$3,$4,$5,$6,$7,$8,"100","100"}' > pat.txt
		/aidenlab/juicebox pre pat.txt paternal.hic mm10

CPU/common/diploid_split.awk

0 → 100644

+62 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/awk -f
		##########
		#The MIT License (MIT)
		#
		# Copyright (c) 2015 Aiden Lab
		#
		# Permission is hereby granted, free of charge, to any person obtaining a copy
		# of this software and associated documentation files (the "Software"), to deal
		# in the Software without restriction, including without limitation the rights
		# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
		# copies of the Software, and to permit persons to whom the Software is
		# furnished to do so, subject to the following conditions:
		#
		# The above copyright notice and this permission notice shall be included in
		# all copies or substantial portions of the Software.
		#
		# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
		# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
		# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
		# THE SOFTWARE.
		##########
		# Splits the diploid output file into maternal, paternal, maternal_both,
		# paternal_both, diff, and mismatch reads
		# maternal_both and paternal_both have both read ends assigned
		# diff has paternal assigned to one read end and maternal assigned to the other
		# mismatch has at least one SNP that doesn't match either the reference or the
		# alternate
		# Juicer version 1.5
		NF==13{
		if ($12 ~ /mismatch/ \|\| $13 ~ /mismatch/) {
		print >> stem"mismatch.txt";
		}
		else if (($12 ~ /paternal/ && $12 ~ /maternal/) \|\| ($13 ~ /paternal/ && $13 ~ /maternal/)) {
		print >> stem"mismatch.txt";
		}
		else if ($12 ~ /paternal/ && $13 ~ /paternal/) {
		print >> stem"paternal_both.txt";
		}
		else if ($12 ~ /maternal/ && $13 ~ /maternal/) {
		print >> stem"maternal_both.txt";
		}
		else if (($12 ~ /maternal/ && $13 ~ /paternal/) \|\| ($12 ~ /paternal/ && $13 ~ /maternal/)) {
		print >> stem"diff.txt";
		}
		else {
		if ($12 ~ /maternal/ \|\| $13 ~ /maternal/) {
		print >> stem"maternal.txt";
		}
		else if ($12 ~ /paternal/ \|\| $13 ~ /paternal/) {
		print >> stem"paternal.txt";
		}
		else {
		print >> stem"problem.txt";
		}
		}
		}
		NF!=13 {
		print >> stem"problem.txt";
		}

CPU/common/mega.sh

0 → 100755

+71 −0

Original line number	Diff line number	Diff line
		#!/bin/bash
		# Makes a mega map from the files given on the command line, presumes hg19
		# MboI
		# Note: this is an older version and should be updated

		tmpDir="/aidenlab/neva"
		juiceDir="/aidenlab"
		genomeID="hg19"
		site="MboI"

		# check usage
		if [ $# -lt 2 ]
		then
		echo "Usage: `basename $0` <file1> <file2> ... <fileN>"
		echo " Combines the files on command line into mega map"
		exit 1
		fi

		# check that files exist, add to string
		str=""
		for var in "$@"
		do
		if [ ! -f $var ]
		then
		echo "File $var does not exist"
		exit 1
		else
		str+="$var "
		fi
		done

		sort -T ${tmpDir} -k2,2d -k6,6d -m ${str} > merged_nodups.txt
		if [ $? -eq 0 ]; then
		echo "merged_nodups successfully created"
		else
		echo "merged_nodups failed, exiting"
		exit 1
		fi
		${juiceDir}/scripts/statistics.pl -q 1 -ointer.txt -s ${juiceDir}/restriction_sites/${genomeID}_${site}.txt merged_nodups.txt
		if [ $? -eq 0 ]; then
		echo "inter.txt successfully created"
		else
		echo "inter.txt failed, exiting"
		exit 1
		fi
		${juiceDir}/scripts/statistics.pl -q 30 -ointer_30.txt -s ${juiceDir}/restriction_sites/${genomeID}_${site}.txt merged_nodups.txt
		if [ $? -eq 0 ]; then
		echo "inter_30.txt successfully created"
		else
		echo "inter_30.txt failed, exiting"
		exit 1
		fi
		${juiceDir}/juicer_tools pre -f /aidenlab/restriction_sites/${genomeID}_${site}.txt -s inter.txt -g inter_hists.m -q 1 merged_nodups.txt inter.hic ${genomeID}
		if [ $? -eq 0 ]; then
		echo "inter.hic successfully created"
		else
		echo "inter.hic failed, exiting"
		exit 1
		fi
		/aidenlab/juicebox pre -f /aidenlab/restriction_sites/${genomeID}_${site}.txt -s inter_30.txt -q 30 -g inter_30_hists.m merged_nodups.txt inter_30.hic ${genomeID}
		if [ $? -eq 0 ]; then
		echo "inter_30.txt successfully created"
		else
		echo "inter_30.txt failed, exiting"
		exit 1
		fi

CPU/juicer.sh

+23 −9

Original line number	Diff line number	Diff line
		@@ -59,7 +59,7 @@
		# match any files with the read1str.
		set -e
		shopt -s extglob
		juicer_version="1.5"
		juicer_version="1.5.6"
		### LOAD BWA AND SAMTOOLS

		# fastq files should look like filename_R1.fastq and filename_R2.fastq
		@@ -164,7 +164,7 @@ then
		case $genomeID in
		mm9) refSeq="${juiceDir}/references/Mus_musculus_assembly9_norandom.fasta";;
		mm10) refSeq="${juiceDir}/references/Mus_musculus_assembly10.fasta";;
		hg38) refSeq="${juiceDir}/references/hg38.fa";;
		hg38) refSeq="${juiceDir}/references/Homo_sapiens_assembly38.fasta";;
		hg19) refSeq="${juiceDir}/references/Homo_sapiens_assembly19.fasta";;

		*) echo "$usageHelp"
		@@ -299,7 +299,6 @@ fi
		## Arguments have been checked and directories created. Now begins
		## the real work of the pipeline


		testname=$(ls -l ${fastqdir} \| awk 'NR==1{print $9}')
		if [ "${testname: -3}" == ".gz" ]
		then
		@@ -309,9 +308,23 @@ else
		read1=${splitdir}"/${read1str}.fastq"
		fi

		date
		echo "Juicer version:$juicer_version"
		echo "$0 $@"
		headfile=${outputdir}/header
		date > $headfile
		# Experiment description
		if [ -n "${about}" ]
		then
		echo -ne 'Experiment description: ${about}; ' >> $headfile
		else
		echo -ne 'Experiment description: ' >> $headfile
		fi

		# Get version numbers of all software
		echo -ne "Juicer version $juicer_version;" >> $headfile
		bwa 2>&1 \| awk '\$1=="Version:"{printf(" BWA %s; ", \$2)}' >> $headfile
		echo -ne "$threads threads; " >> $headfile
		java -version 2>&1 \| awk 'NR==1{printf("%s; ", \$0);}' >> $headfile
		${juiceDir}/scripts/juicer_tools -V 2>&1 \| awk '\$1=="Juicer" && \$2=="Tools"{printf("%s; ", \$0);}' >> $headfile
		echo "$0 $@" >> $headfile

		## ALIGN FASTQ AS SINGLE END, SORT BY READNAME, HANDLE CHIMERIC READS

		@@ -488,7 +501,8 @@ then
		touch ${outputdir}/optdups.txt
		touch ${outputdir}/merged_nodups.txt
		awk -f ${juiceDir}/scripts/common/dups.awk -v name=${outputdir}/ ${outputdir}/merged_sort.txt
		mv ${outputdir}/optdups.txt ${outputdir}/opt_dups.txt # for consistency with cluster naming in split_rmdups
		# for consistency with cluster naming in split_rmdups
		mv ${outputdir}/optdups.txt ${outputdir}/opt_dups.txt
		fi
		if [ -z "$genomePath" ]
		then
		@@ -504,7 +518,7 @@ then
		then
		export _JAVA_OPTIONS=-Xmx16384m
		export LC_ALL=en_US.UTF-8
		echo -e "Experiment description: $about" > $outputdir/inter.txt
		tail -n1 $headfile \| awk '{printf"%-1000s\n", $0}' > $outputdir/inter.txt;
		${juiceDir}/scripts/common/statistics.pl -s $site_file -l $ligation -o $outputdir/stats_dups.txt $outputdir/dups.txt
		cat $splitdir/*.res.txt \| awk -f ${juiceDir}/scripts/common/stats_sub.awk >> $outputdir/inter.txt
		java -cp ${juiceDir}/scripts/common/ LibraryComplexity $outputdir inter.txt >> $outputdir/inter.txt
		@@ -518,7 +532,7 @@ then
		else
		${juiceDir}/scripts/common/juicer_tools pre -f $site_file -s $outputdir/inter.txt -g $outputdir/inter_hists.m -q 1 $outputdir/merged_nodups.txt $outputdir/inter.hic $genomePath
		fi
		echo -e "Experiment description: $about" > $outputdir/inter_30.txt
		tail -n1 $headfile \| awk '{printf"%-1000s\n", $0}' > $outputdir/inter_30.txt;
		cat $splitdir/*.res.txt \| awk -f ${juiceDir}/scripts/common/stats_sub.awk >> $outputdir/inter_30.txt
		java -cp ${juiceDir}/scripts/common/ LibraryComplexity $outputdir inter_30.txt >> $outputdir/inter_30.txt
		${juiceDir}/scripts/common/statistics.pl -s $site_file -l $ligation -o $outputdir/inter_30.txt -q 30 $outputdir/merged_nodups.txt

Admin message