source $1

thread_str=$local_threads

$pigz_cmd ${work_dir}/*.fastq

fastq_suffix=fastq.gz

split_dir=${work_dir}/splits
tmp_dir=${work_dir}/HIC_tmp
out_dir=${work_dir}/result


if [ ! -d $split_dir ];then mkdir -p $split_dir;fi
if [ ! -d $out_dir ];then mkdir -p $out_dir;fi
if [ ! -d $tmp_dir ];then mkdir -p $tmp_dir;fi


ls ${work_dir}/*_read1.${fastq_suffix} | while read read1_file
do
	sample=$(basename $read1_file _read1.${fastq_suffix})
	read2_file=${work_dir}/${sample}_read2.${fastq_suffix}

	if [ ! -e $read2_file ];then
		echo "$read2_file doesn't exist"
		stop
	fi

	split_prefix=${split_dir}/${sample}
	out_prefix=${out_dir}/${sample}
	res_file=${split_prefix}_norm.txt.res.txt
	linecount_file=${split_prefix}_linecount.txt

	if [ ! -e $res_file ];then
		num1=$(\
			paste \
			<(gunzip -c $read1_file) \
			<(gunzip -c $read2_file) | \
			$awk_cmd '!((NR+2)%4)' | \
			grep -cE $ligation \
		)

		echo -ne "$num1" > $res_file
	fi

	if [ ! -e $linecount_file ];then
		num2=$(\
			gunzip -c $read1_file | \
			wc -l | \
			$awk_cmd '{print $1}'\
		)

		echo "$num2" > $linecount_file
	fi

	if [ ! -e ${split_prefix}.bam ];then
		$bwa_cmd \
			mem \
				-SP5M \
				-t $thread_str \
				$bwa_index \
				$read1_file \
				$read2_file | \
		$awk_cmd \
			-v stem=${split_prefix}_norm \
			-v site_file=$site_file \
			-f ${common_dir}/chimeric_sam.awk | \
		$samtools_cmd \
			sort \
				-t cb \
				-n \
				-@ $thread_str \
		> ${split_prefix}.bam
	fi
done

ls ${split_dir}/*.bam | while read i
do
	sample=$(basename $i .bam)
	rep=$(echo $sample | sed 's/_part[0-9]*//g' | sed 's/_t[0-9]*//g')
	lib=$(echo $rep | sed 's/_rep[0-9]*//g')

	rep_prefix=${out_dir}/${rep}
	rep_bam=${rep_prefix}.bam
	rep_dedup_sam=${rep_prefix}_dedup.sam
	rep_dedup_txt=${rep_prefix}_dedup.txt
	rep_dedup_index_txt=${rep_prefix}_dedup_index.txt
	rep_stat_txt=${rep_prefix}.txt
	rep_stat_m=${rep_prefix}_hists.m
	rep_hic=${rep_prefix}.hic

	if [ ! -e $rep_bam ];then
		$samtools_cmd \
			merge \
				-c \
				-t cb \
				-n \
				-@ $thread_str \
				$rep_bam \
				${split_dir}/${rep}*.bam
	fi

	if [ ! -e $rep_dedup_txt ];then
		$samtools_cmd \
			view \
				-@ $thread_str \
				-h $rep_bam | \
		$awk_cmd \
			-f ${common_dir}/dups_sam.awk | \
		$samtools_cmd \
			view \
				-@ $thread_str \
				-F 1024 \
				-O sam | \
		$awk_cmd \
			-v mapq=1 \
			-f ${common_dir}/sam_to_pre.awk \
		> $rep_dedup_txt
	fi

	if [ ! -e $rep_dedup_index_txt ];then
		${common_dir}/index_by_chr.awk \
			$rep_dedup_txt \
			500000 \
		> $rep_dedup_index_txt
	fi

	if [ ! -e $rep_dedup_bam ];then
		$samtools_cmd \
			view \
				-b \
				-@ $thread_str \
				$rep_dedup_sam \
		> $rep_dedup_bam
	fi

#	export IBM_JAVA_OPTIONS="-Xmx60g -Xgcthreads${thread_str}"
#	export _JAVA_OPTIONS="-Xmx60g -Xms60g"

	if [ ! -e $rep_stat_m ];then
		dups=$(\
			$samtools_cmd \
				view \
					-c \
					-f 1089 \
					-F 256 \
					-@ $thread_str \
					$rep_dedup_bam \
		)

		cat \
			${split_dir}/*.res.txt | \
		$awk_cmd \
			-v dups=$dups \
			-v ligation=$ligation \
			-f ${common_dir}/stats_sub.awk \
		> $rep_stat_txt

		$hic_tools \
			statistics \
				--threads $thread_str \
				$site_file \
				$rep_stat_txt \
				$rep_dedup_txt \
				$genome_size_file
	fi

	if [ ! -e $rep_hic ];then
		$hic_tools \
			pre \
				--threads $thread_str \
				-s $rep_stat_txt \
				-g $rep_stat_m \
				-i $rep_dedup_index_txt \
				-t $tmp_dir \
				$rep_dedup_txt \
				$rep_hic \
				$genome_size_file
	fi
done


ls ${out_dir}/*.hic | while read i
do
	rep=$(basename $i .hic)
	lib=$(echo $rep | sed 's/_rep[0-9]*//g')

	rep_hic_num=$(ls ${out_dir}/${lib}*.hic | wc -l)

	if [[ ! $rep = $lib ]] && [[ $rep_hic_num > 1 ]];then

		lib_prefix=${out_dir}/${lib}
		lib_bam=${lib_prefix}.bam
		lib_dedup_sam=${lib_prefix}_dedup.sam
		lib_dedup_txt=${lib_prefix}_dedup.txt
		lib_dedup_index_txt=${lib_prefix}_dedup_index.txt
		lib_stat_txt=${lib_prefix}.txt
		lib_stat_m=${lib_prefix}_hists.m
		lib_hic=${lib_prefix}.hic

		if [ ! -e $lib_bam ];then
			$samtools_cmd \
				merge \
					-c \
					-t cb \
					-n \
					-@ $thread_str \
					$lib_bam \
					${split_dir}/${lib}*.bam
		fi
	
		if [ ! -e $lib_dedup_txt ];then
			$samtools_cmd \
				view \
					-@ $thread_str \
					-h $lib_bam | \
			$awk_cmd \
				-f ${common_dir}/dups_sam.awk | \
			$samtools_cmd \
				view \
					-@ $thread_str \
					-F 1024 \
					-O sam | \
			$awk_cmd \
				-v mapq=1 \
				-f ${common_dir}/sam_to_pre.awk \
			> $lib_dedup_txt
		fi
	
		if [ ! -e $lib_dedup_index_txt ];then
			${common_dir}/index_by_chr.awk \
				$lib_dedup_txt \
				500000 \
			> $lib_dedup_index_txt
		fi
	
		if [ ! -e $lib_dedup_bam ];then
			$samtools_cmd \
				view \
					-b \
					-@ $thread_str \
					$lib_dedup_sam \
			> $lib_dedup_bam
		fi

		if [ ! -e $lib_stat_m ];then
			dups=$(\
				$samtools_cmd \
					view \
						-c \
						-f 1089 \
						-F 256 \
						-@ $thread_str \
						$lib_dedup_bam \
			)

			cat \
				${split_dir}/*.res.txt | \
			$awk_cmd \
				-v dups=\$dups \
				-v ligation=$ligation \
				-f ${common_dir}/stats_sub.awk \
			> $lib_stat_txt
	
			$hic_tools \
				statistics \
					--threads $thread_str \
					$site_file \
					$lib_stat_txt \
					$lib_dedup_txt \
					$genome_size_file
		fi

		$hic_tools \
			pre \
				--threads $thread_str \
				-s $lib_stat_txt \
				-g $lib_stat_m \
				-i $lib_dedup_index_txt \
				-t $tmp_dir \
				$lib_dedup_txt \
				$lib_hic \
				$genome_size_file
	fi
done
