Unverified Commit 827fe444 authored by Muhammad Saad Shamim's avatar Muhammad Saad Shamim Committed by GitHub
Browse files

Merge pull request #257 from aidenlab/fix-encode-mega-stats

Fix encode mega stats
parents 5e822868 b706453b
Loading
Loading
Loading
Loading

CPU/common/makemega_addstats.awk

deleted100755 → 0
+0 −74
Original line number Diff line number Diff line
#!/usr/bin/awk -f
##########
#The MIT License (MIT)
#
# Copyright (c) 2015 Aiden Lab
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#  THE SOFTWARE.
##########
# Helper script for taking in multiple inter.txt and producing sums
# Juicer version 2.0
$1=="Sequenced"{
gsub(/,/,"",$4);
 total=total+$4;
}
$1=="Unmapped:"{
gsub(/,/,"",$2);
 unmapped=unmapped+$2
}
$1=="Normal"{
gsub(/,/,"",$3);
 regular=regular+$3}
$1=="Chimeric" && $2=="Paired:"{
gsub(/,/,"",$3);
 normal=normal+$3}
$2=="Ambiguous:"{
gsub(/,/,"",$3);
 abnorm=abnorm+$3}
$1=="Alignable"{
gsub(/,/,"",$4);
 alignable=alignable+$4}
$1=="Unique" {
gsub(/,/,"",$3);
 dedup=dedup+$3}
$1=="Duplicates:"{
gsub(/,/,"",$3);
 dups=dups+$3}
$1=="Ligation"{
gsub(/,/,"",$4);
 ligs=ligs+$4}
$1=="Single"{
gsub(/,/,"",$3);
 single=single+$3}
$1=="Average"{
 avg_insert=avg_insert+$4;
}
END{
 printf("%s %'d\n", "Sequenced Read Pairs:", total);
 printf(" %s %'d (%0.2f%)\n", "Normal Paired:", regular, regular*100/total);
 printf(" %s %'d (%0.2f%)\n", "Chimeric Paired:", normal, normal*100/total);
 printf(" %s %'d (%0.2f%)\n", "Chimeric Ambiguous:", abnorm, abnorm*100/total);
 printf(" %s %'d (%0.2f%)\n", "Unmapped:", unmapped, unmapped*100/total);
 printf(" %s %'d (%0.2f%)\n", "Ligation Motif Present:", ligs, ligs*100/total);
 printf(" %s %'d (%0.2f%)\n", "Single Alignment:", single, single*100/total);
 printf(" %s %0.2f\n", "Average insert size:", avg_insert/NR);
 printf(" %s %'d (%0.2f%)\n", "Alignable (Normal+Chimeric Paired):", alignable, alignable*100/total);
 printf("%s %'d\n", "Unique Reads:", dedup);
 printf("%s %'d\n", "Duplicates:", dups);
}
+24.4 MiB

File added.

No diff preview for this file type.

+9 −5
Original line number Diff line number Diff line
@@ -168,6 +168,9 @@ then
    merged_names30=$(find -L "${topDir}" | grep merged30.txt | tr '\n' ' ')
fi
inter_names=$(find -L "${topDir}" | grep inter.txt | tr '\n' ' ')
inter_30_names=$(find -L "${topDir}" | grep inter_30.txt | tr '\n' ' ')
inter_hist_names=$(find -L "${topDir}" | grep inter_hists.m | tr '\n' ' ')
inter_30_hist_names=$(find -L "${topDir}" | grep inter_30_hists.m | tr '\n' ' ')

## Create output directory, exit if already exists
if [[ -d "${outputDir}" ]] && [ -z $final ] && [ -z $postproc ]
@@ -191,14 +194,15 @@ fi
if [ -z $final ] && [ -z $postproc ]
then
    # Create top statistics file from all inter.txt files found under current dir
    awk -f "${juiceDir}"/scripts/common/makemega_addstats.awk "${inter_names}" > "${outputDir}"/inter.txt
    java -Xmx2g -jar "${juiceDir}"/scripts/common/merge-stats.jar "$outputDir"/inter "${inter_names}"
    java -Xmx2g -jar "${juiceDir}"/scripts/common/merge-stats.jar "$outputDir"/inter_30 "${inter_30_names}"
    java -Xmx2g -jar "${juiceDir}"/scripts/common/merge-stats.jar "$outputDir"/inter "${inter_hist_names}"
    java -Xmx2g -jar "${juiceDir}"/scripts/common/merge-stats.jar "$outputDir"/inter_30 "${inter_30_hist_names}"

    echo "(-: Finished creating top stats files."
    cp "${outputDir}"/inter.txt "${outputDir}"/inter_30.txt
    sort --parallel=40 -T "${tmpdir}" -m -k2,2d -k6,6d "${merged_names}" > "${outputDir}"/merged1.txt
    sort --parallel=40 -T "${tmpdir}" -m -k2,2d -k6,6d "${merged_names30}" > "${outputDir}"/merged30.txt
    echo "(-: Finished sorting all files into a single merge."
    "${juiceDir}"/scripts/common/juicer_tools statistics "$site_file" "$outputDir"/inter.txt "$outputDir"/merged1.txt "$genomeID"
    "${juiceDir}"/scripts/common/juicer_tools statistics "$site_file" "$outputDir"/inter_30.txt "$outputDir"/merged30.txt "$genomeID"

    mkdir "${tempdirPre}"
	  if [[ $threadsHic -gt 1 ]] && [[ ! -s "${outputDir}"/merged1_index.txt ]]