Commit 65dd75a9 authored by Shengen's avatar Shengen
Browse files

CAD data analysis scripts

parent bf1d8939
Loading
Loading
Loading
Loading

.DS_Store

0 → 100644
+6 KiB

File added.

No diff preview for this file type.

(6 KiB)

File changed.

No diff preview for this file type.

+13 −0
Original line number Diff line number Diff line
#!/bin/bash

task=$1
outdir=$2
ref_fasta=$3

python form_svm_input_fastas.py \
        --outf $outdir/$task/svm.inputs.$task.test.0 $outdir/$task/svm.inputs.$task.test.1 $outdir/$task/svm.inputs.$task.test.2 $outdir/$task/svm.inputs.$task.test.3 $outdir/$task/svm.inputs.$task.test.4 $outdir/$task/svm.inputs.$task.test.5 $outdir/$task/svm.inputs.$task.test.6 $outdir/$task/svm.inputs.$task.test.7 $outdir/$task/svm.inputs.$task.test.8 $outdir/$task/svm.inputs.$task.test.9 $outdir/$task/svm.inputs.$task.train.0 $outdir/$task/svm.inputs.$task.train.1 $outdir/$task/svm.inputs.$task.train.2 $outdir/$task/svm.inputs.$task.train.3 $outdir/$task/svm.inputs.$task.train.4 $outdir/$task/svm.inputs.$task.train.5 $outdir/$task/svm.inputs.$task.train.6 $outdir/$task/svm.inputs.$task.train.7 $outdir/$task/svm.inputs.$task.train.8 $outdir/$task/svm.inputs.$task.train.9 \
        --neg_pickle $outdir/$task/$task.candidate.negatives.gc.p \
        --overwrite_outf \
        --ref_fasta $ref_fasta \
        --peaks $outdir/$task/svm.peaks.$task.test.0.gc.seq $outdir/$task/svm.peaks.$task.test.1.gc.seq $outdir/$task/svm.peaks.$task.test.2.gc.seq $outdir/$task/svm.peaks.$task.test.3.gc.seq $outdir/$task/svm.peaks.$task.test.4.gc.seq $outdir/$task/svm.peaks.$task.test.5.gc.seq $outdir/$task/svm.peaks.$task.test.6.gc.seq $outdir/$task/svm.peaks.$task.test.7.gc.seq $outdir/$task/svm.peaks.$task.test.8.gc.seq $outdir/$task/svm.peaks.$task.test.9.gc.seq $outdir/$task/svm.peaks.$task.train.0.gc.seq $outdir/$task/svm.peaks.$task.train.1.gc.seq $outdir/$task/svm.peaks.$task.train.2.gc.seq $outdir/$task/svm.peaks.$task.train.3.gc.seq $outdir/$task/svm.peaks.$task.train.4.gc.seq $outdir/$task/svm.peaks.$task.train.5.gc.seq $outdir/$task/svm.peaks.$task.train.6.gc.seq $outdir/$task/svm.peaks.$task.train.7.gc.seq $outdir/$task/svm.peaks.$task.train.8.gc.seq $outdir/$task/svm.peaks.$task.train.9.gc.seq
+24 −0
Original line number Diff line number Diff line
#!/bin/bash

#get the inverse intersection of all peaks and all gc genome bins

task=$1
peaks=$2
outdir=$3
genomewide_gc=$4

rm -f $outdir/$task/$task.all.positives.bed

for split in `seq 0 9`
do
    cat $outdir/$task/svm.peaks.$task.test.$split.gc.seq | cut -f 1-3 >> $outdir/$task/$task.all.positives.bed
done

python peak_to_bed.py $peaks $outdir/$task/$task.peaks.bed

#cut -f 1-3 $peaks > $outdir/$task/$task.peaks.bed

cat $outdir/$task/$task.peaks.bed $outdir/$task/$task.all.positives.bed > $outdir/$task/$task.all.positives.peaks.bed

bedtools intersect -v -a $genomewide_gc -b $outdir/$task/$task.all.positives.peaks.bed > $outdir/$task/$task.candidate.negatives.tsv
+9 −0
Original line number Diff line number Diff line
#!/bin/bash

task=$1
outdir=$2

python get_chrom_gc_region_dict.py \
       --input_bed $outdir/$task/$task.candidate.negatives.tsv \
       --outf $outdir/$task/$task.candidate.negatives.gc.p
Loading