Commit a48e216c authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

Merge branch 'master' of github.com:minoda-lab/universc

parents 333a51c2 39c9401e
Loading
Loading
Loading
Loading
+96 −23
Original line number Diff line number Diff line
@@ -40,7 +40,9 @@ my $trimmed_folder = "02_TRIMMED";
my $multi_folder = "03_MULTIQC";
my $integrated_folder = "04_INTEGRATED";

my $trimmings = "trimmed_sequences.fas";
my $trimmings3 = "trimmed_sequences_3prime.fas";
my $trimmings5 = "trimmed_sequences_5prime.fas";

my $m_threshold = 1; #shortest contaminant to consider
my $l_threshold = 15; #shortest length of sequence to keep after trimming
my $p_threshold = 0.99; #my prior
@@ -164,11 +166,22 @@ $r2_at_file = $trimmed_folder."/at.".$r2_at_file;
my $r2_atqt_file = `basename $r2_raw_file | perl -pe chomp`;
$r2_atqt_file = $trimmed_folder."/atqt.".$r2_atqt_file;

$trimmings = $trimmed_folder."/".$trimmings;
$trimmings3 = $trimmed_folder."/".$trimmings3;
$trimmings5 = $trimmed_folder."/".$trimmings5;

my $polyA = "AAAAAAAAAAAAAAA";
my $polyT = "TTTTTTTTTTTTTTT";

my $univ_adapter = "AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT";
my $univ_adapter_rc = reverse $univ_adapter;
$univ_adapter_rc =~ tr/ATGC/TACG/;
my $se_pcr_primer1 = "AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT";
my $se_pcr_primer1_rc = reverse $se_pcr_primer1;
$se_pcr_primer1_rc =~ tr/ATGC/TACG/;
my $rna_pcr_primer = "AATGATACGGCGACCACCGAGATCTACACGTTCAGAGTTCTACAGTCCGA";
my $rna_pcr_primer_rc = reverse $rna_pcr_primer;
$rna_pcr_primer_rc =~ tr/ATGC/TACG/;

my $nextera_transposase_1 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG";
my $nextera_transposase_1_rc = reverse $nextera_transposase_1;
$nextera_transposase_1_rc =~ tr/ATGC/TACG/;
@@ -176,19 +189,29 @@ my $nextera_transposase_2 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG";
my $nextera_transposase_2_rc = reverse $nextera_transposase_2;
$nextera_transposase_2_rc =~ tr/ATGC/TACG/;

my $univ_adapter = "AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT";
my $univ_adapter_rc = reverse $univ_adapter;
$univ_adapter_rc =~ tr/ATGC/TACG/;
#                                             AGCAGTGGTATCAACGCAGAGTACATG
#                                              GCAGTGGTATCAACGCAGAGTACATG
my $smart_race_5prime_adapter_plus = reverse "AGCAGTGGTATCAACGCAGAGTACATG";
my $smart_race_5prime_adapter_plus_rc = reverse $smart_race_5prime_adapter_plus;
$smart_race_5prime_adapter_plus_rc =~ tr/ATGC/TACG/;

my $smarter_ii_a_oligo = "AAGCAGTGGTATCAACGCAGAGTAC";
my $smarter_ii_a_oligo_rc = reverse $smarter_ii_a_oligo;
$smarter_ii_a_oligo_rc =~ tr/ATGC/TACG/;

open (TRIMMINGS, ">", "$trimmings") or die "cannot open $trimmings\n";
open (TRIMMINGS, ">", "$trimmings3") or die "cannot open $trimmings3\n";
print TRIMMINGS ">As\n";
print TRIMMINGS "$polyA\n";
print TRIMMINGS ">Ts\n";
print TRIMMINGS "$polyT\n";
print TRIMMINGS ">IlluminaUniversalAdapter\n";
print TRIMMINGS "$univ_adapter\n";
print TRIMMINGS ">IlluminaUniversalAdapter_rc\n";
print TRIMMINGS "$univ_adapter_rc\n";
print TRIMMINGS ">IlluminaSEPCRPrimer1\n";
print TRIMMINGS "$se_pcr_primer1\n";
print TRIMMINGS ">IlluminaSEPCRPrimer1_rc\n";
print TRIMMINGS "$se_pcr_primer1_rc\n";
print TRIMMINGS ">IlluminaRNAPCRPrimer\n";
print TRIMMINGS "$rna_pcr_primer\n";
print TRIMMINGS ">IlluminaRNAPCRPrimer_rc\n";
print TRIMMINGS "$rna_pcr_primer_rc\n";
print TRIMMINGS ">NexteraTransposaseRead1\n";
print TRIMMINGS "$nextera_transposase_1\n";
print TRIMMINGS ">NexteraTransposaseRead1_rc\n";
@@ -197,15 +220,6 @@ print TRIMMINGS ">NexteraTransposaseRead2\n";
print TRIMMINGS "$nextera_transposase_2\n";
print TRIMMINGS ">NexteraTransposaseRead2_rc\n";
print TRIMMINGS "$nextera_transposase_2_rc\n";
print TRIMMINGS ">IlluminaUniversalAdapter\n";
print TRIMMINGS "$univ_adapter\n";
print TRIMMINGS ">IlluminaUniversalAdapter_rc\n";
print TRIMMINGS "$univ_adapter_rc\n";
print TRIMMINGS ">SMARTer_II_A_oligo\n";
print TRIMMINGS "$smarter_ii_a_oligo\n";
print TRIMMINGS ">SMARTer_II_A_oligo_rc\n";
print TRIMMINGS "$smarter_ii_a_oligo_rc\n";

foreach my $ind (@indices) {
	my $nextera_pcr_primer_i7 = "CAAGCAGAAGACGGCATACGAGAT".$ind."GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG";
	my $nextera_pcr_primer_i7_rc = reverse $nextera_pcr_primer_i7;
@@ -218,10 +232,69 @@ foreach my $ind (@indices) {
}
close (TRIMMINGS);

#run scythe
print " running adapter trimming with scythe\n";
system "scythe -a $trimmings -q sanger -p $p_threshold -n $m_threshold -M $l_threshold --quiet $r2_raw_file | gzip >$r2_at_file";
print LOG "\tcmd: scythe -a $trimmings -q sanger -p $p_threshold -n $m_threshold -M $l_threshold --quiet $r2_raw_file | gzip >$r2_at_file\n";
open (TRIMMINGS, ">", $trimmings5) or die "cannot open $trimmings5.\n";
print TRIMMINGS ">SMART_RACE_5prime_Adapter_plus5bp\n";
print TRIMMINGS "$smart_race_5prime_adapter_plus\n";
print TRIMMINGS ">SMART_RACE_5prime_Adapter_plus5bp_rc\n";
print TRIMMINGS "$smart_race_5prime_adapter_plus_rc\n";
close (TRIMMINGS);

#run scythe on 3 prime end
my $prime3 = "temp_3prime.fq";
print " running adapter trimming with scythe (3 prime)\n";
system "scythe -a $trimmings3 -q sanger -p $p_threshold -n $m_threshold -M $l_threshold --quiet $r2_raw_file >$prime3";
print LOG "\tcmd: scythe -a $trimmings3 -q sanger -p $p_threshold -n $m_threshold -M $l_threshold --quiet $r2_raw_file\n";

#reverse sequences for 5 prime end trimming
my $prime3_rev = "temp_3prime_rev.fq";
my $linecount = 0;
open (PRIME3, "<", $prime3) or die "cannot open $prime3\n";
open (PRIME3REV, ">", $prime3_rev) or die "cannot open $prime3_rev.\n";
while (my $line = <PRIME3>) {
	$linecount++;
	if ($linecount % 2 != 0) {
		print PRIME3REV "$line";
	}
	else {
		my $new_line = $line;
		chomp $new_line;
		$new_line = reverse $new_line;
		print PRIME3REV "$new_line\n";
	}
}
close (PRIME3);
close (PRIME3REV);

#run scythe on 5 prime end
my $prime5_rev = "temp_5prime_rev.fq";
print " running adapter trimming with scythe (5 prime)\n";
system "scythe -a $trimmings5 -q sanger -p $p_threshold -n $m_threshold -M $l_threshold --quiet $prime3_rev >$prime5_rev";
print LOG "\tcmd: scythe -a $trimmings5 -q sanger -p $p_threshold -n $m_threshold -M $l_threshold --quiet $r2_raw_file (3prime trimmed and reversed)\n";

#reverse sequences back to its original orientation
my $prime5 = "temp_5prime.fq";
$linecount = 0;
open (PRIME5REV, "<", $prime5_rev) or die "cannot open $prime5_rev\n";
open (PRIME5, ">", $prime5) or die "cannot open $prime5.\n";
while (my $line = <PRIME5REV>) {
	$linecount++;
	if ($linecount % 2 != 0) {
		print PRIME5 "$line";
	}
	else {
		my $new_line = $line;
		chomp $new_line;
		$new_line = reverse $new_line;
		print PRIME5 "$new_line\n";
	}
}
close (PRIME5REV);
close (PRIME5);
system "gzip -c $prime5 >$r2_at_file";
system "rm $prime3";
system "rm $prime3_rev";
system "rm $prime5_rev";
system "rm $prime5";

#run sickle
print " running adapter trimming with sickle\n";