Commit 60efd798 authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Reformat code

parent e5c6c213
Loading
Loading
Loading
Loading
+422 −447
Original line number Diff line number Diff line
@@ -968,7 +968,6 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
              //for (auto &ci : negative_candidates2) {
              //  std::cerr << (ci.position >> 32) << " " << (uint32_t) ci.position << " " << (int)ci.count << "\n";
              //}
              //if ((positive_candidates1.size() == 0 && negative_candidates1.size() == 0) || (positive_candidates2.size() == 0 && negative_candidates2.size() == 0)) {
              if (!split_alignment_) {
                SupplementCandidates(index, repetitive_seed_length1, repetitive_seed_length2, minimizers1, minimizers2, positive_hits1, positive_hits2, positive_candidates1, positive_candidates2, positive_candidates1_buffer, positive_candidates2_buffer, negative_hits1, negative_hits2, negative_candidates1, negative_candidates2, negative_candidates1_buffer, negative_candidates2_buffer);
                current_num_candidates1 = positive_candidates1.size() + negative_candidates1.size();
@@ -1059,7 +1058,6 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
                    std::sort(negative_mappings1.begin(), negative_mappings1.end(), [](const std::pair<int,uint64_t> &a, const std::pair<int,uint64_t> &b) { return a.second < b.second; });
                    std::sort(negative_mappings2.begin(), negative_mappings2.end(), [](const std::pair<int,uint64_t> &a, const std::pair<int,uint64_t> &b) { return a.second < b.second; });
                  }
                    
                  GenerateBestMappingsForPairedEndRead(pair_index, positive_candidates1.size(), negative_candidates1.size(), repetitive_seed_length1, min_num_errors1, num_best_mappings1, second_min_num_errors1, num_second_best_mappings1, read_batch1, positive_mappings1, positive_split_sites1, negative_mappings1, negative_split_sites1, positive_candidates2.size(), negative_candidates2.size(), repetitive_seed_length2, min_num_errors2, num_best_mappings2, second_min_num_errors2, num_second_best_mappings2, read_batch2, reference, barcode_batch, positive_mappings2, positive_split_sites2, negative_mappings2, negative_split_sites2, &best_mapping_indices, &generator, &F1R2_best_mappings, &F2R1_best_mappings, &F1F2_best_mappings, &R1R2_best_mappings, &min_sum_errors, &num_best_mappings, &second_min_sum_errors, &num_second_best_mappings, &mappings_on_diff_ref_seqs);
                  if (num_best_mappings == 1) {
                    ++thread_num_uniquely_mapped_reads;
@@ -1347,7 +1345,6 @@ void Chromap<MappingRecord>::GenerateBestMappingsForPairedEndReadOnOneDirection(
        (*num_best_mappings)++;
      }
    }

    return;
  }

@@ -1466,11 +1463,8 @@ void Chromap<MappingRecord>::ProcessBestMappingsForPairedEndReadOnOneDirection(D
          split_site1 = split_sites1[i1];
          split_site2 = split_sites2[i2];
        }
				GetRefStartEndPositionForReadFromMapping(first_read_direction, mappings1[i1], effect_read1, read1_length, split_site1, 
						reference, &ref_start_position1, &ref_end_position1, &n_cigar1, &cigar1, &NM1, MD_tag1) ;
				GetRefStartEndPositionForReadFromMapping(second_read_direction, mappings2[i2], effect_read2, read2_length, 
						split_site2, reference, &ref_start_position2, &ref_end_position2, 
						&n_cigar2, &cigar2, &NM2, MD_tag2) ;
        GetRefStartEndPositionForReadFromMapping(first_read_direction, mappings1[i1], effect_read1, read1_length, split_site1, reference, &ref_start_position1, &ref_end_position1, &n_cigar1, &cigar1, &NM1, MD_tag1);
        GetRefStartEndPositionForReadFromMapping(second_read_direction, mappings2[i2], effect_read2, read2_length, split_site2, reference, &ref_start_position2, &ref_end_position2, &n_cigar2, &cigar2, &NM2, MD_tag2);
        /*if (!strcmp(read_batch1.GetSequenceNameAt(pair_index), "XXXX")) {
          printf("%d\n", best_mappings.size());
          printf("%d. %d %d\n", min_sum_errors, i1, i2);
@@ -1519,17 +1513,13 @@ void Chromap<MappingRecord>::ProcessBestMappingsForPairedEndReadOnOneDirection(D
          }

          if (rid1 < rid2 || (rid1 == rid2 && position1 < position2)) {
						EmplaceBackMappingRecord(read_id, read1_name, barcode_key, 
							rid1, rid2, position1, position2, direction, direction2, mapq, is_unique, 1,
							&((*mappings_on_diff_ref_seqs)[rid1])) ;
            EmplaceBackMappingRecord(read_id, read1_name, barcode_key, rid1, rid2, position1, position2, direction, direction2, mapq, is_unique, 1, &((*mappings_on_diff_ref_seqs)[rid1]));
          } else {
						EmplaceBackMappingRecord(read_id, read1_name, barcode_key, 
							rid2, rid1, position2, position1, direction2, direction, mapq, is_unique, 1,
							&((*mappings_on_diff_ref_seqs)[rid2])) ;
            EmplaceBackMappingRecord(read_id, read1_name, barcode_key, rid2, rid1, position2, position1, direction2, direction, mapq, is_unique, 1, &((*mappings_on_diff_ref_seqs)[rid2]));
          }
        } else if (output_mapping_in_PAF_) {
          uint32_t fragment_start_position = ref_start_position1;
					uint16_t fragment_length = ref_end_position2 - ref_start_position1 + 1;;
          uint16_t fragment_length = ref_end_position2 - ref_start_position1 + 1;
          uint16_t positive_alignment_length = ref_end_position1 - ref_start_position1 + 1;
          uint16_t negative_alignment_length = ref_end_position2 - ref_start_position2 + 1;
          if (direction == 0) {
@@ -1922,12 +1912,9 @@ void Chromap<MappingRecord>::GenerateMDTag(const char *pattern, const char *text
  }
}


// return: newly adjust reference start/end position (kPositive for start, kNegative for end)
template <typename MappingRecord>
int Chromap<MappingRecord>::AdjustGapBeginning(Direction mapping_direction, const char *ref, const char *read, int *gap_beginning,
	int read_end, int ref_start_position, int ref_end_position, int *n_cigar, uint32_t **cigar) {

int Chromap<MappingRecord>::AdjustGapBeginning(Direction mapping_direction, const char *ref, const char *read, int *gap_beginning, int read_end, int ref_start_position, int ref_end_position, int *n_cigar, uint32_t **cigar) {
  int i, j;
  if (mapping_direction == kPositive) {
    if (*gap_beginning <= 0) {
@@ -1969,7 +1956,6 @@ int Chromap<MappingRecord>::AdjustGapBeginning(Direction mapping_direction, cons
        (*cigar)[*n_cigar-1] += (j - (ref_end_position + 1)) << 4;
      }
    }

    return j - 1;  
  }
}
@@ -2022,10 +2008,7 @@ void Chromap<MappingRecord>::GetRefStartEndPositionForReadFromMapping(Direction
      ksw_semi_global3(read_length + 2 * error_threshold_, reference.GetSequenceAt(rid) + verification_window_start_position, read_length, read + gap_beginning, 5, mat, gap_open_penalties_[0], gap_extension_penalties_[0], gap_open_penalties_[1], gap_extension_penalties_[1], error_threshold_ * 2 + 1, n_cigar, cigar, &mapping_start_position, &mapping_end_position);
      //std::cerr << verification_window_start_position << " " << read_length << " " << split_site << " " << mapping_start_position << " " << mapping_end_position << "\n";
      if (gap_beginning > 0) {
				int new_ref_start_position = AdjustGapBeginning(mapping_direction, reference.GetSequenceAt(rid), 
						read, &gap_beginning, read_length - 1, 
						verification_window_start_position + mapping_start_position, 
						verification_window_start_position + mapping_end_position - 1, n_cigar, cigar);
        int new_ref_start_position = AdjustGapBeginning(mapping_direction, reference.GetSequenceAt(rid), read, &gap_beginning, read_length - 1, verification_window_start_position + mapping_start_position, verification_window_start_position + mapping_end_position - 1, n_cigar, cigar);
        mapping_start_position = new_ref_start_position - verification_window_start_position;
      }
      GenerateMDTag(reference.GetSequenceAt(rid), read + gap_beginning, verification_window_start_position + mapping_start_position, *n_cigar, *cigar, *NM, MD_tag);
@@ -2067,10 +2050,7 @@ void Chromap<MappingRecord>::GetRefStartEndPositionForReadFromMapping(Direction
      ksw_semi_global3(read_length + 2 * error_threshold_, reference.GetSequenceAt(rid) + verification_window_start_position + read_start_site, read_length, read + read_start_site, 5, mat, gap_open_penalties_[0], gap_extension_penalties_[0], gap_open_penalties_[1], gap_extension_penalties_[1], error_threshold_ * 2 + 1, n_cigar, cigar, &mapping_start_position, &mapping_end_position);
      if (gap_beginning > 0) {
        //printf("before adjust %d %d %d. %d %d\n", split_site, read_start_site, read_length, verification_window_start_position + mapping_start_position, verification_window_start_position + mapping_end_position);
				int new_ref_end_position = AdjustGapBeginning(mapping_direction, reference.GetSequenceAt(rid), 
						read + read_start_site, &gap_beginning, read_length - 1, 
						verification_window_start_position + mapping_start_position, 
						verification_window_start_position + mapping_end_position - 1, n_cigar, cigar);
        int new_ref_end_position = AdjustGapBeginning(mapping_direction, reference.GetSequenceAt(rid), read + read_start_site, &gap_beginning, read_length - 1, verification_window_start_position + mapping_start_position, verification_window_start_position + mapping_end_position - 1, n_cigar, cigar);
        // The returned position is right-closed, so need to plus one to match bed convention
        mapping_end_position = new_ref_end_position + 1 - verification_window_start_position - read_start_site; 
        read_length = split_site - gap_beginning;
@@ -2124,7 +2104,6 @@ void Chromap<MappingRecord>::ProcessBestMappingsForSingleEndRead(Direction mappi
        uint32_t ref_start_position;
        uint32_t ref_end_position;
        uint8_t direction = 1;

        uint32_t *cigar;
        int n_cigar = 0;
        int NM = 0;
@@ -2141,8 +2120,7 @@ void Chromap<MappingRecord>::ProcessBestMappingsForSingleEndRead(Direction mappi
          split_site = split_sites[mi];
        }
        //printf("%d %d\n", split_site, read_length);
	GetRefStartEndPositionForReadFromMapping(mapping_direction, mappings[mi], effect_read, read_length, split_site, 
		reference, &ref_start_position, &ref_end_position, &n_cigar, &cigar, &NM, MD_tag) ;
        GetRefStartEndPositionForReadFromMapping(mapping_direction, mappings[mi], effect_read, read_length, split_site, reference, &ref_start_position, &ref_end_position, &n_cigar, &cigar, &NM, MD_tag);
        mapq = GetMAPQForSingleEndRead(error_threshold_, num_candidates, repetitive_seed_length, ref_end_position - ref_start_position + 1, min_num_errors, num_best_mappings, second_min_num_errors, num_second_best_mappings);

        if (output_mapping_in_SAM_) {
@@ -2707,7 +2685,6 @@ void Chromap<MappingRecord>::VerifyCandidatesOnOneDirection(Direction candidate_
    
    if (split_alignment_) {
      int mapping_length_threshold = 30;

      if (candidate_direction == kPositive) {
        num_errors = BandedAlignPatternToTextWithDropOff(reference.GetSequenceAt(rid) + position - error_threshold_, read, read_length, &mapping_end_position);
        if (mapping_end_position < 0 && allow_gap_beginning > 0) {
@@ -2717,8 +2694,7 @@ void Chromap<MappingRecord>::VerifyCandidatesOnOneDirection(Direction candidate_
          if (num_errors > error_threshold_ || mapping_end_position < 0) {
            num_errors = backup_num_errors;
            mapping_end_position = backup_mapping_end_position;
	  }
	  else {
          } else {
            gap_beginning = allow_gap_beginning;
            mapping_end_position += gap_beginning; // realign the mapping end position as it is the alignment from the whole read
            // I use this adjustment since "position" is based on the whole read,
@@ -2734,8 +2710,7 @@ void Chromap<MappingRecord>::VerifyCandidatesOnOneDirection(Direction candidate_
          if (num_errors > error_threshold_ || mapping_end_position < 0) {
            num_errors = backup_num_errors;
            mapping_end_position = backup_mapping_end_position;
	  }
	  else {
          } else {
            gap_beginning = allow_gap_beginning;
            mapping_end_position += gap_beginning; 
          }
@@ -2748,7 +2723,7 @@ void Chromap<MappingRecord>::VerifyCandidatesOnOneDirection(Direction candidate_
      //    num_errors = -(mapping_end_position - error_threshold_);
      //  }
      //} else {
      if (mapping_end_position - error_threshold_ - num_errors >= mapping_length_threshold) {
      if (mapping_end_position - error_threshold_ - num_errors >= mapping_length_threshold) { // TODO(Haowen): check if this needs adjustment
        num_errors = -(mapping_end_position - error_threshold_ - num_errors);
      } else {
      	num_errors = error_threshold_ + 1;
@@ -2791,9 +2766,10 @@ void Chromap<MappingRecord>::VerifyCandidatesOnOneDirection(Direction candidate_
        /*if (mapping_end_position - error_threshold_ < 0 || mapping_end_position - error_threshold_ > 200 || mapping_end_position - error_threshold_ < 20) {
          printf("ERROR! %d %d %d %d %d\n", mapping_end_position, error_threshold_, read_length,(int)candidates[ci].position, gap_beginning);
          }*/
	if (mapping_end_position - error_threshold_ > (int)read_length)
        if (mapping_end_position - error_threshold_ > (int)read_length) {
          //printf("??? %d %d\n", mapping_end_position, gap_beginning);
          mapping_end_position = read_length + error_threshold_;
        }
        split_sites->emplace_back((gap_beginning<<16)| (mapping_end_position - error_threshold_));
      }
    }
@@ -2970,7 +2946,7 @@ int Chromap<MappingRecord>::BandedAlignPatternToTextWithDropOff(const char *patt
  uint32_t HP = 0;
  int num_errors_at_band_start_position = 0;
  int i = 0;
  int fail_beginning = 0; // the alignment failed at the beginning part
  bool fail_beginning = 0; // the alignment failed at the beginning part
  for (; i < read_length; i++) {
    uint8_t pattern_base = SequenceBatch::CharToUint8(pattern[i + 2 * error_threshold_]);
    Peq[pattern_base] = Peq[pattern_base] | highest_bit_in_band_mask;
@@ -2984,18 +2960,17 @@ int Chromap<MappingRecord>::BandedAlignPatternToTextWithDropOff(const char *patt
    num_errors_at_band_start_position += 1 - (D0 & lowest_bit_in_band_mask);
    if (num_errors_at_band_start_position > 2 * error_threshold_) {
      //return error_threshold_ + 1;
      // the min error in this band could be still less than the error_threshold, and could 
      // but this should be fine since it does not affect the 5' end of the read.
      if (i < 4 * error_threshold_ && i < read_length / 2) {
        fail_beginning = 1;
      }
      break;
    }
    for (int ai = 0; ai < 5; ai++) {
      Peq[ai] >>= 1;
    }
  }

  // the min error in this band could be still less than the error_threshold, and could 
  // but this should be fine since it does not affect the 5' end of the read.
  if (i < 4 * error_threshold_ && i < read_length / 2) {
    fail_beginning = true;
  }
  /*char tmp[255] ;
  strncpy(tmp, pattern, read_length + 2 * error_threshold_);
  printf("%s\n%s\n", tmp, text);
@@ -3036,7 +3011,7 @@ int Chromap<MappingRecord>::BandedAlignPatternToTextWithDropOffFrom3End(const ch
  uint32_t HP = 0;
  int num_errors_at_band_start_position = 0;
  int i = 0;
  int fail_beginning = 0; // the alignment failed at the beginning part
  bool fail_beginning = false; // the alignment failed at the beginning part
  for (; i < read_length; i++) {
    uint8_t pattern_base = SequenceBatch::CharToUint8(pattern[read_length - 1 - i]);
    Peq[pattern_base] = Peq[pattern_base] | highest_bit_in_band_mask;
@@ -3050,15 +3025,15 @@ int Chromap<MappingRecord>::BandedAlignPatternToTextWithDropOffFrom3End(const ch
    num_errors_at_band_start_position += 1 - (D0 & lowest_bit_in_band_mask);
    if (num_errors_at_band_start_position > 2 * error_threshold_) {
      //return error_threshold_ + 1;
      if (i < 4 * error_threshold_ && i < read_length / 2) {
        fail_beginning = 1;
      }
      break;
    }
    for (int ai = 0; ai < 5; ai++) {
      Peq[ai] >>= 1;
    }
  }
  if (i < 4 * error_threshold_ && i < read_length / 2) {
    fail_beginning = true;
  }
  //printf("li %d: %d %d %d\n", fail_beginning, i, error_threshold_, read_length);
  int band_start_position = i;
  int min_num_errors = num_errors_at_band_start_position;
+47 −47

File changed.

Contains only whitespace changes.