Commit c3f8e555 authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Fix cigar and position bugs

parent 9858315d
Loading
Loading
Loading
Loading
+34 −24
Original line number Original line Diff line number Diff line
@@ -1805,22 +1805,27 @@ void Chromap<MappingRecord>::ProcessBestMappingsForSingleEndRead(Direction mappi
  uint32_t read_length = read_batch.GetSequenceLengthAt(read_index);
  uint32_t read_length = read_batch.GetSequenceLengthAt(read_index);
  const std::string &negative_read = read_batch.GetNegativeSequenceAt(read_index);
  const std::string &negative_read = read_batch.GetNegativeSequenceAt(read_index);
  uint8_t is_unique = num_best_mappings == 1 ? 1 : 0;
  uint8_t is_unique = num_best_mappings == 1 ? 1 : 0;
  //std::cerr << split_sites.size() << " " << mappings.size() << "\n";
  for (uint32_t mi = 0; mi < mappings.size(); ++mi) {
  for (uint32_t mi = 0; mi < mappings.size(); ++mi) {
    if (mappings[mi].first == min_num_errors) {
    if (mappings[mi].first == min_num_errors) {
      if (*best_mapping_index == best_mapping_indices[*num_best_mappings_reported]) {
      if (*best_mapping_index == best_mapping_indices[*num_best_mappings_reported]) {
        read_length = read_batch.GetSequenceLengthAt(read_index);
        read_length = read_batch.GetSequenceLengthAt(read_index);
        uint32_t rid = mappings[mi].second >> 32;
        uint32_t rid = mappings[mi].second >> 32;
        uint32_t position = mappings[mi].second;
        uint32_t position = mappings[mi].second;
        int split_site = mapping_direction == kPositive ? 0 : read_length - 1;
        int split_site = mapping_direction == kPositive ? 0 : read_length;
        if (split_alignment_) {
        if (split_alignment_) {
          split_site =  split_sites[mi] - error_threshold_;
          split_site = split_sites[mi];
          read_length = split_site;
          read_length = split_site;
        }
        }
        uint32_t verification_window_start_position = position + 1 > (read_length + error_threshold_) ? position + 1 - read_length - error_threshold_ : 0;
        uint32_t verification_window_start_position = position + 1 > (read_length + error_threshold_) ? position + 1 - read_length - error_threshold_ : 0;
        if (position >= reference.GetSequenceLengthAt(rid)) {
        if (position >= reference.GetSequenceLengthAt(rid)) {
          verification_window_start_position = reference.GetSequenceLengthAt(rid) - error_threshold_ - read_length; 
          verification_window_start_position = reference.GetSequenceLengthAt(rid) - error_threshold_ - read_length; 
        }
        }
        if (split_alignment_) {
          if (split_sites[mi] < (int)read_batch.GetSequenceLengthAt(read_index)) {
            split_site -= 3 * error_threshold_;
          } 
          read_length = split_site;
        }
        uint32_t barcode_key = 0;
        uint32_t barcode_key = 0;
        if (!is_bulk_data_) {
        if (!is_bulk_data_) {
          barcode_key = barcode_batch.GenerateSeedFromSequenceAt(read_index, 0, barcode_batch.GetSequenceLengthAt(read_index));
          barcode_key = barcode_batch.GenerateSeedFromSequenceAt(read_index, 0, barcode_batch.GetSequenceLengthAt(read_index));
@@ -1837,11 +1842,12 @@ void Chromap<MappingRecord>::ProcessBestMappingsForSingleEndRead(Direction mappi
            uint32_t *cigar;
            uint32_t *cigar;
            int mapping_end_position;
            int mapping_end_position;
            ksw_semi_global3(read_length + 2 * error_threshold_, reference.GetSequenceAt(rid) + verification_window_start_position, read_length, read, 5, mat, gap_open_penalties_[0], gap_extension_penalties_[0], gap_open_penalties_[1], gap_extension_penalties_[1], error_threshold_ * 2 + 1, &n_cigar, &cigar, &mapping_start_position, &mapping_end_position);
            ksw_semi_global3(read_length + 2 * error_threshold_, reference.GetSequenceAt(rid) + verification_window_start_position, read_length, read, 5, mat, gap_open_penalties_[0], gap_extension_penalties_[0], gap_open_penalties_[1], gap_extension_penalties_[1], error_threshold_ * 2 + 1, &n_cigar, &cigar, &mapping_start_position, &mapping_end_position);
            //std::cerr << verification_window_start_position << " " << read_length << " " << split_site << " " << mapping_start_position << " " << mapping_end_position << "\n";
            int NM = 0;
            int NM = 0;
            std::string MD_tag = "";
            std::string MD_tag = "";
            GenerateMDTag(reference.GetSequenceAt(rid), read, verification_window_start_position + mapping_start_position + 1, n_cigar, cigar, NM, MD_tag);
            GenerateMDTag(reference.GetSequenceAt(rid), read, verification_window_start_position + mapping_start_position, n_cigar, cigar, NM, MD_tag);
            mapq = GetMAPQForSingleEndRead(error_threshold_, 0, 0, mapping_end_position - mapping_start_position + 1, min_num_errors, num_best_mappings, second_min_num_errors, num_second_best_mappings);
            mapq = GetMAPQForSingleEndRead(error_threshold_, 0, 0, mapping_end_position - mapping_start_position, min_num_errors, num_best_mappings, second_min_num_errors, num_second_best_mappings);
            EmplaceBackMappingRecord(read_id, read_name, 1, verification_window_start_position + mapping_start_position, rid, flag, 1, is_unique, mapq, NM, n_cigar, cigar, MD_tag, &((*mappings_on_diff_ref_seqs)[rid]));
            EmplaceBackMappingRecord(read_id, read_name, 1, verification_window_start_position + mapping_start_position, rid, flag, 0, is_unique, mapq, NM, n_cigar, cigar, MD_tag, &((*mappings_on_diff_ref_seqs)[rid]));
          } else {
          } else {
            //int n_cigar = 0;
            //int n_cigar = 0;
            //uint32_t *cigar;
            //uint32_t *cigar;
@@ -1863,17 +1869,17 @@ void Chromap<MappingRecord>::ProcessBestMappingsForSingleEndRead(Direction mappi
          }
          }
        } else {
        } else {
          uint8_t direction = 0;
          uint8_t direction = 0;
          int read_start_site = read_batch.GetSequenceLengthAt(read_index) - 1 - split_site;
          int read_start_site = read_batch.GetSequenceLengthAt(read_index) - split_site;
          if (output_mapping_in_SAM_) {
          if (output_mapping_in_SAM_) {
            int n_cigar = 0;
            int n_cigar = 0;
            uint32_t *cigar;
            uint32_t *cigar;
            int mapping_end_position;
            int mapping_end_position;
            ksw_semi_global3(read_length + 2 * error_threshold_, reference.GetSequenceAt(rid) + verification_window_start_position, read_length, negative_read.data() + read_start_site, 5, mat, gap_open_penalties_[0], gap_extension_penalties_[0], gap_open_penalties_[1], gap_extension_penalties_[1], error_threshold_ * 2 + 1, &n_cigar, &cigar, &mapping_start_position, &mapping_end_position);
            ksw_semi_global3(read_length + 2 * error_threshold_, reference.GetSequenceAt(rid) + verification_window_start_position + read_start_site, read_length, negative_read.data() + read_start_site, 5, mat, gap_open_penalties_[0], gap_extension_penalties_[0], gap_open_penalties_[1], gap_extension_penalties_[1], error_threshold_ * 2 + 1, &n_cigar, &cigar, &mapping_start_position, &mapping_end_position);
            int NM = 0;
            int NM = 0;
            std::string MD_tag = "";
            std::string MD_tag = "";
            GenerateMDTag(reference.GetSequenceAt(rid), negative_read.data() + read_start_site, verification_window_start_position + mapping_start_position + 1, n_cigar, cigar, NM, MD_tag);
            GenerateMDTag(reference.GetSequenceAt(rid), negative_read.data() + read_start_site, verification_window_start_position + read_start_site + mapping_start_position, n_cigar, cigar, NM, MD_tag);
            mapq = GetMAPQForSingleEndRead(error_threshold_, 0, 0, mapping_end_position - mapping_start_position + 1, min_num_errors, num_best_mappings, second_min_num_errors, num_second_best_mappings);
            mapq = GetMAPQForSingleEndRead(error_threshold_, 0, 0, mapping_end_position - mapping_start_position, min_num_errors, num_best_mappings, second_min_num_errors, num_second_best_mappings);
            EmplaceBackMappingRecord(read_id, read_name, 1, verification_window_start_position + mapping_end_position, rid, flag, 0, is_unique, mapq, NM, n_cigar, cigar, MD_tag, &((*mappings_on_diff_ref_seqs)[rid]));
            EmplaceBackMappingRecord(read_id, read_name, 1, verification_window_start_position + read_start_site + mapping_end_position, rid, flag, 1, is_unique, mapq, NM, n_cigar, cigar, MD_tag, &((*mappings_on_diff_ref_seqs)[rid]));
          } else {
          } else {
            //int n_cigar = 0;
            //int n_cigar = 0;
            //uint32_t *cigar;
            //uint32_t *cigar;
@@ -2446,14 +2452,18 @@ void Chromap<MappingRecord>::VerifyCandidatesOnOneDirection(Direction candidate_
      } else {
      } else {
        num_errors = BandedAlignPatternToTextWithDropOffFrom3End(reference.GetSequenceAt(rid) + position - error_threshold_, negative_read.data(), read_length, &mapping_end_position);
        num_errors = BandedAlignPatternToTextWithDropOffFrom3End(reference.GetSequenceAt(rid) + position - error_threshold_, negative_read.data(), read_length, &mapping_end_position);
      }
      }
      if (num_errors > error_threshold_) {
      //std::cerr << "ne1: " << num_errors << " " << mapping_end_position << "\n";
      //if (num_errors > 2 * error_threshold_) {
      //  if (mapping_end_position - error_threshold_ - num_errors >= mapping_length_threshold) {
      //    mapping_end_position -= num_errors;
      //    num_errors = -(mapping_end_position - error_threshold_);
      //  }
      //} else {
      if (mapping_end_position - error_threshold_ - num_errors >= mapping_length_threshold) {
      if (mapping_end_position - error_threshold_ - num_errors >= mapping_length_threshold) {
          mapping_end_position -= num_errors;
          num_errors = -(mapping_end_position - error_threshold_);
        }
      } else {
        num_errors = -(mapping_end_position - error_threshold_ - num_errors);
        num_errors = -(mapping_end_position - error_threshold_ - num_errors);
      }
      }
      //}
      //std::cerr << "ne2: " << num_errors << " " << mapping_end_position << "\n";
    } else {
    } else {
      if (candidate_direction == kPositive) {
      if (candidate_direction == kPositive) {
        num_errors = BandedAlignPatternToText(reference.GetSequenceAt(rid) + position - error_threshold_, read, read_length, &mapping_end_position);
        num_errors = BandedAlignPatternToText(reference.GetSequenceAt(rid) + position - error_threshold_, read, read_length, &mapping_end_position);
@@ -2478,14 +2488,14 @@ void Chromap<MappingRecord>::VerifyCandidatesOnOneDirection(Direction candidate_
      if (candidate_direction == kPositive) {
      if (candidate_direction == kPositive) {
        mappings->emplace_back(num_errors, candidates[ci].position - error_threshold_ + mapping_end_position);
        mappings->emplace_back(num_errors, candidates[ci].position - error_threshold_ + mapping_end_position);
      } else {
      } else {
        if (split_alignment_) {
        //if (split_alignment_) {
          mappings->emplace_back(num_errors, candidates[ci].position - read_length + 1 - error_threshold_ + read_length + error_threshold_); 
        //  mappings->emplace_back(num_errors, candidates[ci].position - read_length + 1 - error_threshold_ + read_length + error_threshold_); 
        } else {
        //} else {
          mappings->emplace_back(num_errors, candidates[ci].position - read_length + 1 - error_threshold_ + mapping_end_position); 
          mappings->emplace_back(num_errors, candidates[ci].position - read_length + 1 - error_threshold_ + mapping_end_position); 
        }
        //}
      }
      }
      if (split_alignment_) {
      if (split_alignment_) {
        split_sites->emplace_back(mapping_end_position);
        split_sites->emplace_back(mapping_end_position - error_threshold_);
      }
      }
    }
    }
  }
  }
@@ -2671,7 +2681,7 @@ int Chromap<MappingRecord>::BandedAlignPatternToTextWithDropOff(const char *patt
    VN = X & HP;
    VN = X & HP;
    VP = HN | ~(X | HP);
    VP = HN | ~(X | HP);
    num_errors_at_band_start_position += 1 - (D0 & lowest_bit_in_band_mask);
    num_errors_at_band_start_position += 1 - (D0 & lowest_bit_in_band_mask);
    if (num_errors_at_band_start_position > 3 * error_threshold_) {
    if (num_errors_at_band_start_position > 2 * error_threshold_) {
      //return error_threshold_ + 1;
      //return error_threshold_ + 1;
      break;
      break;
    }
    }
@@ -2721,7 +2731,7 @@ int Chromap<MappingRecord>::BandedAlignPatternToTextWithDropOffFrom3End(const ch
    VN = X & HP;
    VN = X & HP;
    VP = HN | ~(X | HP);
    VP = HN | ~(X | HP);
    num_errors_at_band_start_position += 1 - (D0 & lowest_bit_in_band_mask);
    num_errors_at_band_start_position += 1 - (D0 & lowest_bit_in_band_mask);
    if (num_errors_at_band_start_position > 3 * error_threshold_) {
    if (num_errors_at_band_start_position > 2 * error_threshold_) {
      //return error_threshold_ + 1;
      //return error_threshold_ + 1;
      break;
      break;
    }
    }
+1 −1
Original line number Original line Diff line number Diff line
@@ -614,7 +614,7 @@ int ksw_semi_global3(int qlen, const char *query, int tlen, const char *target,
		}
		}
		if (i >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, i + 1);
		if (i >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, i + 1);
    if (mapping_start_position) {
    if (mapping_start_position) {
      *mapping_start_position = k;
      *mapping_start_position = k + 1;
    }
    }
		//if (k >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, k + 1);
		//if (k >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, k + 1);
		for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR
		for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR
+2 −2
Original line number Original line Diff line number Diff line
@@ -255,7 +255,7 @@ struct SAMMapping {
    for (int ci = 0; ci < n_cigar; ++ci) {
    for (int ci = 0; ci < n_cigar; ++ci) {
      uint32_t op = bam_cigar_op(cigar[ci]);
      uint32_t op = bam_cigar_op(cigar[ci]);
      uint32_t op_length = bam_cigar_oplen(cigar[ci]);
      uint32_t op_length = bam_cigar_oplen(cigar[ci]);
      if ((bam_cigar_type(op) & 0x10) > 0) {
      if ((bam_cigar_type(op) & 0x2) > 0) {
        alignment_length += op_length;
        alignment_length += op_length;
      }
      }
    }
    }
@@ -263,7 +263,7 @@ struct SAMMapping {
  }
  }
  uint32_t GetStartPosition() const { // inclusive
  uint32_t GetStartPosition() const { // inclusive
    if (IsPositive()) {
    if (IsPositive()) {
      return pos;
      return pos + 1;
    } else {
    } else {
      return pos + 1 - GetAlignmentLength();
      return pos + 1 - GetAlignmentLength();
    }
    }