Commit 914ab5de authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Refactor and fix a bug in early stop

parent 560a488f
Loading
Loading
Loading
Loading
+40 −33
Original line number Diff line number Diff line
@@ -1091,12 +1091,12 @@ void Chromap<PairedEndMappingWithBarcode>::EmplaceBackMappingRecord(uint32_t rea
}

template<typename MappingRecord>
void Chromap<MappingRecord>::EmplaceBackMappingRecord(uint32_t read_id, const char *read1_name, const char *read2_name, uint16_t read1_length, uint16_t read2_length, uint32_t barcode, uint32_t fragment_start_position, uint16_t fragment_length, uint8_t mapq, uint8_t direction, uint8_t is_unique, uint8_t num_dups, uint16_t positive_alignment_length, uint16_t negative_alignment_length, std::vector<MappingRecord> *mappings_on_diff_ref_seqs) {
void Chromap<MappingRecord>::EmplaceBackMappingRecord(uint32_t read_id, const char *read1_name, const char *read2_name, uint16_t read1_length, uint16_t read2_length, uint32_t barcode, uint32_t fragment_start_position, uint16_t fragment_length, uint8_t mapq1, uint8_t mapq2, uint8_t direction, uint8_t is_unique, uint8_t num_dups, uint16_t positive_alignment_length, uint16_t negative_alignment_length, std::vector<MappingRecord> *mappings_on_diff_ref_seqs) {
}

template<>
void Chromap<PairedPAFMapping>::EmplaceBackMappingRecord(uint32_t read_id, const char *read1_name, const char *read2_name, uint16_t read1_length, uint16_t read2_length, uint32_t barcode, uint32_t fragment_start_position, uint16_t fragment_length, uint8_t mapq, uint8_t direction, uint8_t is_unique, uint8_t num_dups, uint16_t positive_alignment_length, uint16_t negative_alignment_length, std::vector<PairedPAFMapping> *mappings_on_diff_ref_seqs) {
  mappings_on_diff_ref_seqs->emplace_back(PairedPAFMapping{read_id, std::string(read1_name), std::string(read2_name), read1_length, read2_length, fragment_start_position, fragment_length, positive_alignment_length, negative_alignment_length, mapq, direction, is_unique, num_dups});
void Chromap<PairedPAFMapping>::EmplaceBackMappingRecord(uint32_t read_id, const char *read1_name, const char *read2_name, uint16_t read1_length, uint16_t read2_length, uint32_t barcode, uint32_t fragment_start_position, uint16_t fragment_length, uint8_t mapq1, uint8_t mapq2, uint8_t direction, uint8_t is_unique, uint8_t num_dups, uint16_t positive_alignment_length, uint16_t negative_alignment_length, std::vector<PairedPAFMapping> *mappings_on_diff_ref_seqs) {
  mappings_on_diff_ref_seqs->emplace_back(PairedPAFMapping{read_id, std::string(read1_name), std::string(read2_name), read1_length, read2_length, fragment_start_position, fragment_length, positive_alignment_length, negative_alignment_length, mapq1 < mapq2 ? mapq1 : mapq2, mapq1, mapq2, direction, is_unique, num_dups});
}

template <typename MappingRecord>
@@ -1139,7 +1139,9 @@ void Chromap<MappingRecord>::ProcessBestMappingsForPairedEndReadOnOneDirection(D
          uint16_t fragment_length = position2 - fragment_start_position + 1;
          uint16_t positive_alignment_length = position1 + 1 - fragment_start_position;
          uint16_t negative_alignment_length = position2 + 1 - (verification_window_start_position2 + mapping_start_position2);
          mapq = GetMAPQForPairedEndRead(num_candidates1, num_candidates2, repetitive_seed_length1, repetitive_seed_length2, positive_alignment_length, negative_alignment_length, min_sum_errors, num_best_mappings, second_min_sum_errors, num_second_best_mappings, min_num_errors1, min_num_errors2, num_best_mappings1, num_best_mappings2, second_min_num_errors1, second_min_num_errors2, num_second_best_mappings1, num_second_best_mappings2);
          uint8_t mapq1 = 0;
          uint8_t mapq2 = 0;
          mapq = GetMAPQForPairedEndRead(num_candidates1, num_candidates2, repetitive_seed_length1, repetitive_seed_length2, positive_alignment_length, negative_alignment_length, min_sum_errors, num_best_mappings, second_min_sum_errors, num_second_best_mappings, min_num_errors1, min_num_errors2, num_best_mappings1, num_best_mappings2, second_min_num_errors1, second_min_num_errors2, num_second_best_mappings1, num_second_best_mappings2, mapq1, mapq2);
          uint8_t direction = 1;
          //mapq |= (uint8_t)1;
          uint32_t barcode_key = 0;
@@ -1147,7 +1149,7 @@ void Chromap<MappingRecord>::ProcessBestMappingsForPairedEndReadOnOneDirection(D
            barcode_key = barcode_batch.GenerateSeedFromSequenceAt(pair_index, 0, barcode_batch.GetSequenceLengthAt(pair_index));
          }
          if (output_mapping_in_PAF_) {
            EmplaceBackMappingRecord(read_id, read_batch1.GetSequenceNameAt(pair_index), read_batch2.GetSequenceNameAt(pair_index), (uint16_t)read_batch1.GetSequenceLengthAt(pair_index), (uint16_t)read_batch2.GetSequenceLengthAt(pair_index), barcode_key, fragment_start_position, fragment_length, mapq, direction, is_unique, 1, positive_alignment_length, negative_alignment_length, &((*mappings_on_diff_ref_seqs)[rid1]));
            EmplaceBackMappingRecord(read_id, read_batch1.GetSequenceNameAt(pair_index), read_batch2.GetSequenceNameAt(pair_index), (uint16_t)read_batch1.GetSequenceLengthAt(pair_index), (uint16_t)read_batch2.GetSequenceLengthAt(pair_index), barcode_key, fragment_start_position, fragment_length, mapq1, mapq2, direction, is_unique, 1, positive_alignment_length, negative_alignment_length, &((*mappings_on_diff_ref_seqs)[rid1]));
          } else {
            EmplaceBackMappingRecord(read_id, barcode_key, fragment_start_position, fragment_length, mapq, direction, is_unique, 1, positive_alignment_length, negative_alignment_length, &((*mappings_on_diff_ref_seqs)[rid1]));
          }
@@ -1158,14 +1160,16 @@ void Chromap<MappingRecord>::ProcessBestMappingsForPairedEndReadOnOneDirection(D
          uint16_t fragment_length = position1 - fragment_start_position + 1;
          uint16_t positive_alignment_length = position2 + 1 - fragment_start_position;
          uint16_t negative_alignment_length = position1 + 1 - (verification_window_start_position1 + mapping_start_position1);
          mapq = GetMAPQForPairedEndRead(num_candidates2, num_candidates1, repetitive_seed_length2, repetitive_seed_length1, positive_alignment_length, negative_alignment_length, min_sum_errors, num_best_mappings, second_min_sum_errors, num_second_best_mappings, min_num_errors2, min_num_errors1, num_best_mappings2, num_best_mappings1, second_min_num_errors2, second_min_num_errors1, num_second_best_mappings2, num_second_best_mappings1);
          uint8_t mapq1 = 0;
          uint8_t mapq2 = 0;
          mapq = GetMAPQForPairedEndRead(num_candidates2, num_candidates1, repetitive_seed_length2, repetitive_seed_length1, positive_alignment_length, negative_alignment_length, min_sum_errors, num_best_mappings, second_min_sum_errors, num_second_best_mappings, min_num_errors2, min_num_errors1, num_best_mappings2, num_best_mappings1, second_min_num_errors2, second_min_num_errors1, num_second_best_mappings2, num_second_best_mappings1, mapq2, mapq1);
          uint8_t direction = 0;
          uint32_t barcode_key = 0;
          if (!is_bulk_data_) {
            barcode_key = barcode_batch.GenerateSeedFromSequenceAt(pair_index, 0, barcode_batch.GetSequenceLengthAt(pair_index));
          }
          if (output_mapping_in_PAF_) {
            EmplaceBackMappingRecord(read_id, read_batch1.GetSequenceNameAt(pair_index), read_batch2.GetSequenceNameAt(pair_index), (uint16_t)read_batch1.GetSequenceLengthAt(pair_index), (uint16_t)read_batch2.GetSequenceLengthAt(pair_index), barcode_key, fragment_start_position, fragment_length, mapq, direction, is_unique, 1, positive_alignment_length, negative_alignment_length, &((*mappings_on_diff_ref_seqs)[rid1]));
            EmplaceBackMappingRecord(read_id, read_batch1.GetSequenceNameAt(pair_index), read_batch2.GetSequenceNameAt(pair_index), (uint16_t)read_batch1.GetSequenceLengthAt(pair_index), (uint16_t)read_batch2.GetSequenceLengthAt(pair_index), barcode_key, fragment_start_position, fragment_length, mapq1, mapq2, direction, is_unique, 1, positive_alignment_length, negative_alignment_length, &((*mappings_on_diff_ref_seqs)[rid1]));
          } else {
            EmplaceBackMappingRecord(read_id, barcode_key, fragment_start_position, fragment_length, mapq, direction, is_unique, 1, positive_alignment_length, negative_alignment_length, &((*mappings_on_diff_ref_seqs)[rid1]));
          }
@@ -1944,7 +1948,7 @@ void Chromap<MappingRecord>::VerifyCandidatesOnOneDirectionUsingSIMD(Direction c
      }
      valid_candidate_index = 0;
      // Check whether we should stop early. Assuming the candidates are sorted 
      if (GetMAPQForSingleEndRead(error_threshold_, num_candidates, 0, read_length + error_threshold_, *min_num_errors, *num_best_mappings, *second_min_num_errors, *second_min_num_errors) == 0)
      if (GetMAPQForSingleEndRead(error_threshold_, num_candidates, 0, read_length + error_threshold_, *min_num_errors, *num_best_mappings, *second_min_num_errors, *num_second_best_mappings) == 0)
        break;
    }
    ++candidate_index;
@@ -2572,6 +2576,7 @@ uint8_t Chromap<MappingRecord>::GetMAPQForSingleEndRead(int error_threshold, int
    double tmp = alignment_length < mapq_coef_length ? 1.0 : mapq_coef_fraction / log(alignment_length);
    tmp *= alignment_identity * alignment_identity;
    mapq = 5 * 6.02 * (second_min_num_errors - min_num_errors) * tmp * tmp + 0.499;
    //mapq = 30 - 34.0 / error_threshold + 34.0 / error_threshold * (second_min_num_errors - min_num_errors) * tmp * tmp + 0.499;
  }
  if (num_second_best_mappings > 0) {
    mapq -= (int)(4.343 * log(num_second_best_mappings + 1) + 0.499);
@@ -2599,11 +2604,11 @@ uint8_t Chromap<MappingRecord>::GetMAPQForSingleEndRead(int error_threshold, int
#define raw_mapq(diff, a) ((int)(5 * 6.02 * (diff) / (a) + .499))

template <typename MappingRecord>
uint8_t Chromap<MappingRecord>::GetMAPQForPairedEndRead(int num_positive_candidates, int num_negative_candidates, uint32_t repetitive_seed_length1, uint32_t repetitive_seed_length2, uint16_t positive_alignment_length, uint16_t negative_alignment_length, int min_sum_errors, int num_best_mappings, int second_min_sum_errors, int num_second_best_mappings, int min_num_errors1, int min_num_errors2, int num_best_mappings1, int num_best_mappings2, int second_min_num_errors1, int second_min_num_errors2, int num_second_best_mappings1, int num_second_best_mappings2) {
uint8_t Chromap<MappingRecord>::GetMAPQForPairedEndRead(int num_positive_candidates, int num_negative_candidates, uint32_t repetitive_seed_length1, uint32_t repetitive_seed_length2, uint16_t positive_alignment_length, uint16_t negative_alignment_length, int min_sum_errors, int num_best_mappings, int second_min_sum_errors, int num_second_best_mappings, int min_num_errors1, int min_num_errors2, int num_best_mappings1, int num_best_mappings2, int second_min_num_errors1, int second_min_num_errors2, int num_second_best_mappings1, int num_second_best_mappings2, uint8_t &mapq1, uint8_t &mapq2) {
  //std::cerr << " rl1:" << (int)repetitive_seed_length1 << " rl2:" << (int)repetitive_seed_length2 << " pal:" << (int)positive_alignment_length << " nal:" << (int)negative_alignment_length << " me:" << min_sum_errors << " #bm:" << num_best_mappings << " sme:" << second_min_sum_errors << " #sbm:" << num_second_best_mappings << " me1:" << min_num_errors1 << " me2:" << min_num_errors2 << " #bm1:" << num_best_mappings1 << " #bm2:" << num_best_mappings2 << " sme1:" << second_min_num_errors1 << " sme2:" << second_min_num_errors2 << " #sbm1:" << num_second_best_mappings1 << " #sbm2:" << num_second_best_mappings2 << "\n";
  uint8_t mapq_pe = GetMAPQForSingleEndRead(2 * error_threshold_, num_positive_candidates + num_negative_candidates, repetitive_seed_length1 + repetitive_seed_length2, positive_alignment_length + negative_alignment_length, min_sum_errors, num_best_mappings, second_min_sum_errors, num_second_best_mappings);
  uint8_t mapq1 = GetMAPQForSingleEndRead(error_threshold_, num_positive_candidates, repetitive_seed_length1, positive_alignment_length, min_num_errors1, num_best_mappings1, second_min_num_errors1, num_second_best_mappings1);
  uint8_t mapq2 = GetMAPQForSingleEndRead(error_threshold_, num_negative_candidates, repetitive_seed_length2, negative_alignment_length, min_num_errors2, num_best_mappings2, second_min_num_errors2, num_second_best_mappings2);
  mapq1 = GetMAPQForSingleEndRead(error_threshold_, num_positive_candidates, repetitive_seed_length1, positive_alignment_length, min_num_errors1, num_best_mappings1, second_min_num_errors1, num_second_best_mappings1);
  mapq2 = GetMAPQForSingleEndRead(error_threshold_, num_negative_candidates, repetitive_seed_length2, negative_alignment_length, min_num_errors2, num_best_mappings2, second_min_num_errors2, num_second_best_mappings2);
  //std::cerr << " 1:" << (int)mapq1 << " 2:" << (int)mapq2 << "\n";
  mapq1 = mapq1 > mapq_pe ? mapq1 : mapq_pe < mapq1 + 40? mapq_pe : mapq1 + 40;
  mapq2 = mapq2 > mapq_pe ? mapq2 : mapq_pe < mapq2 + 40? mapq_pe : mapq2 + 40;
@@ -2617,6 +2622,8 @@ uint8_t Chromap<MappingRecord>::GetMAPQForPairedEndRead(int num_positive_candida
  //int mapq_coef = 60;
  //uint8_t max_mapq1 = (int)(mapq_coef * ((double)(second_min_num_errors1 - min_num_errors1) / second_min_num_errors1) + .499);
  //uint8_t max_mapq2 = (int)(mapq_coef * ((double)(second_min_num_errors2 - min_num_errors2) / second_min_num_errors2) + .499);
  //uint8_t max_mapq1 = 30 - 34.0 / error_threshold_ + 34.0 / error_threshold_ * (second_min_num_errors1 - min_num_errors1);
  //uint8_t max_mapq2 = 30 - 34.0 / error_threshold_ + 34.0 / error_threshold_ * (second_min_num_errors2 - min_num_errors2);
  //mapq1 = mapq1 < max_mapq1 ? mapq1 : max_mapq1;
  //mapq2 = mapq2 < max_mapq2 ? mapq2 : max_mapq2;
  mapq1 = mapq1 < raw_mapq(second_min_num_errors1 - min_num_errors1, 1) ? mapq1 : raw_mapq(second_min_num_errors1 - min_num_errors1, 1);
+2 −2
Original line number Diff line number Diff line
@@ -123,7 +123,7 @@ class Chromap {
  void ProcessBestMappingsForPairedEndReadOnOneDirection(Direction first_read_direction, uint32_t pair_index, uint8_t mapq, int num_candidates1, uint32_t repetitive_seed_length1, int min_num_errors1, int num_best_mappings1, int second_min_num_errors1, int num_second_best_mappings1, const SequenceBatch &read_batch1, const std::vector<std::pair<int, uint64_t> > &mappings1, int num_candidates2, uint32_t repetitive_seed_length2, int min_num_errors2, int num_best_mappings2, int second_min_num_errors2, int num_second_best_mappings2, const SequenceBatch &read_batch2, const SequenceBatch &reference, const SequenceBatch &barcode_batch, const std::vector<int> &best_mapping_indices, const std::vector<std::pair<int, uint64_t> > &mappings2, const std::vector<std::pair<uint32_t, uint32_t> > &best_mappings, int min_sum_errors, int num_best_mappings, int second_min_sum_errors, int num_second_best_mappings, int *best_mapping_index, int *num_best_mappings_reported, std::vector<std::vector<MappingRecord> > *mappings_on_diff_ref_seqs);
  void GenerateBestMappingsForPairedEndRead(uint32_t pair_index, int num_positive_candidates1, int num_negative_candidates1, uint32_t repetitive_seed_length1, int min_num_errors1, int num_best_mappings1, int second_min_num_errors1, int num_second_best_mappings1, const SequenceBatch &read_batch1, const std::vector<std::pair<int, uint64_t> > &positive_mappings1, const std::vector<std::pair<int, uint64_t> > &negative_mappings1, int num_positive_candidates2, int num_negative_candidates2, uint32_t repetitive_seed_length2, int min_num_errors2, int num_best_mappings2, int second_min_num_errors2, int num_second_best_mappings2, const SequenceBatch &read_batch2, const SequenceBatch &reference, const SequenceBatch &barcode_batch, const std::vector<std::pair<int, uint64_t> > &positive_mappings2, const std::vector<std::pair<int, uint64_t> > &negative_mappings2, std::vector<int> *best_mapping_indices, std::mt19937 *generator, std::vector<std::pair<uint32_t, uint32_t> > *F1R2_best_mappings, std::vector<std::pair<uint32_t, uint32_t> > *F2R1_best_mappings, int *min_sum_errors, int *num_best_mappings, int *second_min_sum_errors, int *num_second_best_mappings, std::vector<std::vector<MappingRecord> > *mappings_on_diff_ref_seqs);
  void EmplaceBackMappingRecord(uint32_t read_id, uint32_t barcode, uint32_t fragment_start_position, uint16_t fragment_length, uint8_t mapq, uint8_t direction, uint8_t is_unique, uint8_t num_dups, uint16_t positive_alignment_length, uint16_t negative_alignment_length, std::vector<MappingRecord> *mappings_on_diff_ref_seqs);
  void EmplaceBackMappingRecord(uint32_t read_id, const char *read1_name, const char *read2_name, uint16_t read1_length, uint16_t read2_length, uint32_t barcode, uint32_t fragment_start_position, uint16_t fragment_length, uint8_t mapq, uint8_t direction, uint8_t is_unique, uint8_t num_dups, uint16_t positive_alignment_length, uint16_t negative_alignment_length, std::vector<MappingRecord> *mappings_on_diff_ref_seqs);
  void EmplaceBackMappingRecord(uint32_t read_id, const char *read1_name, const char *read2_name, uint16_t read1_length, uint16_t read2_length, uint32_t barcode, uint32_t fragment_start_position, uint16_t fragment_length, uint8_t mapq1, uint8_t mapq2, uint8_t direction, uint8_t is_unique, uint8_t num_dups, uint16_t positive_alignment_length, uint16_t negative_alignment_length, std::vector<MappingRecord> *mappings_on_diff_ref_seqs);
  void ApplyTn5ShiftOnPairedEndMapping(uint32_t num_reference_sequences, std::vector<std::vector<MappingRecord> > *mappings);

  // For single-end read mapping
@@ -151,7 +151,7 @@ class Chromap {
  void OutputMappingStatistics();
  void OutputMappingStatistics(uint32_t num_reference_sequences, const std::vector<std::vector<MappingRecord> > &uni_mappings, const std::vector<std::vector<MappingRecord> > &multi_mappings);
  uint8_t GetMAPQForSingleEndRead(int error_threshold, int num_candidates, uint32_t repetitive_seed_length, uint16_t alignment_length, int min_num_errors, int num_best_mappings, int second_min_num_errors, int num_second_best_mappings);
  uint8_t GetMAPQForPairedEndRead(int num_positive_candidates, int num_negative_candidates, uint32_t repetitive_seed_length1, uint32_t repetitive_seed_length2, uint16_t positive_alignment_length, uint16_t negative_alignment_length, int min_sum_errors, int num_best_mappings, int second_min_sum_errors, int num_second_best_mappings, int min_num_errors1, int min_num_errors2, int num_best_mappings1, int num_best_mappings2, int second_min_num_errors1, int second_min_num_errors2, int num_second_best_mappings1, int num_second_best_mappings2);
  uint8_t GetMAPQForPairedEndRead(int num_positive_candidates, int num_negative_candidates, uint32_t repetitive_seed_length1, uint32_t repetitive_seed_length2, uint16_t positive_alignment_length, uint16_t negative_alignment_length, int min_sum_errors, int num_best_mappings, int second_min_sum_errors, int num_second_best_mappings, int min_num_errors1, int min_num_errors2, int num_best_mappings1, int num_best_mappings2, int second_min_num_errors1, int second_min_num_errors2, int num_second_best_mappings1, int num_second_best_mappings2, uint8_t &mapq1, uint8_t &mapq2);
  void SortOutputMappings(uint32_t num_reference_sequences, std::vector<std::vector<MappingRecord> > *mappings);
  void BuildAugmentedTree(uint32_t ref_id);
  uint32_t GetNumOverlappedMappings(uint32_t ref_id, const MappingRecord &mapping);
+62 −52
Original line number Diff line number Diff line
@@ -69,44 +69,53 @@ void Index::GenerateMinimizerSketch(const SequenceBatch &sequence_batch, uint32_
      unambiguous_length = 0;
    }
    buffer[position_in_buffer] = current_seed; // need to do this here as appropriate position_in_buffer and buf[position_in_buffer] are needed below
    if (unambiguous_length == window_size_ + kmer_size_ - 1 && min_seed.first != UINT64_MAX) { // special case for the first window - because identical k-mers are not stored yet
      //
      //bool found_first_min_in_first_loop = false;
      //bool found_first_min_in_second_loop = false; // For debug
      //int first_min_position_in_buffer = 0;
    //if (unambiguous_length == window_size_ + kmer_size_ - 1 && min_seed.first != UINT64_MAX) { // special case for the first window - because identical k-mers are not stored yet
    //  //
    //  //bool found_first_min_in_first_loop = false;
    //  //bool found_first_min_in_second_loop = false; // For debug
    //  //int first_min_position_in_buffer = 0;
    //  //for (int j = position_in_buffer + 1; j < window_size_; ++j)
    //  //  if (min_seed.first == buffer[j].first) {
    //  //    found_first_min_in_first_loop = true;
    //  //    first_min_position_in_buffer = j;
    //  //    break;
    //  //  }
    //  //if (!found_first_min_in_first_loop) {
    //  //  for (int j = 0; j < position_in_buffer; ++j)
    //  //    if (min_seed.first == buffer[j].first) {
    //  //      found_first_min_in_second_loop = true;
    //  //      first_min_position_in_buffer = j;
    //  //      break;
    //  //    }
    //  //}
    //  //assert(found_first_min_in_first_loop || found_first_min_in_second_loop);
    //  //bool found_pre_seed = false;
    //  //if (found_first_min_in_first_loop) {
    //  //  for (int j = position_in_buffer + 1; j < first_min_position_in_buffer; ++j)
    //  //    if (buffer[j].first < pre_seed.first) {
    //  //      pre_seed = buffer[j];
    //  //      found_pre_seed = true;
    //  //    }
    //  //}
    //  //if (found_first_min_in_second_loop) {
    //  //  for (int j = 0; j < first_min_position_in_buffer; ++j)
    //  //    if (buffer[j].first < pre_seed.first) {
    //  //      pre_seed = buffer[j];
    //  //      found_pre_seed = true;
    //  //    }
    //  //}
    //  //if (found_pre_seed)
    //  //  minimizers->push_back(pre_seed);
    //  //
    //  for (int j = position_in_buffer + 1; j < window_size_; ++j)
      //  if (min_seed.first == buffer[j].first) {
      //    found_first_min_in_first_loop = true;
      //    first_min_position_in_buffer = j;
      //    break;
      //  }
      //if (!found_first_min_in_first_loop) {
    //    if (min_seed.first == buffer[j].first && buffer[j].second != min_seed.second) 
    //      minimizers->push_back(buffer[j]);
    //  for (int j = 0; j < position_in_buffer; ++j)
      //    if (min_seed.first == buffer[j].first) {
      //      found_first_min_in_second_loop = true;
      //      first_min_position_in_buffer = j;
      //      break;
      //    }
      //}
      //assert(found_first_min_in_first_loop || found_first_min_in_second_loop);
      //bool found_pre_seed = false;
      //if (found_first_min_in_first_loop) {
      //  for (int j = position_in_buffer + 1; j < first_min_position_in_buffer; ++j)
      //    if (buffer[j].first < pre_seed.first) {
      //      pre_seed = buffer[j];
      //      found_pre_seed = true;
      //    }
    //    if (min_seed.first == buffer[j].first && buffer[j].second != min_seed.second) 
    //      minimizers->push_back(buffer[j]);
    //}
      //if (found_first_min_in_second_loop) {
      //  for (int j = 0; j < first_min_position_in_buffer; ++j)
      //    if (buffer[j].first < pre_seed.first) {
      //      pre_seed = buffer[j];
      //      found_pre_seed = true;
      //    }
      //}
      //if (found_pre_seed)
      //  minimizers->push_back(pre_seed);
      //

    if (unambiguous_length == window_size_ + kmer_size_ - 1 && min_seed.first != UINT64_MAX && min_seed.first < current_seed.first) { // special case for the first window - because identical k-mers are not stored yet
      for (int j = position_in_buffer + 1; j < window_size_; ++j)
        if (min_seed.first == buffer[j].first && buffer[j].second != min_seed.second) 
          minimizers->push_back(buffer[j]);
@@ -114,6 +123,7 @@ void Index::GenerateMinimizerSketch(const SequenceBatch &sequence_batch, uint32_
        if (min_seed.first == buffer[j].first && buffer[j].second != min_seed.second) 
          minimizers->push_back(buffer[j]);
    }

    if (current_seed.first <= min_seed.first) { // a new minimum; then write the old min
      if (unambiguous_length >= window_size_ + kmer_size_ && min_seed.first != UINT64_MAX) {
        minimizers->push_back(min_seed);
+3 −2
Original line number Diff line number Diff line
@@ -43,11 +43,12 @@ struct PairedPAFMapping {
  uint16_t fragment_length;
  uint16_t positive_alignment_length;
  uint16_t negative_alignment_length;
  uint8_t mapq : 6, direction : 1, is_unique : 1;
  uint8_t mapq;
  uint16_t mapq1 : 6, mapq2 : 6, direction : 1, is_unique : 1, reserved : 2;
  uint8_t num_dups;
  //uint8_t mapq; // least significant bit saves the direction of mapping
  bool operator<(const PairedPAFMapping& m) const {
    return std::tie(fragment_start_position, fragment_length, mapq, direction, is_unique, read_id, positive_alignment_length, negative_alignment_length) < std::tie(m.fragment_start_position, m.fragment_length, m.mapq, m.direction, m.is_unique, m.read_id, m.positive_alignment_length, m.negative_alignment_length);
    return std::tie(fragment_start_position, fragment_length, mapq1, mapq2, direction, is_unique, read_id, positive_alignment_length, negative_alignment_length) < std::tie(m.fragment_start_position, m.fragment_length, m.mapq1, m.mapq2, m.direction, m.is_unique, m.read_id, m.positive_alignment_length, m.negative_alignment_length);
  }
  bool operator==(const PairedPAFMapping& m) const {
    return std::tie(fragment_start_position, fragment_length) == std::tie(m.fragment_start_position, m.fragment_length);