Commit c5a8ca7e authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Func to get a mapping supported by all minimizers.

parent 7f283b6f
Loading
Loading
Loading
Loading
+121 −85
Original line number Diff line number Diff line
@@ -61,6 +61,10 @@ class MappingGenerator {
      std::vector<std::vector<MappingRecord>> &mappings_on_diff_ref_seqs);

 private:
  bool DirectlyGenerateOneNonSplitMappingSupportedByAllMinimizers(
      const SequenceBatch &read_batch, uint32_t read_index,
      const SequenceBatch &reference, MappingMetadata &mapping_metadata);

  void VerifyCandidatesOnOneDirectionUsingSIMD(
      Direction candidate_direction, uint32_t read_index,
      const SequenceBatch &read_batch, const SequenceBatch &reference,
@@ -132,17 +136,6 @@ template <typename MappingRecord>
void MappingGenerator<MappingRecord>::VerifyCandidates(
    const SequenceBatch &read_batch, uint32_t read_index,
    const SequenceBatch &reference, MappingMetadata &mapping_metadata) {
  const std::vector<std::pair<uint64_t, uint64_t>> &minimizers =
      mapping_metadata.minimizers_;
  const std::vector<Candidate> &positive_candidates =
      mapping_metadata.positive_candidates_;
  const std::vector<Candidate> &negative_candidates =
      mapping_metadata.negative_candidates_;
  std::vector<std::pair<int, uint64_t>> &positive_mappings =
      mapping_metadata.positive_mappings_;
  std::vector<std::pair<int, uint64_t>> &negative_mappings =
      mapping_metadata.negative_mappings_;

  int &min_num_errors = mapping_metadata.min_num_errors_;
  int &num_best_mappings = mapping_metadata.num_best_mappings_;
  int &second_min_num_errors = mapping_metadata.second_min_num_errors_;
@@ -155,9 +148,82 @@ void MappingGenerator<MappingRecord>::VerifyCandidates(

  // Directly obtain the non-split mapping in ideal case and return without
  // running actual verification.
  const bool is_mapping_generated =
      DirectlyGenerateOneNonSplitMappingSupportedByAllMinimizers(
          read_batch, read_index, reference, mapping_metadata);
  if (is_mapping_generated) {
    return;
  }

  // Use more sophicated approach to obtain the mapping.
  // Sort the candidates by their count in descending order.
  // TODO: check if this sorting is necessary.
  mapping_metadata.SortCandidates();

  // For split alignments, SIMD cannot be used.
  if (mapping_parameters_.split_alignment) {
    VerifyCandidatesOnOneDirection(kPositive, read_index, read_batch, reference,
                                   mapping_metadata);

    VerifyCandidatesOnOneDirection(kNegative, read_index, read_batch, reference,
                                   mapping_metadata);
    return;
  }

  const size_t num_positive_candidates =
      mapping_metadata.positive_candidates_.size();
  const size_t num_negative_candidates =
      mapping_metadata.negative_candidates_.size();

  // For non-split alignments, use SIMD when possible.
  if (num_positive_candidates < (size_t)NUM_VPU_LANES_) {
    VerifyCandidatesOnOneDirection(kPositive, read_index, read_batch, reference,
                                   mapping_metadata);
  } else {
    VerifyCandidatesOnOneDirectionUsingSIMD(kPositive, read_index, read_batch,
                                            reference, mapping_metadata);
  }

  if (num_negative_candidates < (size_t)NUM_VPU_LANES_) {
    VerifyCandidatesOnOneDirection(kNegative, read_index, read_batch, reference,
                                   mapping_metadata);
  } else {
    VerifyCandidatesOnOneDirectionUsingSIMD(kNegative, read_index, read_batch,
                                            reference, mapping_metadata);
  }
}

// Return true when there is one non-split mapping generated and the mapping is
// supported by all the minimizers.
template <typename MappingRecord>
bool MappingGenerator<MappingRecord>::
    DirectlyGenerateOneNonSplitMappingSupportedByAllMinimizers(
        const SequenceBatch &read_batch, uint32_t read_index,
        const SequenceBatch &reference, MappingMetadata &mapping_metadata) {
  if (mapping_parameters_.split_alignment) {
    return false;
  }

  const std::vector<Candidate> &positive_candidates =
      mapping_metadata.positive_candidates_;
  const std::vector<Candidate> &negative_candidates =
      mapping_metadata.negative_candidates_;

  const bool has_one_candidate =
      (positive_candidates.size() + negative_candidates.size() == 1);
  if (!mapping_parameters_.split_alignment && has_one_candidate) {

  if (!has_one_candidate) {
    return false;
  }

  const std::vector<std::pair<uint64_t, uint64_t>> &minimizers =
      mapping_metadata.minimizers_;

  std::vector<std::pair<int, uint64_t>> &positive_mappings =
      mapping_metadata.positive_mappings_;
  std::vector<std::pair<int, uint64_t>> &negative_mappings =
      mapping_metadata.negative_mappings_;

  uint32_t num_all_minimizer_candidates = 0;
  uint32_t all_minimizer_candidate_index = 0;
  Direction all_minimizer_candidate_direction = kPositive;
@@ -187,10 +253,13 @@ void MappingGenerator<MappingRecord>::VerifyCandidates(
    }
  }

    if (num_all_minimizer_candidates == 1) {
      num_best_mappings = 1;
      num_second_best_mappings = 0;
      min_num_errors = 0;
  if (num_all_minimizer_candidates != 1) {
    return false;
  }

  mapping_metadata.min_num_errors_ = 0;
  mapping_metadata.num_best_mappings_ = 1;
  mapping_metadata.num_second_best_mappings_ = 0;

  uint32_t rid = 0;
  uint32_t position = 0;
@@ -201,8 +270,8 @@ void MappingGenerator<MappingRecord>::VerifyCandidates(
    position = positive_candidates[all_minimizer_candidate_index].position;
  } else {
    rid = negative_candidates[all_minimizer_candidate_index].position >> 32;
        position = (uint32_t)negative_candidates[all_minimizer_candidate_index]
                       .position -
    position =
        (uint32_t)negative_candidates[all_minimizer_candidate_index].position -
        read_length + 1;
  }

@@ -223,43 +292,10 @@ void MappingGenerator<MappingRecord>::VerifyCandidates(
      negative_mappings.emplace_back(
          0, negative_candidates[all_minimizer_candidate_index].position);
    }

        return;
      }
    }
    return true;
  }

  // Use more sophicated approach to obtain the mapping.
  // Sort the candidates by their count in descending order.
  // TODO: check if this sorting is necessary.
  mapping_metadata.SortCandidates();

  // For split alignments, SIMD cannot be used.
  if (mapping_parameters_.split_alignment) {
    VerifyCandidatesOnOneDirection(kPositive, read_index, read_batch, reference,
                                   mapping_metadata);

    VerifyCandidatesOnOneDirection(kNegative, read_index, read_batch, reference,
                                   mapping_metadata);
    return;
  }

  // For non-split alignments, use SIMD when possible.
  if (positive_candidates.size() < (size_t)NUM_VPU_LANES_) {
    VerifyCandidatesOnOneDirection(kPositive, read_index, read_batch, reference,
                                   mapping_metadata);
  } else {
    VerifyCandidatesOnOneDirectionUsingSIMD(kPositive, read_index, read_batch,
                                            reference, mapping_metadata);
  }

  if (negative_candidates.size() < (size_t)NUM_VPU_LANES_) {
    VerifyCandidatesOnOneDirection(kNegative, read_index, read_batch, reference,
                                   mapping_metadata);
  } else {
    VerifyCandidatesOnOneDirectionUsingSIMD(kNegative, read_index, read_batch,
                                            reference, mapping_metadata);
  }
  return false;
}

template <typename MappingRecord>