Commit 66adea22 authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Merge close candidates

parent 4c63cd30
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -429,9 +429,9 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
        positive_candidates2.clear();
        negative_candidates1.clear();
        negative_candidates2.clear();
        index.GenerateCandidates(minimizers1, &positive_hits1, &negative_hits1, &positive_candidates1, &negative_candidates1);
        index.GenerateCandidates(error_threshold_, minimizers1, &positive_hits1, &negative_hits1, &positive_candidates1, &negative_candidates1);
        uint32_t current_num_candidates1 = positive_candidates1.size() + negative_candidates1.size();
        index.GenerateCandidates(minimizers2, &positive_hits2, &negative_hits2, &positive_candidates2, &negative_candidates2);
        index.GenerateCandidates(error_threshold_, minimizers2, &positive_hits2, &negative_hits2, &positive_candidates2, &negative_candidates2);
        uint32_t current_num_candidates2 = positive_candidates2.size() + negative_candidates2.size();
        if (current_num_candidates1 > 0 && current_num_candidates2 > 0) {
          positive_candidates1.swap(positive_hits1);
@@ -949,7 +949,7 @@ void Chromap<MappingRecord>::MapSingleEndReads() {
        negative_hits.clear();
        positive_candidates.clear();
        negative_candidates.clear();
        index.GenerateCandidates(minimizers, &positive_hits, &negative_hits, &positive_candidates, &negative_candidates);
        index.GenerateCandidates(error_threshold_, minimizers, &positive_hits, &negative_hits, &positive_candidates, &negative_candidates);
        uint32_t current_num_candidates = positive_candidates.size() + negative_candidates.size(); 
        //std::cerr << "Generated candidates!\n";
        if (current_num_candidates > 0) {
+7 −7
Original line number Diff line number Diff line
@@ -261,7 +261,7 @@ void Index::Load() {
  std::cerr << "Loaded index successfully in "<< Chromap<>::GetRealTime() - real_start_time << "s.\n";
}

void Index::GenerateCandidatesOnOneDirection(std::vector<uint64_t> *hits, std::vector<uint64_t> *candidates) {
void Index::GenerateCandidatesOnOneDirection(int error_threshold, std::vector<uint64_t> *hits, std::vector<uint64_t> *candidates) {
  hits->emplace_back(UINT64_MAX);
  if (hits->size() > 0) {
    std::sort(hits->begin(), hits->end());
@@ -272,7 +272,7 @@ void Index::GenerateCandidatesOnOneDirection(std::vector<uint64_t> *hits, std::v
    for (uint32_t pi = 1; pi < hits->size(); ++pi) {
      uint32_t current_reference_id = (*hits)[pi] >> 32;
      uint32_t current_reference_position = (*hits)[pi];
      if (current_reference_id != previous_reference_id || current_reference_position - previous_reference_position > 0) {
      if (current_reference_id != previous_reference_id || current_reference_position > previous_reference_position + error_threshold) {
        if (count >= min_num_seeds_required_for_mapping_) {
          candidates->push_back(previous_hit);
        }
@@ -338,18 +338,18 @@ void Index::CollectCandiates(int max_seed_frequency, const std::vector<std::pair
  }
}

void Index::GenerateCandidates(const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, std::vector<uint64_t> *positive_hits, std::vector<uint64_t> *negative_hits, std::vector<uint64_t> *positive_candidates, std::vector<uint64_t> *negative_candidates) {
void Index::GenerateCandidates(int error_threshold, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, std::vector<uint64_t> *positive_hits, std::vector<uint64_t> *negative_hits, std::vector<uint64_t> *positive_candidates, std::vector<uint64_t> *negative_candidates) {
  CollectCandiates(max_seed_frequencies_[0], minimizers, positive_hits, negative_hits);
  // Now I can generate primer chain in candidates
  // Let me use sort for now, but I can use merge later.
  GenerateCandidatesOnOneDirection(positive_hits, positive_candidates);
  GenerateCandidatesOnOneDirection(negative_hits, negative_candidates);
  GenerateCandidatesOnOneDirection(error_threshold, positive_hits, positive_candidates);
  GenerateCandidatesOnOneDirection(error_threshold, negative_hits, negative_candidates);
  if (positive_candidates->size() + negative_candidates->size() == 0) {
    positive_hits->clear();
    negative_hits->clear();
    CollectCandiates(max_seed_frequencies_[1], minimizers, positive_hits, negative_hits);
    GenerateCandidatesOnOneDirection(positive_hits, positive_candidates);
    GenerateCandidatesOnOneDirection(negative_hits, negative_candidates);
    GenerateCandidatesOnOneDirection(error_threshold, positive_hits, positive_candidates);
    GenerateCandidatesOnOneDirection(error_threshold, negative_hits, negative_candidates);
    // TODO: if necessary, we can further improve the rescue. But the code below is not thread safe. We can think about this later
//    if (positive_candidates->size() + negative_candidates->size() == 0) {
//      --min_num_seeds_required_for_mapping_;
+2 −2
Original line number Diff line number Diff line
@@ -44,8 +44,8 @@ class Index {
  void Construct(uint32_t num_sequences, const SequenceBatch &reference);
  void Save();
  void Load();
  void GenerateCandidatesOnOneDirection(std::vector<uint64_t> *hits, std::vector<uint64_t> *candidates);
  void GenerateCandidates(const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, std::vector<uint64_t> *positive_hits, std::vector<uint64_t> *negative_hits, std::vector<uint64_t> *positive_candidates, std::vector<uint64_t> *negative_candidates);
  void GenerateCandidatesOnOneDirection(int error_threshold, std::vector<uint64_t> *hits, std::vector<uint64_t> *candidates);
  void GenerateCandidates(int error_threshold, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, std::vector<uint64_t> *positive_hits, std::vector<uint64_t> *negative_hits, std::vector<uint64_t> *positive_candidates, std::vector<uint64_t> *negative_candidates);
  void CollectCandiates(int max_seed_frequency, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, std::vector<uint64_t> *positive_hits, std::vector<uint64_t> *negative_hits);
  inline static uint64_t Hash64(uint64_t key, const uint64_t mask) {
    key = (~key + (key << 21)) & mask; // key = (key << 21) - key - 1;