Commit bfae352b authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Merge branch 'li_dev'

Conflicts:
	src/index.cc
parents ab48f22d 26d51a3c
Loading
Loading
Loading
Loading
+8 −8
Original line number Diff line number Diff line
@@ -563,28 +563,28 @@ void Chromap<MappingRecord>::SupplementCandidates(const Index &index, uint32_t r
    uint32_t mm_count = minimizers->size();
    bool augment_flag = true;
    uint32_t candidate_num = positive_candidates->size();
    if (positive_candidates->size() >= (uint32_t)max_seed_frequencies_[0]) {
      augment_flag = false;
    } else {
    //if (positive_candidates->size() >= (uint32_t)max_seed_frequencies_[0]) {
    //  augment_flag = false;
    //} else {
      for (uint32_t i = 0; i < candidate_num; ++i) {
        if (positive_candidates->at(i).count >= mm_count / 2) {
          augment_flag = false;
          break;
        }
      }
    }
    //}
    candidate_num = negative_candidates->size();
    if (augment_flag) {
      if (negative_candidates->size() >= (uint32_t)max_seed_frequencies_[0]) {
        augment_flag = false;
      } else {
      //if (negative_candidates->size() >= (uint32_t)max_seed_frequencies_[0]) {
      //  augment_flag = false;
      //} else {
        for (uint32_t i = 0; i < candidate_num; ++i) {
          if (negative_candidates->at(i).count >= mm_count / 2) {
            augment_flag = false;
            break;
          }
        }
      }
      //}
    }
    if (augment_flag) {
      positive_hits->clear();
+69 −48
Original line number Diff line number Diff line
@@ -492,14 +492,35 @@ void Index::GenerateCandidatesFromRepetitiveReadWithMateInfo(int error_threshold
      ++best_candidate_num;
    }
  }
  //if (best_candidate_num != 1 || max_count < min_num_seeds_required_for_mapping_) 
  if (best_candidate_num >= 500) 
  if (best_candidate_num >= 500)//|| max_count < min_num_seeds_required_for_mapping_) 
    return;

  std::vector<std::pair<uint64_t, uint64_t> > boundaries;
  boundaries.reserve(500);
  for (uint32_t ci = 0; ci < mate_candidates_size; ++ci) {
    if (mate_candidates->at(ci).count != max_count) {
      continue;
    if (mate_candidates->at(ci).count == max_count) {
      std::pair<uint64_t, uint64_t> r;
      r.first = (mate_candidates->at(ci).position < range) ? 0 : (mate_candidates->at(ci).position - range);
      r.second = mate_candidates->at(ci).position + range;
      boundaries.push_back(r);
    }
  }
  
  // Merge adjacent boundary point. assume the candidates are sorted by coordinate so boundaries are also sorted.
  uint32_t boundary_size = 1;
  uint32_t raw_boundary_size = boundaries.size();
  if (raw_boundary_size == 0)
    return;
  for (uint32_t bi = 1; bi < raw_boundary_size; ++bi) {
    if (boundaries[boundary_size - 1].second < boundaries[bi].first) {
      boundaries[boundary_size] = boundaries[bi];
      ++boundary_size;
    } else {
      boundaries[boundary_size - 1].second = boundaries[bi].second;
    }
  }
  boundaries.resize(boundary_size);

  *repetitive_seed_length = 0;
  for (uint32_t mi = 0; mi < num_minimizers; ++mi) {
    khiter_t khash_iterator = kh_get(k64, lookup_table_, minimizers[mi].first << 1);
@@ -530,14 +551,15 @@ void Index::GenerateCandidatesFromRepetitiveReadWithMateInfo(int error_threshold
    } else {
      uint32_t offset = value >> 32;
      uint32_t num_occurrences = value;
      for (uint32_t bi = 0; bi < boundary_size; ++bi) {
        // use binary search to locate the coordinate near mate position
        int32_t l = 0, m = 0, r = num_occurrences - 1;
        uint64_t boundary = (mate_candidates->at(ci).position < range) ? 0 : (mate_candidates->at(ci).position - range) ;
        uint64_t boundary = boundaries[bi].first;
        while (l <= r) {
          m = (l + r) / 2;
          uint64_t value = (occurrence_table_[offset + m])>>1;
          //std::cerr << "l: " << l << ", r: " << r << ", m: " << m << ", val: " << (value >> 32) << ", " << (uint32_t)value << ", bd: " << (boundary >> 32) << ", " << (uint32_t)(boundary) << "\n";
          //if (value <= boundary) {
          //if (value <= boundary) 
          if (value < boundary) {
            l = m + 1;
          } else if (value > boundary) {
@@ -549,7 +571,7 @@ void Index::GenerateCandidatesFromRepetitiveReadWithMateInfo(int error_threshold
        //printf("%s: %d %d: %d %d\n", __func__, m, num_occurrences, (int)(boundary>>32), (int)boundary) ;
        for (uint32_t oi = m; oi < num_occurrences; ++oi) {
          uint64_t value = occurrence_table_[offset + oi];
          if ((value >> 1) > mate_candidates->at(ci).position + range)
          if ((value >> 1) > boundaries[bi].second)
            break;
          uint64_t reference_id = value >> 33;
          uint32_t reference_position = value >> 1;
@@ -565,7 +587,7 @@ void Index::GenerateCandidatesFromRepetitiveReadWithMateInfo(int error_threshold
            hits->push_back(candidate);
          }
        }

      } // for bi
      if (num_occurrences >= (uint32_t)max_seed_frequencies_[0]) {
        if (previous_repetitive_seed_position > read_position) { // first minimizer
          *repetitive_seed_length += kmer_size_;
@@ -578,9 +600,8 @@ void Index::GenerateCandidatesFromRepetitiveReadWithMateInfo(int error_threshold
        }
        previous_repetitive_seed_position = read_position;
      }
    } // for if-else on occurence
  } // for mi
    }
  }

  std::sort(hits->begin(), hits->end());
  //for (uint32_t i = 0 ; i < hits->size(); ++i)
@@ -630,7 +651,7 @@ void Index::GenerateCandidates(int error_threshold, const std::vector<std::pair<
  //printf("p+n: %d. %d %d\n", positive_hits->size() + negative_hits->size(), repetitive_seed_count, minimizers.size()) ;

  int num_required_seeds = minimizers.size() - repetitive_seed_count;
  num_required_seeds = num_required_seeds >= 1 ? num_required_seeds : 1; 
  num_required_seeds = num_required_seeds > 1 ? num_required_seeds : 1; 
  num_required_seeds = num_required_seeds > min_num_seeds_required_for_mapping_ ? min_num_seeds_required_for_mapping_ : num_required_seeds;
  GenerateCandidatesOnOneDirection(error_threshold, num_required_seeds, positive_hits, positive_candidates);
  GenerateCandidatesOnOneDirection(error_threshold, num_required_seeds, negative_hits, negative_candidates);