Commit df39e2a9 authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Tune parameters

parent ccd90a8d
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -753,8 +753,9 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
          {
            num_loaded_pairs_for_loading = LoadPairedEndReadsWithBarcodes(&read_batch1_for_loading, &read_batch2_for_loading, &barcode_batch_for_loading);
          } // end of openmp loading task
          int grain_size = 10000;
#pragma omp taskloop grainsize(grain_size) //num_tasks(num_threads_* 50)
          //int grain_size = 5000;
//#pragma omp taskloop grainsize(grain_size) //num_tasks(num_threads_* 50)
#pragma omp taskloop num_tasks(num_threads_* num_threads_)
          for (uint32_t pair_index = 0; pair_index < num_loaded_pairs; ++pair_index) {
            read_batch1.PrepareNegativeSequenceAt(pair_index);
            read_batch2.PrepareNegativeSequenceAt(pair_index);
+2 −2
Original line number Diff line number Diff line
@@ -218,7 +218,7 @@ class Chromap {
  bool output_mapping_in_BED_;
  bool output_mapping_in_TagAlign_;
  bool output_mapping_in_PAF_;
  uint32_t read_batch_size_ = 1000000; // default batch size, # reads for single-end reads, # read pairs for paired-end reads
  uint32_t read_batch_size_ = 500000; // default batch size, # reads for single-end reads, # read pairs for paired-end reads
  bool low_memory_mode_;
  bool cell_by_bin_;
  int bin_size_;
@@ -258,7 +258,7 @@ class Chromap {
  uint64_t num_reads_ = 0;
  uint64_t num_duplicated_reads_ = 0; // # identical reads
  // For barcode stats
  uint64_t initial_num_sample_barcodes_ = 100000000;
  uint64_t initial_num_sample_barcodes_ = 50000000;
  uint64_t num_sample_barcodes_ = 0;
  uint64_t num_barcode_in_whitelist_ = 0;
  uint64_t num_corrected_barcode_ = 0;
+14 −11
Original line number Diff line number Diff line
@@ -492,12 +492,11 @@ void Index::GenerateCandidatesFromRepetitiveReadWithMateInfo(int error_threshold
      ++best_candidate_num;
    }
  }
  if (best_candidate_num >= 500 
    || (max_count < min_num_seeds_required_for_mapping_ && best_candidate_num >= 5)
    || (max_count == min_num_seeds_required_for_mapping_ && best_candidate_num >=50)) 
  if (best_candidate_num >= 300 || (max_count <= min_num_seeds_required_for_mapping_ && best_candidate_num >= 200)) {
    return;
  }
  std::vector<std::pair<uint64_t, uint64_t> > boundaries;
  boundaries.reserve(500);
  boundaries.reserve(300);
  for (uint32_t ci = 0; ci < mate_candidates_size; ++ci) {
    if (mate_candidates->at(ci).count == max_count) {
      std::pair<uint64_t, uint64_t> r;
@@ -552,7 +551,7 @@ void Index::GenerateCandidatesFromRepetitiveReadWithMateInfo(int error_threshold
    } else {
      uint32_t offset = value >> 32;
      uint32_t num_occurrences = value;
      uint32_t prev_l = 0;
      int32_t prev_l = 0;
      for (uint32_t bi = 0; bi < boundary_size; ++bi) {
        // use binary search to locate the coordinate near mate position
        int32_t l = prev_l, m = 0, r = num_occurrences - 1;
@@ -619,14 +618,17 @@ void Index::GenerateCandidates(int error_threshold, const std::vector<std::pair<
  int repetitive_seed_count = CollectCandidates(max_seed_frequencies_[0], max_seed_frequencies_[0], minimizers, repetitive_seed_length, positive_hits, negative_hits, false);
  //std::cerr << "rep seed: " << repetitive_seed_count << "\n";
  //if ((repetitive_seed_count > (int)minimizers.size() / 2 && minimizers.size() >= 10)) {
  bool checkmore = false;
  bool use_high_frequency_minimizers = false;
  if (positive_hits->size() + negative_hits->size() == 0) {
    positive_hits->clear();
    negative_hits->clear();
    *repetitive_seed_length = 0;
    repetitive_seed_count = CollectCandidates(max_seed_frequencies_[1], max_seed_frequencies_[0], minimizers, repetitive_seed_length, positive_hits, negative_hits, true);
    //recollect = false;
    checkmore = true;
    use_high_frequency_minimizers = true;
    if (positive_hits->size() == 0 || negative_hits->size() == 0) {
      use_high_frequency_minimizers = false;
    }
  }
  //if ((positive_candidates->size() == 0 || negative_candidates->size() == 0) && recollect) {
  ////if (positive_candidates->size() + negative_candidates->size() == 0 && recollect) {
@@ -658,8 +660,9 @@ void Index::GenerateCandidates(int error_threshold, const std::vector<std::pair<
  int num_required_seeds = minimizers.size() - repetitive_seed_count;
  num_required_seeds = num_required_seeds > 1 ? num_required_seeds : 1; 
  num_required_seeds = num_required_seeds > min_num_seeds_required_for_mapping_ ? min_num_seeds_required_for_mapping_ : num_required_seeds;
  if (checkmore)
  if (use_high_frequency_minimizers) {
  	num_required_seeds = min_num_seeds_required_for_mapping_;
  }
  GenerateCandidatesOnOneDirection(error_threshold, num_required_seeds, positive_hits, positive_candidates);
  GenerateCandidatesOnOneDirection(error_threshold, num_required_seeds, negative_hits, negative_candidates);
  //fprintf(stderr, "p+n: %d\n", positive_candidates->size() + negative_candidates->size()) ;