Commit da883d90 authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Code refactor and same results

parent 5c533895
Loading
Loading
Loading
Loading
+190 −187
Original line number Diff line number Diff line
@@ -15,14 +15,6 @@
#include "mmcache.hpp"

namespace chromap {
struct _mm_history {
	bool skip;
	std::vector<std::pair<uint64_t, uint64_t> > minimizers;
	std::vector<Candidate> positive_candidates;
	std::vector<Candidate> negative_candidates;
  uint32_t repetitive_seed_length;
};

template <typename MappingRecord>
void Chromap<MappingRecord>::TrimAdapterForPairedEndRead(uint32_t pair_index, SequenceBatch *read_batch1, SequenceBatch *read_batch2) {
  const char *read1 = read_batch1->GetSequenceAt(pair_index);
@@ -528,33 +520,118 @@ void Chromap<MappingRecord>::UpdateBarcodeAbundance(uint32_t num_loaded_barcodes
  std::cerr << "Update barcode abundance using " << num_sample_barcodes_ << " in "<< Chromap<>::GetRealTime() - real_start_time << "s.\n";
}

template <typename MappingRecord>
void Chromap<MappingRecord>::SupplementCandidates(const Index &index, uint32_t repetitive_seed_length1, uint32_t repetitive_seed_length2, std::vector<std::pair<uint64_t, uint64_t> > &minimizers1, std::vector<std::pair<uint64_t, uint64_t> > &minimizers2, std::vector<uint64_t> &positive_hits1, std::vector<uint64_t> &positive_hits2, std::vector<Candidate> &positive_candidates1, std::vector<Candidate> &positive_candidates2, std::vector<Candidate> &positive_candidates1_buffer, std::vector<Candidate> &positive_candidates2_buffer, std::vector<uint64_t> &negative_hits1, std::vector<uint64_t> &negative_hits2, std::vector<Candidate> &negative_candidates1, std::vector<Candidate> &negative_candidates2, std::vector<Candidate> &negative_candidates1_buffer, std::vector<Candidate> &negative_candidates2_buffer) {
  std::vector<Candidate> augment_positive_candidates1;
  std::vector<Candidate> augment_positive_candidates2;
  std::vector<Candidate> augment_negative_candidates1;
  std::vector<Candidate> augment_negative_candidates2;
  for (int mate = 0; mate <= 1; ++mate) {
    std::vector<std::pair<uint64_t, uint64_t> > *minimizers;
    std::vector<uint64_t> *positive_hits;
    std::vector<uint64_t> *negative_hits;
    std::vector<Candidate> *positive_candidates;
    std::vector<Candidate> *negative_candidates;
    std::vector<Candidate> *mate_positive_candidates;
    std::vector<Candidate> *mate_negative_candidates;
    std::vector<Candidate> *augment_positive_candidates;
    std::vector<Candidate> *augment_negative_candidates;
    uint32_t *repetitive_seed_length ;
    if (mate == 0) {
      minimizers = &minimizers1;
      positive_hits = &positive_hits1;
      negative_hits = &negative_hits1;
      positive_candidates = &positive_candidates1;
      negative_candidates = &negative_candidates1;
      mate_positive_candidates = &positive_candidates2;
      mate_negative_candidates = &negative_candidates2;
      augment_positive_candidates = &augment_positive_candidates1;
      augment_negative_candidates = &augment_negative_candidates1;
      repetitive_seed_length = &repetitive_seed_length1;
    } else {
      minimizers = &minimizers2;
      positive_hits = &positive_hits2;
      negative_hits = &negative_hits2;
      positive_candidates = &positive_candidates2;
      negative_candidates = &negative_candidates2;
      mate_positive_candidates = &positive_candidates1;
      mate_negative_candidates = &negative_candidates1;
      augment_positive_candidates = &augment_positive_candidates2;
      augment_negative_candidates = &augment_negative_candidates2;
      repetitive_seed_length = &repetitive_seed_length2;
    }
    uint32_t mm_count = minimizers->size();
    bool augment_flag = true;
    uint32_t candidate_num = positive_candidates->size();
    for (uint32_t i = 0; i < candidate_num; ++i) {
      if (positive_candidates->at(i).count >= mm_count / 2)
        augment_flag = false;
    }
    candidate_num = negative_candidates->size();
    if (augment_flag) {
      for (uint32_t i = 0; i < candidate_num; ++i) {
        if (negative_candidates->at(i).count >= mm_count / 2)
          augment_flag = false;
      }
    }
    if (augment_flag) {
      positive_hits->clear();
      negative_hits->clear();
      if (mate_positive_candidates->size() > 0) {
        index.GenerateCandidatesFromRepetitiveReadWithMateInfo(error_threshold_, *minimizers, repetitive_seed_length, negative_hits, augment_negative_candidates, mate_positive_candidates, -1, 2 * max_insert_size_);
      }
      if (mate_negative_candidates->size() > 0) {
        index.GenerateCandidatesFromRepetitiveReadWithMateInfo(error_threshold_, *minimizers, repetitive_seed_length, positive_hits, augment_positive_candidates, mate_negative_candidates, 1, 2 * max_insert_size_);
      }
    }
  }
  if (augment_positive_candidates1.size() > 0) {
    MergeCandidates(positive_candidates1, augment_positive_candidates1, positive_candidates1_buffer);
  }
  if (augment_negative_candidates1.size() > 0) {
    MergeCandidates(negative_candidates1, augment_negative_candidates1, negative_candidates1_buffer);
  }
  if (augment_positive_candidates2.size() > 0) {
    MergeCandidates(positive_candidates2, augment_positive_candidates2, positive_candidates2_buffer);
  }
  if (augment_negative_candidates2.size() > 0) {
    MergeCandidates(negative_candidates2, augment_negative_candidates2, negative_candidates2_buffer);
  }
}

template <typename MappingRecord>
void Chromap<MappingRecord>::MapPairedEndReads() {
  double real_start_time = Chromap<>::GetRealTime();
  // Load reference
  SequenceBatch reference;
  reference.InitializeLoading(reference_file_path_);
  uint32_t num_reference_sequences = reference.LoadAllSequences();
  // Load index
  Index index(min_num_seeds_required_for_mapping_, max_seed_frequencies_, index_file_path_);
  index.Load();
  kmer_size_ = index.GetKmerSize();
  window_size_ = index.GetWindowSize();
  //index.Statistics(num_sequences, reference);
  // Initialize read batches
  SequenceBatch read_batch1(read_batch_size_);
  SequenceBatch read_batch2(read_batch_size_);
  SequenceBatch barcode_batch(read_batch_size_);
  SequenceBatch read_batch1_for_loading(read_batch_size_);
  SequenceBatch read_batch2_for_loading(read_batch_size_);
  SequenceBatch barcode_batch_for_loading(read_batch_size_);
  // Initialize cache
  mm_cache mm_to_candidates_cache(2000003);
  mm_to_candidates_cache.SetKmerLength(kmer_size_);
  struct _mm_history *mm_history1 = new struct _mm_history[read_batch_size_];
  struct _mm_history *mm_history2 = new struct _mm_history[read_batch_size_];
  // Initialize mapping container
  mappings_on_diff_ref_seqs_.reserve(num_reference_sequences);
  deduped_mappings_on_diff_ref_seqs_.reserve(num_reference_sequences);
  for (uint32_t i = 0; i < num_reference_sequences; ++i) {
    mappings_on_diff_ref_seqs_.emplace_back(std::vector<MappingRecord>());
    deduped_mappings_on_diff_ref_seqs_.emplace_back(std::vector<MappingRecord>());
  }
  // Initialize output tools
  if (output_mapping_in_BED_) {
    output_tools_ = std::unique_ptr<BEDPEOutputTools<MappingRecord> >(new BEDPEOutputTools<MappingRecord>);
  } else if (output_mapping_in_TagAlign_) {
@@ -565,6 +642,7 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
  output_tools_->InitializeMappingOutput(mapping_output_file_path_);
  uint32_t num_mappings_in_mem = 0;
  uint64_t max_num_mappings_in_mem = 1 * ((uint64_t)1 << 30) / sizeof(MappingRecord);
  // Preprocess barcodes for single cell data
  if (!is_bulk_data_) {
    if (!barcode_whitelist_file_path_.empty()) {
      LoadBarcodeWhitelist();
@@ -694,20 +772,16 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
              negative_candidates2_buffer.clear();
              uint32_t repetitive_seed_length1 = 0;
              uint32_t repetitive_seed_length2 = 0;
#ifdef LI_DEBUG
	      printf("Process read 1\n");
#endif
              if (mm_to_candidates_cache.Query(minimizers1, positive_candidates1, negative_candidates1, repetitive_seed_length1, read_batch1.GetSequenceLengthAt(pair_index)) == -1)
              // Generate candidates
              if (mm_to_candidates_cache.Query(minimizers1, positive_candidates1, negative_candidates1, repetitive_seed_length1, read_batch1.GetSequenceLengthAt(pair_index)) == -1) {
                index.GenerateCandidates(error_threshold_, minimizers1, &repetitive_seed_length1, &positive_hits1, &negative_hits1, &positive_candidates1, &negative_candidates1);
              }
              uint32_t current_num_candidates1 = positive_candidates1.size() + negative_candidates1.size();
#ifdef LI_DEBUG
	      printf("Process read 2\n");
#endif
              if (mm_to_candidates_cache.Query(minimizers2, positive_candidates2, negative_candidates2, repetitive_seed_length2, read_batch2.GetSequenceLengthAt(pair_index)) == -1)
              if (mm_to_candidates_cache.Query(minimizers2, positive_candidates2, negative_candidates2, repetitive_seed_length2, read_batch2.GetSequenceLengthAt(pair_index)) == -1) {
                index.GenerateCandidates(error_threshold_, minimizers2, &repetitive_seed_length2, &positive_hits2, &negative_hits2, &positive_candidates2, &negative_candidates2);
              }
              uint32_t current_num_candidates2 = positive_candidates2.size() + negative_candidates2.size();
              if (pair_index < num_loaded_pairs / 2 
	         && (pair_index < num_loaded_pairs / num_threads_ || num_reads_ < 2 * 5000000)) {
              if (pair_index < num_loaded_pairs / 2 && (pair_index < num_loaded_pairs / num_threads_ || num_reads_ < 2 * 5000000)) {
                mm_history1[pair_index].minimizers = minimizers1;
                mm_history1[pair_index].positive_candidates = positive_candidates1;
                mm_history1[pair_index].negative_candidates = negative_candidates1;
@@ -718,84 +792,9 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
                mm_history2[pair_index].repetitive_seed_length = repetitive_seed_length2;
              }
              // Test whether we need to augment the candidate list with mate information.
	      std::vector<Candidate> augment_positive_candidates1;
	      std::vector<Candidate> augment_positive_candidates2;
	      std::vector<Candidate> augment_negative_candidates1;
	      std::vector<Candidate> augment_negative_candidates2;
	      for (int mate = 0; mate <= 1; ++mate) {
		      std::vector<std::pair<uint64_t, uint64_t> > *minimizers;
		      std::vector<uint64_t> *positive_hits;
		      std::vector<uint64_t> *negative_hits;
		      std::vector<Candidate> *positive_candidates;
		      std::vector<Candidate> *negative_candidates;
		      std::vector<Candidate> *mate_positive_candidates;
		      std::vector<Candidate> *mate_negative_candidates;
		      std::vector<Candidate> *augment_positive_candidates;
		      std::vector<Candidate> *augment_negative_candidates;
		      uint32_t *repetitive_seed_length ;

		      if (mate == 0) {
			      minimizers = &minimizers1;
			      positive_hits = &positive_hits1;
			      negative_hits = &negative_hits1;
			      positive_candidates = &positive_candidates1;
			      negative_candidates = &negative_candidates1;
			      mate_positive_candidates = &positive_candidates2;
			      mate_negative_candidates = &negative_candidates2;
			      augment_positive_candidates = &augment_positive_candidates1;
			      augment_negative_candidates = &augment_negative_candidates1;
			      repetitive_seed_length = &repetitive_seed_length1;
		      } else {
			      minimizers = &minimizers2;
			      positive_hits = &positive_hits2;
			      negative_hits = &negative_hits2;
			      positive_candidates = &positive_candidates2;
			      negative_candidates = &negative_candidates2;
			      mate_positive_candidates = &positive_candidates1;
			      mate_negative_candidates = &negative_candidates1;
			      augment_positive_candidates = &augment_positive_candidates2;
			      augment_negative_candidates = &augment_negative_candidates2;
			      repetitive_seed_length = &repetitive_seed_length2;
		      }
			
		      uint32_t mm_count = minimizers->size();
		      bool augment_flag = true;
		      uint32_t candidate_num = positive_candidates->size();
		      for (uint32_t i = 0; i < candidate_num; ++i) {
			      if (positive_candidates->at(i).count >= mm_count / 2)
			      	augment_flag = false;
		      }
		      candidate_num = negative_candidates->size();
		      if (augment_flag) {
			      for (uint32_t i = 0; i < candidate_num; ++i) {
				      if (negative_candidates->at(i).count >= mm_count / 2)
					      augment_flag = false;
			      }
		      }

		      if (augment_flag) {
			      positive_hits->clear();
			      negative_hits->clear();
			      if (mate_positive_candidates->size() > 0) 
				      index.GenerateCandidatesFromRepetitiveReadWithMateInfo(error_threshold_, *minimizers, repetitive_seed_length, negative_hits, augment_negative_candidates, mate_positive_candidates, -1, 2 * max_insert_size_) ;
			      if (mate_negative_candidates->size() > 0)
				      index.GenerateCandidatesFromRepetitiveReadWithMateInfo(error_threshold_, *minimizers, repetitive_seed_length, positive_hits, augment_positive_candidates, mate_negative_candidates, 1, 2 * max_insert_size_) ;
		      }
	      }
	      //std::cerr<<augment_positive_candidates1.size()<<" "<<augment_negative_candidates1.size()<<" "
	      //		<<augment_positive_candidates2.size()<<" "<<augment_negative_candidates2.size()<<"\n";
	      if (augment_positive_candidates1.size() > 0) 
	      	MergeCandidates(positive_candidates1, augment_positive_candidates1, positive_candidates1_buffer);
	      if (augment_negative_candidates1.size() > 0) 
	      	MergeCandidates(negative_candidates1, augment_negative_candidates1, negative_candidates1_buffer);
	      if (augment_positive_candidates2.size() > 0) 
	      	MergeCandidates(positive_candidates2, augment_positive_candidates2, positive_candidates2_buffer);
	      if (augment_negative_candidates2.size() > 0) 
	      	MergeCandidates(negative_candidates2, augment_negative_candidates2, negative_candidates2_buffer);
	      
              SupplementCandidates(index, repetitive_seed_length1, repetitive_seed_length2, minimizers1, minimizers2, positive_hits1, positive_hits2, positive_candidates1, positive_candidates2, positive_candidates1_buffer, positive_candidates2_buffer, negative_hits1, negative_hits2, negative_candidates1, negative_candidates2, negative_candidates1_buffer, negative_candidates2_buffer);
              current_num_candidates1 = positive_candidates1.size() + negative_candidates1.size();
              current_num_candidates2 = positive_candidates2.size() + negative_candidates2.size();

              if (current_num_candidates1 > 0 && current_num_candidates2 > 0) {
                positive_candidates1.swap(positive_candidates1_buffer);
                negative_candidates1.swap(negative_candidates1_buffer);
@@ -805,16 +804,13 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
                positive_candidates2.clear();
                negative_candidates1.clear();
                negative_candidates2.clear();
                //std::cerr << "#pc1: " << positive_candidates1_buffer.size() << ", #nc1: " << negative_candidates1_buffer.size() << ", #pc2: " << positive_candidates2_buffer.size() << ", #nc2: " << negative_candidates2_buffer.size() << "\n";
                // Paired-end filter
                ReduceCandidatesForPairedEndRead(positive_candidates1_buffer, negative_candidates1_buffer, positive_candidates2_buffer, negative_candidates2_buffer, &positive_candidates1, &negative_candidates1, &positive_candidates2, &negative_candidates2);
                //std::cerr << "After pe filter, #pc1: " << positive_candidates1.size() << ", #nc1: " << negative_candidates1.size() << ", #pc2: " << positive_candidates2.size() << ", #nc2: " << negative_candidates2.size() << "\n";
                current_num_candidates1 = positive_candidates1.size() + negative_candidates1.size();
                current_num_candidates2 = positive_candidates2.size() + negative_candidates2.size();

                // Verify candidates
                if (current_num_candidates1 > 0 && current_num_candidates2 > 0) {

                  thread_num_candidates += positive_candidates1.size() + positive_candidates2.size() + negative_candidates1.size() + negative_candidates2.size();

                  positive_mappings1.clear();
                  positive_mappings2.clear();
                  negative_mappings1.clear();
@@ -864,21 +860,28 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
          //    }
          //  }
          //}
          // Update cache
          for (uint32_t pair_index = 0; pair_index < num_loaded_pairs / 2; ++pair_index) {
            if (num_reads_ >= 2 * 5000000 && pair_index >= num_loaded_pairs / num_threads_)
            if (num_reads_ >= 2 * 5000000 && pair_index >= num_loaded_pairs / num_threads_) {
              break;
            }
            mm_to_candidates_cache.Update(mm_history1[pair_index].minimizers, mm_history1[pair_index].positive_candidates, mm_history1[pair_index].negative_candidates, mm_history1[pair_index].repetitive_seed_length);
            mm_to_candidates_cache.Update(mm_history2[pair_index].minimizers, mm_history2[pair_index].positive_candidates, mm_history2[pair_index].negative_candidates, mm_history2[pair_index].repetitive_seed_length);
            if (mm_history1[pair_index].positive_candidates.size() < mm_history1[pair_index].positive_candidates.capacity() / 2)
            if (mm_history1[pair_index].positive_candidates.size() < mm_history1[pair_index].positive_candidates.capacity() / 2) {
              std::vector<Candidate>().swap(mm_history1[pair_index].positive_candidates);
            if (mm_history1[pair_index].negative_candidates.size() < mm_history1[pair_index].negative_candidates.capacity() / 2)
            }
            if (mm_history1[pair_index].negative_candidates.size() < mm_history1[pair_index].negative_candidates.capacity() / 2) {
              std::vector<Candidate>().swap(mm_history1[pair_index].negative_candidates);
            if (mm_history2[pair_index].positive_candidates.size() < mm_history2[pair_index].positive_candidates.capacity() / 2)
            }
            if (mm_history2[pair_index].positive_candidates.size() < mm_history2[pair_index].positive_candidates.capacity() / 2) {
              std::vector<Candidate>().swap(mm_history2[pair_index].positive_candidates);
            if (mm_history2[pair_index].negative_candidates.size() < mm_history2[pair_index].negative_candidates.capacity() / 2)
            }
            if (mm_history2[pair_index].negative_candidates.size() < mm_history2[pair_index].negative_candidates.capacity() / 2) {
              std::vector<Candidate>().swap(mm_history2[pair_index].negative_candidates);
            }
          }
#pragma omp taskwait
          // Swap to next batch
          num_loaded_pairs = num_loaded_pairs_for_loading;
          read_batch1_for_loading.SwapSequenceBatch(read_batch1);
          read_batch2_for_loading.SwapSequenceBatch(read_batch2);
@@ -886,6 +889,7 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
          mappings_on_diff_ref_seqs_for_diff_threads.swap(mappings_on_diff_ref_seqs_for_diff_threads_for_saving);
#pragma omp task
          {
            // Handle output
            num_mappings_in_mem += MoveMappingsInBuffersToMappingContainer(num_reference_sequences, &mappings_on_diff_ref_seqs_for_diff_threads_for_saving);
            if (low_memory_mode_ && num_mappings_in_mem > max_num_mappings_in_mem) {
              TempMappingFileHandle<MappingRecord> temp_mapping_file_handle;
@@ -1996,8 +2000,7 @@ void Chromap<MappingRecord>::AllocateMultiMappings(uint32_t num_reference_sequen
}

template <typename MappingRecord>
void Chromap<MappingRecord>::MergeCandidates(std::vector<Candidate> &c1, const std::vector<Candidate> &c2, std::vector<Candidate> &buffer)
{
void Chromap<MappingRecord>::MergeCandidates(std::vector<Candidate> &c1, const std::vector<Candidate> &c2, std::vector<Candidate> &buffer) {
  if (c1.size() == 0) {
    c1 = c2;
    return ;
+9 −0
Original line number Diff line number Diff line
@@ -28,6 +28,14 @@ struct StackCell {
  StackCell(int k_, size_t x_, int w_) : x(x_), k(k_), w(w_) {};
};

struct _mm_history {
	bool skip;
	std::vector<std::pair<uint64_t, uint64_t> > minimizers;
	std::vector<Candidate> positive_candidates;
	std::vector<Candidate> negative_candidates;
  uint32_t repetitive_seed_length;
};

struct Peak {
  uint32_t start_position;
  uint16_t length;
@@ -144,6 +152,7 @@ class Chromap {
  void BandedAlign8PatternsToText(const char **patterns, const char *text, int read_length, int16_t *mapping_edit_distances, int16_t *mapping_end_positions);
  void BandedTraceback(int min_num_errors, const char *pattern, const char *text, const int read_length, int *mapping_start_position);
  void MergeCandidates(std::vector<Candidate> &c1, const std::vector<Candidate> &c2, std::vector<Candidate> &buffer);
  void SupplementCandidates(const Index &index, uint32_t repetitive_seed_length1, uint32_t repetitive_seed_length2, std::vector<std::pair<uint64_t, uint64_t> > &minimizers1, std::vector<std::pair<uint64_t, uint64_t> > &minimizers2, std::vector<uint64_t> &positive_hits1, std::vector<uint64_t> &positive_hits2, std::vector<Candidate> &positive_candidates1, std::vector<Candidate> &positive_candidates2, std::vector<Candidate> &positive_candidates1_buffer, std::vector<Candidate> &positive_candidates2_buffer, std::vector<uint64_t> &negative_hits1, std::vector<uint64_t> &negative_hits2, std::vector<Candidate> &negative_candidates1, std::vector<Candidate> &negative_candidates2, std::vector<Candidate> &negative_candidates1_buffer, std::vector<Candidate> &negative_candidates2_buffer);
  void VerifyCandidatesOnOneDirectionUsingSIMD(Direction candidate_direction, const SequenceBatch &read_batch, uint32_t read_index, const SequenceBatch &reference, const std::vector<Candidate> &candidates, std::vector<std::pair<int, uint64_t> > *mappings, int *min_num_errors, int *num_best_mappings, int *second_min_num_errors, int *num_second_best_mappings);
  void VerifyCandidatesOnOneDirection(Direction candidate_direction, const SequenceBatch &read_batch, uint32_t read_index, const SequenceBatch &reference, const std::vector<Candidate> &candidates, std::vector<std::pair<int, uint64_t> > *mappings, int *min_num_errors, int *num_best_mappings, int *second_min_num_errors, int *num_second_best_mappings);
  void VerifyCandidates(const SequenceBatch &read_batch, uint32_t read_index, const SequenceBatch &reference, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, const std::vector<Candidate> &positive_candidates, const std::vector<Candidate> &negative_candidates, std::vector<std::pair<int, uint64_t> > *positive_mappings, std::vector<std::pair<int, uint64_t> > *negative_mappings, int *min_num_errors, int *num_best_mappings, int *second_min_num_errors, int *num_second_best_mappings);
+87 −96

File changed.

Preview size limit exceeded, changes collapsed.

+12 −4
Original line number Diff line number Diff line
@@ -26,6 +26,14 @@ struct Candidate {
  }
};

struct mmHit {
  uint32_t mi;
  uint64_t position;
  bool operator<(const mmHit &h)  const {
    return position > h.position; // the inversed direction is to make a min-heap
  }
};

class Index {
 public:
  Index(int min_num_seeds_required_for_mapping, const std::vector<int> &max_seed_frequencies, const std::string &index_file_path) : min_num_seeds_required_for_mapping_(min_num_seeds_required_for_mapping), max_seed_frequencies_(max_seed_frequencies), index_file_path_(index_file_path) { // for read mapping
@@ -65,10 +73,10 @@ class Index {
  void Construct(uint32_t num_sequences, const SequenceBatch &reference);
  void Save();
  void Load();
  void GenerateCandidatesOnOneDirection(int error_threshold, int num_seeds_required, std::vector<uint64_t> *hits, std::vector<Candidate> *candidates);
  void GenerateCandidates(int error_threshold, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, uint32_t *repetitive_seed_length, std::vector<uint64_t> *positive_hits, std::vector<uint64_t> *negative_hits, std::vector<Candidate> *positive_candidates, std::vector<Candidate> *negative_candidates);
  void GenerateCandidatesFromRepetitiveReadWithMateInfo(int error_threshold, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, uint32_t *repetitive_seed_length, std::vector<uint64_t> *hits, std::vector<Candidate> *candidates, std::vector<Candidate> *mate_candidates, int direction, unsigned int range);
  int CollectCandidates(int max_seed_frequency, int repetitive_seed_frequency, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, uint32_t *repetitive_seed_length, std::vector<uint64_t> *positive_hits, std::vector<uint64_t> *negative_hits, bool use_heap);
  void GenerateCandidatesOnOneDirection(int error_threshold, int num_seeds_required, std::vector<uint64_t> *hits, std::vector<Candidate> *candidates) const;
  void GenerateCandidates(int error_threshold, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, uint32_t *repetitive_seed_length, std::vector<uint64_t> *positive_hits, std::vector<uint64_t> *negative_hits, std::vector<Candidate> *positive_candidates, std::vector<Candidate> *negative_candidates) const;
  void GenerateCandidatesFromRepetitiveReadWithMateInfo(int error_threshold, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, uint32_t *repetitive_seed_length, std::vector<uint64_t> *hits, std::vector<Candidate> *candidates, std::vector<Candidate> *mate_candidates, int direction, unsigned int range) const;
  int CollectCandidates(int max_seed_frequency, int repetitive_seed_frequency, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, uint32_t *repetitive_seed_length, std::vector<uint64_t> *positive_hits, std::vector<uint64_t> *negative_hits, bool use_heap) const;
  inline static uint64_t Hash64(uint64_t key, const uint64_t mask) {
    key = (~key + (key << 21)) & mask; // key = (key << 21) - key - 1;
    key = key ^ key >> 24;
+2 −1
Original line number Diff line number Diff line
@@ -19,13 +19,14 @@ class SequenceBatch {
    sequence_batch_.reserve(max_num_sequences_);
    for (uint32_t i = 0; i < max_num_sequences_; ++i) {
      sequence_batch_.emplace_back((kseq_t*)calloc(1, sizeof(kseq_t)));
      sequence_batch_.back()->f = NULL;
    }
    negative_sequence_batch_.assign(max_num_sequences_, "");
  }
  ~SequenceBatch(){
    if (sequence_batch_.size() > 0) {
      for (uint32_t i = 0; i < sequence_batch_.size(); ++i) {
        free(sequence_batch_[i]);
        kseq_destroy(sequence_batch_[i]);
      }
    }
  }