Commit 567fc3ed authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Merge branch 'refactor'

Conflicts:
	src/chromap.cc
	src/chromap.h
	src/mmcache.hpp
parents 860b7044 42a1dbb4
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
cpp_source=sequence_batch.cc index.cc ksw.cc chromap.cc
cpp_source=sequence_batch.cc index.cc ksw.cc output_tools.cc chromap.cc
src_dir=src
objs_dir=objs
objs+=$(patsubst %.cc,$(objs_dir)/%.o,$(cpp_source))
+3846 −2105

File changed.

Preview size limit exceeded, changes collapsed.

+486 −102

File changed.

Preview size limit exceeded, changes collapsed.

+325 −177

File changed.

Preview size limit exceeded, changes collapsed.

+57 −34
Original line number Diff line number Diff line
#ifndef INDEX_H_
#define INDEX_H_

#include <queue>
#include <string>
#include <vector>
#include <queue>

#include "khash.h"
#include "sequence_batch.h"
@@ -35,16 +35,27 @@ struct mmHit {
  uint32_t mi;
  uint64_t position;
  bool operator<(const mmHit &h) const {
    return position > h.position; // the inversed direction is to make a min-heap
    return position >
           h.position;  // the inversed direction is to make a min-heap
  }
};

class Index {
 public:
  Index(int min_num_seeds_required_for_mapping, const std::vector<int> &max_seed_frequencies, const std::string &index_file_path) : min_num_seeds_required_for_mapping_(min_num_seeds_required_for_mapping), max_seed_frequencies_(max_seed_frequencies), index_file_path_(index_file_path) { // for read mapping
  Index(int min_num_seeds_required_for_mapping,
        const std::vector<int> &max_seed_frequencies,
        const std::string &index_file_path)
      : min_num_seeds_required_for_mapping_(min_num_seeds_required_for_mapping),
        max_seed_frequencies_(max_seed_frequencies),
        index_file_path_(index_file_path) {  // for read mapping
    lookup_table_ = kh_init(k64);
  }
  Index(int kmer_size, int window_size, int num_threads, const std::string &index_file_path) : kmer_size_(kmer_size), window_size_(window_size), num_threads_(num_threads), index_file_path_(index_file_path) { // for index construction
  Index(int kmer_size, int window_size, int num_threads,
        const std::string &index_file_path)
      : kmer_size_(kmer_size),
        window_size_(window_size),
        num_threads_(num_threads),
        index_file_path_(index_file_path) {  // for index construction
    lookup_table_ = kh_init(k64);
  }
  ~Index() {
@@ -57,31 +68,43 @@ class Index {
    lookup_table_ = NULL;
    std::vector<uint64_t>().swap(occurrence_table_);
  }
  khash_t(k64) const * GetLookupTable() const {
    return lookup_table_;
  }
  int GetKmerSize() const {
    return kmer_size_;
  }
  int GetWindowSize() const {
    return window_size_;
  }
  uint32_t GetLookupTableSize() const {
    return kh_size(lookup_table_);
  }
  khash_t(k64) const *GetLookupTable() const { return lookup_table_; }
  int GetKmerSize() const { return kmer_size_; }
  int GetWindowSize() const { return window_size_; }
  uint32_t GetLookupTableSize() const { return kh_size(lookup_table_); }
  std::vector<uint64_t> const &GetOccurrenceTable() const {
    return occurrence_table_;
  }
  void Statistics(uint32_t num_sequences, const SequenceBatch &reference);
  void CheckIndex(uint32_t num_sequences, const SequenceBatch &reference);
  void GenerateMinimizerSketch(const SequenceBatch &sequence_batch, uint32_t sequence_index, std::vector<std::pair<uint64_t, uint64_t> > *minimizers);
  void GenerateMinimizerSketch(
      const SequenceBatch &sequence_batch, uint32_t sequence_index,
      std::vector<std::pair<uint64_t, uint64_t> > *minimizers);
  void Construct(uint32_t num_sequences, const SequenceBatch &reference);
  void Save();
  void Load();
  void GenerateCandidatesOnOneDirection(int error_threshold, int num_seeds_required, uint32_t num_minimizers, std::vector<uint64_t> *hits, std::vector<Candidate> *candidates) const;
  void GenerateCandidates(int error_threshold, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, uint32_t *repetitive_seed_length, std::vector<uint64_t> *positive_hits, std::vector<uint64_t> *negative_hits, std::vector<Candidate> *positive_candidates, std::vector<Candidate> *negative_candidates) const;
	int GenerateCandidatesFromRepetitiveReadWithMateInfo(int error_threshold, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, uint32_t *repetitive_seed_length, std::vector<uint64_t> *hits, std::vector<Candidate> *candidates, std::vector<Candidate> *mate_candidates, Direction direction, uint32_t range) const;
  int CollectCandidates(int max_seed_frequency, int repetitive_seed_frequency, const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, uint32_t *repetitive_seed_length, std::vector<uint64_t> *positive_hits, std::vector<uint64_t> *negative_hits, bool use_heap) const;
  void GenerateCandidatesOnOneDirection(
      int error_threshold, int num_seeds_required, uint32_t num_minimizers,
      std::vector<uint64_t> *hits, std::vector<Candidate> *candidates) const;
  void GenerateCandidates(
      int error_threshold,
      const std::vector<std::pair<uint64_t, uint64_t> > &minimizers,
      uint32_t *repetitive_seed_length, std::vector<uint64_t> *positive_hits,
      std::vector<uint64_t> *negative_hits,
      std::vector<Candidate> *positive_candidates,
      std::vector<Candidate> *negative_candidates) const;
  int GenerateCandidatesFromRepetitiveReadWithMateInfo(
      int error_threshold,
      const std::vector<std::pair<uint64_t, uint64_t> > &minimizers,
      uint32_t *repetitive_seed_length, std::vector<uint64_t> *hits,
      std::vector<Candidate> *candidates,
      std::vector<Candidate> *mate_candidates, Direction direction,
      uint32_t range) const;
  int CollectCandidates(
      int max_seed_frequency, int repetitive_seed_frequency,
      const std::vector<std::pair<uint64_t, uint64_t> > &minimizers,
      uint32_t *repetitive_seed_length, std::vector<uint64_t> *positive_hits,
      std::vector<uint64_t> *negative_hits, bool use_heap) const;
  inline static uint64_t Hash64(uint64_t key, const uint64_t mask) {
    key = (~key + (key << 21)) & mask;  // key = (key << 21) - key - 1;
    key = key ^ key >> 24;
Loading