Commit 22f52653 authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Implement a minimizer class and its generator.

Replace Direction with Strand.
parent fc69224c
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@ CXX=g++
CXXFLAGS=-std=c++11 -Wall -O3 -fopenmp -msse4.1
LDFLAGS=-lm -lz

cpp_source=sequence_batch.cc index.cc candidate_processor.cc alignment.cc feature_barcode_matrix.cc ksw.cc draft_mapping_generator.cc mapping_generator.cc mapping_writer.cc chromap.cc chromap_driver.cc
cpp_source=sequence_batch.cc index.cc minimizer_generator.cc candidate_processor.cc alignment.cc feature_barcode_matrix.cc ksw.cc draft_mapping_generator.cc mapping_generator.cc mapping_writer.cc chromap.cc chromap_driver.cc
src_dir=src
objs_dir=objs
objs+=$(patsubst %.cc,$(objs_dir)/%.o,$(cpp_source))
+2 −2
Original line number Diff line number Diff line
@@ -21,12 +21,12 @@ int GetLongestMatchLength(const char *pattern, const char *text,
  return max_match;
}

int AdjustGapBeginning(Direction mapping_direction, const char *ref,
int AdjustGapBeginning(const Strand mapping_strand, const char *ref,
                       const char *read, int *gap_beginning, int read_end,
                       int ref_start_position, int ref_end_position,
                       int *n_cigar, uint32_t **cigar) {
  int i, j;
  if (mapping_direction == kPositive) {
  if (mapping_strand == kPositive) {
    if (*gap_beginning <= 0) {
      return ref_start_position;
    }
+1 −1
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ int GetLongestMatchLength(const char *pattern, const char *text,

// Return newly adjusted reference start/end position for kPositive/kNegative
// mappings.
int AdjustGapBeginning(Direction mapping_direction, const char *ref,
int AdjustGapBeginning(const Strand mapping_strand, const char *ref,
                       const char *read, int *gap_beginning, int read_end,
                       int ref_start_position, int ref_end_position,
                       int *n_cigar, uint32_t **cigar);
+15 −17
Original line number Diff line number Diff line
@@ -12,8 +12,7 @@ namespace chromap {
void CandidateProcessor::GenerateCandidates(
    int error_threshold, const Index &index,
    MappingMetadata &mapping_metadata) const {
  const std::vector<std::pair<uint64_t, uint64_t>> &minimizers =
      mapping_metadata.minimizers_;
  const std::vector<Minimizer> &minimizers = mapping_metadata.minimizers_;
  std::vector<uint64_t> &positive_hits = mapping_metadata.positive_hits_;
  std::vector<uint64_t> &negative_hits = mapping_metadata.negative_hits_;
  std::vector<Candidate> &positive_candidates =
@@ -51,11 +50,11 @@ void CandidateProcessor::GenerateCandidates(
  }

  // std::cerr << "Normal positive gen on one dir\n";
  GenerateCandidatesOnOneDirection(error_threshold, num_required_seeds,
  GenerateCandidatesOnOneStrand(error_threshold, num_required_seeds,
                                minimizers.size(), positive_hits,
                                positive_candidates);
  // std::cerr << "Normal negative gen on one dir\n";
  GenerateCandidatesOnOneDirection(error_threshold, num_required_seeds,
  GenerateCandidatesOnOneStrand(error_threshold, num_required_seeds,
                                minimizers.size(), negative_hits,
                                negative_candidates);
  // fprintf(stderr, "p+n: %d\n", positive_candidates->size() +
@@ -75,7 +74,7 @@ int CandidateProcessor::SupplementCandidates(
  int ret = 0;

  for (int mate = 0; mate <= 1; ++mate) {
    std::vector<std::pair<uint64_t, uint64_t>> *minimizers;
    std::vector<Minimizer> *minimizers;
    std::vector<uint64_t> *positive_hits;
    std::vector<uint64_t> *negative_hits;
    std::vector<Candidate> *positive_candidates;
@@ -256,22 +255,21 @@ void CandidateProcessor::ReduceCandidatesForPairedEndRead(

int CandidateProcessor::GenerateCandidatesFromRepetitiveReadWithMateInfo(
    int error_threshold, const Index &index,
    const std::vector<std::pair<uint64_t, uint64_t>> &minimizers,
    uint32_t &repetitive_seed_length, std::vector<uint64_t> &hits,
    std::vector<Candidate> &candidates,
    const std::vector<Candidate> &mate_candidates, const Direction direction,
    const std::vector<Minimizer> &minimizers, uint32_t &repetitive_seed_length,
    std::vector<uint64_t> &hits, std::vector<Candidate> &candidates,
    const std::vector<Candidate> &mate_candidates, const Strand strand,
    uint32_t search_range) const {
  int max_seed_count = index.CollectSeedHitsFromRepetitiveReadWithMateInfo(
      error_threshold, minimizers, repetitive_seed_length, hits,
      mate_candidates, direction, search_range,
      mate_candidates, strand, search_range,
      min_num_seeds_required_for_mapping_, max_seed_frequencies_[0]);

  GenerateCandidatesOnOneDirection(error_threshold, /*num_seeds_required=*/1,
  GenerateCandidatesOnOneStrand(error_threshold, /*num_seeds_required=*/1,
                                minimizers.size(), hits, candidates);
  return max_seed_count;
}

void CandidateProcessor::GenerateCandidatesOnOneDirection(
void CandidateProcessor::GenerateCandidatesOnOneStrand(
    int error_threshold, int num_seeds_required, uint32_t num_minimizers,
    std::vector<uint64_t> &hits, std::vector<Candidate> &candidates) const {
  hits.emplace_back(UINT64_MAX);
+8 −5
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include "mapping_metadata.h"
#include "paired_end_mapping_metadata.h"
#include "sequence_batch.h"
#include "utils.h"

namespace chromap {

@@ -39,16 +40,18 @@ class CandidateProcessor {
      PairedEndMappingMetadata &paired_end_mapping_metadata) const;

 private:
  void GenerateCandidatesOnOneDirection(
      int error_threshold, int num_seeds_required, uint32_t num_minimizers,
      std::vector<uint64_t> &hits, std::vector<Candidate> &candidates) const;
  void GenerateCandidatesOnOneStrand(int error_threshold,
                                     int num_seeds_required,
                                     uint32_t num_minimizers,
                                     std::vector<uint64_t> &hits,
                                     std::vector<Candidate> &candidates) const;

  int GenerateCandidatesFromRepetitiveReadWithMateInfo(
      int error_threshold, const Index &index,
      const std::vector<std::pair<uint64_t, uint64_t> > &minimizers,
      const std::vector<Minimizer> &minimizers,
      uint32_t &repetitive_seed_length, std::vector<uint64_t> &hits,
      std::vector<Candidate> &candidates,
      const std::vector<Candidate> &mate_candidates, const Direction direction,
      const std::vector<Candidate> &mate_candidates, const Strand strand,
      uint32_t search_range) const;

  void MergeCandidates(int error_threshold, std::vector<Candidate> &c1,
Loading