Unverified Commit 1fdd749e authored by Li Song's avatar Li Song Committed by GitHub
Browse files

Merge branch 'master' into li_dev3

parents 60fbe5f7 db21c154
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ on:
    branches: [ master ]

env:
  DEVELOPER_DIR: /Applications/Xcode_12.4.app/Contents/Developer
  DEVELOPER_DIR: /Applications/Xcode.app/Contents/Developer

jobs:
  ubuntu:
@@ -36,16 +36,16 @@ jobs:
    - uses: actions/checkout@v2
    - name: cache-openmp
      id: cache-openmp
      uses: actions/cache@v2.1.7
      uses: actions/cache@v3
      with:
        path: openmp-install
        key: openmp-macos-install-20201213
        key: openmp-macos-install
    - name: build-openmp
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-11.0.0/openmp-11.0.0.src.tar.xz
        tar -xf openmp-11.0.0.src.tar.xz
        cd openmp-11.0.0.src
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-14.0.0/openmp-14.0.0.src.tar.xz
        tar -xf openmp-14.0.0.src.tar.xz
        cd openmp-14.0.0.src
        sed -i'' -e '/.size __kmp_unnamed_critical_addr/d' runtime/src/z_Linux_asm.S
        sed -i'' -e 's/__kmp_unnamed_critical_addr/___kmp_unnamed_critical_addr/g' runtime/src/z_Linux_asm.S
        mkdir -p build && cd build
+2 −2
Original line number Diff line number Diff line
@@ -201,8 +201,8 @@ Index file.
.BI -1 \ FILE
Single-end read files or paired-end read files 1. Chromap supports mulitple
input files concatenate by ",". For example, setting this option to 
"read11.fq,read12.fq,read13.fq" will make all three files as input and map them 
in this order. Similarly,
"Library1_R1.fastq.gz,Library2_R1.fastq.gz,Library3_R1.fastq.gz" will make 
all three files as input and map them in this order. Similarly,
.BR -2
and
.BR -b
+48 −0
Original line number Diff line number Diff line
#ifndef CANDIDATE_POSITION_GENERATING_CONFIG_H_
#define CANDIDATE_POSITION_GENERATING_CONFIG_H_

namespace chromap {

// This class holds the parameters to generate candidate position. Using the
// parameters, it can check whether a seed is frequent or repetitive.
class CandidatePositionGeneratingConfig {
 public:
  CandidatePositionGeneratingConfig() = delete;

  CandidatePositionGeneratingConfig(uint32_t max_seed_frequency,
                                    uint32_t repetitive_seed_frequency,
                                    bool use_heap_merge)
      : max_seed_frequency_(max_seed_frequency),
        repetitive_seed_frequency_(repetitive_seed_frequency),
        use_heap_merge_(use_heap_merge) {}

  ~CandidatePositionGeneratingConfig() = default;

  inline bool IsFrequentSeed(uint32_t seed_frequency) const {
    return seed_frequency >= max_seed_frequency_;
  }

  inline bool IsRepetitiveSeed(uint32_t seed_frequency) const {
    return seed_frequency >= repetitive_seed_frequency_;
  }

  inline bool UseHeapMerge() const { return use_heap_merge_; }

  inline uint32_t GetMaxSeedFrequency() const { return max_seed_frequency_; }

 private:
  // Only seeds with frequency less than this threshold will be used.
  const uint32_t max_seed_frequency_;

  // Seeds with frequency greater than or equal to this threshold will be
  // considered as repetitive seeds.
  const uint32_t repetitive_seed_frequency_;

  // When the number of candidate positions is really large, use heap merge to
  // merge sorted candidate lists.
  const bool use_heap_merge_;
};

}  // namespace chromap

#endif  // CANDIDATE_POSITION_GENERATING_CONFIG_H_
+35 −24
Original line number Diff line number Diff line
@@ -21,19 +21,28 @@ void CandidateProcessor::GenerateCandidates(
      mapping_metadata.negative_candidates_;
  uint32_t &repetitive_seed_length = mapping_metadata.repetitive_seed_length_;

  const CandidatePositionGeneratingConfig first_round_generating_config(
      /*max_seed_frequency=*/max_seed_frequencies_[0],
      /*repetitive_seed_frequency=*/max_seed_frequencies_[0],
      /*use_heap_merge=*/false);

  repetitive_seed_length = 0;
  int repetitive_seed_count = index.CollectSeedHits(
      max_seed_frequencies_[0], max_seed_frequencies_[0], minimizers,
      repetitive_seed_length, positive_hits, negative_hits, false);
  int repetitive_seed_count = index.GenerateCandidatePositions(
      first_round_generating_config, mapping_metadata);

  bool use_high_frequency_minimizers = false;
  if (positive_hits.size() + negative_hits.size() == 0) {
    positive_hits.clear();
    negative_hits.clear();
    repetitive_seed_length = 0;
    repetitive_seed_count = index.CollectSeedHits(
        max_seed_frequencies_[1], max_seed_frequencies_[0], minimizers,
        repetitive_seed_length, positive_hits, negative_hits, true);

    const CandidatePositionGeneratingConfig second_round_generating_config(
        /*max_seed_frequency=*/max_seed_frequencies_[1],
        /*repetitive_seed_frequency=*/max_seed_frequencies_[0],
        /*use_heap_merge=*/true);

    repetitive_seed_count = index.GenerateCandidatePositions(
        second_round_generating_config, mapping_metadata);
    use_high_frequency_minimizers = true;
    if (positive_hits.size() == 0 || negative_hits.size() == 0) {
      use_high_frequency_minimizers = false;
@@ -153,18 +162,18 @@ int CandidateProcessor::SupplementCandidates(
      int negative_rescue_result = 0;
      if (mate_positive_candidates->size() > 0) {
        positive_rescue_result =
            GenerateCandidatesFromRepetitiveReadWithMateInfo(
                error_threshold, index, *minimizers, *repetitive_seed_length,
                *negative_hits, *augment_negative_candidates,
                *mate_positive_candidates, kNegative, search_range);
            GenerateCandidatesFromRepetitiveReadWithMateInfoOnOneStrand(
                kNegative, search_range, error_threshold, index, *minimizers,
                *mate_positive_candidates, *repetitive_seed_length,
                *negative_hits, *augment_negative_candidates);
      }

      if (mate_negative_candidates->size() > 0) {
        negative_rescue_result =
            GenerateCandidatesFromRepetitiveReadWithMateInfo(
                error_threshold, index, *minimizers, *repetitive_seed_length,
                *positive_hits, *augment_positive_candidates,
                *mate_negative_candidates, kPositive, search_range);
            GenerateCandidatesFromRepetitiveReadWithMateInfoOnOneStrand(
                kPositive, search_range, error_threshold, index, *minimizers,
                *mate_negative_candidates, *repetitive_seed_length,
                *positive_hits, *augment_positive_candidates);
      }

      // If one of the strand did not supplement due to too many best candidate,
@@ -253,16 +262,18 @@ void CandidateProcessor::ReduceCandidatesForPairedEndRead(
      filtered_negative_candidates1, filtered_positive_candidates2);
}

int CandidateProcessor::GenerateCandidatesFromRepetitiveReadWithMateInfo(
    int error_threshold, const Index &index,
    const std::vector<Minimizer> &minimizers, uint32_t &repetitive_seed_length,
    std::vector<uint64_t> &hits, std::vector<Candidate> &candidates,
    const std::vector<Candidate> &mate_candidates, const Strand strand,
    uint32_t search_range) const {
  int max_seed_count = index.CollectSeedHitsFromRepetitiveReadWithMateInfo(
      error_threshold, minimizers, repetitive_seed_length, hits,
      mate_candidates, strand, search_range,
      min_num_seeds_required_for_mapping_, max_seed_frequencies_[0]);
int CandidateProcessor::
    GenerateCandidatesFromRepetitiveReadWithMateInfoOnOneStrand(
        const Strand strand, uint32_t search_range, int error_threshold,
        const Index &index, const std::vector<Minimizer> &minimizers,
        const std::vector<Candidate> &mate_candidates,
        uint32_t &repetitive_seed_length, std::vector<uint64_t> &hits,
        std::vector<Candidate> &candidates) const {
  int max_seed_count =
      index.GenerateCandidatePositionsFromRepetitiveReadWithMateInfoOnOneStrand(
          strand, search_range, min_num_seeds_required_for_mapping_,
          max_seed_frequencies_[0], error_threshold, minimizers,
          mate_candidates, repetitive_seed_length, hits);

  GenerateCandidatesOnOneStrand(error_threshold, /*num_seeds_required=*/1,
                                minimizers.size(), hits, candidates);
+5 −6
Original line number Diff line number Diff line
@@ -46,13 +46,12 @@ class CandidateProcessor {
                                     std::vector<uint64_t> &hits,
                                     std::vector<Candidate> &candidates) const;

  int GenerateCandidatesFromRepetitiveReadWithMateInfo(
      int error_threshold, const Index &index,
      const std::vector<Minimizer> &minimizers,
  int GenerateCandidatesFromRepetitiveReadWithMateInfoOnOneStrand(
      const Strand strand, uint32_t search_range, int error_threshold,
      const Index &index, const std::vector<Minimizer> &minimizers,
      const std::vector<Candidate> &mate_candidates,
      uint32_t &repetitive_seed_length, std::vector<uint64_t> &hits,
      std::vector<Candidate> &candidates,
      const std::vector<Candidate> &mate_candidates, const Strand strand,
      uint32_t search_range) const;
      std::vector<Candidate> &candidates) const;

  void MergeCandidates(int error_threshold, std::vector<Candidate> &c1,
                       std::vector<Candidate> &c2,
Loading