Commit adba6823 authored by Swift Genomics's avatar Swift Genomics Committed by swiftgenomics
Browse files

Move funcs to index utils.

parent 212145ea
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@
#include "temp_mapping.h"
#include "utils.h"

#define CHROMAP_VERSION "0.2.3-r446"
#define CHROMAP_VERSION "0.2.3-r447"

namespace chromap {

+18 −57
Original line number Diff line number Diff line
@@ -6,7 +6,6 @@
#include <iostream>

#include "minimizer_generator.h"
#include "minimizer_utils.h"

namespace chromap {

@@ -246,62 +245,6 @@ void Index::CheckIndex(uint32_t num_sequences,
  }
}

void Index::HeapMergeCandidatePositionLists(
    const std::vector<std::vector<uint64_t>> sorted_candidate_position_lists,
    std::vector<uint64_t> &candidate_positions) const {
  std::priority_queue<CandidatePositionWithListIndex> heap;
  std::vector<uint32_t> candidate_position_list_indices(
      sorted_candidate_position_lists.size(), 0);

  for (uint32_t li = 0; li < sorted_candidate_position_lists.size(); ++li) {
    if (sorted_candidate_position_lists[li].size() == 0) {
      continue;
    }
    heap.emplace(li, sorted_candidate_position_lists[li][0]);
  }

  while (!heap.empty()) {
    const CandidatePositionWithListIndex min_candidate_position = heap.top();
    heap.pop();
    candidate_positions.push_back(min_candidate_position.position);
    ++candidate_position_list_indices[min_candidate_position.list_index];

    const uint32_t min_candidate_position_list_index =
        candidate_position_list_indices[min_candidate_position.list_index];
    const std::vector<uint64_t> &min_sorted_candidate_position_list =
        sorted_candidate_position_lists[min_candidate_position.list_index];
    if (min_candidate_position_list_index <
        min_sorted_candidate_position_list.size()) {
      heap.emplace(min_candidate_position.list_index,
                   min_sorted_candidate_position_list
                       [min_candidate_position_list_index]);
    }
  }
}

uint64_t Index::GenerateCandidatePositionForSingleSeedHit(
    uint64_t reference_seed_hit, uint64_t read_seed_hit) const {
  const uint32_t reference_position =
      GenerateSequencePosition(reference_seed_hit);

  const uint32_t read_position = GenerateSequencePosition(read_seed_hit);

  // For now we can't see the reference here. So let us don't validate
  // this seed hit. Instead, we do it later some time when we check the
  // candidates.
  const uint32_t mapping_start_position =
      AreTwoHitsOnTheSameStrand(reference_seed_hit, read_seed_hit)
          ? reference_position - read_position
          : reference_position + read_position - kmer_size_ + 1;

  const uint64_t reference_id = GenerateSequenceIndex(reference_seed_hit);

  const uint64_t candidate_position =
      GenerateCandidatePosition(reference_id, mapping_start_position);

  return candidate_position;
}

int Index::GenerateCandidatePositions(
    const CandidatePositionGeneratingConfig &generating_config,
    MappingMetadata &mapping_metadata) const {
@@ -668,4 +611,22 @@ int Index::GenerateCandidatePositionsFromRepetitiveReadWithMateInfoOnOneStrand(
  return max_minimizer_count;
}

uint64_t Index::GenerateCandidatePositionForSingleSeedHit(
    uint64_t reference_seed_hit, uint64_t read_seed_hit) const {
  const uint32_t reference_position =
      GenerateSequencePosition(reference_seed_hit);
  const uint32_t read_position = GenerateSequencePosition(read_seed_hit);
  // For now we can't see the reference here. So let us don't validate this
  // candidate position. Instead, we do it later some time when we check the
  // candidates.
  const uint32_t mapping_start_position =
      AreTwoHitsOnTheSameStrand(reference_seed_hit, read_seed_hit)
          ? reference_position - read_position
          : reference_position + read_position - kmer_size_ + 1;
  const uint64_t reference_id = GenerateSequenceIndex(reference_seed_hit);
  const uint64_t candidate_position =
      GenerateCandidatePosition(reference_id, mapping_start_position);
  return candidate_position;
}

}  // namespace chromap
+0 −5
Original line number Diff line number Diff line
@@ -81,14 +81,9 @@ class Index {
  uint32_t GetLookupTableSize() const { return kh_size(lookup_table_); }

 private:
  void HeapMergeCandidatePositionLists(
      const std::vector<std::vector<uint64_t>> sorted_candidate_position_lists,
      std::vector<uint64_t> &candidate_positions) const;

  uint64_t GenerateCandidatePositionForSingleSeedHit(
      uint64_t reference_seed_hit, uint64_t read_seed_hit) const;

 protected:
  int kmer_size_ = 0;
  int window_size_ = 0;
  // Number of threads to build the index, which is not used right now.
+47 −0
Original line number Diff line number Diff line
@@ -46,6 +46,53 @@ inline static uint64_t GenerateCandidatePositionFromOccurrenceTableEntry(
  return entry >> 1;
}

// Only used in Index to merge sorted candidate position lists using heap.
struct CandidatePositionWithListIndex {
  uint32_t list_index;
  uint64_t position;

  CandidatePositionWithListIndex(uint32_t list_index, uint64_t position)
      : list_index(list_index), position(position) {}

  bool operator<(const CandidatePositionWithListIndex &h) const {
    // The inversed direction is to make a min-heap.
    return position > h.position;
  }
};

inline static void HeapMergeCandidatePositionLists(
    const std::vector<std::vector<uint64_t>> sorted_candidate_position_lists,
    std::vector<uint64_t> &candidate_positions) {
  std::priority_queue<CandidatePositionWithListIndex> heap;
  std::vector<uint32_t> candidate_position_list_indices(
      sorted_candidate_position_lists.size(), 0);

  for (uint32_t li = 0; li < sorted_candidate_position_lists.size(); ++li) {
    if (sorted_candidate_position_lists[li].size() == 0) {
      continue;
    }
    heap.emplace(li, sorted_candidate_position_lists[li][0]);
  }

  while (!heap.empty()) {
    const CandidatePositionWithListIndex min_candidate_position = heap.top();
    heap.pop();
    candidate_positions.push_back(min_candidate_position.position);
    ++candidate_position_list_indices[min_candidate_position.list_index];

    const uint32_t min_candidate_position_list_index =
        candidate_position_list_indices[min_candidate_position.list_index];
    const std::vector<uint64_t> &min_sorted_candidate_position_list =
        sorted_candidate_position_lists[min_candidate_position.list_index];
    if (min_candidate_position_list_index <
        min_sorted_candidate_position_list.size()) {
      heap.emplace(min_candidate_position.list_index,
                   min_sorted_candidate_position_list
                       [min_candidate_position_list_index]);
    }
  }
}

}  // namespace chromap

#endif  // INDEX_UTILS_H_
+0 −14
Original line number Diff line number Diff line
@@ -42,20 +42,6 @@ struct _mm_history {
  uint32_t repetitive_seed_length;
};

// Only used in Index to merge sorted candidate position lists using heap.
struct CandidatePositionWithListIndex {
  uint32_t list_index;
  uint64_t position;

  CandidatePositionWithListIndex(uint32_t list_index, uint64_t position)
      : list_index(list_index), position(position) {}

  bool operator<(const CandidatePositionWithListIndex &h) const {
    // The inversed direction is to make a min-heap.
    return position > h.position;
  }
};

KHASH_MAP_INIT_INT64(k128, uint128_t);
KHASH_MAP_INIT_INT64(k64_seq, uint64_t);
KHASH_SET_INIT_INT(k32_set);