Commit c7ba742a authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Make char int conversion functions free.

Also format the code.
parent 2169e0f5
Loading
Loading
Loading
Loading
+44 −62
Original line number Diff line number Diff line
@@ -9,8 +9,7 @@ int GetLongestMatchLength(const char *pattern, const char *text,
  int max_match = 0;
  int tmp = 0;
  for (int i = 0; i < read_length; ++i) {
    if (SequenceBatch::CharToUint8(pattern[i]) ==
        SequenceBatch::CharToUint8(text[i])) {
    if (CharToUint8(pattern[i]) == CharToUint8(text[i])) {
      ++tmp;
    } else if (tmp > max_match) {
      max_match = tmp;
@@ -148,7 +147,7 @@ int BandedAlignPatternToText(int error_threshold, const char *pattern,
                             int *mapping_end_position) {
  uint32_t Peq[5] = {0, 0, 0, 0, 0};
  for (int i = 0; i < 2 * error_threshold; i++) {
    uint8_t base = SequenceBatch::CharToUint8(pattern[i]);
    uint8_t base = CharToUint8(pattern[i]);
    Peq[base] = Peq[base] | (1 << i);
  }
  uint32_t highest_bit_in_band_mask = 1 << (2 * error_threshold);
@@ -161,10 +160,9 @@ int BandedAlignPatternToText(int error_threshold, const char *pattern,
  uint32_t HP = 0;
  int num_errors_at_band_start_position = 0;
  for (int i = 0; i < read_length; i++) {
    uint8_t pattern_base =
        SequenceBatch::CharToUint8(pattern[i + 2 * error_threshold]);
    uint8_t pattern_base = CharToUint8(pattern[i + 2 * error_threshold]);
    Peq[pattern_base] = Peq[pattern_base] | highest_bit_in_band_mask;
    X = Peq[SequenceBatch::CharToUint8(text[i])] | VN;
    X = Peq[CharToUint8(text[i])] | VN;
    D0 = ((VP + (X & VP)) ^ VP) | X;
    HN = VP & D0;
    HP = VN | ~(VP | D0);
@@ -207,7 +205,7 @@ int BandedAlignPatternToTextWithDropOff(int error_threshold,
                                        int *read_mapping_length) {
  uint32_t Peq[5] = {0, 0, 0, 0, 0};
  for (int i = 0; i < 2 * error_threshold; i++) {
    uint8_t base = SequenceBatch::CharToUint8(pattern[i]);
    uint8_t base = CharToUint8(pattern[i]);
    Peq[base] = Peq[base] | (1 << i);
  }
  uint32_t highest_bit_in_band_mask = 1 << (2 * error_threshold);
@@ -225,10 +223,9 @@ int BandedAlignPatternToTextWithDropOff(int error_threshold,
  int fail_beginning = 0;  // the alignment failed at the beginning part
  int prev_num_errors_at_band_start_position = 0;
  for (; i < read_length; i++) {
    uint8_t pattern_base =
        SequenceBatch::CharToUint8(pattern[i + 2 * error_threshold]);
    uint8_t pattern_base = CharToUint8(pattern[i + 2 * error_threshold]);
    Peq[pattern_base] = Peq[pattern_base] | highest_bit_in_band_mask;
    X = Peq[SequenceBatch::CharToUint8(text[i])] | VN;
    X = Peq[CharToUint8(text[i])] | VN;
    D0 = ((VP + (X & VP)) ^ VP) | X;
    HN = VP & D0;
    HP = VN | ~(VP | D0);
@@ -297,8 +294,8 @@ int BandedAlignPatternToTextWithDropOffFrom3End(int error_threshold,
                                                int *read_mapping_length) {
  uint32_t Peq[5] = {0, 0, 0, 0, 0};
  for (int i = 0; i < 2 * error_threshold; i++) {
    uint8_t base = SequenceBatch::CharToUint8(
        pattern[read_length + 2 * error_threshold - 1 - i]);
    uint8_t base =
        CharToUint8(pattern[read_length + 2 * error_threshold - 1 - i]);
    Peq[base] = Peq[base] | (1 << i);
  }
  uint32_t highest_bit_in_band_mask = 1 << (2 * error_threshold);
@@ -318,10 +315,9 @@ int BandedAlignPatternToTextWithDropOffFrom3End(int error_threshold,
  for (; i < read_length; i++) {
    // printf("%c %c %d\n", pattern[read_length - 1 - i], pattern[read_length -
    // 1 - i + error_threshold], text[read_length - 1 - i]);
    uint8_t pattern_base =
        SequenceBatch::CharToUint8(pattern[read_length - 1 - i]);
    uint8_t pattern_base = CharToUint8(pattern[read_length - 1 - i]);
    Peq[pattern_base] = Peq[pattern_base] | highest_bit_in_band_mask;
    X = Peq[SequenceBatch::CharToUint8(text[read_length - 1 - i])] | VN;
    X = Peq[CharToUint8(text[read_length - 1 - i])] | VN;
    D0 = ((VP + (X & VP)) ^ VP) | X;
    HN = VP & D0;
    HP = VN | ~(VP | D0);
@@ -407,10 +403,10 @@ void BandedAlign4PatternsToText(int error_threshold, const char **patterns,
    Peq[ai] = _mm_setzero_si128();
  }
  for (int i = 0; i < 2 * error_threshold; i++) {
    uint8_t base0 = SequenceBatch::CharToUint8(reference_sequence0[i]);
    uint8_t base1 = SequenceBatch::CharToUint8(reference_sequence1[i]);
    uint8_t base2 = SequenceBatch::CharToUint8(reference_sequence2[i]);
    uint8_t base3 = SequenceBatch::CharToUint8(reference_sequence3[i]);
    uint8_t base0 = CharToUint8(reference_sequence0[i]);
    uint8_t base1 = CharToUint8(reference_sequence1[i]);
    uint8_t base2 = CharToUint8(reference_sequence2[i]);
    uint8_t base3 = CharToUint8(reference_sequence3[i]);
    Peq[base0] = _mm_or_si128(highest_bit_in_band_mask_vpu0, Peq[base0]);
    Peq[base1] = _mm_or_si128(highest_bit_in_band_mask_vpu1, Peq[base1]);
    Peq[base2] = _mm_or_si128(highest_bit_in_band_mask_vpu2, Peq[base2]);
@@ -432,19 +428,15 @@ void BandedAlign4PatternsToText(int error_threshold, const char **patterns,
  __m128i num_errors_at_band_start_position_vpu = _mm_setzero_si128();
  __m128i early_stop_threshold_vpu = _mm_set1_epi32(error_threshold * 3);
  for (int i = 0; i < read_length; i++) {
    uint8_t base0 = SequenceBatch::CharToUint8(
        reference_sequence0[i + 2 * error_threshold]);
    uint8_t base1 = SequenceBatch::CharToUint8(
        reference_sequence1[i + 2 * error_threshold]);
    uint8_t base2 = SequenceBatch::CharToUint8(
        reference_sequence2[i + 2 * error_threshold]);
    uint8_t base3 = SequenceBatch::CharToUint8(
        reference_sequence3[i + 2 * error_threshold]);
    uint8_t base0 = CharToUint8(reference_sequence0[i + 2 * error_threshold]);
    uint8_t base1 = CharToUint8(reference_sequence1[i + 2 * error_threshold]);
    uint8_t base2 = CharToUint8(reference_sequence2[i + 2 * error_threshold]);
    uint8_t base3 = CharToUint8(reference_sequence3[i + 2 * error_threshold]);
    Peq[base0] = _mm_or_si128(highest_bit_in_band_mask_vpu0, Peq[base0]);
    Peq[base1] = _mm_or_si128(highest_bit_in_band_mask_vpu1, Peq[base1]);
    Peq[base2] = _mm_or_si128(highest_bit_in_band_mask_vpu2, Peq[base2]);
    Peq[base3] = _mm_or_si128(highest_bit_in_band_mask_vpu3, Peq[base3]);
    X = _mm_or_si128(Peq[SequenceBatch::CharToUint8(text[i])], VN);
    X = _mm_or_si128(Peq[CharToUint8(text[i])], VN);
    D0 = _mm_and_si128(X, VP);
    D0 = _mm_add_epi32(D0, VP);
    D0 = _mm_xor_si128(D0, VP);
@@ -548,14 +540,14 @@ void BandedAlign8PatternsToText(int error_threshold, const char **patterns,
    Peq[ai] = _mm_setzero_si128();
  }
  for (int i = 0; i < 2 * error_threshold; i++) {
    uint8_t base0 = SequenceBatch::CharToUint8(reference_sequence0[i]);
    uint8_t base1 = SequenceBatch::CharToUint8(reference_sequence1[i]);
    uint8_t base2 = SequenceBatch::CharToUint8(reference_sequence2[i]);
    uint8_t base3 = SequenceBatch::CharToUint8(reference_sequence3[i]);
    uint8_t base4 = SequenceBatch::CharToUint8(reference_sequence4[i]);
    uint8_t base5 = SequenceBatch::CharToUint8(reference_sequence5[i]);
    uint8_t base6 = SequenceBatch::CharToUint8(reference_sequence6[i]);
    uint8_t base7 = SequenceBatch::CharToUint8(reference_sequence7[i]);
    uint8_t base0 = CharToUint8(reference_sequence0[i]);
    uint8_t base1 = CharToUint8(reference_sequence1[i]);
    uint8_t base2 = CharToUint8(reference_sequence2[i]);
    uint8_t base3 = CharToUint8(reference_sequence3[i]);
    uint8_t base4 = CharToUint8(reference_sequence4[i]);
    uint8_t base5 = CharToUint8(reference_sequence5[i]);
    uint8_t base6 = CharToUint8(reference_sequence6[i]);
    uint8_t base7 = CharToUint8(reference_sequence7[i]);
    Peq[base0] = _mm_or_si128(highest_bit_in_band_mask_vpu0, Peq[base0]);
    Peq[base1] = _mm_or_si128(highest_bit_in_band_mask_vpu1, Peq[base1]);
    Peq[base2] = _mm_or_si128(highest_bit_in_band_mask_vpu2, Peq[base2]);
@@ -581,22 +573,14 @@ void BandedAlign8PatternsToText(int error_threshold, const char **patterns,
  __m128i num_errors_at_band_start_position_vpu = _mm_setzero_si128();
  __m128i early_stop_threshold_vpu = _mm_set1_epi16(error_threshold * 3);
  for (int i = 0; i < read_length; i++) {
    uint8_t base0 = SequenceBatch::CharToUint8(
        reference_sequence0[i + 2 * error_threshold]);
    uint8_t base1 = SequenceBatch::CharToUint8(
        reference_sequence1[i + 2 * error_threshold]);
    uint8_t base2 = SequenceBatch::CharToUint8(
        reference_sequence2[i + 2 * error_threshold]);
    uint8_t base3 = SequenceBatch::CharToUint8(
        reference_sequence3[i + 2 * error_threshold]);
    uint8_t base4 = SequenceBatch::CharToUint8(
        reference_sequence4[i + 2 * error_threshold]);
    uint8_t base5 = SequenceBatch::CharToUint8(
        reference_sequence5[i + 2 * error_threshold]);
    uint8_t base6 = SequenceBatch::CharToUint8(
        reference_sequence6[i + 2 * error_threshold]);
    uint8_t base7 = SequenceBatch::CharToUint8(
        reference_sequence7[i + 2 * error_threshold]);
    uint8_t base0 = CharToUint8(reference_sequence0[i + 2 * error_threshold]);
    uint8_t base1 = CharToUint8(reference_sequence1[i + 2 * error_threshold]);
    uint8_t base2 = CharToUint8(reference_sequence2[i + 2 * error_threshold]);
    uint8_t base3 = CharToUint8(reference_sequence3[i + 2 * error_threshold]);
    uint8_t base4 = CharToUint8(reference_sequence4[i + 2 * error_threshold]);
    uint8_t base5 = CharToUint8(reference_sequence5[i + 2 * error_threshold]);
    uint8_t base6 = CharToUint8(reference_sequence6[i + 2 * error_threshold]);
    uint8_t base7 = CharToUint8(reference_sequence7[i + 2 * error_threshold]);
    Peq[base0] = _mm_or_si128(highest_bit_in_band_mask_vpu0, Peq[base0]);
    Peq[base1] = _mm_or_si128(highest_bit_in_band_mask_vpu1, Peq[base1]);
    Peq[base2] = _mm_or_si128(highest_bit_in_band_mask_vpu2, Peq[base2]);
@@ -605,7 +589,7 @@ void BandedAlign8PatternsToText(int error_threshold, const char **patterns,
    Peq[base5] = _mm_or_si128(highest_bit_in_band_mask_vpu5, Peq[base5]);
    Peq[base6] = _mm_or_si128(highest_bit_in_band_mask_vpu6, Peq[base6]);
    Peq[base7] = _mm_or_si128(highest_bit_in_band_mask_vpu7, Peq[base7]);
    X = _mm_or_si128(Peq[SequenceBatch::CharToUint8(text[i])], VN);
    X = _mm_or_si128(Peq[CharToUint8(text[i])], VN);
    D0 = _mm_and_si128(X, VP);
    D0 = _mm_add_epi16(D0, VP);
    D0 = _mm_xor_si128(D0, VP);
@@ -694,8 +678,8 @@ void BandedTraceback(int error_threshold, int min_num_errors,
  // if not then there are gaps so that we have to traceback with edit distance.
  uint32_t Peq[5] = {0, 0, 0, 0, 0};
  for (int i = 0; i < 2 * error_threshold; i++) {
    uint8_t base = SequenceBatch::CharToUint8(
        pattern[read_length - 1 + 2 * error_threshold - i]);
    uint8_t base =
        CharToUint8(pattern[read_length - 1 + 2 * error_threshold - i]);
    Peq[base] = Peq[base] | (1 << i);
  }
  uint32_t highest_bit_in_band_mask = 1 << (2 * error_threshold);
@@ -708,10 +692,9 @@ void BandedTraceback(int error_threshold, int min_num_errors,
  uint32_t HP = 0;
  int num_errors_at_band_start_position = 0;
  for (int i = 0; i < read_length; i++) {
    uint8_t pattern_base =
        SequenceBatch::CharToUint8(pattern[read_length - 1 - i]);
    uint8_t pattern_base = CharToUint8(pattern[read_length - 1 - i]);
    Peq[pattern_base] = Peq[pattern_base] | highest_bit_in_band_mask;
    X = Peq[SequenceBatch::CharToUint8(text[read_length - 1 - i])] | VN;
    X = Peq[CharToUint8(text[read_length - 1 - i])] | VN;
    D0 = ((VP + (X & VP)) ^ VP) | X;
    HN = VP & D0;
    HP = VN | ~(VP | D0);
@@ -759,7 +742,7 @@ void BandedTracebackToEnd(int error_threshold, int min_num_errors,
  // if not then there are gaps so that we have to traceback with edit distance.
  uint32_t Peq[5] = {0, 0, 0, 0, 0};
  for (int i = 0; i < 2 * error_threshold; i++) {
    uint8_t base = SequenceBatch::CharToUint8(pattern[i]);
    uint8_t base = CharToUint8(pattern[i]);
    Peq[base] = Peq[base] | (1 << i);
  }
  uint32_t highest_bit_in_band_mask = 1 << (2 * error_threshold);
@@ -774,10 +757,9 @@ void BandedTracebackToEnd(int error_threshold, int min_num_errors,
  for (int i = 0; i < read_length; i++) {
    // printf("=>%d %d %c %c\n", i, num_errors_at_band_start_position, pattern[i
    // + 2 * error_threshold], text[i]) ;
    uint8_t pattern_base =
        SequenceBatch::CharToUint8(pattern[i + 2 * error_threshold]);
    uint8_t pattern_base = CharToUint8(pattern[i + 2 * error_threshold]);
    Peq[pattern_base] = Peq[pattern_base] | highest_bit_in_band_mask;
    X = Peq[SequenceBatch::CharToUint8(text[i])] | VN;
    X = Peq[CharToUint8(text[i])] | VN;
    D0 = ((VP + (X & VP)) ^ VP) | X;
    HN = VP & D0;
    HP = VN | ~(VP | D0);
+2 −2
Original line number Diff line number Diff line
@@ -91,8 +91,8 @@ class BarcodeTranslator {
    sequence.reserve(seed_length);
    uint64_t mask_ = 3;
    for (uint32_t i = 0; i < seed_length; ++i) {
      sequence.push_back(SequenceBatch::Uint8ToChar(
          (seed >> ((seed_length - 1 - i) * 2)) & mask_));
      sequence.push_back(
          Uint8ToChar((seed >> ((seed_length - 1 - i) * 2)) & mask_));
    }
    return sequence;
  }
+15 −16
Original line number Diff line number Diff line
@@ -526,9 +526,9 @@ bool Chromap::CorrectBarcodeAt(uint32_t barcode_index,
          adjusted_qual = adjusted_qual < 3 ? 3 : adjusted_qual;
          double score =
              pow(10.0, ((-adjusted_qual) / 10.0)) * barcode_abundance;
          corrected_barcodes_with_quals.emplace_back(BarcodeWithQual{
              barcode_length - 1 - i,
              SequenceBatch::Uint8ToChar(base_to_change1), 0, 0, score});
          corrected_barcodes_with_quals.emplace_back(
              BarcodeWithQual{barcode_length - 1 - i,
                              Uint8ToChar(base_to_change1), 0, 0, score});
          // std::cerr << "1score: " << score << " pos1: " << barcode_length - 1
          // - i << " b1: " << base_to_change1 << " pos2: " << 0 << " b2: " <<
          // (char)0 << "\n";
@@ -579,10 +579,9 @@ bool Chromap::CorrectBarcodeAt(uint32_t barcode_index,
                double score =
                    pow(10.0, ((-adjusted_qual) / 10.0)) * barcode_abundance;
                corrected_barcodes_with_quals.emplace_back(BarcodeWithQual{
                    barcode_length - 1 - i,
                    SequenceBatch::Uint8ToChar(base_to_change1),
                    barcode_length - 1 - j,
                    SequenceBatch::Uint8ToChar(base_to_change2), score});
                    barcode_length - 1 - i, Uint8ToChar(base_to_change1),
                    barcode_length - 1 - j, Uint8ToChar(base_to_change2),
                    score});
                // std::cerr << "2score: " << score << " pos1: " <<
                // barcode_length - 1 - i << " b1: " << base_to_change1 << "
                // pos2: " << barcode_length - 1 -j << " b2: " <<
@@ -724,14 +723,14 @@ void Chromap::ParseReadFormat(const std::string &read_format) {
      ;
    bool parse_success = true;
    if (read_format[i] == 'r' && read_format[i + 1] == '1') {
      parse_success = read1_format_.ParseEffectiveRange(
                read_format.c_str() + i, j - i);
      parse_success =
          read1_format_.ParseEffectiveRange(read_format.c_str() + i, j - i);
    } else if (read_format[i] == 'r' && read_format[i + 1] == '2') {
      parse_success = read2_format_.ParseEffectiveRange(
                read_format.c_str() + i, j - i);
      parse_success =
          read2_format_.ParseEffectiveRange(read_format.c_str() + i, j - i);
    } else if (read_format[i] == 'b' && read_format[i + 1] == 'c') {
      parse_success = barcode_format_.ParseEffectiveRange(
                read_format.c_str() + i, j - i);
      parse_success =
          barcode_format_.ParseEffectiveRange(read_format.c_str() + i, j - i);
    } else {
      parse_success = false;
    }
+7 −5
Original line number Diff line number Diff line
@@ -312,7 +312,8 @@ void Chromap::MapSingleEndReads() {
                read_batch_for_loading, barcode_batch_for_loading);
          }  // end of openmp loading task
          uint32_t history_update_threshold =
            mm_to_candidates_cache.GetUpdateThreshold(num_loaded_reads, num_reads_, false);
              mm_to_candidates_cache.GetUpdateThreshold(num_loaded_reads,
                                                        num_reads_, false);
          // int grain_size = 10000;
//#pragma omp taskloop grainsize(grain_size) //num_tasks(num_threads_* 50)
#pragma omp taskloop num_tasks( \
@@ -702,7 +703,8 @@ void Chromap::MapPairedEndReads() {

          int grain_size = 5000;
          uint32_t history_update_threshold =
            mm_to_candidates_cache.GetUpdateThreshold(num_loaded_pairs, num_reads_, true);
              mm_to_candidates_cache.GetUpdateThreshold(num_loaded_pairs,
                                                        num_reads_, true);
#pragma omp taskloop grainsize(grain_size)
          for (uint32_t pair_index = 0; pair_index < num_loaded_pairs;
               ++pair_index) {
+1 −1
Original line number Diff line number Diff line
@@ -622,7 +622,7 @@ void Index::GenerateMinimizerSketch(
  int min_position = 0;

  for (uint32_t position = 0; position < sequence_length; ++position) {
    uint8_t current_base = SequenceBatch::CharToUint8(sequence[position]);
    uint8_t current_base = CharToUint8(sequence[position]);
    std::pair<uint64_t, uint64_t> current_seed = {UINT64_MAX, UINT64_MAX};
    if (current_base < 4) {
      // Not an ambiguous base.
Loading