Commit 9ecc860e authored by Li's avatar Li
Browse files

Allowing shifted read with same minimizers in cache. Handle paired-end case

parent 9b5a3126
Loading
Loading
Loading
Loading
+40 −12
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ class mm_cache
private:
	int cache_size ;
	struct _mm_cache_entry *cache ;
	int kmer_length ;
	
	// 0: not match. -1: opposite order. 1: same order
	int IsMinimizersMatchCache(const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, const struct _mm_cache_entry &cache)
@@ -39,7 +40,7 @@ private:
		{
			for (i = 0 ; i < size - 1 ; ++i)	
			{
				if (cache.offsets[i] != ((int)(minimizers[i + 1].second)>>1) - ((int)(minimizers[i].second)>>1))
				if (cache.offsets[i] != ((int)minimizers[i + 1].second>>1) - ((int)minimizers[i].second>>1))
					break ;
			}
			if (i >= size - 1)
@@ -58,7 +59,7 @@ private:
		{
			for (i = 0, j = size - 1; i < size - 1; ++i, --j)
			{
				if (cache.offsets[i] != ((int)(minimizers[j].second)>>1) - ((int)(minimizers[j - 1].second)>>1))
				if (cache.offsets[i] != ((int)minimizers[j].second>>1) - ((int)minimizers[j - 1].second>>1))
					break ;
			}

@@ -82,6 +83,11 @@ public:
		delete[] cache ;
	}
	
	void SetKmerLength(int kl)
	{
		kmer_length = kl ;
	}

	// Return the hash entry index. -1 if failed.
	int Query(const std::vector<std::pair<uint64_t, uint64_t> > &minimizers, 
			std::vector<struct _candidate> &pos_candidates, std::vector<struct _candidate> &neg_candidates, 
@@ -98,19 +104,29 @@ public:
		{
			pos_candidates = cache[hidx].positive_candidates ;
			neg_candidates = cache[hidx].negative_candidates ;
			int size = pos_candidates.size() ;
			int shift = (int)minimizers[0].second>>1 ;
			for (i = 0 ; i < size ; ++i)
				pos_candidates[i].refPos -= shift ;
			size = neg_candidates.size() ;
			for (i = 0 ; i < size ; ++i)
				neg_candidates[i].refPos += shift ;
			return hidx ;
		}
		else if (direction == -1)
		else if (direction == -1) // The "read" is on the other direction of the cached "read"
		{
			int size = cache[hidx].negative_candidates.size() ;
			// Start position of the last minimizer shoud equal the first minimizer's end position in rc "read".
			int shift = read_len - ((int)minimizers[msize - 1].second>>1) - 1 + kmer_length - 1 ; 
			
			pos_candidates = cache[hidx].negative_candidates ;
			for (i = 0 ; i < size ; ++i)
				pos_candidates[i].refPos = cache[hidx].negative_candidates[i].refPos - read_len + 1 ;
				pos_candidates[i].refPos = cache[hidx].negative_candidates[i].refPos + shift - read_len + 1 ;

			size = cache[hidx].positive_candidates.size() ;
			neg_candidates = cache[hidx].positive_candidates ;
			for (i = 0 ; i < size ; ++i)
				neg_candidates[i].refPos = cache[hidx].positive_candidates[i].refPos + read_len - 1 ;
				neg_candidates[i].refPos = cache[hidx].positive_candidates[i].refPos - shift + read_len - 1 ;
			return hidx ;
		}
		else
@@ -139,17 +155,29 @@ public:
		if (cache[hidx].weight <= 0)
		{
			cache[hidx].weight = 1 ;
			int size = minimizers.size() ;
			cache[hidx].minimizers.resize(size) ;
			cache[hidx].offsets.resize(size - 1) ;
			for (i = 0 ; i < size ; ++i)
			cache[hidx].minimizers.resize(msize) ;
			if (msize == 0)
				return ;

			cache[hidx].offsets.resize(msize - 1) ;
			for (i = 0 ; i < msize ; ++i)
				cache[hidx].minimizers[i] = minimizers[i].first ;
			for (i = 0 ; i < size - 1; ++i)
			for (i = 0 ; i < msize - 1; ++i)
			{
				cache[hidx].offsets[i] = ((int)(minimizers[i + 1].second)>>1) - ((int)(minimizers[i].second)>>1) ;
				cache[hidx].offsets[i] = ((int)minimizers[i + 1].second>>1) - ((int)minimizers[i].second>>1) ;
			}
			cache[hidx].positive_candidates = pos_candidates ;
			cache[hidx].negative_candidates = neg_candidates ;

			// adjust the candidate position.
			int size = cache[hidx].positive_candidates.size() ;
			int shift = (int)minimizers[0].second>>1;
			for (i = 0 ; i < size ; ++i)
				cache[hidx].positive_candidates[i].refPos += shift ;
			size = cache[hidx].negative_candidates.size() ;
			for (i = 0 ; i < size ; ++i)
				cache[hidx].negative_candidates[i].refPos -= shift ;
			
		}
	}