Unverified Commit 94c6d2d5 authored by Steve Plimpton's avatar Steve Plimpton Committed by GitHub
Browse files

Merge pull request #918 from stanmoore1/kk_atomics

Fix performance regression in KOKKOS package
parents 585a164e 5a4a7ceb
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -70,6 +70,20 @@ __inline__ __device__
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
{ return atomicSub((unsigned int*)dest,val); }

__inline__ __device__
unsigned int atomic_fetch_sub( volatile int64_t * const dest , const int64_t val )
{ return atomic_fetch_add(dest,-val); }

__inline__ __device__
unsigned int atomic_fetch_sub( volatile float * const dest , const float val )
{ return atomicAdd((float*)dest,-val); }

#if ( 600 <= __CUDA_ARCH__ )
__inline__ __device__
unsigned int atomic_fetch_sub( volatile double * const dest , const double val )
{ return atomicAdd((double*)dest,-val); }
#endif

template < typename T >
__inline__ __device__
T atomic_fetch_sub( volatile T * const dest ,