Commit 5a4a7ceb authored by Stan Moore's avatar Stan Moore
Browse files

Fix performance regression in KOKKOS package

parent 307e4714
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -70,6 +70,20 @@ __inline__ __device__
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
{ return atomicSub((unsigned int*)dest,val); }

__inline__ __device__
unsigned int atomic_fetch_sub( volatile int64_t * const dest , const int64_t val )
{ return atomic_fetch_add(dest,-val); }

__inline__ __device__
unsigned int atomic_fetch_sub( volatile float * const dest , const float val )
{ return atomicAdd((float*)dest,-val); }

#if ( 600 <= __CUDA_ARCH__ )
__inline__ __device__
unsigned int atomic_fetch_sub( volatile double * const dest , const double val )
{ return atomicAdd((double*)dest,-val); }
#endif

template < typename T >
__inline__ __device__
T atomic_fetch_sub( volatile T * const dest ,