Unverified Commit f04d97e6 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer Committed by GitHub
Browse files

Merge pull request #1312 from stanmoore1/kk_update

Update Kokkos library to v2.8.00
parents b871b4e1 77347663
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
# Change Log

## [2.8.00](https://github.com/kokkos/kokkos/tree/2.8.00) (2019-02-05)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.7.24...2.8.00)

**Implemented enhancements:**

- Capability, Tests: C++14 support and testing [\#1914](https://github.com/kokkos/kokkos/issues/1914)
- Capability: Add environment variables for all command line arguments [\#1798](https://github.com/kokkos/kokkos/issues/1798)
- Capability: --kokkos-ndevices not working for Slurm [\#1920](https://github.com/kokkos/kokkos/issues/1920)
- View: Undefined behavior when deep copying from and to an empty unmanaged view [\#1967](https://github.com/kokkos/kokkos/issues/1967)
- BuildSystem: nvcc\_wrapper should stop immediately if nvcc is not in PATH [\#1861](https://github.com/kokkos/kokkos/issues/1861)

**Fixed bugs:**

- Cuda: Fix Volta Issues 1 Non-deterministic behavior on Volta, runs fine on Pascal [\#1949](https://github.com/kokkos/kokkos/issues/1949)
- Cuda: Fix Volta Issues 2 CUDA Team Scan gives wrong values on Volta with -G compile flag [\#1942](https://github.com/kokkos/kokkos/issues/1942)
- Cuda: illegal warp sync in parallel\_reduce by functor on Turing 75 [\#1958](https://github.com/kokkos/kokkos/issues/1958)
- Threads: Pthreads backend does not handle RangePolicy with offset correctly [\#1976](https://github.com/kokkos/kokkos/issues/1976)
- Atomics: atomic\_fetch\_oper has no case for Kokkos::complex\<double\> or other 16-byte types [\#1951](https://github.com/kokkos/kokkos/issues/1951)
- MDRangePolicy: Fix zero-length range [\#1948](https://github.com/kokkos/kokkos/issues/1948)
- TeamThreadRange: TeamThreadRange MaxLoc reduce doesnt compile  [\#1909](https://github.com/kokkos/kokkos/issues/1909)

## [2.7.24](https://github.com/kokkos/kokkos/tree/2.7.24) (2018-11-04)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.7.00...2.7.24)

+29 −8
Original line number Diff line number Diff line
@@ -6,16 +6,16 @@ ifndef KOKKOS_PATH
endif
CXXFLAGS=$(CCFLAGS)

# Options: Cuda,ROCm,OpenMP,Pthread,Qthreads,Serial
# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthread"
#KOKKOS_DEVICES ?= "Pthreads"
# Options: 
# Intel:    KNC,KNL,SNB,HSW,BDW,SKX
# NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72
# NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75
# ARM:      ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
# IBM:      BGQ,Power7,Power8,Power9
# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
# AMD-CPUS: AMDAVX,Ryzen,Epyc
# AMD-CPUS: AMDAVX,Ryzen,EPYC
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
@@ -224,7 +224,7 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
else
  ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
     KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
     #KOKKOS_INTERNAL_CXX14_FLAG := -std=c++14
     KOKKOS_INTERNAL_CXX14_FLAG := -std=c++14
     KOKKOS_INTERNAL_CXX1Y_FLAG := -std=c++1y
     #KOKKOS_INTERNAL_CXX17_FLAG := -std=c++17
     #KOKKOS_INTERNAL_CXX1Z_FLAG := -std=c++1Z
@@ -276,6 +276,7 @@ KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pas
KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal60)
KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70)
KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72)
KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75)
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
@@ -284,6 +285,7 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLE
                                              + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
					      + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
					      + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
					      + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
@@ -300,6 +302,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
                                                + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
						+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
						+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
						+ $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
@@ -331,7 +334,7 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
# AMD based.
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Ryzen)
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),Epyc)
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC)
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(call kokkos_has_string,$(KOKKOS_ARCH),Kaveri)
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(call kokkos_has_string,$(KOKKOS_ARCH),Carrizo)
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(call kokkos_has_string,$(KOKKOS_ARCH),Fiji)
@@ -341,12 +344,12 @@ KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(call kokkos_has_string,$(KOKKOS_ARCH),gfx90
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42      := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
KOKKOS_INTERNAL_USE_ARCH_AVX        := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW))
KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC  := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))

# Decide what ISA level we are able to support.
KOKKOS_INTERNAL_USE_ISA_X86_64    := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
KOKKOS_INTERNAL_USE_ISA_X86_64    := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
KOKKOS_INTERNAL_USE_ISA_KNC       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
@@ -658,6 +661,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
  endif
endif

ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_EPYC")
  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_AVX2")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -mavx2
    KOKKOS_LDFLAGS += -mavx2
  else
    KOKKOS_CXXFLAGS += -march=znver1 -mtune=znver1
    KOKKOS_LDFLAGS += -march=znver1 -mtune=znver1
  endif
endif

ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX")
@@ -950,6 +966,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA72")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1)
    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING")
    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING75")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75
  endif

  ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
    KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
+3 −1
Original line number Diff line number Diff line
@@ -73,6 +73,8 @@ For specifics see the LICENSE file contained in the repository or distribution.
  * NVCC 7.5 for CUDA (with gcc 4.8.4)
  * NVCC 8.0.44 for CUDA (with gcc 5.3.0)
  * NVCC 9.1 for CUDA (with gcc 6.1.0)
  * NVCC 9.2 for CUDA (with gcc 7.2.0)
  * NVCC 10.0 for CUDA (with gcc 7.4.0)

### Primary tested compilers on Power 8 are:
  * GCC 6.4.0 (OpenMP,Serial)
+10 −0
Original line number Diff line number Diff line
@@ -308,6 +308,16 @@ do
  shift
done

#Check if nvcc exists
if [ $host_only -ne 1 ]; then
  var=$(which nvcc )
  if [ $? -gt 0 ]; then
    echo "Could not find nvcc in PATH"
    exit $?
  fi
fi


# Only print host compiler version
if [ $get_host_version -eq 1 ]; then
  $host_compiler --version
+1 −0
Original line number Diff line number Diff line
@@ -104,6 +104,7 @@ list(APPEND KOKKOS_ARCH_LIST
     Pascal61        # (GPU) NVIDIA Pascal generation CC 6.1
     Volta70         # (GPU) NVIDIA Volta generation CC 7.0
     Volta72         # (GPU) NVIDIA Volta generation CC 7.2
     Turing75         # (GPU) NVIDIA Turing generation CC 7.5
    )

# List of possible device architectures.
Loading