Commit b6c93f96 authored by Steve Plimpton's avatar Steve Plimpton Committed by GitHub
Browse files

Merge pull request #629 from stanmoore1/kokkos_update

Update Kokkos library to v2.04.00
parents 7b7a5076 090c792d
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
# Change Log

## [2.04.00](https://github.com/kokkos/kokkos/tree/2.04.00) (2017-08-16)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.13...2.04.00)

**Implemented enhancements:**

- Added ROCm backend to support AMD GPUs
- Kokkos::complex\<T\> behaves slightly differently from std::complex\<T\> [\#1011](https://github.com/kokkos/kokkos/issues/1011)
- Kokkos::Experimental::Crs constructor arguments were in the wrong order [\#992](https://github.com/kokkos/kokkos/issues/992)
- Work graph construction ease-of-use (one lambda for count and fill) [\#991](https://github.com/kokkos/kokkos/issues/991)
- when\_all returns pointer of futures (improved interface) [\#990](https://github.com/kokkos/kokkos/issues/990)
- Allow assignment of LayoutLeft to LayoutRight or vice versa for rank-0 Views [\#594](https://github.com/kokkos/kokkos/issues/594)
- Changed the meaning of Kokkos\_ENABLE\_CXX11\_DISPATCH\_LAMBDA [\#1035](https://github.com/kokkos/kokkos/issues/1035)

**Fixed bugs:**

- memory pool default constructor does not properly set member variables. [\#1007](https://github.com/kokkos/kokkos/issues/1007)

## [2.03.13](https://github.com/kokkos/kokkos/tree/2.03.13) (2017-07-27)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.05...2.03.13)

+95 −6
Original line number Diff line number Diff line
@@ -4,10 +4,16 @@
KOKKOS_PATH=../../lib/kokkos
CXXFLAGS=$(CCFLAGS)

# Options: Cuda,OpenMP,Pthreads,Qthreads,Serial
# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads"
# Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX
# Options: 
# Intel:    KNC,KNL,SNB,HSW,BDW,SKX
# NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61
# ARM:      ARMv80,ARMv81,ARMv8-ThunderX
# IBM:      BGQ,Power7,Power8,Power9
# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
# AMD-CPUS: AMDAVX,Ryzen,Epyc
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
@@ -43,8 +49,8 @@ KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | gr
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))


# Check for Kokkos Host Execution Spaces one of which must be on.
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l))
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l))
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l))
@@ -60,6 +66,8 @@ endif

# Check for other Execution Spaces.
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
KOKKOS_INTERNAL_USE_ROCM := $(strip $(shell echo $(KOKKOS_DEVICES) | grep ROCm | wc -l))
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l))

ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
  KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
@@ -87,6 +95,7 @@ ifneq ($(MPICH_CXX),)
endif
KOKKOS_INTERNAL_COMPILER_CLANG       := $(strip $(shell $(CXX) --version       2>&1 | grep clang               | wc -l))
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version       2>&1 | grep "apple-darwin"      | wc -l))
KOKKOS_INTERNAL_COMPILER_HCC         := $(strip $(shell $(CXX) --version       2>&1 | grep HCC                 | wc -l))

ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
  KOKKOS_INTERNAL_COMPILER_CLANG = 1
@@ -99,6 +108,10 @@ endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
  KOKKOS_INTERNAL_COMPILER_CLANG = 0
endif
# AMD HCC passes both clang and hcc test so turn off clang
ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
  KOKKOS_INTENAL_COMPILER_CLANG = 0
endif

ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
  KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')
@@ -182,12 +195,16 @@ else
  else
    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
      KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
    else
      ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
        KOKKOS_INTERNAL_CXX11_FLAG := 
      else
        KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
        KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
      endif
    endif
  endif
endif

# Check for Kokkos Architecture settings.

@@ -259,6 +276,13 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_

# AMD based.
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(strip $(shell echo $(KOKKOS_ARCH) | grep Ryzen | wc -l))
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(strip $(shell echo $(KOKKOS_ARCH) | grep Epyc | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kaveri | wc -l))
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(strip $(shell echo $(KOKKOS_ARCH) | grep Carrizo | wc -l))
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Fiji | wc -l))
KOKKOS_INTERNAL_USE_ARCH_VEGA := $(strip $(shell echo $(KOKKOS_ARCH) | grep Vega | wc -l))
KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(strip $(shell echo $(KOKKOS_ARCH) | grep gfx901 | wc -l))

# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42      := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM) | bc ))
@@ -271,6 +295,7 @@ KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_US
KOKKOS_INTERNAL_USE_ISA_X86_64    := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM)+$(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
KOKKOS_INTERNAL_USE_ISA_KNC       := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER7) | bc ))

# Decide whether we can support transactional memory
KOKKOS_INTERNAL_USE_TM            := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
@@ -319,6 +344,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
  tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
endif

ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
  tmp := $(shell echo '\#define KOKKOS_ENABLE_ROCM 1' >> KokkosCore_config.tmp)
endif

ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
  tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp)
endif
@@ -363,6 +392,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
  tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
endif

ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
  tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
  tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCBE" >> KokkosCore_config.tmp )
  tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
endif

tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
@@ -561,6 +596,18 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
  endif
endif

ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
  tmp := $(shell echo "\#define KOKKOS_ARCH_POWER7 1" >> KokkosCore_config.tmp )

  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)

  else
    # Assume that this is a really a GNU compiler or it could be XL on P8.
    KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7
    KOKKOS_LDFLAGS  += -mcpu=power7 -mtune=power7
  endif
endif

ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
  tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )

@@ -742,7 +789,49 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
  endif
endif

# Figure out the architecture flag for ROCm.
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
  # Lets start with adding architecture defines
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 701" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_KAVERI 1" >> KokkosCore_config.tmp )
    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701 
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 801" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_CARRIZO 1" >> KokkosCore_config.tmp )
    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801 
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 803" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_FIJI 1" >> KokkosCore_config.tmp )
    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 900" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_VEGA 1" >> KokkosCore_config.tmp )
    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900 
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 901" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_GFX901 1" >> KokkosCore_config.tmp )
    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901 
  endif
 
  
  KOKKOS_INTERNAL_HCC_PATH := $(shell which $(CXX))
  ROCM_HCC_PATH ?= $(KOKKOS_INTERNAL_HCC_PATH:/bin/clang++=)

  KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags) 
  KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm 
  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG)

  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp)
  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp)
endif

KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)

ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
  KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
else
+11 −0
Original line number Diff line number Diff line
@@ -42,6 +42,17 @@ Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_C
	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
endif

ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
Kokkos_ROCm_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
Kokkos_ROCm_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Space.cpp
	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Space.cpp
Kokkos_ROCm_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
Kokkos_ROCm_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
endif

ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
+22 −0
Original line number Diff line number Diff line
@@ -80,6 +80,9 @@ Other compilers working:
  X86:
   Cygwin 2.1.0 64bit with gcc 4.9.3

Limited testing of the following compilers on POWER7+ systems:
  GCC 4.8.5 (on RHEL7.1 POWER7+)

Known non-working combinations:
  Power8:
   Pthreads backend
@@ -171,3 +174,22 @@ Contributions to Kokkos are welcome. In order to do so, please open an issue
where a feature request or bug can be discussed. Then issue a pull request
with your contribution. Pull requests must be issued against the develop branch. 

===========================================================================
====Citing Kokkos==========================================================
===========================================================================

If you publish work which mentions Kokkos, please cite the following paper:

@article{CarterEdwards20143202,
title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ",
journal = "Journal of Parallel and Distributed Computing ",
volume = "74",
number = "12",
pages = "3202 - 3216",
year = "2014",
note = "Domain-Specific Languages and High-Level Frameworks for High-Performance Computing ",
issn = "0743-7315",
doi = "https://doi.org/10.1016/j.jpdc.2014.07.003",
url = "http://www.sciencedirect.com/science/article/pii/S0743731514001257",
author = "H. Carter Edwards and Christian R. Trott and Daniel Sunderland"
}
+140 −0
Original line number Diff line number Diff line
Summary:

- Step 1: Testing Kokkos itself using test_all_sandia

- Step 2: Testing of Kokkos integrated into Trilinos (config/trilinos-integration/*.sh)

- Step 3: Locally update CHANGELOG, merge into master, edit config/master_history.txt

- Step 4: Locally snapshot new master into corresponding Trilinos branch (develop or temporary), push with checking-test-sems.sh

- Step 5: Push local Kokkos master to GitHub (need Owner approval)

Steps 1, 2, and 4 include testing that may fail. These failures must be fixed either by pull requests to Kokkos develop, or by creating a new Trilinos branch for parts of Trilinos that must be updated. This is what usually takes the most time.


// -------------------------------------------------------------------------------- //


Step 1: The following should be repeated on enough machines to cover all
supported compilers. Those machines are:

    kokkos-dev
    ??? <- TODO: identify other machines

  1.1. Clone kokkos develop branch (or just switch to it)

         git clone -b develop git@github.com:kokkos/kokkos.git
         cd kokkos

  1.2. Create a testing directory

         mkdir testing
         cd testing

  1.3. Run the test_all_sandia script with no options to test all compilers

         nohup ../config/test_all_sandia &
         tail -f nohup.out                   # to watch progress

// -------------------------------------------------------------------------------- //

Step 2:
  2.1. Build and test Trilinos with 4 different configurations; Run scripts for white and shepard that are provided in kokkos/config/trilinos-integration. These scripts load their own modules/environment, so don't require preparation. You can run all four at the same time, use separate directories for each.

         mkdir serial
         cd serial
         nohup KOKKOS_PATH/config/trilinos-integration/shepard_jenkins_run_script_serial_intel &

  2.2. Compare the compile errors and test failures between updated and pristine versions. There may be compile failures that happen in both, tests that fail in both, and there may be tests that only fail some times (thus, rerun tests manually as needed).

// -------------------------------------------------------------------------------- //

Step 3: This step should be run on kokkos-dev

  3.1. If you don't have a GitHub token already, generate one for yourself (this will give you TOKEN):

       https://github.com/settings/tokens

  3.2. Get a clean copy of the Kokkos develop branch

       git clone -b develop git@github.com:kokkos/kokkos.git
       cd kokkos

  3.3. Generate the initial changelog. Use the most recent tag as OLDTAG (`git tag -l` can show you all tags). The NEWTAG is the new version number, e.g. "2.04.00". RUN THIS OUTSIDE THE KOKKOS SOURCE TREE!

       module load ruby/2.3.1/gcc/5.3.0
       gitthub_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG'
       cat CHANGELOG.md

  3.4. Manually cleanup and commit the change log. Pushing to develop requires Owner permission.
       (Copy the new section from the generated CHANGELOG.md to KOKKOS_PATH/CHANGELOG.md)
       (Make desired changes to CHANGELOG.md to enhance clarity (remove issues not noteworthy))
       (Commit and push the CHANGELOG.md to develop)

  3.5. Merge develop into master. DO NOT FAST-FORWARD THE MERGE!!!!

       (From kokkos directory):
       git checkout master
       git merge --no-ff origin/develop

  3.6. Update the tag in kokkos/config/master_history.txt

       Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate
       Tag field widths: #.#.##
       date description: month:day:year
       date field widths: ##:##:####
       master description: SHA1 of previous master commit (use `git log`?)
       develop description: SHA1 of merged develop branch
       SHA1 field width: ######## (8 chars)

       # Append to config/master_history.txt:

       tag:  2.03.13    date: 07:27:2017    master: da314444    develop: 29ccb58a
       
       git commit --amend -a


  3.7. Create the new tag:

       git tag -a #.#.##

         (type the following into the tag message (same as for step 4.3))
         tag: #.#.##
         date: mm/dd/yyyy
         master: sha1
         develop: sha1

  3.8. DO NOT PUSH YET !!!


// -------------------------------------------------------------------------------- //

Step 4: This step can be done on any SEMS machine (e.g. kokkos-dev). Actually, the checkin step requires lots of disk space and RAM. Use ceerws1113 if you have access to it.

  4.1 Clone the Trilinos corresponding branch (or just switch to it)

        git clone -b develop git@github.com:trilinos/Trilinos.git
        TRILINOS_PATH=$PWD/Trilinos

  4.2 Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files. Run the following outside of the Kokkos and Trilinos source trees.

        module load sems-python/2.7.9
        python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages

  4.3. Run checkin-test to push to trilinos using the CI build modules (gcc/4.9.3)

       cd TRILINOS_PATH
       mkdir CHECKIN
       cd CHECKIN
       nohup ../cmake/std/sems/checkin-test-sems.sh --do-all --push &

  4.4. If there are failures, fix and backtrack. Otherwise, go to next step

// -------------------------------------------------------------------------------- //

Step 5: Push Kokkos master to GitHub (requires Owner permission).
      
       cd KOKKOS_PATH
       git push --follow-tags origin master 
Loading