#!/bin/bash
# Compiles an LLVM bitcode file to GCN ISA, and store as HSACO
# $1 = input ll name
# $2 = HSACO code object file name
# $3 = (optional) --amdgpu-target=(GPU family name)
#                          selects AMDGPU target

# enable bash debugging
KMDBSCRIPT="${KMDBSCRIPT:=0}"

if [ $KMDBSCRIPT == "1" ]
then
  set -x
fi
# directory where files are dumped
KMDUMPDIR="${KMDUMPDIR:=.}"

# dump the LLVM bitcode
KMDUMPLLVM="${KMDUMPLLVM:=0}"

# dump the isa
KMDUMPISA="${KMDUMPISA:=0}"

# pass extra options to OPT
# KMOPTOPT can be used to pass last-minute options to opt in the backend
# if not set, then "-O3" would be passed to opt
KMOPTOPT="${KMOPTOPT:="-O3"}"

# pass extra options to LLC
# KMOPTLLC can be used to pass last-minute options to llc in the backend
# if not set, then "-O2" will be passed to llc
KMOPTLLC="${KMOPTLLC:="-O2"}"

# enable LLVM hijacking
KMHACKLLVM="${KMHACKLLVM:=0}"

# enable ThinLTO
KMTHINLTO="${KMTHINLTO:=0}"

# flag for early finalization
AMDGPU_OBJ_CODEGEN="0"

# flag for function calls enabled
AMDGPU_FUNC_CALLS="0"

if [ $KMDBSCRIPT == "1" ]
then
  set -x
fi

# check command line arguments
if [ "$#" -lt 2 ]; then
  echo "Usage: $0 input_LLVM output_hsaco_kernel (--amdgpu-target=(GPU family name)" >&2
  echo "  --amdgpu-target=(GPU family name)" >&2
  echo "           selects AMDGPU target" >&2
  exit 1
fi

if [ ! -f $1 ]; then
  echo "input LLVM IR $1 is not valid" >&2
  exit 1
fi

BINDIR=$(dirname $0)
AS=$BINDIR/llvm-as
OPT=$BINDIR/opt
LLC=$BINDIR/llc
LINK=$BINDIR/llvm-link
LIB=$BINDIR/../lib
LLD=$BINDIR/ld.lld


################
# Determine the ROCm device libs path
################


HCC_BIN_PATH=$(dirname -- "$(readlink -f -- "$BASH_SOURCE")")
ROCM_DEVICE_LIBS_SEARCH_PATHS="$HCC_BIN_PATH/../../rocdl/lib;$HCC_BIN_PATH/../rocdl/lib;"
ROCM_LIB=""
for SEARCH_PATH in $(echo $ROCM_DEVICE_LIBS_SEARCH_PATHS | tr ";" "\n")
do
  if [ -f "$SEARCH_PATH/ocml.amdgcn.bc" ]; then
    ROCM_LIB="$(readlink -f -- "$SEARCH_PATH")"
    break
  fi
done
if [ ! -f "$ROCM_LIB/ocml.amdgcn.bc" ]; then
  echo "ROCm Device Libs file ocml.amdgcn.bc is missing from "$ROCM_LIB
  exit 1
fi

################
# AMDGPU target
################

ARGS="$@"
for ARG in $ARGS
do
  ######################
  # Parse AMDGPU target
  ######################
  case $ARG in
    --amdgpu-target=*)
    AMDGPU_TARGET="${ARG#*=}"
    continue
    ;;
    --early-finalize)
    AMDGPU_OBJ_CODEGEN="1"
    KMTHINLTO="0"
    continue
    ;;
    --amdgpu-func-calls)
    AMDGPU_FUNC_CALLS="1"
    continue
    ;;
    --hcc-extra-libs=*)
    HCC_EXTRA_LIBRARIES="$HCC_EXTRA_LIBRARIES ${ARG#*=}"
    continue
    ;;
    --dump-isa)
    KMDUMPISA=1
    ;;
    --dump-llvm)
    KMDUMPLLVM=1
    ;;
    --dump-dir=*)
    KMDUMPDIR="${ARG#*=}"
    continue
    ;;
  esac
done

# hijack LLVM #1
if [ $KMHACKLLVM == "1" ]; then
  if [ -e ./hack.input.ll ]; then
    echo "Use ./hack.input.ll to hijack $1"
    $AS ./hack.input.ll
    cp ./hack.input.bc $1
  fi
fi

# emit GCN ISA kernel
if [ $KMDUMPLLVM == "1" ]; then
  cp $1 ./dump.input.bc
fi

HCC_EXTRA_ARCH_FILE=""

# select appropriate ROCm-Device-Libs per AMDGPU_TARGET
if [ $AMDGPU_TARGET == "gfx700" ]; then
  OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_700.amdgcn.bc"
  HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX700
elif [ $AMDGPU_TARGET == "gfx701" ]; then
  OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_701.amdgcn.bc"
  HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX701
elif [ $AMDGPU_TARGET == "gfx801" ]; then
  OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_801.amdgcn.bc"
  HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX801
elif [ $AMDGPU_TARGET == "gfx802" ]; then
  OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_802.amdgcn.bc"
  HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX802
elif [ $AMDGPU_TARGET == "gfx803" ]; then
  OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_803.amdgcn.bc"
  HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX803
elif [ $AMDGPU_TARGET == "gfx900" ]; then
  OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_900.amdgcn.bc"
  HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX900
elif [ $AMDGPU_TARGET == "gfx901" ]; then
  OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_901.amdgcn.bc"
  HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX901
elif [ $AMDGPU_TARGET == "gfx906" ]; then
  OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_906.amdgcn.bc"
  HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX906
fi
HCC_BC_LIBS="$ROCM_LIB/hc.amdgcn.bc $ROCM_LIB/hip.amdgcn.bc $ROCM_LIB/opencl.amdgcn.bc $ROCM_LIB/ocml.amdgcn.bc $ROCM_LIB/ockl.amdgcn.bc $OCLC_ISA_VERSION_LIB $ROCM_LIB/oclc_finite_only_off.amdgcn.bc $ROCM_LIB/oclc_daz_opt_off.amdgcn.bc $ROCM_LIB/oclc_correctly_rounded_sqrt_on.amdgcn.bc $ROCM_LIB/oclc_unsafe_math_off.amdgcn.bc"

if [ -f "$ROCM_LIB/oclc_wavefrontsize64_on.amdgcn.bc" ]; then
   HCC_BC_LIBS="$HCC_BC_LIBS $ROCM_LIB/oclc_wavefrontsize64_on.amdgcn.bc"
fi

# include libraries specified through the HCC_EXTRA_LIBRARIES environment variable
HCC_BC_LIBS="$HCC_BC_LIBS  $HCC_EXTRA_LIBRARIES $HCC_EXTRA_ARCH_FILE"

$LINK -suppress-warnings -o $2.linked.bc $1 $HCC_BC_LIBS

# error handling for llvm-link
RETVAL=$?
if [ $RETVAL != 0 ]; then
  echo "Generating AMD GCN kernel failed in llvm-link with ROCm-Device-Libs for target: $AMDGPU_TARGET"
  exit $RETVAL
fi

if [ $KMDUMPLLVM == "1" ]; then
  cp $2.linked.bc ${KMDUMPDIR}/dump.linked.bc
fi

# Invoke HCC-specific opt passes
$OPT -load $LIB/LLVMSelectAcceleratorCode.so \
  -load $LIB/LLVMPromotePointerKernArgsToGlobal.so \
  -select-accelerator-code -sac-enable-function-calls=$AMDGPU_FUNC_CALLS \
  -promote-pointer-kernargs-to-global \
  -dce -globaldce -always-inline -infer-address-spaces \
  < $2.linked.bc -o $2.selected.bc

# error handling for HCC-specific opt passes
RETVAL=$?
if [ $RETVAL != 0 ]; then
  echo "Generating AMD GCN kernel failed in HCC-specific opt passes for target: $AMDGPU_TARGET"
  exit $RETVAL
fi

# Optimization notes:
#  -disable-simplify-libcalls:  prevents transforming loops into library calls such as memset, memcopy on GPU
$OPT -mtriple amdgcn-amd-amdhsa -mcpu=$AMDGPU_TARGET \
  -amdgpu-internalize-symbols -disable-simplify-libcalls $KMOPTOPT -verify \
  < $2.selected.bc -o $2.opt.bc

# error handling for opt
RETVAL=$?
if [ $RETVAL != 0 ]; then
  echo "Generating AMD GCN kernel failed in opt for target: $AMDGPU_TARGET"
  exit $RETVAL
fi

# hijack LLVM #2
if [ $KMHACKLLVM == "1" ]; then
  if [ -e ./hack-$AMDGPU_TARGET.opt.ll ]; then
    echo "Use ./hack-$AMDGPU_TARGET.opt.ll to hijack $2.opt.bc"
    $AS ./hack-$AMDGPU_TARGET.opt.ll
    cp ./hack-$AMDGPU_TARGET.opt.bc $2.opt.bc
  fi
fi

if [ $KMDUMPLLVM == "1" ]; then
  cp $2.selected.bc ${KMDUMPDIR}/dump-$AMDGPU_TARGET.selected.bc
  cp $2.opt.bc ${KMDUMPDIR}/dump-$AMDGPU_TARGET.opt.bc
fi


# Disable code object v3, generate code object v2 for now
CODE_OBJECT_FORMAT="-mattr=-code-object-v3"

if [ $KMTHINLTO == "1" ]; then
  $LLC -mtriple amdgcn-amd-amdhsa -mcpu=$AMDGPU_TARGET $CODE_OBJECT_FORMAT \
    $KMOPTLLC -amdgpu-function-calls=$AMDGPU_FUNC_CALLS -filetype=obj -o $2 $2.opt.bc
else
  $LLC -mtriple amdgcn-amd-amdhsa -mcpu=$AMDGPU_TARGET $CODE_OBJECT_FORMAT \
    $KMOPTLLC -amdgpu-function-calls=$AMDGPU_FUNC_CALLS -filetype=obj -o $2.isabin $2.opt.bc
fi

# error handling for llc
RETVAL=$?
if [ $RETVAL != 0 ]; then
  echo "Generating AMD GCN kernel failed in llc for target: $AMDGPU_TARGET"
  exit $RETVAL
fi

if [ $KMDUMPISA == "1" ]; then
  if [ $KMTHINLTO == "1" ]; then
    cp $2 ${KMDUMPDIR}/dump-$AMDGPU_TARGET.isabin
  else
    cp $2.isabin ${KMDUMPDIR}/dump-$AMDGPU_TARGET.isabin
  fi
  $LLC -mtriple amdgcn-amd-amdhsa -mcpu=$AMDGPU_TARGET $CODE_OBJECT_FORMAT \
     $KMOPTLLC -amdgpu-function-calls=$AMDGPU_FUNC_CALLS -filetype=asm -o $2.isa $2.opt.bc
  mv $2.isa ${KMDUMPDIR}/dump-$AMDGPU_TARGET.isa
fi

# ThinLTO does not performs LLD here inside clamp-device.in.
# It will perform LLD in clamp-link.in after all parallel kernels
# running clamp-device are finished for each target, linking the
# isabin files into a single hsaco. Default path has single kernel.
if [ $KMTHINLTO != "1" ]; then
  $LLD -shared $2.isabin -o $2
  # error handling for ld.lld
  RETVAL=$?
  if [ $RETVAL != 0 ]; then
    echo "Generating AMD GCN kernel failed in ld.lld for target: $AMDGPU_TARGET"
    exit $RETVAL
  fi

  if [ $KMDUMPISA == "1" ]; then
    cp $2 ${KMDUMPDIR}/dump-$AMDGPU_TARGET.hsaco
  fi
  rm -f $2.isabin
fi

# remove temp file
rm -f $2.promote.bc $2.linked.bc $2.opt.bc $2.isa
