HIP: Heterogenous-computing Interface for Portability
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Groups Pages
HIP Bugs

HIP is more restrictive in enforcing restrictions

The language specification for HIP and CUDA forbid calling a __device__ function in a __host__ context. In practice, you may observe differences in the strictness of this restriction, with HIP exhibiting a tighter adherence to the specification and thus less tolerant of infringing code. The solution is to ensure that all functions which are called in a __device__ context are correctly annotated to reflect it.

The following is an example of codes using the specification, ``` #include <hip/hip_runtime.h> #include <type_traits> #include <random> #include "test_common.h"

static std::random_device dev; static std::mt19937 rng(dev());

template <typename T, typename M> host device inline constexpr int count() { return sizeof(T) / sizeof(M); }

inline float getRandomFloat(float min = 10, float max = 100) { std::uniform_real_distribution<float> gen(min, max); return gen(rng); }

template <typename T, typename B> void fillMatrix(T* a, int size) { for (int i = 0; i < size; i++) { T t; t.x = getRandomFloat(); if constexpr (count<T, B>() >= 2) t.y = getRandomFloat(); if constexpr (count<T, B>() >= 3) t.z = getRandomFloat(); if constexpr (count<T, B>() >= 4) t.w = getRandomFloat();

a[i] = t; } }

// Test operations template <typename T, typename B> host device void testOperations(T& a, T& b) { a.x += b.x; a.x++; b.x++; if constexpr (count<T, B>() >= 2) { a.y = b.x; a.x = b.y; } if constexpr (count<T, B>() >= 3) { if (a.x > 0) b.x /= a.x; a.x *= b.z; a.y–; } if constexpr (count<T, B>() >= 4) { b.w = a.x; a.w += (-b.y); } }

template <typename T, typename B> global void testOperationsGPU(T* d_a, T* d_b, int size) { int id = threadIdx.x; if (id > size) return; T &a = d_a[id]; T &b = d_b[id];

testOperations<T, B>(a, b); }

template <typename t>=""> void dcopy(T* a, T* b, int size) { for (int i = 0; i < size; i++) { a[i] = b[i]; } }

template <typename t>=""> bool isEqual(T* a, T* b, int size) { for (int i = 0; i < size; i++) { if (a[i] != b[i]) { return false; } } return true; }

// Main function that tests type // T = what you want to test // D = pack of 1 i.e. float1 int1 template <typename T, typename D> void testType(int msize) { T *fa, *fb, *fc, *h_fa, *h_fb; fa = new T[msize]; fb = new T[msize]; fc = new T[msize]; h_fa = new T[msize]; h_fb = new T[msize];

T *d_fa, *d_fb;

constexpr int c = count<T, D>();

if (c <= 0 || c >= 5) { failed("Invalid Size\n"); }

fillMatrix<T, D>(fa, msize); dcopy(fb, fa, msize); dcopy(h_fa, fa, msize); dcopy(h_fb, fa, msize); for (int i = 0; i < msize; i++) testOperations<T, D>(h_fa[i], h_fb[i]);

hipMalloc(&d_fa, sizeof(T) * msize); hipMalloc(&d_fb, sizeof(T) * msize);

hipMemcpy(d_fa, fa, sizeof(T) * msize, hipMemcpyHostToDevice); hipMemcpy(d_fb, fb, sizeof(T) * msize, hipMemcpyHostToDevice);

auto kernel = testOperationsGPU<T, D>; hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize);

hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost);

bool pass = true; if (!isEqual<T>(h_fa, fc, msize)) { pass = false; }

delete[] fa; delete[] fb; delete[] fc; delete[] h_fa; delete[] h_fb; hipFree(d_fa); hipFree(d_fb);

if (!pass) { failed("Failed"); } }

int main() { const int msize = 100; // double testType<double1, double1>(msize); testType<double2, double1>(msize); testType<double3, double1>(msize); testType<double4, double1>(msize);

// floats testType<float1, float1>(msize); testType<float2, float1>(msize); testType<float3, float1>(msize); testType<float4, float1>(msize); ... passed(); } ``` For more details for the complete program, please refer to HIP test application at the link, https://github.com/ROCm-Developer-Tools/HIP/blob/main/tests/src/deviceLib/hip_floatnTM.cpp