HIP: Heterogenous-computing Interface for Portability
HIP Bugs

HIP is more restrictive in enforcing restrictions

The language specification for HIP and CUDA forbid calling a __device__ function in a __host__ context. In practice, you may observe differences in the strictness of this restriction, with HIP exhibiting a tighter adherence to the specification and thus less tolerant of infringing code. The solution is to ensure that all functions which are called in a __device__ context are correctly annotated to reflect it. An interesting case where these differences emerge is shown below. This relies on a the common C++ Member Detector idiom, as it would be implemented pre C++11):

1 {c++}
2 #include <cassert>
3 #include <type_traits>
4 
5 struct aye { bool a[1]; };
6 struct nay { bool a[2]; };
7 
8 // Dual restriction is necessary in HIP if the detector is to work for
9 // __device__ contexts as well as __host__ ones. NVCC is less strict.
10 template<typename T>
11 __host__ __device__
12 const T& cref_t();
13 
14 template<typename T>
15 struct Has_call_operator {
16  // Dual restriction is necessary in HIP if the detector is to work for
17  // __device__ contexts as well as __host__ ones. NVCC is less strict.
18  template<typename C>
19  __host__ __device__
20  static
21  aye test(
22  C const *,
23  typename std::enable_if<
24  (sizeof(cref_t<C>().operator()()) > 0)>::type* = nullptr);
25  static
26  nay test(...);
27 
28  enum { value = sizeof(test(static_cast<T*>(0))) == sizeof(aye) };
29 };
30 
31 template<typename T, typename U, bool callable = has_call_operator<U>::value>
32 struct Wrapper {
33  template<typename V>
34  V f() const { return T{1}; }
35 };
36 
37 
38 template<typename T, typename U>
39 struct Wrapper<T, U, true> {
40  template<typename V>
41  V f() const { return T{10}; }
42 };
43 
44 // This specialisation will yield a compile-time error, if selected.
45 template<typename T, typename U>
46 struct Wrapper<T, U, false> {};
47 
48 template<typename T>
49 struct Functor;
50 
51 template<> struct Functor<float> {
52  __device__
53  float operator()() const { return 42.0f; }
54 };
55 
56 __device__
57 void this_will_not_compile_if_detector_is_not_marked_device()
58 {
59  float f = Wrapper<float, Functor<float>>().f<float>();
60 }
61 
62 __host__
63 void this_will_not_compile_if_detector_is_marked_device_only()
64 {
65  float f = Wrapper<float, Functor<float>>().f<float>();
66 }