29 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
30 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
32 #include <hip/hcc_detail/hip_common.h>
46 #if __HCC_OR_HIP_CLANG__
49 #if !defined(__align__)
50 #define __align__(x) __attribute__((aligned(x)))
54 #define CUDA_SUCCESS hipSuccess
56 #include <hip/hip_runtime_api.h>
57 #endif // __HCC_OR_HIP_CLANG__
61 #ifdef HIP_ENABLE_PRINTF
62 #define HCC_ENABLE_ACCELERATOR_PRINTF 1
68 #include "grid_launch.h"
69 #include "hc_printf.hpp"
73 #if GENERIC_GRID_LAUNCH == 0
74 #define hipLaunchParm grid_launch_parm
77 struct Empty_launch_parm {};
79 #define hipLaunchParm hip_impl::Empty_launch_parm
80 #endif // GENERIC_GRID_LAUNCH
82 #if defined(GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || GENERIC_GRID_LAUNCH == 1
83 #else // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
84 #error(HCC must support GRID_LAUNCH_20)
85 #endif // GRID_LAUNCH_VERSION
89 #if GENERIC_GRID_LAUNCH == 1 && defined __HCC__
90 #include "grid_launch_GGL.hpp"
91 #endif // GENERIC_GRID_LAUNCH
95 #if __HCC_OR_HIP_CLANG__
96 extern int HIP_TRACE_API;
99 #include <hip/hcc_detail/hip_ldg.h>
101 #include <hip/hcc_detail/hip_atomic.h>
103 #include <hip/hcc_detail/device_functions.h>
104 #include <hip/hcc_detail/surface_functions.h>
105 #include <hip/hcc_detail/texture_functions.h>
107 #include <hip/hcc_detail/math_functions.h>
110 #if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__)
111 #define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__
115 #if __HIP_DEVICE_COMPILE__ == 1
117 #define assert(COND) \
127 #if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__
131 #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
132 #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
133 #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1)
134 #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1)
135 #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1)
138 #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
139 #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
142 #define __HIP_ARCH_HAS_DOUBLES__ (1)
145 #define __HIP_ARCH_HAS_WARP_VOTE__ (1)
146 #define __HIP_ARCH_HAS_WARP_BALLOT__ (1)
147 #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1)
148 #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
151 #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1)
152 #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
155 #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
156 #define __HIP_ARCH_HAS_3DGRID__ (1)
157 #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
162 #define launch_bounds_impl0(requiredMaxThreadsPerBlock) \
163 __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock)))
164 #define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \
165 __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \
166 amdgpu_waves_per_eu(minBlocksPerMultiprocessor)))
167 #define select_impl_(_1, _2, impl_, ...) impl_
168 #define __launch_bounds__(...) \
169 select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)
172 #if defined(__cplusplus)
174 #elif defined(__STDC_VERSION__)
178 __host__ inline void* __get_dynamicgroupbaseptr() {
return nullptr; }
180 #if __HIP_ARCH_GFX701__ == 0
182 __device__
unsigned __hip_ds_bpermute(
int index,
unsigned src);
183 __device__
float __hip_ds_bpermutef(
int index,
float src);
184 __device__
unsigned __hip_ds_permute(
int index,
unsigned src);
185 __device__
float __hip_ds_permutef(
int index,
float src);
187 template <
int pattern>
188 __device__
unsigned __hip_ds_swizzle_N(
unsigned int src);
189 template <
int pattern>
190 __device__
float __hip_ds_swizzlef_N(
float src);
192 template <
int dpp_ctrl,
int row_mask,
int bank_mask,
bool bound_ctrl>
193 __device__
int __hip_move_dpp_N(
int src);
195 #endif //__HIP_ARCH_GFX803__ == 1
197 #endif // __HCC_OR_HIP_CLANG__
202 typename std::common_type<decltype(hc_get_group_id), decltype(hc_get_group_size),
203 decltype(hc_get_num_groups), decltype(hc_get_workitem_id)>::type f>
205 using R = decltype(f(0));
208 __device__
operator R()
const {
return f(0); }
209 __device__ uint32_t operator=(R _) {
return f(0); }
212 __device__
operator R()
const {
return f(1); }
213 __device__ uint32_t operator=(R _) {
return f(1); }
216 __device__
operator R()
const {
return f(2); }
217 __device__ uint32_t operator=(R _) {
return f(2); }
221 static constexpr X x{};
222 static constexpr Y y{};
223 static constexpr Z z{};
231 #define hipThreadIdx_x (hc_get_workitem_id(0))
232 #define hipThreadIdx_y (hc_get_workitem_id(1))
233 #define hipThreadIdx_z (hc_get_workitem_id(2))
235 #define hipBlockIdx_x (hc_get_group_id(0))
236 #define hipBlockIdx_y (hc_get_group_id(1))
237 #define hipBlockIdx_z (hc_get_group_id(2))
239 #define hipBlockDim_x (hc_get_group_size(0))
240 #define hipBlockDim_y (hc_get_group_size(1))
241 #define hipBlockDim_z (hc_get_group_size(2))
243 #define hipGridDim_x (hc_get_num_groups(0))
244 #define hipGridDim_y (hc_get_num_groups(1))
245 #define hipGridDim_z (hc_get_num_groups(2))
247 #endif // defined __HCC__
248 #if __HCC_OR_HIP_CLANG__
249 extern "C" __device__
void* __hip_malloc(
size_t);
250 extern "C" __device__
void* __hip_free(
void* ptr);
252 static inline __device__
void* malloc(
size_t size) {
return __hip_malloc(size); }
253 static inline __device__
void* free(
void* ptr) {
return __hip_free(ptr); }
255 #if defined(__HCC_ACCELERATOR__) && defined(HC_FEATURE_PRINTF)
256 template <
typename... All>
257 static inline __device__
void printf(
const char* format, All... all) {
258 hc::printf(format, all...);
260 #elif defined(__HCC_ACCELERATOR__) || __HIP__
261 template <
typename... All>
262 static inline __device__
void printf(
const char* format, All... all) {}
265 #endif //__HCC_OR_HIP_CLANG__
269 #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
271 #define HIP_KERNEL_NAME(...) (__VA_ARGS__)
272 #define HIP_SYMBOL(X) #X
274 #if defined __HCC_CPP__
285 #if GENERIC_GRID_LAUNCH == 0
289 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
291 grid_launch_parm lp; \
292 lp.dynamic_group_mem_bytes = _groupMemBytes; \
293 hipStream_t trueStream = \
294 (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \
295 _kernelName(lp, ##__VA_ARGS__); \
296 ihipPostLaunchKernel(#_kernelName, trueStream, lp); \
298 #endif // GENERIC_GRID_LAUNCH
300 #elif defined(__HCC_C__)
327 #elif defined(__clang__) && defined(__HIP__)
329 #define HIP_KERNEL_NAME(...) __VA_ARGS__
330 #define HIP_SYMBOL(X) #X
332 typedef int hipLaunchParm;
334 #define hipLaunchKernel(kernelName, numblocks, numthreads, memperblock, streamId, ...) \
336 kernelName<<<(numblocks), (numthreads), (memperblock), (streamId)>>>(hipLaunchParm{}, ##__VA_ARGS__); \
339 #define hipLaunchKernelGGL(kernelName, numblocks, numthreads, memperblock, streamId, ...) \
341 kernelName<<<(numblocks), (numthreads), (memperblock), (streamId)>>>(__VA_ARGS__); \
344 #include <hip/hip_runtime_api.h>
346 #pragma push_macro("__DEVICE__")
347 #define __DEVICE__ static __device__ __forceinline__
349 extern "C" __device__
size_t __ockl_get_local_id(uint);
350 __DEVICE__ uint __hip_get_thread_idx_x() {
return __ockl_get_local_id(0); }
351 __DEVICE__ uint __hip_get_thread_idx_y() {
return __ockl_get_local_id(1); }
352 __DEVICE__ uint __hip_get_thread_idx_z() {
return __ockl_get_local_id(2); }
354 extern "C" __device__
size_t __ockl_get_group_id(uint);
355 __DEVICE__ uint __hip_get_block_idx_x() {
return __ockl_get_group_id(0); }
356 __DEVICE__ uint __hip_get_block_idx_y() {
return __ockl_get_group_id(1); }
357 __DEVICE__ uint __hip_get_block_idx_z() {
return __ockl_get_group_id(2); }
359 extern "C" __device__
size_t __ockl_get_local_size(uint);
360 __DEVICE__ uint __hip_get_block_dim_x() {
return __ockl_get_local_size(0); }
361 __DEVICE__ uint __hip_get_block_dim_y() {
return __ockl_get_local_size(1); }
362 __DEVICE__ uint __hip_get_block_dim_z() {
return __ockl_get_local_size(2); }
364 extern "C" __device__
size_t __ockl_get_num_groups(uint);
365 __DEVICE__ uint __hip_get_grid_dim_x() {
return __ockl_get_num_groups(0); }
366 __DEVICE__ uint __hip_get_grid_dim_y() {
return __ockl_get_num_groups(1); }
367 __DEVICE__ uint __hip_get_grid_dim_z() {
return __ockl_get_num_groups(2); }
369 #define __HIP_DEVICE_BUILTIN(DIMENSION, FUNCTION) \
370 __declspec(property(get = __get_##DIMENSION)) uint DIMENSION; \
371 __DEVICE__ uint __get_##DIMENSION(void) { \
375 struct __hip_builtin_threadIdx_t {
376 __HIP_DEVICE_BUILTIN(x,__hip_get_thread_idx_x());
377 __HIP_DEVICE_BUILTIN(y,__hip_get_thread_idx_y());
378 __HIP_DEVICE_BUILTIN(z,__hip_get_thread_idx_z());
381 struct __hip_builtin_blockIdx_t {
382 __HIP_DEVICE_BUILTIN(x,__hip_get_block_idx_x());
383 __HIP_DEVICE_BUILTIN(y,__hip_get_block_idx_y());
384 __HIP_DEVICE_BUILTIN(z,__hip_get_block_idx_z());
387 struct __hip_builtin_blockDim_t {
388 __HIP_DEVICE_BUILTIN(x,__hip_get_block_dim_x());
389 __HIP_DEVICE_BUILTIN(y,__hip_get_block_dim_y());
390 __HIP_DEVICE_BUILTIN(z,__hip_get_block_dim_z());
393 struct __hip_builtin_gridDim_t {
394 __HIP_DEVICE_BUILTIN(x,__hip_get_grid_dim_x());
395 __HIP_DEVICE_BUILTIN(y,__hip_get_grid_dim_y());
396 __HIP_DEVICE_BUILTIN(z,__hip_get_grid_dim_z());
399 #undef __HIP_DEVICE_BUILTIN
400 #pragma pop_macro("__DEVICE__")
402 extern const __device__
__attribute__((weak)) __hip_builtin_threadIdx_t threadIdx;
403 extern const __device__
__attribute__((weak)) __hip_builtin_blockIdx_t blockIdx;
404 extern const __device__
__attribute__((weak)) __hip_builtin_blockDim_t blockDim;
405 extern const __device__
__attribute__((weak)) __hip_builtin_gridDim_t gridDim;
408 #define hipThreadIdx_x threadIdx.x
409 #define hipThreadIdx_y threadIdx.y
410 #define hipThreadIdx_z threadIdx.z
412 #define hipBlockIdx_x blockIdx.x
413 #define hipBlockIdx_y blockIdx.y
414 #define hipBlockIdx_z blockIdx.z
416 #define hipBlockDim_x blockDim.x
417 #define hipBlockDim_y blockDim.y
418 #define hipBlockDim_z blockDim.z
420 #define hipGridDim_x gridDim.x
421 #define hipGridDim_y gridDim.y
422 #define hipGridDim_z gridDim.z
424 #include <hip/hcc_detail/math_functions.h>
426 #if __HIP_HCC_COMPAT_MODE__
428 #pragma push_macro("__DEFINE_HCC_FUNC")
429 #define __DEFINE_HCC_FUNC(hc_fun,hip_var) \
430 inline __device__ __attribute__((always_inline)) uint hc_get_##hc_fun(uint i) { \
439 __DEFINE_HCC_FUNC(workitem_id, threadIdx)
440 __DEFINE_HCC_FUNC(group_id, blockIdx)
441 __DEFINE_HCC_FUNC(group_size, blockDim)
442 __DEFINE_HCC_FUNC(num_groups, gridDim)
443 #pragma pop_macro("__DEFINE_HCC_FUNC")
445 extern "C" __device__
__attribute__((
const)) size_t __ockl_get_global_id(uint);
447 hc_get_workitem_absolute_id(
int dim)
449 return (uint)__ockl_get_global_id(dim);
455 #pragma push_macro("__CUDA__")
457 #include <__clang_cuda_math_forward_declares.h>
458 #include <__clang_cuda_complex_builtins.h>
459 #include <cuda_wrappers/algorithm>
460 #include <cuda_wrappers/complex>
461 #include <cuda_wrappers/new>
463 #pragma pop_macro("__CUDA__")
466 uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
467 uint32_t localWorkSizeX, uint32_t localWorkSizeY,
468 uint32_t localWorkSizeZ,
size_t sharedMemBytes,
469 hipStream_t hStream,
void** kernelParams,
void** extra,
474 hipError_t hipHccModuleLaunchKernel(
hipFunction_t f, uint32_t globalWorkSizeX,
475 uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
476 uint32_t localWorkSizeX, uint32_t localWorkSizeY,
477 uint32_t localWorkSizeZ,
size_t sharedMemBytes,
478 hipStream_t hStream,
void** kernelParams,
void** extra,
483 #endif // defined(__clang__) && defined(__HIP__)
485 #include <hip/hcc_detail/hip_memory.h>
487 #endif // HIP_HCC_DETAIL_RUNTIME_H
hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ, size_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra, hipEvent_t startEvent=nullptr, hipEvent_t stopEvent=nullptr, uint32_t flags=0)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
Definition: hip_module.cpp:293
Definition: hip_module.cpp:106
Definition: hip_runtime_api.h:269
#define __host__
Definition: host_defines.h:41
Definition: grid_launch.h:31
Definition: hip_hcc_internal.h:703
Definition: hip_hcc_internal.h:524
Definition: hip_runtime.h:204
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Copies the size of symbol symbolName to size.
Definition: hip_fp16_math_fwd.h:53