HIP: Heterogenous-computing Interface for Portability
hip_runtime_api.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
30 #ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
31 #define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
32 
33 
34 #include <string.h> // for getDeviceProp
35 #include <hip/hip_version.h>
36 #include <hip/hip_common.h>
37 
38 enum {
39  HIP_SUCCESS = 0,
40  HIP_ERROR_INVALID_VALUE,
41  HIP_ERROR_NOT_INITIALIZED,
42  HIP_ERROR_LAUNCH_OUT_OF_RESOURCES
43 };
44 
45 typedef struct {
46  // 32-bit Atomics
47  unsigned hasGlobalInt32Atomics : 1;
48  unsigned hasGlobalFloatAtomicExch : 1;
49  unsigned hasSharedInt32Atomics : 1;
50  unsigned hasSharedFloatAtomicExch : 1;
51  unsigned hasFloatAtomicAdd : 1;
52 
53  // 64-bit Atomics
54  unsigned hasGlobalInt64Atomics : 1;
55  unsigned hasSharedInt64Atomics : 1;
56 
57  // Doubles
58  unsigned hasDoubles : 1;
59 
60  // Warp cross-lane operations
61  unsigned hasWarpVote : 1;
62  unsigned hasWarpBallot : 1;
63  unsigned hasWarpShuffle : 1;
64  unsigned hasFunnelShift : 1;
65 
66  // Sync
67  unsigned hasThreadFenceSystem : 1;
68  unsigned hasSyncThreadsExt : 1;
69 
70  // Misc
71  unsigned hasSurfaceFuncs : 1;
72  unsigned has3dGrid : 1;
73  unsigned hasDynamicParallelism : 1;
75 
76 
77 //---
78 // Common headers for both NVCC and HCC paths:
79 
84 typedef struct hipDeviceProp_t {
85  char name[256];
86  size_t totalGlobalMem;
89  int warpSize;
91  int maxThreadsDim[3];
92  int maxGridSize[3];
93  int clockRate;
96  size_t totalConstMem;
97  int major;
98  int minor;
101  int multiProcessorCount;
108  hipDeviceArch_t arch;
112  int pciBusID;
117  int gcnArch;
118  char gcnArchName[256];
123  int maxTexture2D[2];
124  int maxTexture3D[3];
125  unsigned int* hdpMemFlushCntl;
126  unsigned int* hdpRegFlushCntl;
127  size_t memPitch;
132  int tccDriver;
134  int cooperativeMultiDeviceUnmatchedGridDim;
136  int cooperativeMultiDeviceUnmatchedBlockDim;
138  int cooperativeMultiDeviceUnmatchedSharedMem;
140  int isLargeBar;
147  int pageableMemoryAccessUsesHostPageTables;
150 
151 
155 typedef enum hipMemoryType {
156  hipMemoryTypeHost,
157  hipMemoryTypeDevice,
158  hipMemoryTypeArray,
160  hipMemoryTypeUnified
162 }hipMemoryType;
163 
164 
168 typedef struct hipPointerAttribute_t {
169  enum hipMemoryType memoryType;
170  int device;
171  void* devicePointer;
172  void* hostPointer;
173  int isManaged;
174  unsigned allocationFlags; /* flags specified when memory was allocated*/
175  /* peers? */
177 
178 
179 // hack to get these to show up in Doxygen:
186 // Ignoring error-code return values from hip APIs is discouraged. On C++17,
187 // we can make that yield a warning
188 #if __cplusplus >= 201703L
189 #define __HIP_NODISCARD [[nodiscard]]
190 #else
191 #define __HIP_NODISCARD
192 #endif
193 
194 /*
195  * @brief hipError_t
196  * @enum
197  * @ingroup Enumerations
198  */
199 // Developer note - when updating these, update the hipErrorName and hipErrorString functions in
200 // NVCC and HCC paths Also update the hipCUDAErrorTohipError function in NVCC path.
201 
202 typedef enum __HIP_NODISCARD hipError_t {
203  hipSuccess = 0,
204  hipErrorInvalidValue = 1,
205  hipErrorOutOfMemory = 2,
207  // Deprecated
208  hipErrorMemoryAllocation = 2,
209  hipErrorNotInitialized = 3,
210  // Deprecated
211  hipErrorInitializationError = 3,
212  hipErrorDeinitialized = 4,
213  hipErrorProfilerDisabled = 5,
214  hipErrorProfilerNotInitialized = 6,
215  hipErrorProfilerAlreadyStarted = 7,
216  hipErrorProfilerAlreadyStopped = 8,
217  hipErrorInvalidConfiguration = 9,
218  hipErrorInvalidSymbol = 13,
219  hipErrorInvalidDevicePointer = 17,
220  hipErrorInvalidMemcpyDirection = 21,
221  hipErrorInsufficientDriver = 35,
222  hipErrorMissingConfiguration = 52,
223  hipErrorPriorLaunchFailure = 53,
224  hipErrorInvalidDeviceFunction = 98,
225  hipErrorNoDevice = 100,
226  hipErrorInvalidDevice = 101,
227  hipErrorInvalidImage = 200,
228  hipErrorInvalidContext = 201,
229  hipErrorContextAlreadyCurrent = 202,
230  hipErrorMapFailed = 205,
231  // Deprecated
232  hipErrorMapBufferObjectFailed = 205,
233  hipErrorUnmapFailed = 206,
234  hipErrorArrayIsMapped = 207,
235  hipErrorAlreadyMapped = 208,
236  hipErrorNoBinaryForGpu = 209,
237  hipErrorAlreadyAcquired = 210,
238  hipErrorNotMapped = 211,
239  hipErrorNotMappedAsArray = 212,
240  hipErrorNotMappedAsPointer = 213,
241  hipErrorECCNotCorrectable = 214,
242  hipErrorUnsupportedLimit = 215,
243  hipErrorContextAlreadyInUse = 216,
244  hipErrorPeerAccessUnsupported = 217,
245  hipErrorInvalidKernelFile = 218,
246  hipErrorInvalidGraphicsContext = 219,
247  hipErrorInvalidSource = 300,
248  hipErrorFileNotFound = 301,
249  hipErrorSharedObjectSymbolNotFound = 302,
250  hipErrorSharedObjectInitFailed = 303,
251  hipErrorOperatingSystem = 304,
252  hipErrorInvalidHandle = 400,
253  // Deprecated
254  hipErrorInvalidResourceHandle = 400,
255  hipErrorNotFound = 500,
256  hipErrorNotReady = 600,
257  hipErrorIllegalAddress = 700,
261  hipErrorLaunchOutOfResources = 701,
262  hipErrorLaunchTimeOut = 702,
263  hipErrorPeerAccessAlreadyEnabled =
264  704,
265  hipErrorPeerAccessNotEnabled =
266  705,
267  hipErrorSetOnActiveProcess = 708,
268  hipErrorAssert = 710,
269  hipErrorHostMemoryAlreadyRegistered =
270  712,
271  hipErrorHostMemoryNotRegistered =
272  713,
273  hipErrorLaunchFailure =
274  719,
275  hipErrorCooperativeLaunchTooLarge =
276  720,
277  hipErrorNotSupported = 801,
280  hipErrorUnknown = 999, //< Unknown error.
281  // HSA Runtime Error Codes start here.
282  hipErrorRuntimeMemory = 1052,
283  hipErrorRuntimeOther = 1053,
285  hipErrorTbd
287 } hipError_t;
288 
289 #undef __HIP_NODISCARD
290 
291 /*
292  * @brief hipDeviceAttribute_t
293  * @enum
294  * @ingroup Enumerations
295  */
296 typedef enum hipDeviceAttribute_t {
339 
342 
349 
369 
370 enum hipComputeMode {
371  hipComputeModeDefault = 0,
372  hipComputeModeExclusive = 1,
373  hipComputeModeProhibited = 2,
374  hipComputeModeExclusiveProcess = 3
375 };
376 
381 #if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
383 #elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
384 #include "hip/nvcc_detail/hip_runtime_api.h"
385 #else
386 #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
387 #endif
388 
389 
401 #if defined(__cplusplus) && !defined(__HIP_DISABLE_CPP_FUNCTIONS__)
402 template <class T>
403 static inline hipError_t hipMalloc(T** devPtr, size_t size) {
404  return hipMalloc((void**)devPtr, size);
405 }
406 
407 // Provide an override to automatically typecast the pointer type from void**, and also provide a
408 // default for the flags.
409 template <class T>
410 static inline hipError_t hipHostMalloc(T** ptr, size_t size,
411  unsigned int flags = hipHostMallocDefault) {
412  return hipHostMalloc((void**)ptr, size, flags);
413 }
414 
415 template <class T>
416 static inline hipError_t hipMallocManaged(T** devPtr, size_t size,
417  unsigned int flags = hipMemAttachGlobal) {
418  return hipMallocManaged((void**)devPtr, size, flags);
419 }
420 #endif
421 
422 #endif
Definition: hip_runtime_api.h:364
Device supports allocating managed memory on this system.
Definition: hip_runtime_api.h:359
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
int asicRevision
Revision of the GPU in this device.
Definition: hip_runtime_api.h:142
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:298
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:301
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:343
int concurrentManagedAccess
Device can coherently access managed memory concurrently with the CPU.
Definition: hip_runtime_api.h:145
Global memory bus width in bits.
Definition: hip_runtime_api.h:314
Definition: hip_runtime_api.h:360
unsigned int * hdpRegFlushCntl
Addres of HDP_REG_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:126
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:129
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
Definition: hip_runtime_api.h:308
#define hipMemAttachGlobal
Memory can be accessed by any stream on any device.
Definition: hip_runtime_api.h:191
Revision of the GPU in this device.
Definition: hip_runtime_api.h:358
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
Definition: hip_runtime_api.h:317
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:346
#define hipHostMallocDefault
Flags that can be used with hipHostMalloc.
Definition: hip_runtime_api.h:175
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:336
iGPU
Definition: hip_runtime_api.h:330
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:312
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:122
Definition: hip_runtime_api.h:168
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:303
Minor compute capability version number.
Definition: hip_runtime_api.h:322
Definition: hip_runtime_api.h:304
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:302
Multiple GPU devices.
Definition: hip_runtime_api.h:329
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:132
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:299
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
Definition: hip_memory.cpp:762
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
Definition: hip_runtime_api.h:362
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:332
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:127
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
hipDeviceAttribute_t
Definition: hip_runtime_api.h:296
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:128
Major compute capability version number.
Definition: hip_runtime_api.h:321
Maximum number of threads per block.
Definition: hip_runtime_api.h:297
int gcnArch
AMD GCN Arch Value. Eg: 803, 701.
Definition: hip_runtime_api.h:117
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:338
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:130
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:300
PCI Bus ID.
Definition: hip_runtime_api.h:325
unsigned int * hdpMemFlushCntl
Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:125
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:335
Warp size in threads.
Definition: hip_runtime_api.h:307
int major
Definition: hip_runtime_api.h:97
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:337
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:313
Definition: hip_runtime_api.h:319
Address of the HDP_REG_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:341
int clockInstructionRate
Definition: hip_runtime_api.h:107
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
Constant memory size in bytes.
Definition: hip_runtime_api.h:306
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:345
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
Alignment requirement for textures.
Definition: hip_runtime_api.h:344
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
Definition: hip_memory.cpp:695
Compute mode that device is currently in.
Definition: hip_runtime_api.h:316
PCI Device ID.
Definition: hip_runtime_api.h:326
Device can map host memory into device address space.
Definition: hip_runtime_api.h:347
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:315
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
Definition: hip_runtime_api.h:84
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
Support cooperative launch.
Definition: hip_runtime_api.h:331
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:333
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:131
int managedMemory
Device supports allocating managed memory on this system.
Definition: hip_runtime_api.h:143
int directManagedMemAccessFromHost
Host can directly access managed memory on the device without migration.
Definition: hip_runtime_api.h:144
Definition: hip_runtime_api.h:45
Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language ...
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:334
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
int pageableMemoryAccess
Definition: hip_runtime_api.h:146
Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:340
Device has ECC support enabled.
Definition: hip_runtime_api.h:348
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:133
Definition: hip_runtime_api.h:323