HIP: Heterogenous-computing Interface for Portability
hip_runtime_api.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
30 #ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
31 #define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
32 
33 
34 #include <string.h> // for getDeviceProp
35 #include <hip/hip_version.h>
36 #include <hip/hip_common.h>
37 
38 enum {
39  HIP_SUCCESS = 0,
40  HIP_ERROR_INVALID_VALUE,
41  HIP_ERROR_NOT_INITIALIZED,
42  HIP_ERROR_LAUNCH_OUT_OF_RESOURCES
43 };
44 
45 typedef struct {
46  // 32-bit Atomics
47  unsigned hasGlobalInt32Atomics : 1;
48  unsigned hasGlobalFloatAtomicExch : 1;
49  unsigned hasSharedInt32Atomics : 1;
50  unsigned hasSharedFloatAtomicExch : 1;
51  unsigned hasFloatAtomicAdd : 1;
52 
53  // 64-bit Atomics
54  unsigned hasGlobalInt64Atomics : 1;
55  unsigned hasSharedInt64Atomics : 1;
56 
57  // Doubles
58  unsigned hasDoubles : 1;
59 
60  // Warp cross-lane operations
61  unsigned hasWarpVote : 1;
62  unsigned hasWarpBallot : 1;
63  unsigned hasWarpShuffle : 1;
64  unsigned hasFunnelShift : 1;
65 
66  // Sync
67  unsigned hasThreadFenceSystem : 1;
68  unsigned hasSyncThreadsExt : 1;
69 
70  // Misc
71  unsigned hasSurfaceFuncs : 1;
72  unsigned has3dGrid : 1;
73  unsigned hasDynamicParallelism : 1;
75 
76 
77 //---
78 // Common headers for both NVCC and HCC paths:
79 
84 typedef struct hipDeviceProp_t {
85  char name[256];
86  size_t totalGlobalMem;
89  int warpSize;
91  int maxThreadsDim[3];
92  int maxGridSize[3];
93  int clockRate;
96  size_t totalConstMem;
97  int major;
98  int minor;
101  int multiProcessorCount;
108  hipDeviceArch_t arch;
112  int pciBusID;
117  int gcnArch;
118  char gcnArchName[256];
124  int maxTexture2D[2];
125  int maxTexture3D[3];
126  unsigned int* hdpMemFlushCntl;
127  unsigned int* hdpRegFlushCntl;
128  size_t memPitch;
133  int tccDriver;
135  int cooperativeMultiDeviceUnmatchedGridDim;
137  int cooperativeMultiDeviceUnmatchedBlockDim;
139  int cooperativeMultiDeviceUnmatchedSharedMem;
141  int isLargeBar;
148  int pageableMemoryAccessUsesHostPageTables;
151 
152 
156 typedef enum hipMemoryType {
157  hipMemoryTypeHost,
158  hipMemoryTypeDevice,
159  hipMemoryTypeArray,
161  hipMemoryTypeUnified
163 }hipMemoryType;
164 
165 
169 typedef struct hipPointerAttribute_t {
170  enum hipMemoryType memoryType;
171  int device;
172  void* devicePointer;
173  void* hostPointer;
174  int isManaged;
175  unsigned allocationFlags; /* flags specified when memory was allocated*/
176  /* peers? */
178 
179 
180 // hack to get these to show up in Doxygen:
187 // Ignoring error-code return values from hip APIs is discouraged. On C++17,
188 // we can make that yield a warning
189 #if __cplusplus >= 201703L
190 #define __HIP_NODISCARD [[nodiscard]]
191 #else
192 #define __HIP_NODISCARD
193 #endif
194 
195 /*
196  * @brief hipError_t
197  * @enum
198  * @ingroup Enumerations
199  */
200 // Developer note - when updating these, update the hipErrorName and hipErrorString functions in
201 // NVCC and HCC paths Also update the hipCUDAErrorTohipError function in NVCC path.
202 
203 typedef enum __HIP_NODISCARD hipError_t {
204  hipSuccess = 0,
205  hipErrorInvalidValue = 1,
206  hipErrorOutOfMemory = 2,
208  // Deprecated
209  hipErrorMemoryAllocation = 2,
210  hipErrorNotInitialized = 3,
211  // Deprecated
212  hipErrorInitializationError = 3,
213  hipErrorDeinitialized = 4,
214  hipErrorProfilerDisabled = 5,
215  hipErrorProfilerNotInitialized = 6,
216  hipErrorProfilerAlreadyStarted = 7,
217  hipErrorProfilerAlreadyStopped = 8,
218  hipErrorInvalidConfiguration = 9,
219  hipErrorInvalidPitchValue = 12,
220  hipErrorInvalidSymbol = 13,
221  hipErrorInvalidDevicePointer = 17,
222  hipErrorInvalidMemcpyDirection = 21,
223  hipErrorInsufficientDriver = 35,
224  hipErrorMissingConfiguration = 52,
225  hipErrorPriorLaunchFailure = 53,
226  hipErrorInvalidDeviceFunction = 98,
227  hipErrorNoDevice = 100,
228  hipErrorInvalidDevice = 101,
229  hipErrorInvalidImage = 200,
230  hipErrorInvalidContext = 201,
231  hipErrorContextAlreadyCurrent = 202,
232  hipErrorMapFailed = 205,
233  // Deprecated
234  hipErrorMapBufferObjectFailed = 205,
235  hipErrorUnmapFailed = 206,
236  hipErrorArrayIsMapped = 207,
237  hipErrorAlreadyMapped = 208,
238  hipErrorNoBinaryForGpu = 209,
239  hipErrorAlreadyAcquired = 210,
240  hipErrorNotMapped = 211,
241  hipErrorNotMappedAsArray = 212,
242  hipErrorNotMappedAsPointer = 213,
243  hipErrorECCNotCorrectable = 214,
244  hipErrorUnsupportedLimit = 215,
245  hipErrorContextAlreadyInUse = 216,
246  hipErrorPeerAccessUnsupported = 217,
247  hipErrorInvalidKernelFile = 218,
248  hipErrorInvalidGraphicsContext = 219,
249  hipErrorInvalidSource = 300,
250  hipErrorFileNotFound = 301,
251  hipErrorSharedObjectSymbolNotFound = 302,
252  hipErrorSharedObjectInitFailed = 303,
253  hipErrorOperatingSystem = 304,
254  hipErrorInvalidHandle = 400,
255  // Deprecated
256  hipErrorInvalidResourceHandle = 400,
257  hipErrorNotFound = 500,
258  hipErrorNotReady = 600,
259  hipErrorIllegalAddress = 700,
263  hipErrorLaunchOutOfResources = 701,
264  hipErrorLaunchTimeOut = 702,
265  hipErrorPeerAccessAlreadyEnabled =
266  704,
267  hipErrorPeerAccessNotEnabled =
268  705,
269  hipErrorSetOnActiveProcess = 708,
270  hipErrorContextIsDestroyed = 709,
271  hipErrorAssert = 710,
272  hipErrorHostMemoryAlreadyRegistered =
273  712,
274  hipErrorHostMemoryNotRegistered =
275  713,
276  hipErrorLaunchFailure =
277  719,
278  hipErrorCooperativeLaunchTooLarge =
279  720,
280  hipErrorNotSupported = 801,
283  hipErrorStreamCaptureUnsupported = 900,
284  hipErrorStreamCaptureInvalidated = 901,
286  hipErrorStreamCaptureMerge = 902,
288  hipErrorStreamCaptureUnmatched = 903,
290  hipErrorStreamCaptureUnjoined = 904,
291  hipErrorStreamCaptureIsolation = 905,
293  hipErrorStreamCaptureImplicit = 906,
297  hipErrorCapturedEvent = 907,
300  hipErrorStreamCaptureWrongThread = 908,
302  hipErrorUnknown = 999, //< Unknown error.
306  // HSA Runtime Error Codes start here.
307  hipErrorRuntimeMemory = 1052,
308  hipErrorRuntimeOther = 1053,
310  hipErrorTbd
312 } hipError_t;
313 
314 #undef __HIP_NODISCARD
315 
316 /*
317  * @brief hipDeviceAttribute_t
318  * @enum
319  * @ingroup Enumerations
320  */
321 typedef enum hipDeviceAttribute_t {
364 
367 
374 
394 
397 
398 enum hipComputeMode {
399  hipComputeModeDefault = 0,
400  hipComputeModeExclusive = 1,
401  hipComputeModeProhibited = 2,
402  hipComputeModeExclusiveProcess = 3
403 };
404 
409 #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__))
411 #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__))
412 #include "hip/nvidia_detail/hip_runtime_api.h"
413 #else
414 #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
415 #endif
416 
417 
429 #if defined(__cplusplus) && !defined(__HIP_DISABLE_CPP_FUNCTIONS__)
430 template <class T>
431 static inline hipError_t hipMalloc(T** devPtr, size_t size) {
432  return hipMalloc((void**)devPtr, size);
433 }
434 
435 // Provide an override to automatically typecast the pointer type from void**, and also provide a
436 // default for the flags.
437 template <class T>
438 static inline hipError_t hipHostMalloc(T** ptr, size_t size,
439  unsigned int flags = hipHostMallocDefault) {
440  return hipHostMalloc((void**)ptr, size, flags);
441 }
442 
443 template <class T>
444 static inline hipError_t hipMallocManaged(T** devPtr, size_t size,
445  unsigned int flags = hipMemAttachGlobal) {
446  return hipMallocManaged((void**)devPtr, size, flags);
447 }
448 #endif
449 
450 #endif
Definition: hip_runtime_api.h:389
Device supports allocating managed memory on this system.
Definition: hip_runtime_api.h:384
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
int asicRevision
Revision of the GPU in this device.
Definition: hip_runtime_api.h:143
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:323
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:326
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:368
int concurrentManagedAccess
Device can coherently access managed memory concurrently with the CPU.
Definition: hip_runtime_api.h:146
Global memory bus width in bits.
Definition: hip_runtime_api.h:339
Definition: hip_runtime_api.h:385
unsigned int * hdpRegFlushCntl
Addres of HDP_REG_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:127
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:130
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
Definition: hip_runtime_api.h:333
#define hipMemAttachGlobal
Memory can be accessed by any stream on any device.
Definition: hip_runtime_api.h:172
Revision of the GPU in this device.
Definition: hip_runtime_api.h:383
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
Definition: hip_runtime_api.h:342
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:371
#define hipHostMallocDefault
Flags that can be used with hipHostMalloc.
Definition: hip_runtime_api.h:156
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:361
iGPU
Definition: hip_runtime_api.h:355
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:337
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:123
Definition: hip_runtime_api.h:169
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:328
Minor compute capability version number.
Definition: hip_runtime_api.h:347
Definition: hip_runtime_api.h:329
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:327
Multiple GPU devices.
Definition: hip_runtime_api.h:354
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:133
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
int maxTexture1DLinear
Maximum size for 1D textures bound to linear memory.
Definition: hip_runtime_api.h:122
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:324
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
Definition: hip_runtime_api.h:387
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:357
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:128
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
hipDeviceAttribute_t
Definition: hip_runtime_api.h:321
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:129
Major compute capability version number.
Definition: hip_runtime_api.h:346
Maximum number of threads per block.
Definition: hip_runtime_api.h:322
int gcnArch
DEPRECATED: use gcnArchName instead.
Definition: hip_runtime_api.h:117
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:363
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:131
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:325
PCI Bus ID.
Definition: hip_runtime_api.h:350
unsigned int * hdpMemFlushCntl
Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:126
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:360
Warp size in threads.
Definition: hip_runtime_api.h:332
int major
Definition: hip_runtime_api.h:97
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:362
Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language ...
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:338
Definition: hip_runtime_api.h:344
Address of the HDP_REG_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:366
int clockInstructionRate
Definition: hip_runtime_api.h:107
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
Constant memory size in bytes.
Definition: hip_runtime_api.h:331
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:370
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
Alignment requirement for textures.
Definition: hip_runtime_api.h:369
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
Compute mode that device is currently in.
Definition: hip_runtime_api.h:341
PCI Device ID.
Definition: hip_runtime_api.h:351
Device can map host memory into device address space.
Definition: hip_runtime_api.h:372
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:340
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
Definition: hip_runtime_api.h:84
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
Support cooperative launch.
Definition: hip_runtime_api.h:356
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:358
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:132
int managedMemory
Device supports allocating managed memory on this system.
Definition: hip_runtime_api.h:144
int directManagedMemAccessFromHost
Host can directly access managed memory on the device without migration.
Definition: hip_runtime_api.h:145
Definition: hip_runtime_api.h:45
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:359
int pageableMemoryAccess
Definition: hip_runtime_api.h:147
Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:365
Definition: hip_runtime_api.h:393
Device has ECC support enabled.
Definition: hip_runtime_api.h:373
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:134
Definition: hip_runtime_api.h:348