HIP: Heterogenous-computing Interface for Portability
hip_runtime_api.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
30 #ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
31 #define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
32 
33 
34 #include <string.h> // for getDeviceProp
35 #include <hip/hip_version.h>
36 #include <hip/hip_common.h>
37 
38 enum {
39  HIP_SUCCESS = 0,
40  HIP_ERROR_INVALID_VALUE,
41  HIP_ERROR_NOT_INITIALIZED,
42  HIP_ERROR_LAUNCH_OUT_OF_RESOURCES
43 };
44 
45 typedef struct {
46  // 32-bit Atomics
47  unsigned hasGlobalInt32Atomics : 1;
48  unsigned hasGlobalFloatAtomicExch : 1;
49  unsigned hasSharedInt32Atomics : 1;
50  unsigned hasSharedFloatAtomicExch : 1;
51  unsigned hasFloatAtomicAdd : 1;
52 
53  // 64-bit Atomics
54  unsigned hasGlobalInt64Atomics : 1;
55  unsigned hasSharedInt64Atomics : 1;
56 
57  // Doubles
58  unsigned hasDoubles : 1;
59 
60  // Warp cross-lane operations
61  unsigned hasWarpVote : 1;
62  unsigned hasWarpBallot : 1;
63  unsigned hasWarpShuffle : 1;
64  unsigned hasFunnelShift : 1;
65 
66  // Sync
67  unsigned hasThreadFenceSystem : 1;
68  unsigned hasSyncThreadsExt : 1;
69 
70  // Misc
71  unsigned hasSurfaceFuncs : 1;
72  unsigned has3dGrid : 1;
73  unsigned hasDynamicParallelism : 1;
75 
76 
77 //---
78 // Common headers for both NVCC and HCC paths:
79 
84 typedef struct hipDeviceProp_t {
85  char name[256];
86  size_t totalGlobalMem;
89  int warpSize;
91  int maxThreadsDim[3];
92  int maxGridSize[3];
93  int clockRate;
96  size_t totalConstMem;
97  int major;
98  int minor;
101  int multiProcessorCount;
108  hipDeviceArch_t arch;
112  int pciBusID;
117  int gcnArch;
118  char gcnArchName[256];
124  int maxTexture2D[2];
125  int maxTexture3D[3];
126  unsigned int* hdpMemFlushCntl;
127  unsigned int* hdpRegFlushCntl;
128  size_t memPitch;
133  int tccDriver;
135  int cooperativeMultiDeviceUnmatchedGridDim;
137  int cooperativeMultiDeviceUnmatchedBlockDim;
139  int cooperativeMultiDeviceUnmatchedSharedMem;
141  int isLargeBar;
148  int pageableMemoryAccessUsesHostPageTables;
151 
152 
156 typedef enum hipMemoryType {
157  hipMemoryTypeHost,
158  hipMemoryTypeDevice,
159  hipMemoryTypeArray,
161  hipMemoryTypeUnified
163 }hipMemoryType;
164 
165 
169 typedef struct hipPointerAttribute_t {
170  enum hipMemoryType memoryType;
171  int device;
172  void* devicePointer;
173  void* hostPointer;
174  int isManaged;
175  unsigned allocationFlags; /* flags specified when memory was allocated*/
176  /* peers? */
178 
179 
180 // hack to get these to show up in Doxygen:
187 // Ignoring error-code return values from hip APIs is discouraged. On C++17,
188 // we can make that yield a warning
189 #if __cplusplus >= 201703L
190 #define __HIP_NODISCARD [[nodiscard]]
191 #else
192 #define __HIP_NODISCARD
193 #endif
194 
195 /*
196  * @brief hipError_t
197  * @enum
198  * @ingroup Enumerations
199  */
200 // Developer note - when updating these, update the hipErrorName and hipErrorString functions in
201 // NVCC and HCC paths Also update the hipCUDAErrorTohipError function in NVCC path.
202 
203 typedef enum __HIP_NODISCARD hipError_t {
204  hipSuccess = 0,
205  hipErrorInvalidValue = 1,
206  hipErrorOutOfMemory = 2,
208  // Deprecated
209  hipErrorMemoryAllocation = 2,
210  hipErrorNotInitialized = 3,
211  // Deprecated
212  hipErrorInitializationError = 3,
213  hipErrorDeinitialized = 4,
214  hipErrorProfilerDisabled = 5,
215  hipErrorProfilerNotInitialized = 6,
216  hipErrorProfilerAlreadyStarted = 7,
217  hipErrorProfilerAlreadyStopped = 8,
218  hipErrorInvalidConfiguration = 9,
219  hipErrorInvalidSymbol = 13,
220  hipErrorInvalidDevicePointer = 17,
221  hipErrorInvalidMemcpyDirection = 21,
222  hipErrorInsufficientDriver = 35,
223  hipErrorMissingConfiguration = 52,
224  hipErrorPriorLaunchFailure = 53,
225  hipErrorInvalidDeviceFunction = 98,
226  hipErrorNoDevice = 100,
227  hipErrorInvalidDevice = 101,
228  hipErrorInvalidImage = 200,
229  hipErrorInvalidContext = 201,
230  hipErrorContextAlreadyCurrent = 202,
231  hipErrorMapFailed = 205,
232  // Deprecated
233  hipErrorMapBufferObjectFailed = 205,
234  hipErrorUnmapFailed = 206,
235  hipErrorArrayIsMapped = 207,
236  hipErrorAlreadyMapped = 208,
237  hipErrorNoBinaryForGpu = 209,
238  hipErrorAlreadyAcquired = 210,
239  hipErrorNotMapped = 211,
240  hipErrorNotMappedAsArray = 212,
241  hipErrorNotMappedAsPointer = 213,
242  hipErrorECCNotCorrectable = 214,
243  hipErrorUnsupportedLimit = 215,
244  hipErrorContextAlreadyInUse = 216,
245  hipErrorPeerAccessUnsupported = 217,
246  hipErrorInvalidKernelFile = 218,
247  hipErrorInvalidGraphicsContext = 219,
248  hipErrorInvalidSource = 300,
249  hipErrorFileNotFound = 301,
250  hipErrorSharedObjectSymbolNotFound = 302,
251  hipErrorSharedObjectInitFailed = 303,
252  hipErrorOperatingSystem = 304,
253  hipErrorInvalidHandle = 400,
254  // Deprecated
255  hipErrorInvalidResourceHandle = 400,
256  hipErrorNotFound = 500,
257  hipErrorNotReady = 600,
258  hipErrorIllegalAddress = 700,
262  hipErrorLaunchOutOfResources = 701,
263  hipErrorLaunchTimeOut = 702,
264  hipErrorPeerAccessAlreadyEnabled =
265  704,
266  hipErrorPeerAccessNotEnabled =
267  705,
268  hipErrorSetOnActiveProcess = 708,
269  hipErrorAssert = 710,
270  hipErrorHostMemoryAlreadyRegistered =
271  712,
272  hipErrorHostMemoryNotRegistered =
273  713,
274  hipErrorLaunchFailure =
275  719,
276  hipErrorCooperativeLaunchTooLarge =
277  720,
278  hipErrorNotSupported = 801,
281  hipErrorUnknown = 999, //< Unknown error.
282  // HSA Runtime Error Codes start here.
283  hipErrorRuntimeMemory = 1052,
284  hipErrorRuntimeOther = 1053,
286  hipErrorTbd
288 } hipError_t;
289 
290 #undef __HIP_NODISCARD
291 
292 /*
293  * @brief hipDeviceAttribute_t
294  * @enum
295  * @ingroup Enumerations
296  */
297 typedef enum hipDeviceAttribute_t {
340 
343 
350 
370 
371 enum hipComputeMode {
372  hipComputeModeDefault = 0,
373  hipComputeModeExclusive = 1,
374  hipComputeModeProhibited = 2,
375  hipComputeModeExclusiveProcess = 3
376 };
377 
382 #if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
384 #elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
385 #include "hip/nvcc_detail/hip_runtime_api.h"
386 #else
387 #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
388 #endif
389 
390 
402 #if defined(__cplusplus) && !defined(__HIP_DISABLE_CPP_FUNCTIONS__)
403 template <class T>
404 static inline hipError_t hipMalloc(T** devPtr, size_t size) {
405  return hipMalloc((void**)devPtr, size);
406 }
407 
408 // Provide an override to automatically typecast the pointer type from void**, and also provide a
409 // default for the flags.
410 template <class T>
411 static inline hipError_t hipHostMalloc(T** ptr, size_t size,
412  unsigned int flags = hipHostMallocDefault) {
413  return hipHostMalloc((void**)ptr, size, flags);
414 }
415 
416 template <class T>
417 static inline hipError_t hipMallocManaged(T** devPtr, size_t size,
418  unsigned int flags = hipMemAttachGlobal) {
419  return hipMallocManaged((void**)devPtr, size, flags);
420 }
421 #endif
422 
423 #endif
Definition: hip_runtime_api.h:365
Device supports allocating managed memory on this system.
Definition: hip_runtime_api.h:360
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
int asicRevision
Revision of the GPU in this device.
Definition: hip_runtime_api.h:143
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:299
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:302
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:344
int concurrentManagedAccess
Device can coherently access managed memory concurrently with the CPU.
Definition: hip_runtime_api.h:146
Global memory bus width in bits.
Definition: hip_runtime_api.h:315
Definition: hip_runtime_api.h:361
unsigned int * hdpRegFlushCntl
Addres of HDP_REG_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:127
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:130
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
Definition: hip_runtime_api.h:309
#define hipMemAttachGlobal
Memory can be accessed by any stream on any device.
Definition: hip_runtime_api.h:191
Revision of the GPU in this device.
Definition: hip_runtime_api.h:359
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
Definition: hip_runtime_api.h:318
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:347
#define hipHostMallocDefault
Flags that can be used with hipHostMalloc.
Definition: hip_runtime_api.h:175
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:337
iGPU
Definition: hip_runtime_api.h:331
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:313
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:123
Definition: hip_runtime_api.h:169
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:304
Minor compute capability version number.
Definition: hip_runtime_api.h:323
Definition: hip_runtime_api.h:305
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:303
Multiple GPU devices.
Definition: hip_runtime_api.h:330
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:133
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
int maxTexture1DLinear
Maximum size for 1D textures bound to linear memory.
Definition: hip_runtime_api.h:122
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:300
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
Definition: hip_runtime_api.h:363
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:333
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:128
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
hipDeviceAttribute_t
Definition: hip_runtime_api.h:297
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:129
Major compute capability version number.
Definition: hip_runtime_api.h:322
Maximum number of threads per block.
Definition: hip_runtime_api.h:298
int gcnArch
DEPRECATED: use gcnArchName instead.
Definition: hip_runtime_api.h:117
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:339
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:131
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:301
PCI Bus ID.
Definition: hip_runtime_api.h:326
unsigned int * hdpMemFlushCntl
Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:126
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:336
Warp size in threads.
Definition: hip_runtime_api.h:308
int major
Definition: hip_runtime_api.h:97
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:338
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:314
Definition: hip_runtime_api.h:320
Address of the HDP_REG_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:342
int clockInstructionRate
Definition: hip_runtime_api.h:107
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
Constant memory size in bytes.
Definition: hip_runtime_api.h:307
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:346
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
Alignment requirement for textures.
Definition: hip_runtime_api.h:345
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
Compute mode that device is currently in.
Definition: hip_runtime_api.h:317
PCI Device ID.
Definition: hip_runtime_api.h:327
Device can map host memory into device address space.
Definition: hip_runtime_api.h:348
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:316
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
Definition: hip_runtime_api.h:84
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
Support cooperative launch.
Definition: hip_runtime_api.h:332
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:334
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:132
int managedMemory
Device supports allocating managed memory on this system.
Definition: hip_runtime_api.h:144
int directManagedMemAccessFromHost
Host can directly access managed memory on the device without migration.
Definition: hip_runtime_api.h:145
Definition: hip_runtime_api.h:45
Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language ...
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:335
int pageableMemoryAccess
Definition: hip_runtime_api.h:147
Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register.
Definition: hip_runtime_api.h:341
Device has ECC support enabled.
Definition: hip_runtime_api.h:349
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:134
Definition: hip_runtime_api.h:324