23 #ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H
24 #define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H
26 #include <cuda_runtime_api.h>
28 #include <cuda_profiler_api.h>
29 #include <cuda_fp16.h>
36 #define __dparm(x) = x
42 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
43 #define __HIP_DEPRECATED
44 #elif defined(_MSC_VER)
45 #define __HIP_DEPRECATED __declspec(deprecated)
46 #elif defined(__GNUC__)
47 #define __HIP_DEPRECATED __attribute__((deprecated))
49 #define __HIP_DEPRECATED
58 typedef enum hipMemcpyKind {
60 hipMemcpyHostToDevice,
61 hipMemcpyDeviceToHost,
62 hipMemcpyDeviceToDevice,
76 #define hipDataType cudaDataType
77 #define HIP_R_16F CUDA_R_16F
78 #define HIP_R_32F CUDA_R_32F
79 #define HIP_R_64F CUDA_R_64F
80 #define HIP_C_16F CUDA_C_16F
81 #define HIP_C_32F CUDA_C_32F
82 #define HIP_C_64F CUDA_C_64F
85 #define hipLibraryPropertyType libraryPropertyType
86 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
87 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
88 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
90 #define HIP_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR
91 #define HIP_ARRAY3D_DESCRIPTOR CUDA_ARRAY3D_DESCRIPTOR
94 #define HIP_AD_FORMAT_UNSIGNED_INT8 CU_AD_FORMAT_UNSIGNED_INT8
95 #define HIP_AD_FORMAT_UNSIGNED_INT16 CU_AD_FORMAT_UNSIGNED_INT16
96 #define HIP_AD_FORMAT_UNSIGNED_INT32 CU_AD_FORMAT_UNSIGNED_INT32
97 #define HIP_AD_FORMAT_SIGNED_INT8 CU_AD_FORMAT_SIGNED_INT8
98 #define HIP_AD_FORMAT_SIGNED_INT16 CU_AD_FORMAT_SIGNED_INT16
99 #define HIP_AD_FORMAT_SIGNED_INT32 CU_AD_FORMAT_SIGNED_INT32
100 #define HIP_AD_FORMAT_HALF CU_AD_FORMAT_HALF
101 #define HIP_AD_FORMAT_FLOAT CU_AD_FORMAT_FLOAT
104 #define hipArray_Format CUarray_format
106 inline static CUarray_format hipArray_FormatToCUarray_format(
107 hipArray_Format format) {
109 case HIP_AD_FORMAT_UNSIGNED_INT8:
110 return CU_AD_FORMAT_UNSIGNED_INT8;
111 case HIP_AD_FORMAT_UNSIGNED_INT16:
112 return CU_AD_FORMAT_UNSIGNED_INT16;
113 case HIP_AD_FORMAT_UNSIGNED_INT32:
114 return CU_AD_FORMAT_UNSIGNED_INT32;
115 case HIP_AD_FORMAT_SIGNED_INT8:
116 return CU_AD_FORMAT_SIGNED_INT8;
117 case HIP_AD_FORMAT_SIGNED_INT16:
118 return CU_AD_FORMAT_SIGNED_INT16;
119 case HIP_AD_FORMAT_SIGNED_INT32:
120 return CU_AD_FORMAT_SIGNED_INT32;
121 case HIP_AD_FORMAT_HALF:
122 return CU_AD_FORMAT_HALF;
123 case HIP_AD_FORMAT_FLOAT:
124 return CU_AD_FORMAT_FLOAT;
126 return CU_AD_FORMAT_UNSIGNED_INT8;
130 #define HIP_TR_ADDRESS_MODE_WRAP CU_TR_ADDRESS_MODE_WRAP
131 #define HIP_TR_ADDRESS_MODE_CLAMP CU_TR_ADDRESS_MODE_CLAMP
132 #define HIP_TR_ADDRESS_MODE_MIRROR CU_TR_ADDRESS_MODE_MIRROR
133 #define HIP_TR_ADDRESS_MODE_BORDER CU_TR_ADDRESS_MODE_BORDER
136 #define hipAddress_mode CUaddress_mode
138 inline static CUaddress_mode hipAddress_modeToCUaddress_mode(
139 hipAddress_mode mode) {
141 case HIP_TR_ADDRESS_MODE_WRAP:
142 return CU_TR_ADDRESS_MODE_WRAP;
143 case HIP_TR_ADDRESS_MODE_CLAMP:
144 return CU_TR_ADDRESS_MODE_CLAMP;
145 case HIP_TR_ADDRESS_MODE_MIRROR:
146 return CU_TR_ADDRESS_MODE_MIRROR;
147 case HIP_TR_ADDRESS_MODE_BORDER:
148 return CU_TR_ADDRESS_MODE_BORDER;
150 return CU_TR_ADDRESS_MODE_WRAP;
154 #define HIP_TR_FILTER_MODE_POINT CU_TR_FILTER_MODE_POINT
155 #define HIP_TR_FILTER_MODE_LINEAR CU_TR_FILTER_MODE_LINEAR
158 #define hipFilter_mode CUfilter_mode
160 inline static CUfilter_mode hipFilter_mode_enumToCUfilter_mode(
161 hipFilter_mode mode) {
163 case HIP_TR_FILTER_MODE_POINT:
164 return CU_TR_FILTER_MODE_POINT;
165 case HIP_TR_FILTER_MODE_LINEAR:
166 return CU_TR_FILTER_MODE_LINEAR;
168 return CU_TR_FILTER_MODE_POINT;
173 #define HIP_RESOURCE_TYPE_ARRAY CU_RESOURCE_TYPE_ARRAY
174 #define HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
175 #define HIP_RESOURCE_TYPE_LINEAR CU_RESOURCE_TYPE_LINEAR
176 #define HIP_RESOURCE_TYPE_PITCH2D CU_RESOURCE_TYPE_PITCH2D
179 #define hipResourcetype CUresourcetype
181 inline static CUresourcetype hipResourcetype_enumToCUresourcetype(
182 hipResourcetype resType) {
184 case HIP_RESOURCE_TYPE_ARRAY:
185 return CU_RESOURCE_TYPE_ARRAY;
186 case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY:
187 return CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
188 case HIP_RESOURCE_TYPE_LINEAR:
189 return CU_RESOURCE_TYPE_LINEAR;
190 case HIP_RESOURCE_TYPE_PITCH2D:
191 return CU_RESOURCE_TYPE_PITCH2D;
193 return CU_RESOURCE_TYPE_ARRAY;
197 #define hipTexRef CUtexref
198 #define hiparray CUarray
201 typedef enum cudaTextureAddressMode hipTextureAddressMode;
202 #define hipAddressModeWrap cudaAddressModeWrap
203 #define hipAddressModeClamp cudaAddressModeClamp
204 #define hipAddressModeMirror cudaAddressModeMirror
205 #define hipAddressModeBorder cudaAddressModeBorder
208 typedef enum cudaTextureFilterMode hipTextureFilterMode;
209 #define hipFilterModePoint cudaFilterModePoint
210 #define hipFilterModeLinear cudaFilterModeLinear
213 typedef enum cudaTextureReadMode hipTextureReadMode;
214 #define hipReadModeElementType cudaReadModeElementType
215 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
218 typedef enum cudaChannelFormatKind hipChannelFormatKind;
219 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned
220 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
221 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat
222 #define hipChannelFormatKindNone cudaChannelFormatKindNone
226 #define hipMemRangeAttributeReadMostly cudaMemRangeAttributeReadMostly
227 #define hipMemRangeAttributePreferredLocation cudaMemRangeAttributePreferredLocation
228 #define hipMemRangeAttributeAccessedBy cudaMemRangeAttributeAccessedBy
229 #define hipMemRangeAttributeLastPrefetchLocation cudaMemRangeAttributeLastPrefetchLocation
231 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
232 #define hipBoundaryModeZero cudaBoundaryModeZero
233 #define hipBoundaryModeTrap cudaBoundaryModeTrap
234 #define hipBoundaryModeClamp cudaBoundaryModeClamp
237 #define hipFuncCachePreferNone cudaFuncCachePreferNone
238 #define hipFuncCachePreferShared cudaFuncCachePreferShared
239 #define hipFuncCachePreferL1 cudaFuncCachePreferL1
240 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual
243 #define hipResourceType cudaResourceType
244 #define hipResourceTypeArray cudaResourceTypeArray
245 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
246 #define hipResourceTypeLinear cudaResourceTypeLinear
247 #define hipResourceTypePitch2D cudaResourceTypePitch2D
253 #define hipEventDefault cudaEventDefault
254 #define hipEventBlockingSync cudaEventBlockingSync
255 #define hipEventDisableTiming cudaEventDisableTiming
256 #define hipEventInterprocess cudaEventInterprocess
257 #define hipEventReleaseToDevice 0
258 #define hipEventReleaseToSystem 0
261 #define hipHostMallocDefault cudaHostAllocDefault
262 #define hipHostMallocPortable cudaHostAllocPortable
263 #define hipHostMallocMapped cudaHostAllocMapped
264 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined
265 #define hipHostMallocCoherent 0x0
266 #define hipHostMallocNonCoherent 0x0
268 #define hipMemAttachGlobal cudaMemAttachGlobal
269 #define hipMemAttachHost cudaMemAttachHost
270 #define hipMemAttachSingle cudaMemAttachSingle
272 #define hipHostRegisterDefault cudaHostRegisterDefault
273 #define hipHostRegisterPortable cudaHostRegisterPortable
274 #define hipHostRegisterMapped cudaHostRegisterMapped
275 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory
277 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
278 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
279 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
280 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize
281 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
283 #define hipOccupancyDefault cudaOccupancyDefault
285 #define hipCooperativeLaunchMultiDeviceNoPreSync \
286 cudaCooperativeLaunchMultiDeviceNoPreSync
287 #define hipCooperativeLaunchMultiDeviceNoPostSync \
288 cudaCooperativeLaunchMultiDeviceNoPostSync
292 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
293 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
294 #define hipJitOptionWallTime CU_JIT_WALL_TIME
295 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
296 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
297 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
298 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
299 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
300 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
301 #define hipJitOptionTarget CU_JIT_TARGET
302 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
303 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
304 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
305 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
306 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE
307 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
308 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
309 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
311 typedef cudaEvent_t hipEvent_t;
312 typedef cudaStream_t hipStream_t;
315 typedef enum cudaLimit hipLimit_t;
318 typedef CUcontext hipCtx_t;
320 typedef CUfunc_cache hipFuncCache;
321 typedef CUjit_option hipJitOption;
322 typedef CUdevice hipDevice_t;
323 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
324 #define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank
325 #define hipDevP2PAttrAccessSupported cudaDevP2PAttrAccessSupported
326 #define hipDevP2PAttrNativeAtomicSupported cudaDevP2PAttrNativeAtomicSupported
327 #define hipDevP2PAttrHipArrayAccessSupported cudaDevP2PAttrCudaArrayAccessSupported
328 #define hipFuncAttributeMaxDynamicSharedMemorySize cudaFuncAttributeMaxDynamicSharedMemorySize
329 #define hipFuncAttributePreferredSharedMemoryCarveout cudaFuncAttributePreferredSharedMemoryCarveout
331 typedef CUmodule hipModule_t;
332 typedef CUfunction hipFunction_t;
333 typedef CUdeviceptr hipDeviceptr_t;
339 #define hipFunction_attribute CUfunction_attribute
340 #define hip_Memcpy2D CUDA_MEMCPY2D
341 #define HIP_MEMCPY3D CUDA_MEMCPY3D
342 #define hipMemcpy3DParms cudaMemcpy3DParms
343 #define hipArrayDefault cudaArrayDefault
344 #define hipArrayLayered cudaArrayLayered
345 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
346 #define hipArrayCubemap cudaArrayCubemap
347 #define hipArrayTextureGather cudaArrayTextureGather
349 typedef cudaTextureObject_t hipTextureObject_t;
351 #define hipTextureType1D cudaTextureType1D
352 #define hipTextureType1DLayered cudaTextureType1DLayered
353 #define hipTextureType2D cudaTextureType2D
354 #define hipTextureType2DLayered cudaTextureType2DLayered
355 #define hipTextureType3D cudaTextureType3D
357 #define hipDeviceScheduleAuto cudaDeviceScheduleAuto
358 #define hipDeviceScheduleSpin cudaDeviceScheduleSpin
359 #define hipDeviceScheduleYield cudaDeviceScheduleYield
360 #define hipDeviceScheduleBlockingSync cudaDeviceScheduleBlockingSync
361 #define hipDeviceScheduleMask cudaDeviceScheduleMask
362 #define hipDeviceMapHost cudaDeviceMapHost
363 #define hipDeviceLmemResizeToMax cudaDeviceLmemResizeToMax
365 #define hipCpuDeviceId cudaCpuDeviceId
366 #define hipInvalidDeviceId cudaInvalidDeviceId
369 #define make_hipExtent make_cudaExtent
370 #define make_hipPos make_cudaPos
371 #define make_hipPitchedPtr make_cudaPitchedPtr
373 #define hipStreamDefault cudaStreamDefault
374 #define hipStreamNonBlocking cudaStreamNonBlocking
381 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
382 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
383 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
386 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
387 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
388 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
389 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
390 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
391 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
392 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
393 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
394 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
395 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
396 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
398 #if CUDA_VERSION >= 9000
399 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
400 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
401 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
402 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
405 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
409 case cudaErrorProfilerDisabled:
410 return hipErrorProfilerDisabled;
411 case cudaErrorProfilerNotInitialized:
412 return hipErrorProfilerNotInitialized;
413 case cudaErrorProfilerAlreadyStarted:
414 return hipErrorProfilerAlreadyStarted;
415 case cudaErrorProfilerAlreadyStopped:
416 return hipErrorProfilerAlreadyStopped;
417 case cudaErrorInsufficientDriver:
418 return hipErrorInsufficientDriver;
419 case cudaErrorUnsupportedLimit:
420 return hipErrorUnsupportedLimit;
421 case cudaErrorPeerAccessUnsupported:
422 return hipErrorPeerAccessUnsupported;
423 case cudaErrorInvalidGraphicsContext:
424 return hipErrorInvalidGraphicsContext;
425 case cudaErrorSharedObjectSymbolNotFound:
426 return hipErrorSharedObjectSymbolNotFound;
427 case cudaErrorSharedObjectInitFailed:
428 return hipErrorSharedObjectInitFailed;
429 case cudaErrorOperatingSystem:
430 return hipErrorOperatingSystem;
431 case cudaErrorSetOnActiveProcess:
432 return hipErrorSetOnActiveProcess;
433 case cudaErrorIllegalAddress:
434 return hipErrorIllegalAddress;
435 case cudaErrorInvalidSymbol:
436 return hipErrorInvalidSymbol;
437 case cudaErrorMissingConfiguration:
438 return hipErrorMissingConfiguration;
439 case cudaErrorMemoryAllocation:
440 return hipErrorOutOfMemory;
441 case cudaErrorInitializationError:
442 return hipErrorNotInitialized;
443 case cudaErrorLaunchFailure:
445 case cudaErrorCooperativeLaunchTooLarge:
447 case cudaErrorPriorLaunchFailure:
448 return hipErrorPriorLaunchFailure;
449 case cudaErrorLaunchOutOfResources:
451 case cudaErrorInvalidDeviceFunction:
452 return hipErrorInvalidDeviceFunction;
453 case cudaErrorInvalidConfiguration:
454 return hipErrorInvalidConfiguration;
455 case cudaErrorInvalidDevice:
457 case cudaErrorInvalidValue:
459 case cudaErrorInvalidDevicePointer:
461 case cudaErrorInvalidMemcpyDirection:
463 case cudaErrorInvalidResourceHandle:
464 return hipErrorInvalidHandle;
465 case cudaErrorNotReady:
467 case cudaErrorNoDevice:
469 case cudaErrorPeerAccessAlreadyEnabled:
471 case cudaErrorPeerAccessNotEnabled:
473 case cudaErrorHostMemoryAlreadyRegistered:
475 case cudaErrorHostMemoryNotRegistered:
477 case cudaErrorMapBufferObjectFailed:
478 return hipErrorMapFailed;
479 case cudaErrorAssert:
481 case cudaErrorNotSupported:
483 case cudaErrorCudartUnloading:
484 return hipErrorDeinitialized;
485 case cudaErrorInvalidKernelImage:
486 return hipErrorInvalidImage;
487 case cudaErrorUnmapBufferObjectFailed:
488 return hipErrorUnmapFailed;
489 case cudaErrorNoKernelImageForDevice:
490 return hipErrorNoBinaryForGpu;
491 case cudaErrorECCUncorrectable:
492 return hipErrorECCNotCorrectable;
493 case cudaErrorDeviceAlreadyInUse:
494 return hipErrorContextAlreadyInUse;
495 case cudaErrorInvalidPtx:
497 case cudaErrorLaunchTimeout:
498 return hipErrorLaunchTimeOut;
499 #if CUDA_VERSION >= 10010
500 case cudaErrorInvalidSource:
501 return hipErrorInvalidSource;
502 case cudaErrorFileNotFound:
503 return hipErrorFileNotFound;
504 case cudaErrorSymbolNotFound:
505 return hipErrorNotFound;
506 case cudaErrorArrayIsMapped:
507 return hipErrorArrayIsMapped;
508 case cudaErrorNotMappedAsPointer:
509 return hipErrorNotMappedAsPointer;
510 case cudaErrorNotMappedAsArray:
511 return hipErrorNotMappedAsArray;
512 case cudaErrorNotMapped:
513 return hipErrorNotMapped;
514 case cudaErrorAlreadyAcquired:
515 return hipErrorAlreadyAcquired;
516 case cudaErrorAlreadyMapped:
517 return hipErrorAlreadyMapped;
519 #if CUDA_VERSION >= 10020
520 case cudaErrorDeviceUninitialized:
523 case cudaErrorUnknown:
525 return hipErrorUnknown;
529 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
533 case CUDA_ERROR_OUT_OF_MEMORY:
534 return hipErrorOutOfMemory;
535 case CUDA_ERROR_INVALID_VALUE:
537 case CUDA_ERROR_INVALID_DEVICE:
539 case CUDA_ERROR_DEINITIALIZED:
540 return hipErrorDeinitialized;
541 case CUDA_ERROR_NO_DEVICE:
543 case CUDA_ERROR_INVALID_CONTEXT:
545 case CUDA_ERROR_NOT_INITIALIZED:
546 return hipErrorNotInitialized;
547 case CUDA_ERROR_INVALID_HANDLE:
548 return hipErrorInvalidHandle;
549 case CUDA_ERROR_MAP_FAILED:
550 return hipErrorMapFailed;
551 case CUDA_ERROR_PROFILER_DISABLED:
552 return hipErrorProfilerDisabled;
553 case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
554 return hipErrorProfilerNotInitialized;
555 case CUDA_ERROR_PROFILER_ALREADY_STARTED:
556 return hipErrorProfilerAlreadyStarted;
557 case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
558 return hipErrorProfilerAlreadyStopped;
559 case CUDA_ERROR_INVALID_IMAGE:
560 return hipErrorInvalidImage;
561 case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
562 return hipErrorContextAlreadyCurrent;
563 case CUDA_ERROR_UNMAP_FAILED:
564 return hipErrorUnmapFailed;
565 case CUDA_ERROR_ARRAY_IS_MAPPED:
566 return hipErrorArrayIsMapped;
567 case CUDA_ERROR_ALREADY_MAPPED:
568 return hipErrorAlreadyMapped;
569 case CUDA_ERROR_NO_BINARY_FOR_GPU:
570 return hipErrorNoBinaryForGpu;
571 case CUDA_ERROR_ALREADY_ACQUIRED:
572 return hipErrorAlreadyAcquired;
573 case CUDA_ERROR_NOT_MAPPED:
574 return hipErrorNotMapped;
575 case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
576 return hipErrorNotMappedAsArray;
577 case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
578 return hipErrorNotMappedAsPointer;
579 case CUDA_ERROR_ECC_UNCORRECTABLE:
580 return hipErrorECCNotCorrectable;
581 case CUDA_ERROR_UNSUPPORTED_LIMIT:
582 return hipErrorUnsupportedLimit;
583 case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
584 return hipErrorContextAlreadyInUse;
585 case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
586 return hipErrorPeerAccessUnsupported;
587 case CUDA_ERROR_INVALID_PTX:
589 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
590 return hipErrorInvalidGraphicsContext;
591 case CUDA_ERROR_INVALID_SOURCE:
592 return hipErrorInvalidSource;
593 case CUDA_ERROR_FILE_NOT_FOUND:
594 return hipErrorFileNotFound;
595 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
596 return hipErrorSharedObjectSymbolNotFound;
597 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
598 return hipErrorSharedObjectInitFailed;
599 case CUDA_ERROR_OPERATING_SYSTEM:
600 return hipErrorOperatingSystem;
601 case CUDA_ERROR_NOT_FOUND:
602 return hipErrorNotFound;
603 case CUDA_ERROR_NOT_READY:
605 case CUDA_ERROR_ILLEGAL_ADDRESS:
606 return hipErrorIllegalAddress;
607 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
609 case CUDA_ERROR_LAUNCH_TIMEOUT:
610 return hipErrorLaunchTimeOut;
611 case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
613 case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
615 case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
616 return hipErrorSetOnActiveProcess;
617 case CUDA_ERROR_ASSERT:
619 case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
621 case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
623 case CUDA_ERROR_LAUNCH_FAILED:
625 case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
627 case CUDA_ERROR_NOT_SUPPORTED:
629 case CUDA_ERROR_UNKNOWN:
631 return hipErrorUnknown;
635 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
639 case hipErrorOutOfMemory:
640 return cudaErrorMemoryAllocation;
641 case hipErrorProfilerDisabled:
642 return cudaErrorProfilerDisabled;
643 case hipErrorProfilerNotInitialized:
644 return cudaErrorProfilerNotInitialized;
645 case hipErrorProfilerAlreadyStarted:
646 return cudaErrorProfilerAlreadyStarted;
647 case hipErrorProfilerAlreadyStopped:
648 return cudaErrorProfilerAlreadyStopped;
649 case hipErrorInvalidConfiguration:
650 return cudaErrorInvalidConfiguration;
652 return cudaErrorLaunchOutOfResources;
654 return cudaErrorInvalidValue;
655 case hipErrorInvalidHandle:
656 return cudaErrorInvalidResourceHandle;
658 return cudaErrorInvalidDevice;
660 return cudaErrorInvalidMemcpyDirection;
662 return cudaErrorInvalidDevicePointer;
663 case hipErrorNotInitialized:
664 return cudaErrorInitializationError;
666 return cudaErrorNoDevice;
668 return cudaErrorNotReady;
670 return cudaErrorPeerAccessNotEnabled;
672 return cudaErrorPeerAccessAlreadyEnabled;
674 return cudaErrorHostMemoryAlreadyRegistered;
676 return cudaErrorHostMemoryNotRegistered;
677 case hipErrorDeinitialized:
678 return cudaErrorCudartUnloading;
679 case hipErrorInvalidSymbol:
680 return cudaErrorInvalidSymbol;
681 case hipErrorInsufficientDriver:
682 return cudaErrorInsufficientDriver;
683 case hipErrorMissingConfiguration:
684 return cudaErrorMissingConfiguration;
685 case hipErrorPriorLaunchFailure:
686 return cudaErrorPriorLaunchFailure;
687 case hipErrorInvalidDeviceFunction:
688 return cudaErrorInvalidDeviceFunction;
689 case hipErrorInvalidImage:
690 return cudaErrorInvalidKernelImage;
692 #if CUDA_VERSION >= 10020
693 return cudaErrorDeviceUninitialized;
695 return cudaErrorUnknown;
697 case hipErrorMapFailed:
698 return cudaErrorMapBufferObjectFailed;
699 case hipErrorUnmapFailed:
700 return cudaErrorUnmapBufferObjectFailed;
701 case hipErrorArrayIsMapped:
702 #if CUDA_VERSION >= 10010
703 return cudaErrorArrayIsMapped;
705 return cudaErrorUnknown;
707 case hipErrorAlreadyMapped:
708 #if CUDA_VERSION >= 10010
709 return cudaErrorAlreadyMapped;
711 return cudaErrorUnknown;
713 case hipErrorNoBinaryForGpu:
714 return cudaErrorNoKernelImageForDevice;
715 case hipErrorAlreadyAcquired:
716 #if CUDA_VERSION >= 10010
717 return cudaErrorAlreadyAcquired;
719 return cudaErrorUnknown;
721 case hipErrorNotMapped:
722 #if CUDA_VERSION >= 10010
723 return cudaErrorNotMapped;
725 return cudaErrorUnknown;
727 case hipErrorNotMappedAsArray:
728 #if CUDA_VERSION >= 10010
729 return cudaErrorNotMappedAsArray;
731 return cudaErrorUnknown;
733 case hipErrorNotMappedAsPointer:
734 #if CUDA_VERSION >= 10010
735 return cudaErrorNotMappedAsPointer;
737 return cudaErrorUnknown;
739 case hipErrorECCNotCorrectable:
740 return cudaErrorECCUncorrectable;
741 case hipErrorUnsupportedLimit:
742 return cudaErrorUnsupportedLimit;
743 case hipErrorContextAlreadyInUse:
744 return cudaErrorDeviceAlreadyInUse;
745 case hipErrorPeerAccessUnsupported:
746 return cudaErrorPeerAccessUnsupported;
748 return cudaErrorInvalidPtx;
749 case hipErrorInvalidGraphicsContext:
750 return cudaErrorInvalidGraphicsContext;
751 case hipErrorInvalidSource:
752 #if CUDA_VERSION >= 10010
753 return cudaErrorInvalidSource;
755 return cudaErrorUnknown;
757 case hipErrorFileNotFound:
758 #if CUDA_VERSION >= 10010
759 return cudaErrorFileNotFound;
761 return cudaErrorUnknown;
763 case hipErrorSharedObjectSymbolNotFound:
764 return cudaErrorSharedObjectSymbolNotFound;
765 case hipErrorSharedObjectInitFailed:
766 return cudaErrorSharedObjectInitFailed;
767 case hipErrorOperatingSystem:
768 return cudaErrorOperatingSystem;
769 case hipErrorNotFound:
770 #if CUDA_VERSION >= 10010
771 return cudaErrorSymbolNotFound;
773 return cudaErrorUnknown;
775 case hipErrorIllegalAddress:
776 return cudaErrorIllegalAddress;
777 case hipErrorLaunchTimeOut:
778 return cudaErrorLaunchTimeout;
779 case hipErrorSetOnActiveProcess:
780 return cudaErrorSetOnActiveProcess;
782 return cudaErrorLaunchFailure;
784 return cudaErrorCooperativeLaunchTooLarge;
786 return cudaErrorNotSupported;
791 case hipErrorUnknown:
794 return cudaErrorUnknown;
798 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
800 case hipMemcpyHostToHost:
801 return cudaMemcpyHostToHost;
802 case hipMemcpyHostToDevice:
803 return cudaMemcpyHostToDevice;
804 case hipMemcpyDeviceToHost:
805 return cudaMemcpyDeviceToHost;
806 case hipMemcpyDeviceToDevice:
807 return cudaMemcpyDeviceToDevice;
809 return cudaMemcpyDefault;
813 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
814 hipTextureAddressMode kind) {
816 case hipAddressModeWrap:
817 return cudaAddressModeWrap;
818 case hipAddressModeClamp:
819 return cudaAddressModeClamp;
820 case hipAddressModeMirror:
821 return cudaAddressModeMirror;
822 case hipAddressModeBorder:
823 return cudaAddressModeBorder;
825 return cudaAddressModeWrap;
829 inline static enum cudaMemRangeAttribute hipMemRangeAttributeToCudaMemRangeAttribute(
833 return cudaMemRangeAttributeReadMostly;
835 return cudaMemRangeAttributePreferredLocation;
837 return cudaMemRangeAttributeAccessedBy;
839 return cudaMemRangeAttributeLastPrefetchLocation;
841 return cudaMemRangeAttributeReadMostly;
845 inline static enum cudaMemoryAdvise hipMemoryAdviseTocudaMemoryAdvise(
849 return cudaMemAdviseSetReadMostly;
851 return cudaMemAdviseUnsetReadMostly ;
853 return cudaMemAdviseSetPreferredLocation;
855 return cudaMemAdviseUnsetPreferredLocation;
857 return cudaMemAdviseSetAccessedBy;
859 return cudaMemAdviseUnsetAccessedBy;
861 return cudaMemAdviseSetReadMostly;
865 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
866 hipTextureFilterMode kind) {
868 case hipFilterModePoint:
869 return cudaFilterModePoint;
870 case hipFilterModeLinear:
871 return cudaFilterModeLinear;
873 return cudaFilterModePoint;
877 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
879 case hipReadModeElementType:
880 return cudaReadModeElementType;
881 case hipReadModeNormalizedFloat:
882 return cudaReadModeNormalizedFloat;
884 return cudaReadModeElementType;
888 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
889 hipChannelFormatKind kind) {
891 case hipChannelFormatKindSigned:
892 return cudaChannelFormatKindSigned;
893 case hipChannelFormatKindUnsigned:
894 return cudaChannelFormatKindUnsigned;
895 case hipChannelFormatKindFloat:
896 return cudaChannelFormatKindFloat;
897 case hipChannelFormatKindNone:
898 return cudaChannelFormatKindNone;
900 return cudaChannelFormatKindNone;
907 #define HIPRT_CB CUDART_CB
908 typedef void(HIPRT_CB*
hipStreamCallback_t)(hipStream_t stream, hipError_t status,
void* userData);
909 inline static hipError_t
hipInit(
unsigned int flags) {
910 return hipCUResultTohipError(cuInit(flags));
913 inline static hipError_t
hipDeviceReset() {
return hipCUDAErrorTohipError(cudaDeviceReset()); }
915 inline static hipError_t
hipGetLastError() {
return hipCUDAErrorTohipError(cudaGetLastError()); }
918 return hipCUDAErrorTohipError(cudaPeekAtLastError());
921 inline static hipError_t
hipMalloc(
void** ptr,
size_t size) {
922 return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
925 inline static hipError_t
hipMallocPitch(
void** ptr,
size_t* pitch,
size_t width,
size_t height) {
926 return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
929 inline static hipError_t
hipMemAllocPitch(hipDeviceptr_t* dptr,
size_t* pitch,
size_t widthInBytes,
size_t height,
unsigned int elementSizeBytes){
930 return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
934 return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
937 inline static hipError_t
hipFree(
void* ptr) {
return hipCUDAErrorTohipError(cudaFree(ptr)); }
939 inline static hipError_t
hipMallocHost(
void** ptr,
size_t size)
940 __attribute__((deprecated(
"use hipHostMalloc instead")));
942 return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
946 __attribute__((deprecated(
"use hipHostMalloc instead")));
948 return hipCUResultTohipError(cuMemAllocHost(ptr, size));
951 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags)
952 __attribute__((deprecated(
"use hipHostMalloc instead")));
953 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags) {
954 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
957 inline static hipError_t
hipHostMalloc(
void** ptr,
size_t size,
unsigned int flags) {
958 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
963 return hipCUDAErrorTohipError(cudaMemAdvise(dev_ptr, count,
964 hipMemoryAdviseTocudaMemoryAdvise(advice), device));
967 inline static hipError_t
hipMemPrefetchAsync(
const void* dev_ptr,
size_t count,
int device,
968 hipStream_t stream __dparm(0)) {
969 return hipCUDAErrorTohipError(cudaMemPrefetchAsync(dev_ptr, count, device, stream));
974 const void* dev_ptr,
size_t count) {
975 return hipCUDAErrorTohipError(cudaMemRangeGetAttribute(data, data_size,
976 hipMemRangeAttributeToCudaMemRangeAttribute(attribute), dev_ptr, count));
981 size_t num_attributes,
const void* dev_ptr,
983 return hipCUDAErrorTohipError(cudaMemRangeGetAttributes(data, data_sizes, attributes,
984 num_attributes, dev_ptr, count));
988 size_t length __dparm(0),
990 return hipCUDAErrorTohipError(cudaStreamAttachMemAsync(stream, dev_ptr, length, flags));
993 inline static hipError_t
hipMallocManaged(
void** ptr,
size_t size,
unsigned int flags) {
994 return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
998 size_t width,
size_t height,
1000 return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
1005 return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
1009 return hipCUDAErrorTohipError(cudaFreeArray(array));
1013 return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
1016 inline static hipError_t
hipHostGetFlags(
unsigned int* flagsPtr,
void* hostPtr) {
1017 return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
1020 inline static hipError_t
hipHostRegister(
void* ptr,
size_t size,
unsigned int flags) {
1021 return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
1025 return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
1029 __attribute__((deprecated(
"use hipHostFree instead")));
1031 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
1035 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
1039 return hipCUDAErrorTohipError(cudaSetDevice(device));
1043 struct cudaDeviceProp cdprop;
1044 memset(&cdprop, 0x0,
sizeof(
struct cudaDeviceProp));
1045 cdprop.major = prop->
major;
1046 cdprop.minor = prop->
minor;
1061 return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
1064 inline static hipError_t
hipMemcpyHtoD(hipDeviceptr_t dst,
void* src,
size_t size) {
1065 return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
1068 inline static hipError_t
hipMemcpyDtoH(
void* dst, hipDeviceptr_t src,
size_t size) {
1069 return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
1072 inline static hipError_t
hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size) {
1073 return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
1076 inline static hipError_t
hipMemcpyHtoDAsync(hipDeviceptr_t dst,
void* src,
size_t size,
1077 hipStream_t stream) {
1078 return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
1081 inline static hipError_t
hipMemcpyDtoHAsync(
void* dst, hipDeviceptr_t src,
size_t size,
1082 hipStream_t stream) {
1083 return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
1086 inline static hipError_t
hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size,
1087 hipStream_t stream) {
1088 return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
1091 inline static hipError_t
hipMemcpy(
void* dst,
const void* src,
size_t sizeBytes,
1092 hipMemcpyKind copyKind) {
1093 return hipCUDAErrorTohipError(
1094 cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
1098 inline static hipError_t hipMemcpyWithStream(
void* dst,
const void* src,
1099 size_t sizeBytes, hipMemcpyKind copyKind,
1100 hipStream_t stream) {
1101 cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
1102 hipMemcpyKindToCudaMemcpyKind(copyKind),
1105 if (error != cudaSuccess)
return hipCUDAErrorTohipError(error);
1107 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1110 inline static hipError_t
hipMemcpyAsync(
void* dst,
const void* src,
size_t sizeBytes,
1111 hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
1112 return hipCUDAErrorTohipError(
1113 cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
1116 inline static hipError_t hipMemcpyToSymbol(
const void* symbol,
const void* src,
size_t sizeBytes,
1117 size_t offset __dparm(0),
1118 hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
1119 return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
1120 hipMemcpyKindToCudaMemcpyKind(copyType)));
1123 inline static hipError_t hipMemcpyToSymbolAsync(
const void* symbol,
const void* src,
1124 size_t sizeBytes,
size_t offset,
1125 hipMemcpyKind copyType,
1126 hipStream_t stream __dparm(0)) {
1127 return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
1128 symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
1131 inline static hipError_t hipMemcpyFromSymbol(
void* dst,
const void* symbolName,
size_t sizeBytes,
1132 size_t offset __dparm(0),
1133 hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
1134 return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
1135 hipMemcpyKindToCudaMemcpyKind(kind)));
1138 inline static hipError_t hipMemcpyFromSymbolAsync(
void* dst,
const void* symbolName,
1139 size_t sizeBytes,
size_t offset,
1141 hipStream_t stream __dparm(0)) {
1142 return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
1143 dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
1146 inline static hipError_t hipGetSymbolAddress(
void** devPtr,
const void* symbolName) {
1147 return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
1150 inline static hipError_t hipGetSymbolSize(
size_t* size,
const void* symbolName) {
1151 return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
1154 inline static hipError_t
hipMemcpy2D(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
1155 size_t width,
size_t height, hipMemcpyKind kind) {
1156 return hipCUDAErrorTohipError(
1157 cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
1161 return hipCUResultTohipError(cuMemcpy2D(pCopy));
1165 return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
1169 return hipCUDAErrorTohipError(cudaMemcpy3D(p));
1173 return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
1177 return hipCUResultTohipError(cuMemcpy3D(pCopy));
1181 return hipCUResultTohipError(cuMemcpy3DAsync(pCopy, stream));
1184 inline static hipError_t
hipMemcpy2DAsync(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
1185 size_t width,
size_t height, hipMemcpyKind kind,
1186 hipStream_t stream) {
1187 return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
1188 hipMemcpyKindToCudaMemcpyKind(kind), stream));
1192 size_t wOffset,
size_t hOffset,
size_t width,
1193 size_t height, hipMemcpyKind kind) {
1194 return hipCUDAErrorTohipError(cudaMemcpy2DFromArray(dst, dpitch, src, wOffset, hOffset, width,
1196 hipMemcpyKindToCudaMemcpyKind(kind)));
1200 size_t wOffset,
size_t hOffset,
size_t width,
1201 size_t height, hipMemcpyKind kind,
1202 hipStream_t stream) {
1203 return hipCUDAErrorTohipError(cudaMemcpy2DFromArrayAsync(dst, dpitch, src, wOffset, hOffset,
1205 hipMemcpyKindToCudaMemcpyKind(kind),
1210 const void* src,
size_t spitch,
size_t width,
1211 size_t height, hipMemcpyKind kind) {
1212 return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
1213 height, hipMemcpyKindToCudaMemcpyKind(kind)));
1217 const void* src,
size_t spitch,
size_t width,
1218 size_t height, hipMemcpyKind kind,
1219 hipStream_t stream) {
1220 return hipCUDAErrorTohipError(cudaMemcpy2DToArrayAsync(dst, wOffset, hOffset, src, spitch,
1222 hipMemcpyKindToCudaMemcpyKind(kind),
1227 size_t hOffset,
const void* src,
1228 size_t count, hipMemcpyKind kind) {
1229 return hipCUDAErrorTohipError(
1230 cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
1234 size_t wOffset,
size_t hOffset,
1235 size_t count, hipMemcpyKind kind) {
1236 return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
1237 hipMemcpyKindToCudaMemcpyKind(kind)));
1242 return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
1247 return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1251 return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1255 return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1259 return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
1263 return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1267 return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
1271 return cudaGetErrorString(hipErrorToCudaError(error));
1275 return cudaGetErrorName(hipErrorToCudaError(error));
1279 return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1283 return hipCUDAErrorTohipError(cudaGetDevice(device));
1287 return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1290 inline static hipError_t hipIpcGetEventHandle(
hipIpcEventHandle_t* handle, hipEvent_t event) {
1291 return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1295 return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1298 inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event,
hipIpcEventHandle_t handle) {
1299 return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1303 unsigned int flags) {
1304 return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1307 inline static hipError_t
hipMemset(
void* devPtr,
int value,
size_t count) {
1308 return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1311 inline static hipError_t
hipMemsetD32(hipDeviceptr_t devPtr,
int value,
size_t count) {
1312 return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1315 inline static hipError_t
hipMemsetAsync(
void* devPtr,
int value,
size_t count,
1316 hipStream_t stream __dparm(0)) {
1317 return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1320 inline static hipError_t
hipMemsetD32Async(hipDeviceptr_t devPtr,
int value,
size_t count,
1321 hipStream_t stream __dparm(0)) {
1322 return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1325 inline static hipError_t
hipMemsetD8(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes) {
1326 return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1329 inline static hipError_t
hipMemsetD8Async(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes,
1330 hipStream_t stream __dparm(0)) {
1331 return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1334 inline static hipError_t
hipMemsetD16(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes) {
1335 return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1338 inline static hipError_t
hipMemsetD16Async(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes,
1339 hipStream_t stream __dparm(0)) {
1340 return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1343 inline static hipError_t
hipMemset2D(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height) {
1344 return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1347 inline static hipError_t
hipMemset2DAsync(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height, hipStream_t stream __dparm(0)) {
1348 return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1352 return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1356 return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1360 struct cudaDeviceProp cdprop;
1362 cerror = cudaGetDeviceProperties(&cdprop, device);
1364 strncpy(p_prop->
name, cdprop.name, 256);
1368 p_prop->
warpSize = cdprop.warpSize;
1370 for (
int i = 0; i < 3; i++) {
1378 p_prop->
major = cdprop.major;
1379 p_prop->
minor = cdprop.minor;
1386 int ccVers = p_prop->
major * 100 + p_prop->
minor * 10;
1407 p_prop->
pciBusID = cdprop.pciBusID;
1428 p_prop->
memPitch = cdprop.memPitch;
1435 return hipCUDAErrorTohipError(cerror);
1439 enum cudaDeviceAttr cdattr;
1444 cdattr = cudaDevAttrMaxThreadsPerBlock;
1447 cdattr = cudaDevAttrMaxBlockDimX;
1450 cdattr = cudaDevAttrMaxBlockDimY;
1453 cdattr = cudaDevAttrMaxBlockDimZ;
1456 cdattr = cudaDevAttrMaxGridDimX;
1459 cdattr = cudaDevAttrMaxGridDimY;
1462 cdattr = cudaDevAttrMaxGridDimZ;
1465 cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1468 cdattr = cudaDevAttrTotalConstantMemory;
1471 cdattr = cudaDevAttrWarpSize;
1474 cdattr = cudaDevAttrMaxRegistersPerBlock;
1477 cdattr = cudaDevAttrClockRate;
1480 cdattr = cudaDevAttrMemoryClockRate;
1483 cdattr = cudaDevAttrGlobalMemoryBusWidth;
1486 cdattr = cudaDevAttrMultiProcessorCount;
1489 cdattr = cudaDevAttrComputeMode;
1492 cdattr = cudaDevAttrL2CacheSize;
1495 cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1498 cdattr = cudaDevAttrComputeCapabilityMajor;
1501 cdattr = cudaDevAttrComputeCapabilityMinor;
1504 cdattr = cudaDevAttrConcurrentKernels;
1507 cdattr = cudaDevAttrPciBusId;
1510 cdattr = cudaDevAttrPciDeviceId;
1513 cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1516 cdattr = cudaDevAttrIsMultiGpuBoard;
1519 cdattr = cudaDevAttrIntegrated;
1522 cdattr = cudaDevAttrMaxTexture1DWidth;
1525 cdattr = cudaDevAttrMaxTexture2DWidth;
1528 cdattr = cudaDevAttrMaxTexture2DHeight;
1531 cdattr = cudaDevAttrMaxTexture3DWidth;
1534 cdattr = cudaDevAttrMaxTexture3DHeight;
1537 cdattr = cudaDevAttrMaxTexture3DDepth;
1540 cdattr = cudaDevAttrMaxPitch;
1543 cdattr = cudaDevAttrTextureAlignment;
1546 cdattr = cudaDevAttrTexturePitchAlignment;
1549 cdattr = cudaDevAttrKernelExecTimeout;
1552 cdattr = cudaDevAttrCanMapHostMemory;
1555 cdattr = cudaDevAttrEccEnabled;
1558 cdattr = cudaDevAttrCooperativeLaunch;
1561 cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1564 cdattr = cudaDevAttrConcurrentManagedAccess;
1567 cdattr = cudaDevAttrManagedMemory;
1570 cdattr = cudaDevAttrPageableMemoryAccessUsesHostPageTables;
1573 cdattr = cudaDevAttrPageableMemoryAccess;
1576 cdattr = cudaDevAttrDirectManagedMemAccessFromHost;
1579 return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1582 cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1584 return hipCUDAErrorTohipError(cerror);
1590 size_t dynamicSMemSize) {
1591 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1592 blockSize, dynamicSMemSize));
1598 size_t dynamicSMemSize,
1599 unsigned int flags) {
1600 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1601 blockSize, dynamicSMemSize, flags));
1607 size_t dynamicSMemSize ){
1608 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1609 blockSize, dynamicSMemSize));
1615 size_t dynamicSMemSize,
1616 unsigned int flags ) {
1617 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1618 blockSize, dynamicSMemSize, flags));
1623 hipFunction_t f,
size_t dynSharedMemPerBlk,
1624 int blockSizeLimit){
1625 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1626 dynSharedMemPerBlk, blockSizeLimit));
1631 hipFunction_t f,
size_t dynSharedMemPerBlk,
1632 int blockSizeLimit,
unsigned int flags){
1633 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1634 dynSharedMemPerBlk, blockSizeLimit, flags));
1638 struct cudaPointerAttributes cPA;
1639 hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1641 #if (CUDART_VERSION >= 11000)
1642 auto memType = cPA.type;
1644 unsigned memType = cPA.memoryType;
1647 case cudaMemoryTypeDevice:
1650 case cudaMemoryTypeHost:
1654 return hipErrorUnknown;
1656 attributes->device = cPA.device;
1657 attributes->devicePointer = cPA.devicePointer;
1658 attributes->hostPointer = cPA.hostPointer;
1659 attributes->isManaged = 0;
1660 attributes->allocationFlags = 0;
1665 inline static hipError_t
hipMemGetInfo(
size_t* free,
size_t* total) {
1666 return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1670 return hipCUDAErrorTohipError(cudaEventCreate(event));
1673 inline static hipError_t
hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) {
1674 return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1678 return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1681 inline static hipError_t
hipEventElapsedTime(
float* ms, hipEvent_t start, hipEvent_t stop) {
1682 return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1686 return hipCUDAErrorTohipError(cudaEventDestroy(event));
1690 return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1694 return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1698 return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1702 return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1706 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1710 return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1713 inline static hipError_t
hipStreamGetFlags(hipStream_t stream,
unsigned int *flags) {
1714 return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1718 return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1722 unsigned int flags) {
1723 return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1727 return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1731 void* userData,
unsigned int flags) {
1732 return hipCUDAErrorTohipError(
1733 cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1737 cudaError_t err = cudaDriverGetVersion(driverVersion);
1742 return hipCUDAErrorTohipError(err);
1746 return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1750 return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1754 return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1758 return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1762 return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1766 return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1771 return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1775 return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1779 return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1783 return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1787 return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1791 hipDeviceptr_t dptr) {
1792 return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1795 inline static hipError_t
hipMemcpyPeer(
void* dst,
int dstDevice,
const void* src,
int srcDevice,
1797 return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1800 inline static hipError_t
hipMemcpyPeerAsync(
void* dst,
int dstDevice,
const void* src,
1801 int srcDevice,
size_t count,
1802 hipStream_t stream __dparm(0)) {
1803 return hipCUDAErrorTohipError(
1804 cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1808 inline static hipError_t
hipProfilerStart() {
return hipCUDAErrorTohipError(cudaProfilerStart()); }
1810 inline static hipError_t
hipProfilerStop() {
return hipCUDAErrorTohipError(cudaProfilerStop()); }
1813 return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1817 return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1821 return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1825 return hipCUDAErrorTohipError(cudaEventQuery(event));
1828 inline static hipError_t
hipCtxCreate(hipCtx_t* ctx,
unsigned int flags, hipDevice_t device) {
1829 return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1833 return hipCUResultTohipError(cuCtxDestroy(ctx));
1837 return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1841 return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1845 return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1849 return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1853 return hipCUResultTohipError(cuCtxGetDevice(device));
1857 return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (
unsigned int*)apiVersion));
1861 return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1865 return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1869 return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1873 return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1877 return hipCUResultTohipError(cuCtxSynchronize());
1881 return hipCUResultTohipError(cuCtxGetFlags(flags));
1884 inline static hipError_t hipCtxDetach(hipCtx_t ctx) {
1885 return hipCUResultTohipError(cuCtxDetach(ctx));
1888 inline static hipError_t
hipDeviceGet(hipDevice_t* device,
int ordinal) {
1889 return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1893 return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1896 inline static hipError_t
hipDeviceGetName(
char* name,
int len, hipDevice_t device) {
1897 return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1901 int srcDevice,
int dstDevice) {
1902 return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1906 return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1910 return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1914 return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1918 return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1922 return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1926 return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1929 inline static hipError_t
hipModuleLoad(hipModule_t* module,
const char* fname) {
1930 return hipCUResultTohipError(cuModuleLoad(module, fname));
1934 return hipCUResultTohipError(cuModuleUnload(hmod));
1938 const char* kname) {
1939 return hipCUResultTohipError(cuModuleGetFunction(
function, module, kname));
1942 inline static hipError_t
hipModuleGetTexRef(hipTexRef* pTexRef, hipModule_t hmod,
const char* name){
1943 hipCUResultTohipError(cuModuleGetTexRef(pTexRef, hmod, name));
1947 return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1950 inline static hipError_t
hipFuncGetAttribute (
int* value, hipFunction_attribute attrib, hipFunction_t hfunc) {
1951 return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1954 inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr,
size_t* bytes, hipModule_t hmod,
1956 return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1959 inline static hipError_t
hipModuleLoadData(hipModule_t* module,
const void* image) {
1960 return hipCUResultTohipError(cuModuleLoadData(module, image));
1964 unsigned int numOptions, hipJitOption* options,
1965 void** optionValues) {
1966 return hipCUResultTohipError(
1967 cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1971 dim3 dimBlocks,
void** args,
size_t sharedMemBytes,
1974 return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1978 unsigned int gridDimY,
unsigned int gridDimZ,
1979 unsigned int blockDimX,
unsigned int blockDimY,
1980 unsigned int blockDimZ,
unsigned int sharedMemBytes,
1981 hipStream_t stream,
void** kernelParams,
1983 return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1984 blockDimY, blockDimZ, sharedMemBytes, stream,
1985 kernelParams, extra));
1989 return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1992 __HIP_DEPRECATED
inline static hipError_t hipBindTexture(
size_t* offset,
1996 size_t size __dparm(UINT_MAX)) {
1997 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
2000 __HIP_DEPRECATED
inline static hipError_t hipBindTexture2D(
2003 return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
2007 hipChannelFormatKind f) {
2008 return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
2011 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
2015 return hipCUDAErrorTohipError(
2016 cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
2019 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
2020 return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
2025 return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
2029 return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
2032 inline static hipError_t hipGetTextureObjectResourceDesc(
hipResourceDesc* pResDesc,
2033 hipTextureObject_t textureObject) {
2034 return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
2037 __HIP_DEPRECATED
inline static hipError_t hipGetTextureAlignmentOffset(
2039 return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
2044 return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
2048 void** kernelParams,
unsigned int sharedMemBytes,
2049 hipStream_t stream) {
2050 return hipCUDAErrorTohipError(
2051 cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
2055 int numDevices,
unsigned int flags) {
2056 return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
2069 size_t dynamicSMemSize) {
2070 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
2071 blockSize, dynamicSMemSize));
2076 size_t dynamicSMemSize = 0,
2077 int blockSizeLimit = 0) {
2078 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
2079 dynamicSMemSize, blockSizeLimit));
2083 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(
int* minGridSize,
int* blockSize, T func,
2084 size_t dynamicSMemSize = 0,
2085 int blockSizeLimit = 0,
unsigned int flags = 0) {
2086 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
2087 dynamicSMemSize, blockSizeLimit, flags));
2092 int blockSize,
size_t dynamicSMemSize,
unsigned int flags) {
2093 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
2094 blockSize, dynamicSMemSize, flags));
2097 template <
class T,
int dim, enum cudaTextureReadMode readMode>
2098 inline static hipError_t hipBindTexture(
size_t* offset,
const struct texture<T, dim, readMode>& tex,
2099 const void* devPtr,
size_t size = UINT_MAX) {
2100 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
2103 template <
class T,
int dim, enum cudaTextureReadMode readMode>
2104 inline static hipError_t hipBindTexture(
size_t* offset,
struct texture<T, dim, readMode>& tex,
2106 size_t size = UINT_MAX) {
2107 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
2110 template <
class T,
int dim, enum cudaTextureReadMode readMode>
2111 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>* tex) {
2112 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
2115 template <
class T,
int dim, enum cudaTextureReadMode readMode>
2116 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>& tex) {
2117 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
2120 template <
class T,
int dim, enum cudaTextureReadMode readMode>
2121 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
2124 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
2127 template <
class T,
int dim, enum cudaTextureReadMode readMode>
2128 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
2131 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
2134 template <
class T,
int dim, enum cudaTextureReadMode readMode>
2135 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
2137 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
2142 return cudaCreateChannelDesc<T>();
2147 void** kernelParams,
unsigned int sharedMemBytes, hipStream_t stream) {
2148 return hipCUDAErrorTohipError(
2149 cudaLaunchCooperativeKernel(
reinterpret_cast<const void*
>(f), gridDim, blockDim, kernelParams, sharedMemBytes, stream));
2152 inline static hipError_t hipTexRefSetAddressMode(hipTexRef hTexRef,
int dim, hipAddress_mode am){
2153 return hipCUResultTohipError(cuTexRefSetAddressMode(hTexRef,dim,am));
2156 inline static hipError_t hipTexRefSetFilterMode(hipTexRef hTexRef, hipFilter_mode fm){
2157 return hipCUResultTohipError(cuTexRefSetFilterMode(hTexRef,fm));
2160 inline static hipError_t hipTexRefSetAddress(
size_t *ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr,
size_t bytes){
2161 return hipCUResultTohipError(cuTexRefSetAddress(ByteOffset,hTexRef,dptr,bytes));
2164 inline static hipError_t hipTexRefSetAddress2D(hipTexRef hTexRef,
const CUDA_ARRAY_DESCRIPTOR *desc, hipDeviceptr_t dptr,
size_t Pitch){
2165 return hipCUResultTohipError(cuTexRefSetAddress2D(hTexRef,desc,dptr,Pitch));
2168 inline static hipError_t hipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt,
int NumPackedComponents){
2169 return hipCUResultTohipError(cuTexRefSetFormat(hTexRef,fmt,NumPackedComponents));
2172 inline static hipError_t hipTexRefSetFlags(hipTexRef hTexRef,
unsigned int Flags){
2173 return hipCUResultTohipError(cuTexRefSetFlags(hTexRef,Flags));
2176 inline static hipError_t hipTexRefSetArray(hipTexRef hTexRef,
hiparray hArray,
unsigned int Flags){
2177 return hipCUResultTohipError(cuTexRefSetArray(hTexRef,hArray,Flags));
2181 return hipCUResultTohipError(cuArrayCreate(pHandle, pAllocateArray));
2184 inline static hipError_t hipArrayDestroy(
hiparray hArray){
2185 return hipCUResultTohipError(cuArrayDestroy(hArray));
2188 inline static hipError_t hipArray3DCreate(
hiparray* pHandle,
2190 return hipCUResultTohipError(cuArray3DCreate(pHandle, pAllocateArray));
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
hipError_t hipCtxGetFlags(unsigned int *flags)
Return flags used for creating default context.
hipError_t hipCtxPopCurrent(hipCtx_t *ctx)
Pop the current/default context and return the popped context.
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig *pConfig)
Get Shared memory bank configuration.
hipError_t hipCtxGetCurrent(hipCtx_t *ctx)
Get the handle of the current/ default context.
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
hipError_t hipCtxSetCurrent(hipCtx_t ctx)
Set the passed context as current/default.
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags)
Enables direct access to memory allocations in a peer context.
hipError_t hipCtxGetDevice(hipDevice_t *device)
Get the handle of the device associated with current/default context.
hipError_t hipCtxDestroy(hipCtx_t ctx)
Destroy a HIP context.
hipError_t hipCtxPushCurrent(hipCtx_t ctx)
Push the context to be set as current/ default context.
hipError_t hipCtxGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
Create a context and set it as current/ default context.
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int *apiVersion)
Returns the approximate HIP api version.
hipError_t hipCtxSynchronize(void)
Blocks until the default context has completed all preceding requested tasks.
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config)
Set Shared memory bank configuration.
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx)
Disable direct access from current context's virtual address space to memory allocations physically l...
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags)
Set flags for the primary context.
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int *flags, int *active)
Get the state of the primary context.
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev)
Release the primary context on the GPU.
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *pctx, hipDevice_t dev)
Retain the primary context on the GPU.
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev)
Resets the primary context on the GPU.
hipError_t hipGetDeviceFlags(unsigned int *flags)
Gets the flags set for current device.
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
Returns bank width of shared memory for current device.
hipError_t hipIpcOpenMemHandle(void **devPtr, hipIpcMemHandle_t handle, unsigned int flags)
Opens an interprocess memory handle exported from another process and returns a device pointer usable...
hipError_t hipGetDeviceProperties(hipDeviceProp_t *prop, int deviceId)
Returns device properties.
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
hipError_t hipSetDevice(int deviceId)
Set default device to be used for subsequent hip API calls from this thread.
hipError_t hipSetDeviceFlags(unsigned flags)
The current device behavior is changed according the flags passed.
hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int deviceId)
Query for a specific device attribute.
hipError_t hipGetDevice(int *deviceId)
Return the default device id for the calling host thread.
hipError_t hipGetDeviceCount(int *count)
Return number of compute-capable devices.
hipError_t hipDeviceReset(void)
The state of current device is discarded and updated to a fresh state.
hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit)
Get Resource limits of current device.
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
The bank width of shared memory on current device is set.
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
hipError_t hipIpcCloseMemHandle(void *devPtr)
Close memory mapped with hipIpcOpenMemHandle.
hipError_t hipDeviceSynchronize(void)
Waits on all active streams on current device.
hipError_t hipChooseDevice(int *device, const hipDeviceProp_t *prop)
Device which matches hipDeviceProp_t is returned.
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *handle, void *devPtr)
Gets an interprocess memory handle for an existing device memory allocation.
hipError_t hipInit(unsigned int flags)
Explicitly initializes the HIP runtime.
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
Returns the compute capability of the device.
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device)
Returns an identifer string for the device.
hipError_t hipDeviceGetByPCIBusId(int *device, const char *pciBusId)
Returns a handle to a compute device.
hipError_t hipDeviceTotalMem(size_t *bytes, hipDevice_t device)
Returns the total amount of memory on the device.
hipError_t hipDeviceGetPCIBusId(char *pciBusId, int len, int device)
Returns a PCI Bus Id string for the device, overloaded to take int device ID.
hipError_t hipDeviceGet(hipDevice_t *device, int ordinal)
Returns a handle to a compute device.
hipError_t hipRuntimeGetVersion(int *runtimeVersion)
Returns the approximate HIP Runtime version.
hipError_t hipDeviceGetP2PAttribute(int *value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice)
Returns a value for attr of link between two devices.
hipError_t hipDriverGetVersion(int *driverVersion)
Returns the approximate HIP driver version.
const char * hipGetErrorString(hipError_t hipError)
Return handy text string message to explain the error which occurred.
hipError_t hipPeekAtLastError(void)
Return last error returned by any HIP runtime API call.
hipError_t hipGetLastError(void)
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
const char * hipGetErrorName(hipError_t hip_error)
Return name of the specified error code in text form.
hipError_t hipEventSynchronize(hipEvent_t event)
Wait for an event to complete.
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
hipError_t hipEventQuery(hipEvent_t event)
Query event status.
hipError_t hipEventCreate(hipEvent_t *event)
hipError_t hipEventDestroy(hipEvent_t event)
Destroy the specified event.
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
Return the elapsed time between two events.
hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
Create an event with the specified flags.
hipError_t hipFuncSetSharedMemConfig(const void *func, hipSharedMemConfig config)
Set shared memory configuation for a specific function.
hipError_t hipFuncSetAttribute(const void *func, hipFuncAttribute attr, int value)
Set attribute for a specific function.
hipError_t hipFuncSetCacheConfig(const void *func, hipFuncCache_t config)
Set Cache configuration for a specific function.
hipMemRangeAttribute
Definition: hip_runtime_api.h:249
#define hipMemAttachSingle
the associated device
Definition: hip_runtime_api.h:174
hipMemoryAdvise
Definition: hip_runtime_api.h:231
@ hipMemAdviseUnsetAccessedBy
Definition: hip_runtime_api.h:240
@ hipMemAdviseUnsetPreferredLocation
Clear the preferred location for the data.
Definition: hip_runtime_api.h:237
@ hipMemAdviseSetAccessedBy
Definition: hip_runtime_api.h:238
@ hipMemAdviseSetPreferredLocation
Definition: hip_runtime_api.h:235
@ hipMemAdviseSetReadMostly
Definition: hip_runtime_api.h:232
@ hipMemAdviseUnsetReadMostly
Undo the effect of hipMemAdviseSetReadMostly.
Definition: hip_runtime_api.h:234
#define hipArrayDefault
Default HIP array allocation flag.
Definition: hip_runtime_api.h:203
hipSharedMemConfig
Definition: hip_runtime_api.h:306
hipFuncAttribute
Definition: hip_runtime_api.h:287
hipFuncCache_t
Definition: hip_runtime_api.h:296
hipDeviceAttribute_t
Definition: hip_runtime_api.h:321
@ hipMemRangeAttributeLastPrefetchLocation
The last location to which the range was prefetched.
Definition: hip_runtime_api.h:255
@ hipMemRangeAttributePreferredLocation
The preferred location of the range.
Definition: hip_runtime_api.h:252
@ hipMemRangeAttributeAccessedBy
Definition: hip_runtime_api.h:253
@ hipMemRangeAttributeReadMostly
Definition: hip_runtime_api.h:250
@ hipDeviceAttributeDirectManagedMemAccessFromHost
Definition: hip_runtime_api.h:385
@ hipDeviceAttributeMaxGridDimX
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:326
@ hipDeviceAttributeMaxPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:368
@ hipDeviceAttributePageableMemoryAccessUsesHostPageTables
Definition: hip_runtime_api.h:391
@ hipDeviceAttributeMaxBlockDimY
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:324
@ hipDeviceAttributePageableMemoryAccess
Definition: hip_runtime_api.h:389
@ hipDeviceAttributeClockRate
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:337
@ hipDeviceAttributeComputeCapabilityMajor
Major compute capability version number.
Definition: hip_runtime_api.h:346
@ hipDeviceAttributeMaxTexture3DHeight
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:362
@ hipDeviceAttributeComputeCapabilityMinor
Minor compute capability version number.
Definition: hip_runtime_api.h:347
@ hipDeviceAttributeComputeMode
Compute mode that device is currently in.
Definition: hip_runtime_api.h:341
@ hipDeviceAttributeCooperativeMultiDeviceLaunch
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:357
@ hipDeviceAttributeMaxTexture2DHeight
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:360
@ hipDeviceAttributeEccEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:373
@ hipDeviceAttributePciBusId
PCI Bus ID.
Definition: hip_runtime_api.h:350
@ hipDeviceAttributeL2CacheSize
Definition: hip_runtime_api.h:342
@ hipDeviceAttributeKernelExecTimeout
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:371
@ hipDeviceAttributeMaxGridDimY
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:327
@ hipDeviceAttributeMultiprocessorCount
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:340
@ hipDeviceAttributeIsMultiGpuBoard
Multiple GPU devices.
Definition: hip_runtime_api.h:354
@ hipDeviceAttributeMemoryClockRate
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:338
@ hipDeviceAttributeMaxGridDimZ
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:328
@ hipDeviceAttributeCooperativeLaunch
Support cooperative launch.
Definition: hip_runtime_api.h:356
@ hipDeviceAttributeMaxSharedMemoryPerBlock
Definition: hip_runtime_api.h:329
@ hipDeviceAttributeMaxRegistersPerBlock
Definition: hip_runtime_api.h:333
@ hipDeviceAttributeMaxThreadsPerBlock
Maximum number of threads per block.
Definition: hip_runtime_api.h:322
@ hipDeviceAttributeMemoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:339
@ hipDeviceAttributePciDeviceId
PCI Device ID.
Definition: hip_runtime_api.h:351
@ hipDeviceAttributeCanMapHostMemory
Device can map host memory into device address space.
Definition: hip_runtime_api.h:372
@ hipDeviceAttributeMaxTexture1DWidth
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:358
@ hipDeviceAttributeConcurrentManagedAccess
Definition: hip_runtime_api.h:387
@ hipDeviceAttributeManagedMemory
Device supports allocating managed memory on this system.
Definition: hip_runtime_api.h:384
@ hipDeviceAttributeIntegrated
iGPU
Definition: hip_runtime_api.h:355
@ hipDeviceAttributeMaxTexture3DDepth
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:363
@ hipDeviceAttributeMaxTexture2DWidth
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:359
@ hipDeviceAttributeMaxBlockDimX
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:323
@ hipDeviceAttributeMaxTexture3DWidth
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:361
@ hipDeviceAttributeTotalConstantMemory
Constant memory size in bytes.
Definition: hip_runtime_api.h:331
@ hipDeviceAttributeTextureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:369
@ hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
Definition: hip_runtime_api.h:352
@ hipDeviceAttributeConcurrentKernels
Definition: hip_runtime_api.h:348
@ hipDeviceAttributeMaxThreadsPerMultiProcessor
Definition: hip_runtime_api.h:344
@ hipDeviceAttributeTexturePitchAlignment
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:370
@ hipDeviceAttributeMaxBlockDimZ
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:325
@ hipDeviceAttributeWarpSize
Warp size in threads.
Definition: hip_runtime_api.h:332
hipError_t hipMemRangeGetAttributes(void **data, size_t *data_sizes, hipMemRangeAttribute *attributes, size_t num_attributes, const void *dev_ptr, size_t count)
Query attributes of a given memory range in AMD HMM.
hipError_t hipMemAdvise(const void *dev_ptr, size_t count, hipMemoryAdvise advice, int device)
Advise about the usage of a given memory range to AMD HMM.
hipError_t hipMemPrefetchAsync(const void *dev_ptr, size_t count, int device, hipStream_t stream __dparm(0))
Prefetches memory to the specified destination device using AMD HMM.
hipError_t hipStreamAttachMemAsync(hipStream_t stream, hipDeviceptr_t *dev_ptr, size_t length __dparm(0), unsigned int flags __dparm(hipMemAttachSingle))
Attach memory to a stream asynchronously in AMD HMM.
hipError_t hipMemRangeGetAttribute(void *data, size_t data_size, hipMemRangeAttribute attribute, const void *dev_ptr, size_t count)
Query an attribute of a given memory range in AMD HMM.
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
hipError_t hipMemAllocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:947
hipError_t hipMallocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:941
hipError_t hipMemcpyAtoH(void *dst, hipArray *srcArray, size_t srcOffset, size_t count)
Copies data between host and device.
hipError_t hipMemcpy2DToArrayAsync(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipError_t hipHostFree(void *ptr)
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDevi...
hipError_t hipMemcpyToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, hipMemcpyKind kind)
Copies data between host and device.
hipError_t hipMemcpy2DFromArrayAsync(void *dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipError_t hipMemGetInfo(size_t *free, size_t *total)
Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
Copies data between host and device.
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent)
Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
hipError_t hipHostUnregister(void *hostPtr)
Un-register host pointer.
hipError_t hipHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
Return flags associated with host pointer.
hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count)
Fills the memory area pointed to by dest with the constant integer value for specified number of time...
hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D *pCopy, hipStream_t stream)
Copies data between host and device asynchronously.
hipError_t hipMalloc3DArray(hipArray **array, const struct hipChannelFormatDesc *desc, struct hipExtent extent, unsigned int flags)
Allocate an array on the device.
hipError_t hipFree(void *ptr)
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void *ptr)
Return attributes for the specified pointer.
hipError_t hipMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Device.
hipError_t hipMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
hipError_t hipMemcpyFromArray(void *dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind)
Copies data between host and device.
hipError_t hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags)
Get Device pointer from Host Pointer allocated through hipHostMalloc.
hipError_t hipHostAlloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory [Deprecated].
Definition: hip_runtime_api.h:953
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *pCopy, hipStream_t stream __dparm(0))
Copies memory for 2D arrays.
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipError_t hipMemcpy2DFromArray(void *dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D *pCopy)
Copies data between host and device.
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void *src, size_t sizeBytes)
Copy data from Host to Device.
hipError_t hipMemcpyParam2D(const hip_Memcpy2D *pCopy)
Copies memory for 2D arrays.
hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
hipError_t hipMemcpy2DToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copy data from src to dst asynchronously.
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
Register host memory so it can be accessed from the current device.
hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
Copy data from src to dst.
hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipError_t hipMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipError_t hipMallocArray(hipArray **array, const hipChannelFormatDesc *desc, size_t width, size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault))
Allocate an array on the device.
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Device asynchronously.
hipError_t hipMemAllocPitch(hipDeviceptr_t *dptr, size_t *pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes)
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipError_t hipMemcpyDtoHAsync(void *dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Host asynchronously.
hipError_t hipMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by dst with the constant value.
hipError_t hipFreeHost(void *ptr)
Free memory allocated by the hcc hip host memory allocation API. [Deprecated].
Definition: hip_runtime_api.h:1030
hipError_t hipMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
Fills the memory area pointed to by dst with the constant value.
hipError_t hipFreeArray(hipArray *array)
Frees an array on the device.
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void *src, size_t sizeBytes, hipStream_t stream)
Copy data from Host to Device asynchronously.
hipError_t hipMemcpyDtoH(void *dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Host.
hipError_t hipMemcpyHtoA(hipArray *dstArray, size_t dstOffset, const void *srcHost, size_t count)
Copies data between host and device.
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream __dparm(0))
Fills the memory area pointed to by dev with the constant integer value for specified number of times...
hipError_t hipFuncGetAttributes(struct hipFuncAttributes *attr, const void *func)
Find out attributes for a given function.
hipError_t hipModuleGetTexRef(textureReference **texRef, hipModule_t hmod, const char *name)
returns the handle of the texture reference with the name from the module.
hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams, void **extra)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
Loads code object from file into a hipModule_t.
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
builds module from code object which resides in host memory. Image is pointer to that location....
hipError_t hipFuncGetAttribute(int *value, hipFunction_attribute attrib, hipFunction_t hfunc)
Find out a specific attribute for a given function.
hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams *launchParamsList, int numDevices, unsigned int flags)
Launches kernels on multiple devices where thread blocks can cooperate and synchronize as they execut...
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname)
Function with kname will be extracted if present in module.
hipError_t hipLaunchCooperativeKernel(const void *f, dim3 gridDim, dim3 blockDimX, void **kernelParams, unsigned int sharedMemBytes, hipStream_t stream)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
builds module from code object which resides in host memory. Image is pointer to that location.
hipError_t hipModuleUnload(hipModule_t module)
Frees the module.
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault))
Returns occupancy for a device function.
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
hipError_t hipOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags)
Returns occupancy for a device function.
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
Determine if a device can access a peer's memory.
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
Enable direct access from current device's virtual address space to memory allocations physically loc...
hipError_t hipMemcpyPeer(void *dst, int dstDeviceId, const void *src, int srcDeviceId, size_t sizeBytes)
Copies memory from one device to memory on another device.
hipError_t hipMemcpyPeerAsync(void *dst, int dstDeviceId, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream __dparm(0))
Copies memory from one device to memory on another device.
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId)
Disable direct access from current device's virtual address space to memory allocations physically lo...
hipError_t hipMemGetAddressRange(hipDeviceptr_t *pbase, size_t *psize, hipDeviceptr_t dptr)
Get information on memory allocations.
hipError_t hipProfilerStart()
Start recording of profiling information When using this API, start the profiler with profiling disab...
hipError_t hipProfilerStop()
Stop recording of profiling information. When using this API, start the profiler with profiling disab...
hipError_t hipDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
Returns numerical values that correspond to the least and greatest stream priority.
hipError_t hipStreamDestroy(hipStream_t stream)
Destroys the specified stream.
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
Return flags associated with this stream.
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:1236
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed,...
hipError_t hipStreamSynchronize(hipStream_t stream)
Wait for all commands in stream to complete.
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
Make the specified compute stream wait for an event.
hipError_t hipStreamCreateWithPriority(hipStream_t *stream, unsigned int flags, int priority)
Create an asynchronous stream with the specified priority.
hipError_t hipStreamGetPriority(hipStream_t stream, int *priority)
Query the priority of a stream.
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
Create an asynchronous stream.
hipError_t hipStreamCreate(hipStream_t *stream)
Create an asynchronous stream.
hipErrorInvalidDevicePointer
Invalid Device Pointer.
Definition: hip_runtime_api.h:221
hipErrorRuntimeMemory
Definition: hip_runtime_api.h:307
hipErrorInvalidKernelFile
In CUDA DRV, it is CUDA_ERROR_INVALID_PTX.
Definition: hip_runtime_api.h:247
hipErrorInvalidContext
Produced when input context is invalid.
Definition: hip_runtime_api.h:230
hipErrorPeerAccessNotEnabled
Peer access was never enabled from the current device.
Definition: hip_runtime_api.h:267
hipErrorInvalidValue
Definition: hip_runtime_api.h:205
hipErrorLaunchFailure
An exception occurred on the device while executing a kernel.
Definition: hip_runtime_api.h:276
hipErrorInvalidDevice
DeviceID must be in range 0...#compute-devices.
Definition: hip_runtime_api.h:228
hipErrorHostMemoryAlreadyRegistered
Produced when trying to lock a page-locked memory.
Definition: hip_runtime_api.h:272
hipErrorAssert
Produced when the kernel calls assert.
Definition: hip_runtime_api.h:271
hipErrorNotReady
Definition: hip_runtime_api.h:258
hipErrorLaunchOutOfResources
Out of resources error.
Definition: hip_runtime_api.h:263
hipErrorCooperativeLaunchTooLarge
Definition: hip_runtime_api.h:278
hipErrorNoDevice
Call to hipGetDeviceCount returned 0 devices.
Definition: hip_runtime_api.h:227
hipErrorPeerAccessAlreadyEnabled
Peer access was already enabled from the current device.
Definition: hip_runtime_api.h:265
hipErrorNotSupported
Produced when the hip API is not supported/implemented.
Definition: hip_runtime_api.h:282
hipSuccess
Successful completion.
Definition: hip_runtime_api.h:204
@ hipMemoryTypeDevice
Definition: hip_runtime_api.h:158
@ hipMemoryTypeHost
Memory is physically located on host.
Definition: hip_runtime_api.h:157
hipErrorHostMemoryNotRegistered
Produced when trying to unlock a non-page-locked memory.
Definition: hip_runtime_api.h:274
hipErrorRuntimeOther
Definition: hip_runtime_api.h:309
hipErrorInvalidMemcpyDirection
Invalid memory copy direction.
Definition: hip_runtime_api.h:222
unsigned long long hipSurfaceObject_t
Definition: hip_surface_types.h:36
Definition: driver_types.h:75
Definition: driver_types.h:68
Definition: driver_types.h:406
Definition: hip_runtime_api.h:318
Definition: driver_types.h:84
unsigned hasSharedInt64Atomics
64-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:55
unsigned hasSharedInt32Atomics
32-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:49
unsigned hasFloatAtomicAdd
32-bit float atomic add in global and shared memory.
Definition: hip_runtime_api.h:51
unsigned hasDoubles
Double-precision floating point.
Definition: hip_runtime_api.h:58
unsigned hasWarpVote
Warp vote instructions (__any, __all).
Definition: hip_runtime_api.h:61
unsigned hasWarpShuffle
Warp shuffle operations. (__shfl_*).
Definition: hip_runtime_api.h:63
unsigned hasGlobalInt32Atomics
32-bit integer atomics for global memory.
Definition: hip_runtime_api.h:47
unsigned hasGlobalFloatAtomicExch
32-bit float atomic exch for global memory.
Definition: hip_runtime_api.h:48
unsigned hasDynamicParallelism
Dynamic parallelism.
Definition: hip_runtime_api.h:73
unsigned hasSurfaceFuncs
Surface functions.
Definition: hip_runtime_api.h:71
unsigned has3dGrid
Grid and group dims are 3D (rather than 2D).
Definition: hip_runtime_api.h:72
unsigned hasFunnelShift
Funnel two words into one with shift&mask caps.
Definition: hip_runtime_api.h:64
unsigned hasThreadFenceSystem
__threadfence_system.
Definition: hip_runtime_api.h:67
unsigned hasGlobalInt64Atomics
64-bit integer atomics for global memory.
Definition: hip_runtime_api.h:54
unsigned hasSyncThreadsExt
__syncthreads_count, syncthreads_and, syncthreads_or.
Definition: hip_runtime_api.h:68
unsigned hasWarpBallot
Warp ballot instructions (__ballot).
Definition: hip_runtime_api.h:62
unsigned hasSharedFloatAtomicExch
32-bit float atomic exch for shared memory.
Definition: hip_runtime_api.h:50
Definition: hip_runtime_api.h:84
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:132
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:123
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:128
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
char name[256]
Device name.
Definition: hip_runtime_api.h:85
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:129
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
int clockInstructionRate
Definition: hip_runtime_api.h:107
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
int maxTexture2D[2]
Maximum dimensions (width, height) of 2D images, in image elements.
Definition: hip_runtime_api.h:124
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
int maxTexture3D[3]
Maximum dimensions (width, height, depth) of 3D images, in image elements.
Definition: hip_runtime_api.h:125
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:134
int maxThreadsDim[3]
Max number of threads in each dimension (XYZ) of a block.
Definition: hip_runtime_api.h:91
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
int cooperativeMultiDeviceUnmatchedGridDim
Definition: hip_runtime_api.h:136
int minor
Definition: hip_runtime_api.h:100
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:131
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
int cooperativeMultiDeviceUnmatchedBlockDim
Definition: hip_runtime_api.h:138
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
int gcnArch
DEPRECATED: use gcnArchName instead.
Definition: hip_runtime_api.h:117
int multiProcessorCount
Number of multi-processors (compute units).
Definition: hip_runtime_api.h:103
int maxGridSize[3]
Max grid dimensions (XYZ).
Definition: hip_runtime_api.h:92
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
int major
Definition: hip_runtime_api.h:97
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:133
int cooperativeMultiDeviceUnmatchedSharedMem
Definition: hip_runtime_api.h:140
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
hipDeviceArch_t arch
Architectural feature flags. New for HIP.
Definition: hip_runtime_api.h:109
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:130
Definition: driver_types.h:382
Definition: hip_runtime_api.h:109
Definition: hip_runtime_api.h:101
Definition: hip_runtime_api.h:97
Definition: hip_runtime_api.h:327
Definition: driver_types.h:395
Definition: driver_types.h:375
Definition: hip_runtime_api.h:169
Definition: driver_types.h:273
Definition: driver_types.h:334
Definition: texture_types.h:95
Definition: driver_types.h:98
Definition: texture_types.h:74