23 #ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H 24 #define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H 26 #include <cuda_runtime_api.h> 28 #include <cuda_profiler_api.h> 35 #define __dparm(x) = x 41 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED) 42 #define __HIP_DEPRECATED 43 #elif defined(_MSC_VER) 44 #define __HIP_DEPRECATED __declspec(deprecated) 45 #elif defined(__GNUC__) 46 #define __HIP_DEPRECATED __attribute__((deprecated)) 48 #define __HIP_DEPRECATED 57 typedef enum hipMemcpyKind {
59 hipMemcpyHostToDevice,
60 hipMemcpyDeviceToHost,
61 hipMemcpyDeviceToDevice,
66 #define hipDataType cudaDataType 67 #define HIP_R_16F CUDA_R_16F 68 #define HIP_R_32F CUDA_R_32F 69 #define HIP_R_64F CUDA_R_64F 70 #define HIP_C_16F CUDA_C_16F 71 #define HIP_C_32F CUDA_C_32F 72 #define HIP_C_64F CUDA_C_64F 75 #define hipLibraryPropertyType libraryPropertyType 76 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION 77 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION 78 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL 81 typedef enum cudaTextureAddressMode hipTextureAddressMode;
82 #define hipAddressModeWrap cudaAddressModeWrap 83 #define hipAddressModeClamp cudaAddressModeClamp 84 #define hipAddressModeMirror cudaAddressModeMirror 85 #define hipAddressModeBorder cudaAddressModeBorder 88 typedef enum cudaTextureFilterMode hipTextureFilterMode;
89 #define hipFilterModePoint cudaFilterModePoint 90 #define hipFilterModeLinear cudaFilterModeLinear 93 typedef enum cudaTextureReadMode hipTextureReadMode;
94 #define hipReadModeElementType cudaReadModeElementType 95 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat 98 typedef enum cudaChannelFormatKind hipChannelFormatKind;
99 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned 100 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned 101 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat 102 #define hipChannelFormatKindNone cudaChannelFormatKindNone 104 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode 105 #define hipBoundaryModeZero cudaBoundaryModeZero 106 #define hipBoundaryModeTrap cudaBoundaryModeTrap 107 #define hipBoundaryModeClamp cudaBoundaryModeClamp 110 #define hipFuncCachePreferNone cudaFuncCachePreferNone 111 #define hipFuncCachePreferShared cudaFuncCachePreferShared 112 #define hipFuncCachePreferL1 cudaFuncCachePreferL1 113 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual 116 #define hipResourceType cudaResourceType 117 #define hipResourceTypeArray cudaResourceTypeArray 118 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray 119 #define hipResourceTypeLinear cudaResourceTypeLinear 120 #define hipResourceTypePitch2D cudaResourceTypePitch2D 126 #define hipEventDefault cudaEventDefault 127 #define hipEventBlockingSync cudaEventBlockingSync 128 #define hipEventDisableTiming cudaEventDisableTiming 129 #define hipEventInterprocess cudaEventInterprocess 130 #define hipEventReleaseToDevice 0 131 #define hipEventReleaseToSystem 0 134 #define hipHostMallocDefault cudaHostAllocDefault 135 #define hipHostMallocPortable cudaHostAllocPortable 136 #define hipHostMallocMapped cudaHostAllocMapped 137 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined 138 #define hipHostMallocCoherent 0x0 139 #define hipHostMallocNonCoherent 0x0 141 #define hipMemAttachGlobal cudaMemAttachGlobal 142 #define hipMemAttachHost cudaMemAttachHost 144 #define hipHostRegisterDefault cudaHostRegisterDefault 145 #define hipHostRegisterPortable cudaHostRegisterPortable 146 #define hipHostRegisterMapped cudaHostRegisterMapped 147 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory 149 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER 150 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE 151 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END 152 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize 153 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess 155 #define hipOccupancyDefault cudaOccupancyDefault 157 #define hipCooperativeLaunchMultiDeviceNoPreSync \ 158 cudaCooperativeLaunchMultiDeviceNoPreSync 159 #define hipCooperativeLaunchMultiDeviceNoPostSync \ 160 cudaCooperativeLaunchMultiDeviceNoPostSync 164 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS 165 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK 166 #define hipJitOptionWallTime CU_JIT_WALL_TIME 167 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER 168 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES 169 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER 170 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES 171 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL 172 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT 173 #define hipJitOptionTarget CU_JIT_TARGET 174 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY 175 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO 176 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE 177 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO 178 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE 179 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT 180 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE 181 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS 187 typedef enum cudaLimit hipLimit_t;
192 typedef CUfunc_cache hipFuncCache;
193 typedef CUjit_option hipJitOption;
194 typedef CUdevice hipDevice_t;
195 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
198 typedef CUdeviceptr hipDeviceptr_t;
204 #define hipFunction_attribute CUfunction_attribute 205 #define hip_Memcpy2D CUDA_MEMCPY2D 206 #define hipMemcpy3DParms cudaMemcpy3DParms 207 #define hipArrayDefault cudaArrayDefault 208 #define hipArrayLayered cudaArrayLayered 209 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore 210 #define hipArrayCubemap cudaArrayCubemap 211 #define hipArrayTextureGather cudaArrayTextureGather 213 typedef cudaTextureObject_t hipTextureObject_t;
215 #define hipTextureType1D cudaTextureType1D 216 #define hipTextureType1DLayered cudaTextureType1DLayered 217 #define hipTextureType2D cudaTextureType2D 218 #define hipTextureType2DLayered cudaTextureType2DLayered 219 #define hipTextureType3D cudaTextureType3D 220 #define hipDeviceMapHost cudaDeviceMapHost 224 #define make_hipExtent make_cudaExtent 225 #define make_hipPos make_cudaPos 226 #define make_hipPitchedPtr make_cudaPitchedPtr 228 #define hipStreamDefault cudaStreamDefault 229 #define hipStreamNonBlocking cudaStreamNonBlocking 236 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault 237 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte 238 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte 241 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK 242 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES 243 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES 244 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES 245 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS 246 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION 247 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION 248 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA 249 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES 250 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT 251 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX 253 #if CUDA_VERSION >= 9000 254 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__) 255 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__) 256 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__) 257 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__) 258 #endif // CUDA_VERSION >= 9000 260 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
264 case cudaErrorProfilerDisabled:
265 return hipErrorProfilerDisabled;
266 case cudaErrorProfilerNotInitialized:
267 return hipErrorProfilerNotInitialized;
268 case cudaErrorProfilerAlreadyStarted:
269 return hipErrorProfilerAlreadyStarted;
270 case cudaErrorProfilerAlreadyStopped:
271 return hipErrorProfilerAlreadyStopped;
272 case cudaErrorInsufficientDriver:
273 return hipErrorInsufficientDriver;
274 case cudaErrorUnsupportedLimit:
275 return hipErrorUnsupportedLimit;
276 case cudaErrorPeerAccessUnsupported:
277 return hipErrorPeerAccessUnsupported;
278 case cudaErrorInvalidGraphicsContext:
279 return hipErrorInvalidGraphicsContext;
280 case cudaErrorSharedObjectSymbolNotFound:
281 return hipErrorSharedObjectSymbolNotFound;
282 case cudaErrorSharedObjectInitFailed:
283 return hipErrorSharedObjectInitFailed;
284 case cudaErrorOperatingSystem:
285 return hipErrorOperatingSystem;
286 case cudaErrorSetOnActiveProcess:
287 return hipErrorSetOnActiveProcess;
288 case cudaErrorIllegalAddress:
289 return hipErrorIllegalAddress;
290 case cudaErrorInvalidSymbol:
291 return hipErrorInvalidSymbol;
292 case cudaErrorMissingConfiguration:
293 return hipErrorMissingConfiguration;
294 case cudaErrorMemoryAllocation:
295 return hipErrorOutOfMemory;
296 case cudaErrorInitializationError:
297 return hipErrorNotInitialized;
298 case cudaErrorLaunchFailure:
299 return hipErrorLaunchFailure;
300 case cudaErrorCooperativeLaunchTooLarge:
301 return hipErrorCooperativeLaunchTooLarge;
302 case cudaErrorPriorLaunchFailure:
303 return hipErrorPriorLaunchFailure;
304 case cudaErrorLaunchOutOfResources:
305 return hipErrorLaunchOutOfResources;
306 case cudaErrorInvalidDeviceFunction:
307 return hipErrorInvalidDeviceFunction;
308 case cudaErrorInvalidConfiguration:
309 return hipErrorInvalidConfiguration;
310 case cudaErrorInvalidDevice:
311 return hipErrorInvalidDevice;
312 case cudaErrorInvalidValue:
313 return hipErrorInvalidValue;
314 case cudaErrorInvalidDevicePointer:
315 return hipErrorInvalidDevicePointer;
316 case cudaErrorInvalidMemcpyDirection:
317 return hipErrorInvalidMemcpyDirection;
318 case cudaErrorInvalidResourceHandle:
319 return hipErrorInvalidHandle;
320 case cudaErrorNotReady:
321 return hipErrorNotReady;
322 case cudaErrorNoDevice:
323 return hipErrorNoDevice;
324 case cudaErrorPeerAccessAlreadyEnabled:
325 return hipErrorPeerAccessAlreadyEnabled;
326 case cudaErrorPeerAccessNotEnabled:
327 return hipErrorPeerAccessNotEnabled;
328 case cudaErrorHostMemoryAlreadyRegistered:
329 return hipErrorHostMemoryAlreadyRegistered;
330 case cudaErrorHostMemoryNotRegistered:
331 return hipErrorHostMemoryNotRegistered;
332 case cudaErrorMapBufferObjectFailed:
333 return hipErrorMapFailed;
334 case cudaErrorAssert:
335 return hipErrorAssert;
336 case cudaErrorNotSupported:
337 return hipErrorNotSupported;
338 case cudaErrorCudartUnloading:
339 return hipErrorDeinitialized;
340 case cudaErrorInvalidKernelImage:
341 return hipErrorInvalidImage;
342 case cudaErrorUnmapBufferObjectFailed:
343 return hipErrorUnmapFailed;
344 case cudaErrorNoKernelImageForDevice:
345 return hipErrorNoBinaryForGpu;
346 case cudaErrorECCUncorrectable:
347 return hipErrorECCNotCorrectable;
348 case cudaErrorDeviceAlreadyInUse:
349 return hipErrorContextAlreadyInUse;
350 case cudaErrorInvalidPtx:
351 return hipErrorInvalidKernelFile;
352 case cudaErrorLaunchTimeout:
353 return hipErrorLaunchTimeOut;
354 #if CUDA_VERSION >= 10010 355 case cudaErrorInvalidSource:
356 return hipErrorInvalidSource;
357 case cudaErrorFileNotFound:
358 return hipErrorFileNotFound;
359 case cudaErrorSymbolNotFound:
360 return hipErrorNotFound;
361 case cudaErrorArrayIsMapped:
362 return hipErrorArrayIsMapped;
363 case cudaErrorNotMappedAsPointer:
364 return hipErrorNotMappedAsPointer;
365 case cudaErrorNotMappedAsArray:
366 return hipErrorNotMappedAsArray;
367 case cudaErrorNotMapped:
368 return hipErrorNotMapped;
369 case cudaErrorAlreadyAcquired:
370 return hipErrorAlreadyAcquired;
371 case cudaErrorAlreadyMapped:
372 return hipErrorAlreadyMapped;
374 #if CUDA_VERSION >= 10020 375 case cudaErrorDeviceUninitialized:
376 return hipErrorInvalidContext;
378 case cudaErrorUnknown:
380 return hipErrorUnknown;
384 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
388 case CUDA_ERROR_OUT_OF_MEMORY:
389 return hipErrorOutOfMemory;
390 case CUDA_ERROR_INVALID_VALUE:
391 return hipErrorInvalidValue;
392 case CUDA_ERROR_INVALID_DEVICE:
393 return hipErrorInvalidDevice;
394 case CUDA_ERROR_DEINITIALIZED:
395 return hipErrorDeinitialized;
396 case CUDA_ERROR_NO_DEVICE:
397 return hipErrorNoDevice;
398 case CUDA_ERROR_INVALID_CONTEXT:
399 return hipErrorInvalidContext;
400 case CUDA_ERROR_NOT_INITIALIZED:
401 return hipErrorNotInitialized;
402 case CUDA_ERROR_INVALID_HANDLE:
403 return hipErrorInvalidHandle;
404 case CUDA_ERROR_MAP_FAILED:
405 return hipErrorMapFailed;
406 case CUDA_ERROR_PROFILER_DISABLED:
407 return hipErrorProfilerDisabled;
408 case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
409 return hipErrorProfilerNotInitialized;
410 case CUDA_ERROR_PROFILER_ALREADY_STARTED:
411 return hipErrorProfilerAlreadyStarted;
412 case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
413 return hipErrorProfilerAlreadyStopped;
414 case CUDA_ERROR_INVALID_IMAGE:
415 return hipErrorInvalidImage;
416 case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
417 return hipErrorContextAlreadyCurrent;
418 case CUDA_ERROR_UNMAP_FAILED:
419 return hipErrorUnmapFailed;
420 case CUDA_ERROR_ARRAY_IS_MAPPED:
421 return hipErrorArrayIsMapped;
422 case CUDA_ERROR_ALREADY_MAPPED:
423 return hipErrorAlreadyMapped;
424 case CUDA_ERROR_NO_BINARY_FOR_GPU:
425 return hipErrorNoBinaryForGpu;
426 case CUDA_ERROR_ALREADY_ACQUIRED:
427 return hipErrorAlreadyAcquired;
428 case CUDA_ERROR_NOT_MAPPED:
429 return hipErrorNotMapped;
430 case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
431 return hipErrorNotMappedAsArray;
432 case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
433 return hipErrorNotMappedAsPointer;
434 case CUDA_ERROR_ECC_UNCORRECTABLE:
435 return hipErrorECCNotCorrectable;
436 case CUDA_ERROR_UNSUPPORTED_LIMIT:
437 return hipErrorUnsupportedLimit;
438 case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
439 return hipErrorContextAlreadyInUse;
440 case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
441 return hipErrorPeerAccessUnsupported;
442 case CUDA_ERROR_INVALID_PTX:
443 return hipErrorInvalidKernelFile;
444 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
445 return hipErrorInvalidGraphicsContext;
446 case CUDA_ERROR_INVALID_SOURCE:
447 return hipErrorInvalidSource;
448 case CUDA_ERROR_FILE_NOT_FOUND:
449 return hipErrorFileNotFound;
450 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
451 return hipErrorSharedObjectSymbolNotFound;
452 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
453 return hipErrorSharedObjectInitFailed;
454 case CUDA_ERROR_OPERATING_SYSTEM:
455 return hipErrorOperatingSystem;
456 case CUDA_ERROR_NOT_FOUND:
457 return hipErrorNotFound;
458 case CUDA_ERROR_NOT_READY:
459 return hipErrorNotReady;
460 case CUDA_ERROR_ILLEGAL_ADDRESS:
461 return hipErrorIllegalAddress;
462 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
463 return hipErrorLaunchOutOfResources;
464 case CUDA_ERROR_LAUNCH_TIMEOUT:
465 return hipErrorLaunchTimeOut;
466 case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
467 return hipErrorPeerAccessAlreadyEnabled;
468 case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
469 return hipErrorPeerAccessNotEnabled;
470 case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
471 return hipErrorSetOnActiveProcess;
472 case CUDA_ERROR_ASSERT:
473 return hipErrorAssert;
474 case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
475 return hipErrorHostMemoryAlreadyRegistered;
476 case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
477 return hipErrorHostMemoryNotRegistered;
478 case CUDA_ERROR_LAUNCH_FAILED:
479 return hipErrorLaunchFailure;
480 case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
481 return hipErrorCooperativeLaunchTooLarge;
482 case CUDA_ERROR_NOT_SUPPORTED:
483 return hipErrorNotSupported;
484 case CUDA_ERROR_UNKNOWN:
486 return hipErrorUnknown;
490 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
494 case hipErrorOutOfMemory:
495 return cudaErrorMemoryAllocation;
496 case hipErrorProfilerDisabled:
497 return cudaErrorProfilerDisabled;
498 case hipErrorProfilerNotInitialized:
499 return cudaErrorProfilerNotInitialized;
500 case hipErrorProfilerAlreadyStarted:
501 return cudaErrorProfilerAlreadyStarted;
502 case hipErrorProfilerAlreadyStopped:
503 return cudaErrorProfilerAlreadyStopped;
504 case hipErrorInvalidConfiguration:
505 return cudaErrorInvalidConfiguration;
506 case hipErrorLaunchOutOfResources:
507 return cudaErrorLaunchOutOfResources;
508 case hipErrorInvalidValue:
509 return cudaErrorInvalidValue;
510 case hipErrorInvalidHandle:
511 return cudaErrorInvalidResourceHandle;
512 case hipErrorInvalidDevice:
513 return cudaErrorInvalidDevice;
514 case hipErrorInvalidMemcpyDirection:
515 return cudaErrorInvalidMemcpyDirection;
516 case hipErrorInvalidDevicePointer:
517 return cudaErrorInvalidDevicePointer;
518 case hipErrorNotInitialized:
519 return cudaErrorInitializationError;
520 case hipErrorNoDevice:
521 return cudaErrorNoDevice;
522 case hipErrorNotReady:
523 return cudaErrorNotReady;
524 case hipErrorPeerAccessNotEnabled:
525 return cudaErrorPeerAccessNotEnabled;
526 case hipErrorPeerAccessAlreadyEnabled:
527 return cudaErrorPeerAccessAlreadyEnabled;
528 case hipErrorHostMemoryAlreadyRegistered:
529 return cudaErrorHostMemoryAlreadyRegistered;
530 case hipErrorHostMemoryNotRegistered:
531 return cudaErrorHostMemoryNotRegistered;
532 case hipErrorDeinitialized:
533 return cudaErrorCudartUnloading;
534 case hipErrorInvalidSymbol:
535 return cudaErrorInvalidSymbol;
536 case hipErrorInsufficientDriver:
537 return cudaErrorInsufficientDriver;
538 case hipErrorMissingConfiguration:
539 return cudaErrorMissingConfiguration;
540 case hipErrorPriorLaunchFailure:
541 return cudaErrorPriorLaunchFailure;
542 case hipErrorInvalidDeviceFunction:
543 return cudaErrorInvalidDeviceFunction;
544 case hipErrorInvalidImage:
545 return cudaErrorInvalidKernelImage;
546 case hipErrorInvalidContext:
547 #if CUDA_VERSION >= 10020 548 return cudaErrorDeviceUninitialized;
550 return cudaErrorUnknown;
552 case hipErrorMapFailed:
553 return cudaErrorMapBufferObjectFailed;
554 case hipErrorUnmapFailed:
555 return cudaErrorUnmapBufferObjectFailed;
556 case hipErrorArrayIsMapped:
557 #if CUDA_VERSION >= 10010 558 return cudaErrorArrayIsMapped;
560 return cudaErrorUnknown;
562 case hipErrorAlreadyMapped:
563 #if CUDA_VERSION >= 10010 564 return cudaErrorAlreadyMapped;
566 return cudaErrorUnknown;
568 case hipErrorNoBinaryForGpu:
569 return cudaErrorNoKernelImageForDevice;
570 case hipErrorAlreadyAcquired:
571 #if CUDA_VERSION >= 10010 572 return cudaErrorAlreadyAcquired;
574 return cudaErrorUnknown;
576 case hipErrorNotMapped:
577 #if CUDA_VERSION >= 10010 578 return cudaErrorNotMapped;
580 return cudaErrorUnknown;
582 case hipErrorNotMappedAsArray:
583 #if CUDA_VERSION >= 10010 584 return cudaErrorNotMappedAsArray;
586 return cudaErrorUnknown;
588 case hipErrorNotMappedAsPointer:
589 #if CUDA_VERSION >= 10010 590 return cudaErrorNotMappedAsPointer;
592 return cudaErrorUnknown;
594 case hipErrorECCNotCorrectable:
595 return cudaErrorECCUncorrectable;
596 case hipErrorUnsupportedLimit:
597 return cudaErrorUnsupportedLimit;
598 case hipErrorContextAlreadyInUse:
599 return cudaErrorDeviceAlreadyInUse;
600 case hipErrorPeerAccessUnsupported:
601 return cudaErrorPeerAccessUnsupported;
602 case hipErrorInvalidKernelFile:
603 return cudaErrorInvalidPtx;
604 case hipErrorInvalidGraphicsContext:
605 return cudaErrorInvalidGraphicsContext;
606 case hipErrorInvalidSource:
607 #if CUDA_VERSION >= 10010 608 return cudaErrorInvalidSource;
610 return cudaErrorUnknown;
612 case hipErrorFileNotFound:
613 #if CUDA_VERSION >= 10010 614 return cudaErrorFileNotFound;
616 return cudaErrorUnknown;
618 case hipErrorSharedObjectSymbolNotFound:
619 return cudaErrorSharedObjectSymbolNotFound;
620 case hipErrorSharedObjectInitFailed:
621 return cudaErrorSharedObjectInitFailed;
622 case hipErrorOperatingSystem:
623 return cudaErrorOperatingSystem;
624 case hipErrorNotFound:
625 #if CUDA_VERSION >= 10010 626 return cudaErrorSymbolNotFound;
628 return cudaErrorUnknown;
630 case hipErrorIllegalAddress:
631 return cudaErrorIllegalAddress;
632 case hipErrorLaunchTimeOut:
633 return cudaErrorLaunchTimeout;
634 case hipErrorSetOnActiveProcess:
635 return cudaErrorSetOnActiveProcess;
636 case hipErrorLaunchFailure:
637 return cudaErrorLaunchFailure;
638 case hipErrorCooperativeLaunchTooLarge:
639 return cudaErrorCooperativeLaunchTooLarge;
640 case hipErrorNotSupported:
641 return cudaErrorNotSupported;
643 case hipErrorRuntimeMemory:
645 case hipErrorRuntimeOther:
646 case hipErrorUnknown:
649 return cudaErrorUnknown;
653 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
655 case hipMemcpyHostToHost:
656 return cudaMemcpyHostToHost;
657 case hipMemcpyHostToDevice:
658 return cudaMemcpyHostToDevice;
659 case hipMemcpyDeviceToHost:
660 return cudaMemcpyDeviceToHost;
661 case hipMemcpyDeviceToDevice:
662 return cudaMemcpyDeviceToDevice;
664 return cudaMemcpyDefault;
668 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
669 hipTextureAddressMode kind) {
671 case hipAddressModeWrap:
672 return cudaAddressModeWrap;
673 case hipAddressModeClamp:
674 return cudaAddressModeClamp;
675 case hipAddressModeMirror:
676 return cudaAddressModeMirror;
677 case hipAddressModeBorder:
678 return cudaAddressModeBorder;
680 return cudaAddressModeWrap;
684 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
685 hipTextureFilterMode kind) {
687 case hipFilterModePoint:
688 return cudaFilterModePoint;
689 case hipFilterModeLinear:
690 return cudaFilterModeLinear;
692 return cudaFilterModePoint;
696 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
698 case hipReadModeElementType:
699 return cudaReadModeElementType;
700 case hipReadModeNormalizedFloat:
701 return cudaReadModeNormalizedFloat;
703 return cudaReadModeElementType;
707 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
708 hipChannelFormatKind kind) {
710 case hipChannelFormatKindSigned:
711 return cudaChannelFormatKindSigned;
712 case hipChannelFormatKindUnsigned:
713 return cudaChannelFormatKindUnsigned;
714 case hipChannelFormatKindFloat:
715 return cudaChannelFormatKindFloat;
716 case hipChannelFormatKindNone:
717 return cudaChannelFormatKindNone;
719 return cudaChannelFormatKindNone;
726 #define HIPRT_CB CUDART_CB 727 typedef void(HIPRT_CB*
hipStreamCallback_t)(hipStream_t stream, hipError_t status,
void* userData);
728 inline static hipError_t
hipInit(
unsigned int flags) {
729 return hipCUResultTohipError(cuInit(flags));
732 inline static hipError_t
hipDeviceReset() {
return hipCUDAErrorTohipError(cudaDeviceReset()); }
734 inline static hipError_t
hipGetLastError() {
return hipCUDAErrorTohipError(cudaGetLastError()); }
737 return hipCUDAErrorTohipError(cudaPeekAtLastError());
740 inline static hipError_t
hipMalloc(
void** ptr,
size_t size) {
741 return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
744 inline static hipError_t
hipMallocPitch(
void** ptr,
size_t* pitch,
size_t width,
size_t height) {
745 return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
748 inline static hipError_t
hipMemAllocPitch(hipDeviceptr_t* dptr,
size_t* pitch,
size_t widthInBytes,
size_t height,
unsigned int elementSizeBytes){
749 return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
753 return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
756 inline static hipError_t
hipFree(
void* ptr) {
return hipCUDAErrorTohipError(cudaFree(ptr)); }
758 inline static hipError_t
hipMallocHost(
void** ptr,
size_t size)
759 __attribute__((deprecated(
"use hipHostMalloc instead")));
761 return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
765 __attribute__((deprecated(
"use hipHostMalloc instead")));
767 return hipCUResultTohipError(cuMemAllocHost(ptr, size));
770 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags)
771 __attribute__((deprecated(
"use hipHostMalloc instead")));
772 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags) {
773 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
776 inline static hipError_t
hipHostMalloc(
void** ptr,
size_t size,
unsigned int flags) {
777 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
780 inline static hipError_t
hipMallocManaged(
void** ptr,
size_t size,
unsigned int flags) {
781 return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
785 size_t width,
size_t height,
787 return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
792 return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
796 return hipCUDAErrorTohipError(cudaFreeArray(array));
800 return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
803 inline static hipError_t
hipHostGetFlags(
unsigned int* flagsPtr,
void* hostPtr) {
804 return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
807 inline static hipError_t
hipHostRegister(
void* ptr,
size_t size,
unsigned int flags) {
808 return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
812 return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
816 __attribute__((deprecated(
"use hipHostFree instead")));
818 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
822 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
826 return hipCUDAErrorTohipError(cudaSetDevice(device));
830 struct cudaDeviceProp cdprop;
831 memset(&cdprop, 0x0,
sizeof(
struct cudaDeviceProp));
832 cdprop.major = prop->
major;
833 cdprop.minor = prop->
minor;
848 return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
851 inline static hipError_t
hipMemcpyHtoD(hipDeviceptr_t dst,
void* src,
size_t size) {
852 return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
855 inline static hipError_t
hipMemcpyDtoH(
void* dst, hipDeviceptr_t src,
size_t size) {
856 return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
859 inline static hipError_t
hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size) {
860 return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
863 inline static hipError_t
hipMemcpyHtoDAsync(hipDeviceptr_t dst,
void* src,
size_t size,
864 hipStream_t stream) {
865 return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
868 inline static hipError_t
hipMemcpyDtoHAsync(
void* dst, hipDeviceptr_t src,
size_t size,
869 hipStream_t stream) {
870 return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
873 inline static hipError_t
hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size,
874 hipStream_t stream) {
875 return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
878 inline static hipError_t
hipMemcpy(
void* dst,
const void* src,
size_t sizeBytes,
879 hipMemcpyKind copyKind) {
880 return hipCUDAErrorTohipError(
881 cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
885 inline static hipError_t hipMemcpyWithStream(
void* dst,
const void* src,
886 size_t sizeBytes, hipMemcpyKind copyKind,
887 hipStream_t stream) {
888 cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
889 hipMemcpyKindToCudaMemcpyKind(copyKind),
892 if (error != cudaSuccess)
return hipCUDAErrorTohipError(error);
894 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
897 inline static hipError_t
hipMemcpyAsync(
void* dst,
const void* src,
size_t sizeBytes,
898 hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
899 return hipCUDAErrorTohipError(
900 cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
903 inline static hipError_t hipMemcpyToSymbol(
const void* symbol,
const void* src,
size_t sizeBytes,
904 size_t offset __dparm(0),
905 hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
906 return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
907 hipMemcpyKindToCudaMemcpyKind(copyType)));
910 inline static hipError_t hipMemcpyToSymbolAsync(
const void* symbol,
const void* src,
911 size_t sizeBytes,
size_t offset,
912 hipMemcpyKind copyType,
913 hipStream_t stream __dparm(0)) {
914 return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
915 symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
918 inline static hipError_t hipMemcpyFromSymbol(
void* dst,
const void* symbolName,
size_t sizeBytes,
919 size_t offset __dparm(0),
920 hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
921 return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
922 hipMemcpyKindToCudaMemcpyKind(kind)));
925 inline static hipError_t hipMemcpyFromSymbolAsync(
void* dst,
const void* symbolName,
926 size_t sizeBytes,
size_t offset,
928 hipStream_t stream __dparm(0)) {
929 return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
930 dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
933 inline static hipError_t hipGetSymbolAddress(
void** devPtr,
const void* symbolName) {
934 return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
937 inline static hipError_t hipGetSymbolSize(
size_t* size,
const void* symbolName) {
938 return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
941 inline static hipError_t
hipMemcpy2D(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
942 size_t width,
size_t height, hipMemcpyKind kind) {
943 return hipCUDAErrorTohipError(
944 cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
948 return hipCUResultTohipError(cuMemcpy2D(pCopy));
952 return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
957 return hipCUDAErrorTohipError(cudaMemcpy3D(p));
962 return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
965 inline static hipError_t
hipMemcpy2DAsync(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
966 size_t width,
size_t height, hipMemcpyKind kind,
967 hipStream_t stream) {
968 return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
969 hipMemcpyKindToCudaMemcpyKind(kind), stream));
973 const void* src,
size_t spitch,
size_t width,
974 size_t height, hipMemcpyKind kind) {
975 return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
976 height, hipMemcpyKindToCudaMemcpyKind(kind)));
980 size_t hOffset,
const void* src,
981 size_t count, hipMemcpyKind kind) {
982 return hipCUDAErrorTohipError(
983 cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
986 __HIP_DEPRECATED
inline static hipError_t
hipMemcpyFromArray(
void* dst, hipArray_const_t srcArray,
987 size_t wOffset,
size_t hOffset,
988 size_t count, hipMemcpyKind kind) {
989 return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
990 hipMemcpyKindToCudaMemcpyKind(kind)));
995 return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
1000 return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1004 return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1008 return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1011 inline static hipError_t
hipFuncSetAttribute(
const void* func, hipFuncAttribute attr,
int value) {
1012 return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
1016 return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1020 return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
1024 return cudaGetErrorString(hipErrorToCudaError(error));
1028 return cudaGetErrorName(hipErrorToCudaError(error));
1032 return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1036 return hipCUDAErrorTohipError(cudaGetDevice(device));
1040 return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1043 inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) {
1044 return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1047 inline static hipError_t
hipIpcGetMemHandle(hipIpcMemHandle_t* handle,
void* devPtr) {
1048 return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1051 inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) {
1052 return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1056 unsigned int flags) {
1057 return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1060 inline static hipError_t
hipMemset(
void* devPtr,
int value,
size_t count) {
1061 return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1064 inline static hipError_t
hipMemsetD32(hipDeviceptr_t devPtr,
int value,
size_t count) {
1065 return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1068 inline static hipError_t
hipMemsetAsync(
void* devPtr,
int value,
size_t count,
1069 hipStream_t stream __dparm(0)) {
1070 return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1073 inline static hipError_t
hipMemsetD32Async(hipDeviceptr_t devPtr,
int value,
size_t count,
1074 hipStream_t stream __dparm(0)) {
1075 return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1078 inline static hipError_t
hipMemsetD8(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes) {
1079 return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1082 inline static hipError_t
hipMemsetD8Async(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes,
1083 hipStream_t stream __dparm(0)) {
1084 return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1087 inline static hipError_t
hipMemsetD16(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes) {
1088 return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1091 inline static hipError_t
hipMemsetD16Async(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes,
1092 hipStream_t stream __dparm(0)) {
1093 return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1096 inline static hipError_t
hipMemset2D(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height) {
1097 return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1100 inline static hipError_t
hipMemset2DAsync(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height, hipStream_t stream __dparm(0)) {
1101 return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1105 return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1109 return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1113 struct cudaDeviceProp cdprop;
1115 cerror = cudaGetDeviceProperties(&cdprop, device);
1117 strncpy(p_prop->
name, cdprop.name, 256);
1121 p_prop->
warpSize = cdprop.warpSize;
1123 for (
int i = 0; i < 3; i++) {
1131 p_prop->
major = cdprop.major;
1132 p_prop->
minor = cdprop.minor;
1139 int ccVers = p_prop->
major * 100 + p_prop->
minor * 10;
1160 p_prop->
pciBusID = cdprop.pciBusID;
1181 p_prop->
memPitch = cdprop.memPitch;
1188 return hipCUDAErrorTohipError(cerror);
1192 enum cudaDeviceAttr cdattr;
1197 cdattr = cudaDevAttrMaxThreadsPerBlock;
1200 cdattr = cudaDevAttrMaxBlockDimX;
1203 cdattr = cudaDevAttrMaxBlockDimY;
1206 cdattr = cudaDevAttrMaxBlockDimZ;
1209 cdattr = cudaDevAttrMaxGridDimX;
1212 cdattr = cudaDevAttrMaxGridDimY;
1215 cdattr = cudaDevAttrMaxGridDimZ;
1218 cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1221 cdattr = cudaDevAttrTotalConstantMemory;
1224 cdattr = cudaDevAttrWarpSize;
1227 cdattr = cudaDevAttrMaxRegistersPerBlock;
1230 cdattr = cudaDevAttrClockRate;
1233 cdattr = cudaDevAttrMemoryClockRate;
1236 cdattr = cudaDevAttrGlobalMemoryBusWidth;
1239 cdattr = cudaDevAttrMultiProcessorCount;
1242 cdattr = cudaDevAttrComputeMode;
1245 cdattr = cudaDevAttrL2CacheSize;
1248 cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1251 cdattr = cudaDevAttrComputeCapabilityMajor;
1254 cdattr = cudaDevAttrComputeCapabilityMinor;
1257 cdattr = cudaDevAttrConcurrentKernels;
1260 cdattr = cudaDevAttrPciBusId;
1263 cdattr = cudaDevAttrPciDeviceId;
1266 cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1269 cdattr = cudaDevAttrIsMultiGpuBoard;
1272 cdattr = cudaDevAttrIntegrated;
1275 cdattr = cudaDevAttrMaxTexture1DWidth;
1278 cdattr = cudaDevAttrMaxTexture2DWidth;
1281 cdattr = cudaDevAttrMaxTexture2DHeight;
1284 cdattr = cudaDevAttrMaxTexture3DWidth;
1287 cdattr = cudaDevAttrMaxTexture3DHeight;
1290 cdattr = cudaDevAttrMaxTexture3DDepth;
1293 cdattr = cudaDevAttrMaxPitch;
1296 cdattr = cudaDevAttrTextureAlignment;
1299 cdattr = cudaDevAttrTexturePitchAlignment;
1302 cdattr = cudaDevAttrKernelExecTimeout;
1305 cdattr = cudaDevAttrCanMapHostMemory;
1308 cdattr = cudaDevAttrEccEnabled;
1311 cdattr = cudaDevAttrCooperativeLaunch;
1314 cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1317 return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1320 cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1322 return hipCUDAErrorTohipError(cerror);
1328 size_t dynamicSMemSize) {
1329 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1330 blockSize, dynamicSMemSize));
1336 size_t dynamicSMemSize,
1337 unsigned int flags) {
1338 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1339 blockSize, dynamicSMemSize, flags));
1345 size_t dynamicSMemSize ){
1346 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1347 blockSize, dynamicSMemSize));
1353 size_t dynamicSMemSize,
1354 unsigned int flags ) {
1355 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1356 blockSize, dynamicSMemSize, flags));
1361 hipFunction_t f,
size_t dynSharedMemPerBlk,
1362 int blockSizeLimit){
1363 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1364 dynSharedMemPerBlk, blockSizeLimit));
1369 hipFunction_t f,
size_t dynSharedMemPerBlk,
1370 int blockSizeLimit,
unsigned int flags){
1371 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1372 dynSharedMemPerBlk, blockSizeLimit, flags));
1376 struct cudaPointerAttributes cPA;
1377 hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1378 if (err == hipSuccess) {
1379 #if (CUDART_VERSION >= 11000) 1380 auto memType = cPA.type;
1382 unsigned memType = cPA.memoryType;
1385 case cudaMemoryTypeDevice:
1386 attributes->memoryType = hipMemoryTypeDevice;
1388 case cudaMemoryTypeHost:
1389 attributes->memoryType = hipMemoryTypeHost;
1392 return hipErrorUnknown;
1394 attributes->device = cPA.device;
1395 attributes->devicePointer = cPA.devicePointer;
1396 attributes->hostPointer = cPA.hostPointer;
1397 attributes->isManaged = 0;
1398 attributes->allocationFlags = 0;
1403 inline static hipError_t
hipMemGetInfo(
size_t* free,
size_t* total) {
1404 return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1408 return hipCUDAErrorTohipError(cudaEventCreate(event));
1411 inline static hipError_t
hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) {
1412 return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1416 return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1419 inline static hipError_t
hipEventElapsedTime(
float* ms, hipEvent_t start, hipEvent_t stop) {
1420 return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1424 return hipCUDAErrorTohipError(cudaEventDestroy(event));
1428 return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1432 return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1436 return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1440 return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1444 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1448 return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1451 inline static hipError_t
hipStreamGetFlags(hipStream_t stream,
unsigned int *flags) {
1452 return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1456 return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1460 unsigned int flags) {
1461 return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1465 return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1469 void* userData,
unsigned int flags) {
1470 return hipCUDAErrorTohipError(
1471 cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1475 cudaError_t err = cudaDriverGetVersion(driverVersion);
1480 return hipCUDAErrorTohipError(err);
1484 return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1488 return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1492 return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1496 return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1500 return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1504 return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1509 return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1513 return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1517 return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1521 return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1525 return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1529 hipDeviceptr_t dptr) {
1530 return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1533 inline static hipError_t
hipMemcpyPeer(
void* dst,
int dstDevice,
const void* src,
int srcDevice,
1535 return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1538 inline static hipError_t
hipMemcpyPeerAsync(
void* dst,
int dstDevice,
const void* src,
1539 int srcDevice,
size_t count,
1540 hipStream_t stream __dparm(0)) {
1541 return hipCUDAErrorTohipError(
1542 cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1546 inline static hipError_t
hipProfilerStart() {
return hipCUDAErrorTohipError(cudaProfilerStart()); }
1548 inline static hipError_t
hipProfilerStop() {
return hipCUDAErrorTohipError(cudaProfilerStop()); }
1551 return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1555 return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1559 return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1563 return hipCUDAErrorTohipError(cudaEventQuery(event));
1566 inline static hipError_t
hipCtxCreate(hipCtx_t* ctx,
unsigned int flags, hipDevice_t device) {
1567 return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1571 return hipCUResultTohipError(cuCtxDestroy(ctx));
1575 return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1579 return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1583 return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1587 return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1591 return hipCUResultTohipError(cuCtxGetDevice(device));
1595 return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (
unsigned int*)apiVersion));
1599 return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1603 return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1607 return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1611 return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1615 return hipCUResultTohipError(cuCtxSynchronize());
1619 return hipCUResultTohipError(cuCtxGetFlags(flags));
1622 inline static hipError_t hipCtxDetach(hipCtx_t ctx) {
1623 return hipCUResultTohipError(cuCtxDetach(ctx));
1626 inline static hipError_t
hipDeviceGet(hipDevice_t* device,
int ordinal) {
1627 return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1631 return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1634 inline static hipError_t
hipDeviceGetName(
char* name,
int len, hipDevice_t device) {
1635 return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1639 int srcDevice,
int dstDevice) {
1640 return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1644 return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1648 return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1652 return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1656 return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1660 return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1664 return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1667 inline static hipError_t
hipModuleLoad(hipModule_t* module,
const char* fname) {
1668 return hipCUResultTohipError(cuModuleLoad(module, fname));
1672 return hipCUResultTohipError(cuModuleUnload(hmod));
1676 const char* kname) {
1677 return hipCUResultTohipError(cuModuleGetFunction(
function, module, kname));
1681 return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1684 inline static hipError_t
hipFuncGetAttribute (
int* value, hipFunction_attribute attrib, hipFunction_t hfunc) {
1685 return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1688 inline static hipError_t
hipModuleGetGlobal(hipDeviceptr_t* dptr,
size_t* bytes, hipModule_t hmod,
1690 return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1693 inline static hipError_t
hipModuleLoadData(hipModule_t* module,
const void* image) {
1694 return hipCUResultTohipError(cuModuleLoadData(module, image));
1698 unsigned int numOptions, hipJitOption* options,
1699 void** optionValues) {
1700 return hipCUResultTohipError(
1701 cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1705 dim3 dimBlocks,
void** args,
size_t sharedMemBytes,
1708 return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1712 unsigned int gridDimY,
unsigned int gridDimZ,
1713 unsigned int blockDimX,
unsigned int blockDimY,
1714 unsigned int blockDimZ,
unsigned int sharedMemBytes,
1715 hipStream_t stream,
void** kernelParams,
1717 return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1718 blockDimY, blockDimZ, sharedMemBytes, stream,
1719 kernelParams, extra));
1723 return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1726 __HIP_DEPRECATED
inline static hipError_t hipBindTexture(
size_t* offset,
1730 size_t size __dparm(UINT_MAX)) {
1731 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1734 __HIP_DEPRECATED
inline static hipError_t hipBindTexture2D(
1737 return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
1741 hipChannelFormatKind f) {
1742 return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
1745 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
1749 return hipCUDAErrorTohipError(
1750 cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
1753 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
1754 return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
1757 inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
1759 return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
1762 inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
1763 return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
1766 inline static hipError_t hipGetTextureObjectResourceDesc(
hipResourceDesc* pResDesc,
1767 hipTextureObject_t textureObject) {
1768 return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
1771 __HIP_DEPRECATED
inline static hipError_t hipGetTextureAlignmentOffset(
1773 return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
1778 return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
1781 inline static hipError_t hipLaunchCooperativeKernel(
const void* f,
dim3 gridDim,
dim3 blockDim,
1782 void** kernelParams,
unsigned int sharedMemBytes,
1783 hipStream_t stream) {
1784 return hipCUDAErrorTohipError(
1785 cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1788 inline static hipError_t hipLaunchCooperativeKernelMultiDevice(
hipLaunchParams* launchParamsList,
1789 int numDevices,
unsigned int flags) {
1790 return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
1803 size_t dynamicSMemSize) {
1804 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1805 blockSize, dynamicSMemSize));
1810 size_t dynamicSMemSize = 0,
1811 int blockSizeLimit = 0) {
1812 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1813 dynamicSMemSize, blockSizeLimit));
1817 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(
int* minGridSize,
int* blockSize, T func,
1818 size_t dynamicSMemSize = 0,
1819 int blockSizeLimit = 0,
unsigned int flags = 0) {
1820 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1821 dynamicSMemSize, blockSizeLimit, flags));
1826 int blockSize,
size_t dynamicSMemSize,
unsigned int flags) {
1827 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1828 blockSize, dynamicSMemSize, flags));
1831 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1832 inline static hipError_t hipBindTexture(
size_t* offset,
const struct texture<T, dim, readMode>& tex,
1833 const void* devPtr,
size_t size = UINT_MAX) {
1834 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
1837 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1838 inline static hipError_t hipBindTexture(
size_t* offset,
struct texture<T, dim, readMode>& tex,
1840 size_t size = UINT_MAX) {
1841 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1844 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1845 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>* tex) {
1846 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1849 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1850 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>& tex) {
1851 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1854 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1855 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1856 struct texture<T, dim, readMode>& tex, hipArray_const_t array,
1858 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1861 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1862 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1863 struct texture<T, dim, readMode>* tex, hipArray_const_t array,
1865 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1868 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1869 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1870 struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
1871 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
1876 return cudaCreateChannelDesc<T>();
1880 inline static hipError_t hipLaunchCooperativeKernel(T f,
dim3 gridDim,
dim3 blockDim,
1881 void** kernelParams,
unsigned int sharedMemBytes, hipStream_t stream) {
1882 return hipCUDAErrorTohipError(
1883 cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1888 #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H hipError_t hipHostFree(void *ptr)
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDevi...
Definition: hip_memory.cpp:2396
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags)
Returns occupancy for a device function.
Definition: hip_module.cpp:1714
hipError_t hipFuncSetSharedMemConfig(const void *func, hipSharedMemConfig config)
Set shared memory configuation for a specific function.
Definition: hip_module.cpp:1419
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname)
Function with kname will be extracted if present in module.
Definition: hip_module.cpp:1309
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx)
Disable direct access from current context's virtual address space to memory allocations physically l...
Definition: hip_peer.cpp:227
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent)
Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value...
Definition: hip_memory.cpp:2286
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
Definition: hip_runtime_api.h:321
hipError_t hipStreamGetPriority(hipStream_t stream, int *priority)
Query the priority of a stream.
Definition: hip_stream.cpp:238
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:82
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:292
hipError_t hipHostAlloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory [Deprecated].
Definition: hip_runtime_api.h:772
hipError_t hipDriverGetVersion(int *driverVersion)
Returns the approximate HIP driver version.
Definition: hip_context.cpp:85
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
unsigned hasGlobalInt64Atomics
64-bit integer atomics for global memory.
Definition: hip_runtime_api.h:54
Definition: hip_runtime_api.h:339
hipError_t hipPeekAtLastError(void)
Return last error returned by any HIP runtime API call.
Definition: hip_error.cpp:41
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault))
Returns occupancy for a device function.
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:295
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:337
hipError_t hipMemcpyParam2D(const hip_Memcpy2D *pCopy)
Copies memory for 2D arrays.
Definition: hip_memory.cpp:2144
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags)
Set flags for the primary context.
Definition: hip_context.cpp:321
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *pctx, hipDevice_t dev)
Retain the primary context on the GPU.
Definition: hip_context.cpp:296
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
Returns the compute capability of the device.
Definition: hip_device.cpp:434
hipError_t hipDeviceGetByPCIBusId(int *device, const char *pciBusId)
Returns a handle to a compute device.
Definition: hip_device.cpp:492
int cooperativeMultiDeviceUnmatchedBlockDim
Definition: hip_runtime_api.h:137
Definition: hip_runtime_api.h:120
unsigned hasSurfaceFuncs
Surface functions.
Definition: hip_runtime_api.h:71
Global memory bus width in bits.
Definition: hip_runtime_api.h:308
hipError_t hipMemGetAddressRange(hipDeviceptr_t *pbase, size_t *psize, hipDeviceptr_t dptr)
Get information on memory allocations.
Definition: hip_memory.cpp:2437
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1672
int minor
Definition: hip_runtime_api.h:100
hipError_t hipMemcpyToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1494
unsigned long long hipSurfaceObject_t
Definition: hip_surface_types.h:36
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
builds module from code object which resides in host memory. Image is pointer to that location...
Definition: hip_module.cpp:1527
unsigned hasDynamicParallelism
Dynamic parallelism.
Definition: hip_runtime_api.h:73
unsigned hasThreadFenceSystem
__threadfence_system.
Definition: hip_runtime_api.h:67
Definition: driver_types.h:370
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:129
hipError_t hipMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
Definition: hip_memory.cpp:851
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
Definition: hip_runtime_api.h:302
hipError_t hipMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
Definition: hip_memory.cpp:2271
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
int maxTexture3D[3]
Maximum dimensions (width, height, depth) of 3D images, in image elements.
Definition: hip_runtime_api.h:124
hipError_t hipMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:2020
Definition: hip_runtime_api.h:311
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:340
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
Definition: hip_stream.cpp:258
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:330
iGPU
Definition: hip_runtime_api.h:324
hipError_t hipMemcpyFromArray(void *dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1511
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
hipError_t hipMemcpyAtoH(void *dst, hipArray *srcArray, size_t srcOffset, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1544
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
Enable direct access from current device's virtual address space to memory allocations physically loc...
Definition: hip_peer.cpp:200
hipError_t hipCtxPopCurrent(hipCtx_t *ctx)
Pop the current/default context and return the popped context.
Definition: hip_context.cpp:133
const char * hipGetErrorString(hipError_t hipError)
Return handy text string message to explain the error which occurred.
Definition: hip_error.cpp:54
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:306
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:122
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
Returns bank width of shared memory for current device.
Definition: hip_device.cpp:124
Definition: hip_runtime_api.h:162
hipError_t hipDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
Returns numerical values that correspond to the least and greatest stream priority.
Definition: hip_stream.cpp:122
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
hipError_t hipFuncSetAttribute(const void *func, hipFuncAttribute attr, int value)
Set attribute for a specific function.
Definition: hip_module.cpp:1411
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:297
hipError_t hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags)
Get Device pointer from Host Pointer allocated through hipHostMalloc.
hipError_t hipFreeArray(hipArray *array)
Frees an array on the device.
Definition: hip_memory.cpp:2409
hipError_t hipStreamCreateWithPriority(hipStream_t *stream, unsigned int flags, int priority)
Create an asynchronous stream with the specified priority.
Definition: hip_stream.cpp:113
hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
hipError_t hipEventSynchronize(hipEvent_t event)
Wait for an event to complete.
Definition: hip_event.cpp:300
Definition: driver_types.h:262
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void *src, size_t sizeBytes)
Copy data from Host to Device.
Definition: hip_memory.cpp:1374
hipError_t hipModuleUnload(hipModule_t module)
Frees the module.
Definition: hip_module.cpp:1244
Minor compute capability version number.
Definition: hip_runtime_api.h:316
Definition: hip_module.cpp:108
hipError_t hipSetDeviceFlags(unsigned flags)
The current device behavior is changed according the flags passed.
Definition: hip_runtime_api.h:298
hipError_t hipEventQuery(hipEvent_t event)
Query event status.
Definition: hip_event.cpp:394
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId)
Disable direct access from current device's virtual address space to memory allocations physically lo...
Definition: hip_peer.cpp:193
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig *pConfig)
Get Shared memory bank configuration.
Definition: hip_context.cpp:241
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:296
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
Create a context and set it as current/ default context.
Definition: hip_context.cpp:52
#define hipArrayDefault
Default HIP array allocation flag.
Definition: hip_runtime_api.h:221
unsigned hasSharedInt64Atomics
64-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:55
hipError_t hipMallocArray(hipArray **array, const hipChannelFormatDesc *desc, size_t width, size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault))
Allocate an array on the device.
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config)
Set Shared memory bank configuration.
Definition: hip_context.cpp:233
Multiple GPU devices.
Definition: hip_runtime_api.h:323
hipError_t hipCtxSetCurrent(hipCtx_t ctx)
Set the passed context as current/default.
Definition: hip_context.cpp:178
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *handle, void *devPtr)
Gets an interprocess memory handle for an existing device memory allocation.
Definition: hip_memory.cpp:2458
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:132
hipError_t hipMallocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:760
Definition: hip_runtime_api.h:128
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
hipError_t hipMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by dst with the constant value.
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:293
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
Definition: hip_memory.cpp:762
Definition: hip_hcc_internal.h:938
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
unsigned hasDoubles
Double-precision floating point.
Definition: hip_runtime_api.h:58
Definition: texture_types.h:74
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:326
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit)
Get Resource limits of current device.
Definition: hip_device.cpp:94
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:127
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
builds module from code object which resides in host memory. Image is pointer to that location...
Definition: hip_module.cpp:1508
Definition: driver_types.h:78
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
hipDeviceAttribute_t
Definition: hip_runtime_api.h:290
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
Create an asynchronous stream.
Definition: hip_stream.cpp:97
hipError_t hipEventDestroy(hipEvent_t event)
Destroy the specified event.
Definition: hip_event.cpp:278
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:128
hipError_t hipChooseDevice(int *device, const hipDeviceProp_t *prop)
Device which matches hipDeviceProp_t is returned.
Definition: hip_device.cpp:518
unsigned hasSyncThreadsExt
__syncthreads_count, syncthreads_and, syncthreads_or.
Definition: hip_runtime_api.h:68
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_context.cpp:225
hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams, void **extra)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
Major compute capability version number.
Definition: hip_runtime_api.h:315
Definition: hip_runtime_api.h:330
Maximum number of threads per block.
Definition: hip_runtime_api.h:291
int gcnArch
AMD GCN Arch Value. Eg: 803, 701.
Definition: hip_runtime_api.h:117
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:332
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:130
hipDeviceArch_t arch
Architectural feature flags. New for HIP.
Definition: hip_runtime_api.h:109
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:972
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
Loads code object from file into a hipModule_t.
Definition: hip_module.cpp:1513
int maxGridSize[3]
Max grid dimensions (XYZ).
Definition: hip_runtime_api.h:92
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev)
Resets the primary context on the GPU.
Definition: hip_context.cpp:308
hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
Create an event with the specified flags.
Definition: hip_event.cpp:201
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:294
PCI Bus ID.
Definition: hip_runtime_api.h:319
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
Return the elapsed time between two events.
Definition: hip_event.cpp:344
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_device.cpp:74
hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
Determine if a device can access a peer's memory.
Definition: hip_peer.cpp:186
hipError_t hipGetDeviceCount(int *count)
Return number of compute-capable devices.
Definition: hip_device.cpp:69
hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2220
Definition: driver_types.h:383
unsigned hasSharedInt32Atomics
32-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:49
hipError_t hipFuncGetAttribute(int *value, hipFunction_attribute attrib, hipFunction_t hfunc)
Find out a specific attribute for a given function.
Definition: hip_module.cpp:1427
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:329
hipError_t hipStreamDestroy(hipStream_t stream)
Destroys the specified stream.
Definition: hip_stream.cpp:195
Warp size in threads.
Definition: hip_runtime_api.h:301
hipError_t hipStreamSynchronize(hipStream_t stream)
Wait for all commands in stream to complete.
Definition: hip_stream.cpp:184
hipError_t hipHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
Return flags associated with host pointer.
Definition: hip_memory.cpp:1133
int major
Definition: hip_runtime_api.h:97
hipError_t hipMemAllocPitch(hipDeviceptr_t *dptr, size_t *pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes)
Definition: hip_memory.cpp:862
hipError_t hipIpcOpenMemHandle(void **devPtr, hipIpcMemHandle_t handle, unsigned int flags)
Opens an interprocess memory handle exported from another process and returns a device pointer usable...
Definition: hip_memory.cpp:2494
unsigned hasFunnelShift
Funnel two words into one with shift&mask caps.
Definition: hip_runtime_api.h:64
hipError_t hipCtxGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_context.cpp:217
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Device.
Definition: hip_memory.cpp:1390
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:331
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:307
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value...
Definition: hip_runtime_api.h:313
hipError_t hipCtxSynchronize(void)
Blocks until the default context has completed all preceding requested tasks.
Definition: hip_context.cpp:249
int clockInstructionRate
Definition: hip_runtime_api.h:107
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream __dparm(0))
Fills the memory area pointed to by dev with the constant integer value for specified number of times...
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
hipFuncAttribute
Definition: hip_runtime_api.h:299
unsigned hasGlobalInt32Atomics
32-bit integer atomics for global memory.
Definition: hip_runtime_api.h:47
hipError_t hipCtxGetCurrent(hipCtx_t *ctx)
Get the handle of the current/ default context.
Definition: hip_context.cpp:167
Constant memory size in bytes.
Definition: hip_runtime_api.h:300
hipError_t hipMalloc3DArray(hipArray **array, const struct hipChannelFormatDesc *desc, struct hipExtent extent, unsigned int flags)
Allocate an array on the device.
Definition: hip_memory.cpp:1091
hipError_t hipMemcpyDtoHAsync(void *dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Host asynchronously.
Definition: hip_memory.cpp:1437
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:339
hipError_t hipDeviceSynchronize(void)
Waits on all active streams on current device.
Definition: hip_device.cpp:143
hipError_t hipCtxPushCurrent(hipCtx_t ctx)
Push the context to be set as current/ default context.
Definition: hip_context.cpp:154
hipError_t hipMemcpyDtoH(void *dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Host.
Definition: hip_memory.cpp:1382
unsigned hasWarpVote
Warp vote instructions (__any, __all).
Definition: hip_runtime_api.h:61
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset...
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device)
Returns an identifer string for the device.
Definition: hip_device.cpp:446
hipError_t hipGetDeviceProperties(hipDeviceProp_t *prop, int deviceId)
Returns device properties.
Definition: hip_device.cpp:381
Alignment requirement for textures.
Definition: hip_runtime_api.h:338
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
Register host memory so it can be accessed from the current device.
Definition: hip_memory.cpp:1158
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
Definition: hip_memory.cpp:695
const char * hipGetErrorName(hipError_t hip_error)
Return name of the specified error code in text form.
Definition: hip_error.cpp:48
hipError_t hipMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
Fills the memory area pointed to by dst with the constant value.
Definition: hip_memory.cpp:2251
Definition: driver_types.h:363
Compute mode that device is currently in.
Definition: hip_runtime_api.h:310
hipFuncCache_t
Definition: hip_runtime_api.h:308
PCI Device ID.
Definition: hip_runtime_api.h:320
hipError_t hipGetLastError(void)
Return last error returned by any HIP runtime API call and resets the stored error code to #hipSucces...
Definition: hip_error.cpp:32
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
Make the specified compute stream wait for an event.
Definition: hip_stream.cpp:130
hipError_t hipOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Device can map host memory into device address space.
Definition: hip_runtime_api.h:341
int maxThreadsDim[3]
Max number of threads in each dimension (XYZ) of a block.
Definition: hip_runtime_api.h:91
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
Return flags associated with this stream.
Definition: hip_stream.cpp:223
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:309
hipError_t hipMemGetInfo(size_t *free, size_t *total)
Query memory info. Return snapshot of free memory, and total allocatable memory on the device...
Definition: hip_memory.cpp:2296
hipError_t hipCtxGetDevice(hipDevice_t *device)
Get the handle of the device associated with current/default context.
Definition: hip_context.cpp:191
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev)
Release the primary context on the GPU.
Definition: hip_context.cpp:285
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
hipError_t hipFree(void *ptr)
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
Definition: hip_memory.cpp:2344
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int *apiVersion)
Returns the approximate HIP api version.
Definition: hip_context.cpp:207
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
hipError_t hipDeviceGetP2PAttribute(int *value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice)
Returns a value for attr of link between two devices.
hipError_t hipDeviceReset(void)
The state of current device is discarded and updated to a fresh state.
Definition: hip_device.cpp:148
hipError_t hipInit(unsigned int flags)
Explicitly initializes the HIP runtime.
Definition: hip_context.cpp:39
unsigned hasWarpShuffle
Warp shuffle operations. (__shfl_*).
Definition: hip_runtime_api.h:63
unsigned hasGlobalFloatAtomicExch
32-bit float atomic exch for global memory.
Definition: hip_runtime_api.h:48
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
Copies data between host and device.
Definition: hip_memory.cpp:1712
hipError_t hipRuntimeGetVersion(int *runtimeVersion)
Returns the approximate HIP Runtime version.
Definition: hip_context.cpp:97
hipError_t hipMemcpyHtoA(hipArray *dstArray, size_t dstOffset, const void *srcHost, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1528
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Device asynchronously.
Definition: hip_memory.cpp:1429
unsigned hasFloatAtomicAdd
32-bit float atomic add in global and shared memory.
Definition: hip_runtime_api.h:51
Definition: hip_runtime_api.h:84
hipError_t hipSetDevice(int deviceId)
Set default device to be used for subsequent hip API calls from this thread.
Definition: hip_device.cpp:132
hipError_t hipDeviceGet(hipDevice_t *device, int ordinal)
Returns a handle to a compute device.
Definition: hip_context.cpp:70
hipError_t hipDeviceTotalMem(size_t *bytes, hipDevice_t device)
Returns the total amount of memory on the device.
Definition: hip_device.cpp:480
hipError_t hipFuncSetCacheConfig(const void *func, hipFuncCache_t config)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:108
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
hipError_t hipMemcpyPeerAsync(void *dst, int dstDeviceId, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream __dparm(0))
Copies memory from one device to memory on another device.
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
hipError_t hipCtxGetFlags(unsigned int *flags)
Return flags used for creating default context.
Definition: hip_context.cpp:254
Support cooperative launch.
Definition: hip_runtime_api.h:325
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:327
hipError_t hipCtxDestroy(hipCtx_t ctx)
Destroy a HIP context.
Definition: hip_context.cpp:109
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
hipSharedMemConfig
Definition: hip_runtime_api.h:318
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:131
char name[256]
Device name.
Definition: hip_runtime_api.h:85
unsigned hasWarpBallot
Warp ballot instructions (__ballot).
Definition: hip_runtime_api.h:62
Definition: hip_hcc_internal.h:759
hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int deviceId)
Query for a specific device attribute.
Definition: hip_device.cpp:354
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void *src, size_t sizeBytes, hipStream_t stream)
Copy data from Host to Device asynchronously.
Definition: hip_memory.cpp:1422
hipError_t hipHostUnregister(void *hostPtr)
Un-register host pointer.
Definition: hip_memory.cpp:1233
Definition: hip_hcc_internal.h:580
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipError_t hipMemcpyPeer(void *dst, int dstDeviceId, const void *src, int srcDeviceId, size_t sizeBytes)
Copies memory from one device to memory on another device.
Definition: hip_peer.cpp:207
hipError_t hipStreamCreate(hipStream_t *stream)
Create an asynchronous stream.
Definition: hip_stream.cpp:106
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
unsigned has3dGrid
Grid and group dims are 3D (rather than 2D).
Definition: hip_runtime_api.h:72
hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
Copy data from src to dst.
Definition: hip_memory.cpp:1367
hipError_t hipEventCreate(hipEvent_t *event)
Definition: hip_event.cpp:207
Definition: driver_types.h:91
Definition: hip_runtime_api.h:111
int cooperativeMultiDeviceUnmatchedSharedMem
Definition: hip_runtime_api.h:139
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int *flags, int *active)
Get the state of the primary context.
Definition: hip_context.cpp:263
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags)
Enables direct access to memory allocations in a peer context.
Definition: hip_peer.cpp:221
Definition: driver_types.h:323
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
The bank width of shared memory on current device is set.
Definition: hip_device.cpp:116
hipError_t hipGetDevice(int *deviceId)
Return the default device id for the calling host thread.
Definition: hip_device.cpp:32
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
Definition: hip_event.cpp:213
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:328
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
hipError_t hipModuleGetGlobal(void **, size_t *, hipModule_t, const char *)
returns device memory pointer and size of the kernel present in the module with symbol name ...
Definition: hip_module.cpp:1113
Definition: hip_hcc_internal.h:415
hipError_t hipDeviceGetPCIBusId(char *pciBusId, int len, int device)
Returns a PCI Bus Id string for the device, overloaded to take int device ID.
Definition: hip_device.cpp:460
int maxTexture2D[2]
Maximum dimensions (width, height) of 2D images, in image elements.
Definition: hip_runtime_api.h:123
hipError_t hipMemcpy2DToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1444
Device has ECC support enabled.
Definition: hip_runtime_api.h:342
hipError_t hipStreamQuery(hipStream_t stream)
Return #hipSuccess if all of the operations in the specified stream have completed, or #hipErrorNotReady if not.
Definition: hip_stream.cpp:161
hipError_t hipMemAllocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:766
hipError_t hipFuncGetAttributes(struct hipFuncAttributes *attr, const void *func)
Find out attributes for a given function.
Definition: hip_module.cpp:1393
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1683
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:133
int cooperativeMultiDeviceUnmatchedGridDim
Definition: hip_runtime_api.h:135
hipError_t hipIpcCloseMemHandle(void *devPtr)
Close memory mapped with hipIpcOpenMemHandle.
Definition: hip_memory.cpp:2539
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1662
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void *ptr)
Return attributes for the specified pointer.
Definition: hip_memory.cpp:617
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *pCopy, hipStream_t stream __dparm(0))
Copies memory for 2D arrays.
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1693
hipError_t hipFreeHost(void *ptr)
Free memory allocated by the hcc hip host memory allocation API. [Deprecated].
Definition: hip_runtime_api.h:817
hipError_t hipGetDeviceFlags(unsigned int *flags)
Gets the flags set for current device.
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2261
hipError_t hipProfilerStop()
Stop recording of profiling information. When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2502
Definition: hip_runtime_api.h:317
int multiProcessorCount
Number of multi-processors (compute units).
Definition: hip_runtime_api.h:103
hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count)
Fills the memory area pointed to by dest with the constant integer value for specified number of time...
Definition: hip_memory.cpp:2281
hipError_t hipProfilerStart()
Start recording of profiling information When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2496
unsigned hasSharedFloatAtomicExch
32-bit float atomic exch for shared memory.
Definition: hip_runtime_api.h:50
Definition: texture_types.h:95