HIP: Heterogenous-computing Interface for Portability
math_functions.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #pragma once
24 
25 #include "hip_fp16_math_fwd.h"
26 #include "hip_vector_types.h"
27 #include "math_fwd.h"
28 
30 
31 #include <algorithm>
32 #include <assert.h>
33 #include <limits.h>
34 #include <limits>
35 #include <stdint.h>
36 
37 // HCC's own math functions should be included first, otherwise there will
38 // be conflicts when hip/math_functions.h is included before hip/hip_runtime.h.
39 #ifdef __HCC__
40 #include "kalmar_math.h"
41 #endif
42 
43 #pragma push_macro("__DEVICE__")
44 #pragma push_macro("__RETURN_TYPE")
45 
46 #ifdef __HCC__
47 #define __DEVICE__ __device__
48 #define __RETURN_TYPE int
49 #else // to be consistent with __clang_cuda_math_forward_declares
50 #define __DEVICE__ static __device__
51 #define __RETURN_TYPE bool
52 #endif
53 
54 __DEVICE__
55 inline
56 uint64_t __make_mantissa_base8(const char* tagp)
57 {
58  uint64_t r = 0;
59  while (tagp) {
60  char tmp = *tagp;
61 
62  if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
63  else return 0;
64 
65  ++tagp;
66  }
67 
68  return r;
69 }
70 
71 __DEVICE__
72 inline
73 uint64_t __make_mantissa_base10(const char* tagp)
74 {
75  uint64_t r = 0;
76  while (tagp) {
77  char tmp = *tagp;
78 
79  if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
80  else return 0;
81 
82  ++tagp;
83  }
84 
85  return r;
86 }
87 
88 __DEVICE__
89 inline
90 uint64_t __make_mantissa_base16(const char* tagp)
91 {
92  uint64_t r = 0;
93  while (tagp) {
94  char tmp = *tagp;
95 
96  if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
97  else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
98  else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
99  else return 0;
100 
101  ++tagp;
102  }
103 
104  return r;
105 }
106 
107 __DEVICE__
108 inline
109 uint64_t __make_mantissa(const char* tagp)
110 {
111  if (!tagp) return 0u;
112 
113  if (*tagp == '0') {
114  ++tagp;
115 
116  if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
117  else return __make_mantissa_base8(tagp);
118  }
119 
120  return __make_mantissa_base10(tagp);
121 }
122 
123 // DOT FUNCTIONS
124 #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
125 __DEVICE__
126 inline
127 int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
128  return __ockl_sdot2(a.data, b.data, c, saturate);
129 }
130 __DEVICE__
131 inline
132 uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
133  return __ockl_udot2(a.data, b.data, c, saturate);
134 }
135 __DEVICE__
136 inline
137 int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
138  return __ockl_sdot4(a.data, b.data, c, saturate);
139 }
140 __DEVICE__
141 inline
142 uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
143  return __ockl_udot4(a.data, b.data, c, saturate);
144 }
145 __DEVICE__
146 inline
147 int amd_mixed_dot(int a, int b, int c, bool saturate) {
148  return __ockl_sdot8(a, b, c, saturate);
149 }
150 __DEVICE__
151 inline
152 uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
153  return __ockl_udot8(a, b, c, saturate);
154 }
155 #endif
156 
157 // BEGIN FLOAT
158 __DEVICE__
159 inline
160 float abs(float x) { return __ocml_fabs_f32(x); }
161 __DEVICE__
162 inline
163 float acosf(float x) { return __ocml_acos_f32(x); }
164 __DEVICE__
165 inline
166 float acoshf(float x) { return __ocml_acosh_f32(x); }
167 __DEVICE__
168 inline
169 float asinf(float x) { return __ocml_asin_f32(x); }
170 __DEVICE__
171 inline
172 float asinhf(float x) { return __ocml_asinh_f32(x); }
173 __DEVICE__
174 inline
175 float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
176 __DEVICE__
177 inline
178 float atanf(float x) { return __ocml_atan_f32(x); }
179 __DEVICE__
180 inline
181 float atanhf(float x) { return __ocml_atanh_f32(x); }
182 __DEVICE__
183 inline
184 float cbrtf(float x) { return __ocml_cbrt_f32(x); }
185 __DEVICE__
186 inline
187 float ceilf(float x) { return __ocml_ceil_f32(x); }
188 __DEVICE__
189 inline
190 float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
191 __DEVICE__
192 inline
193 float cosf(float x) { return __ocml_cos_f32(x); }
194 __DEVICE__
195 inline
196 float coshf(float x) { return __ocml_cosh_f32(x); }
197 __DEVICE__
198 inline
199 float cospif(float x) { return __ocml_cospi_f32(x); }
200 __DEVICE__
201 inline
202 float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
203 __DEVICE__
204 inline
205 float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
206 __DEVICE__
207 inline
208 float erfcf(float x) { return __ocml_erfc_f32(x); }
209 __DEVICE__
210 inline
211 float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
212 __DEVICE__
213 inline
214 float erfcxf(float x) { return __ocml_erfcx_f32(x); }
215 __DEVICE__
216 inline
217 float erff(float x) { return __ocml_erf_f32(x); }
218 __DEVICE__
219 inline
220 float erfinvf(float x) { return __ocml_erfinv_f32(x); }
221 __DEVICE__
222 inline
223 float exp10f(float x) { return __ocml_exp10_f32(x); }
224 __DEVICE__
225 inline
226 float exp2f(float x) { return __ocml_exp2_f32(x); }
227 __DEVICE__
228 inline
229 float expf(float x) { return __ocml_exp_f32(x); }
230 __DEVICE__
231 inline
232 float expm1f(float x) { return __ocml_expm1_f32(x); }
233 __DEVICE__
234 inline
235 float fabsf(float x) { return __ocml_fabs_f32(x); }
236 __DEVICE__
237 inline
238 float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
239 __DEVICE__
240 inline
241 float fdividef(float x, float y) { return x / y; }
242 __DEVICE__
243 inline
244 float floorf(float x) { return __ocml_floor_f32(x); }
245 __DEVICE__
246 inline
247 float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
248 __DEVICE__
249 inline
250 float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
251 __DEVICE__
252 inline
253 float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
254 __DEVICE__
255 inline
256 float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
257 __DEVICE__
258 inline
259 float frexpf(float x, int* nptr)
260 {
261  int tmp;
262  float r =
263  __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
264  *nptr = tmp;
265 
266  return r;
267 }
268 __DEVICE__
269 inline
270 float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
271 __DEVICE__
272 inline
273 int ilogbf(float x) { return __ocml_ilogb_f32(x); }
274 __DEVICE__
275 inline
276 __RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
277 __DEVICE__
278 inline
279 __RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
280 __DEVICE__
281 inline
282 __RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
283 __DEVICE__
284 inline
285 float j0f(float x) { return __ocml_j0_f32(x); }
286 __DEVICE__
287 inline
288 float j1f(float x) { return __ocml_j1_f32(x); }
289 __DEVICE__
290 inline
291 float jnf(int n, float x)
292 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
293  // for linear recurrences to get O(log n) steps, but it's unclear if
294  // it'd be beneficial in this case.
295  if (n == 0) return j0f(x);
296  if (n == 1) return j1f(x);
297 
298  float x0 = j0f(x);
299  float x1 = j1f(x);
300  for (int i = 1; i < n; ++i) {
301  float x2 = (2 * i) / x * x1 - x0;
302  x0 = x1;
303  x1 = x2;
304  }
305 
306  return x1;
307 }
308 __DEVICE__
309 inline
310 float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
311 __DEVICE__
312 inline
313 float lgammaf(float x) { return __ocml_lgamma_f32(x); }
314 __DEVICE__
315 inline
316 long long int llrintf(float x) { return __ocml_rint_f32(x); }
317 __DEVICE__
318 inline
319 long long int llroundf(float x) { return __ocml_round_f32(x); }
320 __DEVICE__
321 inline
322 float log10f(float x) { return __ocml_log10_f32(x); }
323 __DEVICE__
324 inline
325 float log1pf(float x) { return __ocml_log1p_f32(x); }
326 __DEVICE__
327 inline
328 float log2f(float x) { return __ocml_log2_f32(x); }
329 __DEVICE__
330 inline
331 float logbf(float x) { return __ocml_logb_f32(x); }
332 __DEVICE__
333 inline
334 float logf(float x) { return __ocml_log_f32(x); }
335 __DEVICE__
336 inline
337 long int lrintf(float x) { return __ocml_rint_f32(x); }
338 __DEVICE__
339 inline
340 long int lroundf(float x) { return __ocml_round_f32(x); }
341 __DEVICE__
342 inline
343 float modff(float x, float* iptr)
344 {
345  float tmp;
346  float r =
347  __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
348  *iptr = tmp;
349 
350  return r;
351 }
352 __DEVICE__
353 inline
354 float nanf(const char* tagp)
355 {
356  union {
357  float val;
358  struct ieee_float {
359  uint32_t mantissa : 22;
360  uint32_t quiet : 1;
361  uint32_t exponent : 8;
362  uint32_t sign : 1;
363  } bits;
364 
365  static_assert(sizeof(float) == sizeof(ieee_float), "");
366  } tmp;
367 
368  tmp.bits.sign = 0u;
369  tmp.bits.exponent = ~0u;
370  tmp.bits.quiet = 1u;
371  tmp.bits.mantissa = __make_mantissa(tagp);
372 
373  return tmp.val;
374 }
375 __DEVICE__
376 inline
377 float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
378 __DEVICE__
379 inline
380 float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
381 __DEVICE__
382 inline
383 float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
384 __DEVICE__
385 inline
386 float norm4df(float x, float y, float z, float w)
387 {
388  return __ocml_len4_f32(x, y, z, w);
389 }
390 __DEVICE__
391 inline
392 float normcdff(float x) { return __ocml_ncdf_f32(x); }
393 __DEVICE__
394 inline
395 float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
396 __DEVICE__
397 inline
398 float normf(int dim, const float* a)
399 { // TODO: placeholder until OCML adds support.
400  float r = 0;
401  while (dim--) { r += a[0] * a[0]; ++a; }
402 
403  return __ocml_sqrt_f32(r);
404 }
405 __DEVICE__
406 inline
407 float powf(float x, float y) { return __ocml_pow_f32(x, y); }
408 __DEVICE__
409 inline
410 float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
411 __DEVICE__
412 inline
413 float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
414 __DEVICE__
415 inline
416 float remquof(float x, float y, int* quo)
417 {
418  int tmp;
419  float r =
420  __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
421  *quo = tmp;
422 
423  return r;
424 }
425 __DEVICE__
426 inline
427 float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
428 __DEVICE__
429 inline
430 float rintf(float x) { return __ocml_rint_f32(x); }
431 __DEVICE__
432 inline
433 float rnorm3df(float x, float y, float z)
434 {
435  return __ocml_rlen3_f32(x, y, z);
436 }
437 
438 __DEVICE__
439 inline
440 float rnorm4df(float x, float y, float z, float w)
441 {
442  return __ocml_rlen4_f32(x, y, z, w);
443 }
444 __DEVICE__
445 inline
446 float rnormf(int dim, const float* a)
447 { // TODO: placeholder until OCML adds support.
448  float r = 0;
449  while (dim--) { r += a[0] * a[0]; ++a; }
450 
451  return __ocml_rsqrt_f32(r);
452 }
453 __DEVICE__
454 inline
455 float roundf(float x) { return __ocml_round_f32(x); }
456 __DEVICE__
457 inline
458 float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
459 __DEVICE__
460 inline
461 float scalblnf(float x, long int n)
462 {
463  return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
464 }
465 __DEVICE__
466 inline
467 float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
468 __DEVICE__
469 inline
470 __RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
471 __DEVICE__
472 inline
473 void sincosf(float x, float* sptr, float* cptr)
474 {
475  float tmp;
476 
477  *sptr =
478  __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
479  *cptr = tmp;
480 }
481 __DEVICE__
482 inline
483 void sincospif(float x, float* sptr, float* cptr)
484 {
485  float tmp;
486 
487  *sptr =
488  __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
489  *cptr = tmp;
490 }
491 __DEVICE__
492 inline
493 float sinf(float x) { return __ocml_sin_f32(x); }
494 __DEVICE__
495 inline
496 float sinhf(float x) { return __ocml_sinh_f32(x); }
497 __DEVICE__
498 inline
499 float sinpif(float x) { return __ocml_sinpi_f32(x); }
500 __DEVICE__
501 inline
502 float sqrtf(float x) { return __ocml_sqrt_f32(x); }
503 __DEVICE__
504 inline
505 float tanf(float x) { return __ocml_tan_f32(x); }
506 __DEVICE__
507 inline
508 float tanhf(float x) { return __ocml_tanh_f32(x); }
509 __DEVICE__
510 inline
511 float tgammaf(float x) { return __ocml_tgamma_f32(x); }
512 __DEVICE__
513 inline
514 float truncf(float x) { return __ocml_trunc_f32(x); }
515 __DEVICE__
516 inline
517 float y0f(float x) { return __ocml_y0_f32(x); }
518 __DEVICE__
519 inline
520 float y1f(float x) { return __ocml_y1_f32(x); }
521 __DEVICE__
522 inline
523 float ynf(int n, float x)
524 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
525  // for linear recurrences to get O(log n) steps, but it's unclear if
526  // it'd be beneficial in this case. Placeholder until OCML adds
527  // support.
528  if (n == 0) return y0f(x);
529  if (n == 1) return y1f(x);
530 
531  float x0 = y0f(x);
532  float x1 = y1f(x);
533  for (int i = 1; i < n; ++i) {
534  float x2 = (2 * i) / x * x1 - x0;
535  x0 = x1;
536  x1 = x2;
537  }
538 
539  return x1;
540 }
541 
542 // BEGIN INTRINSICS
543 __DEVICE__
544 inline
545 float __cosf(float x) { return __ocml_native_cos_f32(x); }
546 __DEVICE__
547 inline
548 float __exp10f(float x) { return __ocml_native_exp10_f32(x); }
549 __DEVICE__
550 inline
551 float __expf(float x) { return __ocml_native_exp_f32(x); }
552 #if defined OCML_BASIC_ROUNDED_OPERATIONS
553 __DEVICE__
554 inline
555 float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
556 #endif
557 __DEVICE__
558 inline
559 float __fadd_rn(float x, float y) { return x + y; }
560 #if defined OCML_BASIC_ROUNDED_OPERATIONS
561 __DEVICE__
562 inline
563 float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
564 __DEVICE__
565 inline
566 float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
567 __DEVICE__
568 inline
569 float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
570 #endif
571 __DEVICE__
572 inline
573 float __fdiv_rn(float x, float y) { return x / y; }
574 #if defined OCML_BASIC_ROUNDED_OPERATIONS
575 __DEVICE__
576 inline
577 float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
578 __DEVICE__
579 inline
580 float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
581 #endif
582 __DEVICE__
583 inline
584 float __fdividef(float x, float y) { return x / y; }
585 #if defined OCML_BASIC_ROUNDED_OPERATIONS
586 __DEVICE__
587 inline
588 float __fmaf_rd(float x, float y, float z)
589 {
590  return __ocml_fma_rtn_f32(x, y, z);
591 }
592 #endif
593 __DEVICE__
594 inline
595 float __fmaf_rn(float x, float y, float z)
596 {
597  return __ocml_fma_f32(x, y, z);
598 }
599 #if defined OCML_BASIC_ROUNDED_OPERATIONS
600 __DEVICE__
601 inline
602 float __fmaf_ru(float x, float y, float z)
603 {
604  return __ocml_fma_rtp_f32(x, y, z);
605 }
606 __DEVICE__
607 inline
608 float __fmaf_rz(float x, float y, float z)
609 {
610  return __ocml_fma_rtz_f32(x, y, z);
611 }
612 __DEVICE__
613 inline
614 float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
615 #endif
616 __DEVICE__
617 inline
618 float __fmul_rn(float x, float y) { return x * y; }
619 #if defined OCML_BASIC_ROUNDED_OPERATIONS
620 __DEVICE__
621 inline
622 float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
623 __DEVICE__
624 inline
625 float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
626 __DEVICE__
627 inline
628 float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
629 #endif
630 __DEVICE__
631 inline
632 float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
633 #if defined OCML_BASIC_ROUNDED_OPERATIONS
634 __DEVICE__
635 inline
636 float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
637 __DEVICE__
638 inline
639 float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
640 #endif
641 __DEVICE__
642 inline
643 float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
644 #if defined OCML_BASIC_ROUNDED_OPERATIONS
645 __DEVICE__
646 inline
647 float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
648 #endif
649 __DEVICE__
650 inline
651 float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); }
652 #if defined OCML_BASIC_ROUNDED_OPERATIONS
653 __DEVICE__
654 inline
655 float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
656 __DEVICE__
657 inline
658 float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
659 __DEVICE__
660 inline
661 float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
662 #endif
663 __DEVICE__
664 inline
665 float __fsub_rn(float x, float y) { return x - y; }
666 #if defined OCML_BASIC_ROUNDED_OPERATIONS
667 __DEVICE__
668 inline
669 float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
670 __DEVICE__
671 inline
672 float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
673 #endif
674 __DEVICE__
675 inline
676 float __log10f(float x) { return __ocml_native_log10_f32(x); }
677 __DEVICE__
678 inline
679 float __log2f(float x) { return __ocml_native_log2_f32(x); }
680 __DEVICE__
681 inline
682 float __logf(float x) { return __ocml_native_log_f32(x); }
683 __DEVICE__
684 inline
685 float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
686 __DEVICE__
687 inline
688 float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
689 __DEVICE__
690 inline
691 void __sincosf(float x, float* sptr, float* cptr)
692 {
693  *sptr = __ocml_native_sin_f32(x);
694  *cptr = __ocml_native_cos_f32(x);
695 }
696 __DEVICE__
697 inline
698 float __sinf(float x) { return __ocml_native_sin_f32(x); }
699 __DEVICE__
700 inline
701 float __tanf(float x) { return __ocml_tan_f32(x); }
702 // END INTRINSICS
703 // END FLOAT
704 
705 // BEGIN DOUBLE
706 __DEVICE__
707 inline
708 double abs(double x) { return __ocml_fabs_f64(x); }
709 __DEVICE__
710 inline
711 double acos(double x) { return __ocml_acos_f64(x); }
712 __DEVICE__
713 inline
714 double acosh(double x) { return __ocml_acosh_f64(x); }
715 __DEVICE__
716 inline
717 double asin(double x) { return __ocml_asin_f64(x); }
718 __DEVICE__
719 inline
720 double asinh(double x) { return __ocml_asinh_f64(x); }
721 __DEVICE__
722 inline
723 double atan(double x) { return __ocml_atan_f64(x); }
724 __DEVICE__
725 inline
726 double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
727 __DEVICE__
728 inline
729 double atanh(double x) { return __ocml_atanh_f64(x); }
730 __DEVICE__
731 inline
732 double cbrt(double x) { return __ocml_cbrt_f64(x); }
733 __DEVICE__
734 inline
735 double ceil(double x) { return __ocml_ceil_f64(x); }
736 __DEVICE__
737 inline
738 double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
739 __DEVICE__
740 inline
741 double cos(double x) { return __ocml_cos_f64(x); }
742 __DEVICE__
743 inline
744 double cosh(double x) { return __ocml_cosh_f64(x); }
745 __DEVICE__
746 inline
747 double cospi(double x) { return __ocml_cospi_f64(x); }
748 __DEVICE__
749 inline
750 double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
751 __DEVICE__
752 inline
753 double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
754 __DEVICE__
755 inline
756 double erf(double x) { return __ocml_erf_f64(x); }
757 __DEVICE__
758 inline
759 double erfc(double x) { return __ocml_erfc_f64(x); }
760 __DEVICE__
761 inline
762 double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
763 __DEVICE__
764 inline
765 double erfcx(double x) { return __ocml_erfcx_f64(x); }
766 __DEVICE__
767 inline
768 double erfinv(double x) { return __ocml_erfinv_f64(x); }
769 __DEVICE__
770 inline
771 double exp(double x) { return __ocml_exp_f64(x); }
772 __DEVICE__
773 inline
774 double exp10(double x) { return __ocml_exp10_f64(x); }
775 __DEVICE__
776 inline
777 double exp2(double x) { return __ocml_exp2_f64(x); }
778 __DEVICE__
779 inline
780 double expm1(double x) { return __ocml_expm1_f64(x); }
781 __DEVICE__
782 inline
783 double fabs(double x) { return __ocml_fabs_f64(x); }
784 __DEVICE__
785 inline
786 double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
787 __DEVICE__
788 inline
789 double floor(double x) { return __ocml_floor_f64(x); }
790 __DEVICE__
791 inline
792 double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
793 __DEVICE__
794 inline
795 double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
796 __DEVICE__
797 inline
798 double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
799 __DEVICE__
800 inline
801 double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
802 __DEVICE__
803 inline
804 double frexp(double x, int* nptr)
805 {
806  int tmp;
807  double r =
808  __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
809  *nptr = tmp;
810 
811  return r;
812 }
813 __DEVICE__
814 inline
815 double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
816 __DEVICE__
817 inline
818 int ilogb(double x) { return __ocml_ilogb_f64(x); }
819 __DEVICE__
820 inline
821 __RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
822 __DEVICE__
823 inline
824 __RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
825 __DEVICE__
826 inline
827 __RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
828 __DEVICE__
829 inline
830 double j0(double x) { return __ocml_j0_f64(x); }
831 __DEVICE__
832 inline
833 double j1(double x) { return __ocml_j1_f64(x); }
834 __DEVICE__
835 inline
836 double jn(int n, double x)
837 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
838  // for linear recurrences to get O(log n) steps, but it's unclear if
839  // it'd be beneficial in this case. Placeholder until OCML adds
840  // support.
841  if (n == 0) return j0f(x);
842  if (n == 1) return j1f(x);
843 
844  double x0 = j0f(x);
845  double x1 = j1f(x);
846  for (int i = 1; i < n; ++i) {
847  double x2 = (2 * i) / x * x1 - x0;
848  x0 = x1;
849  x1 = x2;
850  }
851 
852  return x1;
853 }
854 __DEVICE__
855 inline
856 double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
857 __DEVICE__
858 inline
859 double lgamma(double x) { return __ocml_lgamma_f64(x); }
860 __DEVICE__
861 inline
862 long long int llrint(double x) { return __ocml_rint_f64(x); }
863 __DEVICE__
864 inline
865 long long int llround(double x) { return __ocml_round_f64(x); }
866 __DEVICE__
867 inline
868 double log(double x) { return __ocml_log_f64(x); }
869 __DEVICE__
870 inline
871 double log10(double x) { return __ocml_log10_f64(x); }
872 __DEVICE__
873 inline
874 double log1p(double x) { return __ocml_log1p_f64(x); }
875 __DEVICE__
876 inline
877 double log2(double x) { return __ocml_log2_f64(x); }
878 __DEVICE__
879 inline
880 double logb(double x) { return __ocml_logb_f64(x); }
881 __DEVICE__
882 inline
883 long int lrint(double x) { return __ocml_rint_f64(x); }
884 __DEVICE__
885 inline
886 long int lround(double x) { return __ocml_round_f64(x); }
887 __DEVICE__
888 inline
889 double modf(double x, double* iptr)
890 {
891  double tmp;
892  double r =
893  __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
894  *iptr = tmp;
895 
896  return r;
897 }
898 __DEVICE__
899 inline
900 double nan(const char* tagp)
901 {
902  union {
903  double val;
904  struct ieee_double {
905  uint64_t mantissa : 51;
906  uint32_t quiet : 1;
907  uint32_t exponent : 11;
908  uint32_t sign : 1;
909  } bits;
910 
911  static_assert(sizeof(double) == sizeof(ieee_double), "");
912  } tmp;
913 
914  tmp.bits.sign = 0u;
915  tmp.bits.exponent = ~0u;
916  tmp.bits.quiet = 1u;
917  tmp.bits.mantissa = __make_mantissa(tagp);
918 
919  return tmp.val;
920 }
921 __DEVICE__
922 inline
923 double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
924 __DEVICE__
925 inline
926 double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
927 __DEVICE__
928 inline
929 double norm(int dim, const double* a)
930 { // TODO: placeholder until OCML adds support.
931  double r = 0;
932  while (dim--) { r += a[0] * a[0]; ++a; }
933 
934  return __ocml_sqrt_f64(r);
935 }
936 __DEVICE__
937 inline
938 double norm3d(double x, double y, double z)
939 {
940  return __ocml_len3_f64(x, y, z);
941 }
942 __DEVICE__
943 inline
944 double norm4d(double x, double y, double z, double w)
945 {
946  return __ocml_len4_f64(x, y, z, w);
947 }
948 __DEVICE__
949 inline
950 double normcdf(double x) { return __ocml_ncdf_f64(x); }
951 __DEVICE__
952 inline
953 double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
954 __DEVICE__
955 inline
956 double pow(double x, double y) { return __ocml_pow_f64(x, y); }
957 __DEVICE__
958 inline
959 double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
960 __DEVICE__
961 inline
962 double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
963 __DEVICE__
964 inline
965 double remquo(double x, double y, int* quo)
966 {
967  int tmp;
968  double r =
969  __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
970  *quo = tmp;
971 
972  return r;
973 }
974 __DEVICE__
975 inline
976 double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
977 __DEVICE__
978 inline
979 double rint(double x) { return __ocml_rint_f64(x); }
980 __DEVICE__
981 inline
982 double rnorm(int dim, const double* a)
983 { // TODO: placeholder until OCML adds support.
984  double r = 0;
985  while (dim--) { r += a[0] * a[0]; ++a; }
986 
987  return __ocml_rsqrt_f64(r);
988 }
989 __DEVICE__
990 inline
991 double rnorm3d(double x, double y, double z)
992 {
993  return __ocml_rlen3_f64(x, y, z);
994 }
995 __DEVICE__
996 inline
997 double rnorm4d(double x, double y, double z, double w)
998 {
999  return __ocml_rlen4_f64(x, y, z, w);
1000 }
1001 __DEVICE__
1002 inline
1003 double round(double x) { return __ocml_round_f64(x); }
1004 __DEVICE__
1005 inline
1006 double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
1007 __DEVICE__
1008 inline
1009 double scalbln(double x, long int n)
1010 {
1011  return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1012 }
1013 __DEVICE__
1014 inline
1015 double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
1016 __DEVICE__
1017 inline
1018 __RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
1019 __DEVICE__
1020 inline
1021 double sin(double x) { return __ocml_sin_f64(x); }
1022 __DEVICE__
1023 inline
1024 void sincos(double x, double* sptr, double* cptr)
1025 {
1026  double tmp;
1027  *sptr =
1028  __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
1029  *cptr = tmp;
1030 }
1031 __DEVICE__
1032 inline
1033 void sincospi(double x, double* sptr, double* cptr)
1034 {
1035  double tmp;
1036  *sptr = __ocml_sincospi_f64(
1037  x, (__attribute__((address_space(5))) double*) &tmp);
1038  *cptr = tmp;
1039 }
1040 __DEVICE__
1041 inline
1042 double sinh(double x) { return __ocml_sinh_f64(x); }
1043 __DEVICE__
1044 inline
1045 double sinpi(double x) { return __ocml_sinpi_f64(x); }
1046 __DEVICE__
1047 inline
1048 double sqrt(double x) { return __ocml_sqrt_f64(x); }
1049 __DEVICE__
1050 inline
1051 double tan(double x) { return __ocml_tan_f64(x); }
1052 __DEVICE__
1053 inline
1054 double tanh(double x) { return __ocml_tanh_f64(x); }
1055 __DEVICE__
1056 inline
1057 double tgamma(double x) { return __ocml_tgamma_f64(x); }
1058 __DEVICE__
1059 inline
1060 double trunc(double x) { return __ocml_trunc_f64(x); }
1061 __DEVICE__
1062 inline
1063 double y0(double x) { return __ocml_y0_f64(x); }
1064 __DEVICE__
1065 inline
1066 double y1(double x) { return __ocml_y1_f64(x); }
1067 __DEVICE__
1068 inline
1069 double yn(int n, double x)
1070 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
1071  // for linear recurrences to get O(log n) steps, but it's unclear if
1072  // it'd be beneficial in this case. Placeholder until OCML adds
1073  // support.
1074  if (n == 0) return j0f(x);
1075  if (n == 1) return j1f(x);
1076 
1077  double x0 = j0f(x);
1078  double x1 = j1f(x);
1079  for (int i = 1; i < n; ++i) {
1080  double x2 = (2 * i) / x * x1 - x0;
1081  x0 = x1;
1082  x1 = x2;
1083  }
1084 
1085  return x1;
1086 }
1087 
1088 // BEGIN INTRINSICS
1089 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1090 __DEVICE__
1091 inline
1092 double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
1093 #endif
1094 __DEVICE__
1095 inline
1096 double __dadd_rn(double x, double y) { return x + y; }
1097 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1098 __DEVICE__
1099 inline
1100 double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
1101 __DEVICE__
1102 inline
1103 double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
1104 __DEVICE__
1105 inline
1106 double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
1107 #endif
1108 __DEVICE__
1109 inline
1110 double __ddiv_rn(double x, double y) { return x / y; }
1111 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1112 __DEVICE__
1113 inline
1114 double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
1115 __DEVICE__
1116 inline
1117 double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
1118 __DEVICE__
1119 inline
1120 double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
1121 #endif
1122 __DEVICE__
1123 inline
1124 double __dmul_rn(double x, double y) { return x * y; }
1125 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1126 __DEVICE__
1127 inline
1128 double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
1129 __DEVICE__
1130 inline
1131 double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
1132 __DEVICE__
1133 inline
1134 double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
1135 #endif
1136 __DEVICE__
1137 inline
1138 double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
1139 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1140 __DEVICE__
1141 inline
1142 double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
1143 __DEVICE__
1144 inline
1145 double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
1146 __DEVICE__
1147 inline
1148 double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
1149 #endif
1150 __DEVICE__
1151 inline
1152 double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
1153 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1154 __DEVICE__
1155 inline
1156 double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
1157 __DEVICE__
1158 inline
1159 double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
1160 __DEVICE__
1161 inline
1162 double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
1163 #endif
1164 __DEVICE__
1165 inline
1166 double __dsub_rn(double x, double y) { return x - y; }
1167 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1168 __DEVICE__
1169 inline
1170 double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
1171 __DEVICE__
1172 inline
1173 double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
1174 __DEVICE__
1175 inline
1176 double __fma_rd(double x, double y, double z)
1177 {
1178  return __ocml_fma_rtn_f64(x, y, z);
1179 }
1180 #endif
1181 __DEVICE__
1182 inline
1183 double __fma_rn(double x, double y, double z)
1184 {
1185  return __ocml_fma_f64(x, y, z);
1186 }
1187 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1188 __DEVICE__
1189 inline
1190 double __fma_ru(double x, double y, double z)
1191 {
1192  return __ocml_fma_rtp_f64(x, y, z);
1193 }
1194 __DEVICE__
1195 inline
1196 double __fma_rz(double x, double y, double z)
1197 {
1198  return __ocml_fma_rtz_f64(x, y, z);
1199 }
1200 #endif
1201 // END INTRINSICS
1202 // END DOUBLE
1203 
1204 // BEGIN INTEGER
1205 __DEVICE__
1206 inline
1207 int abs(int x)
1208 {
1209  int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
1210  return (x ^ sgn) - sgn;
1211 }
1212 __DEVICE__
1213 inline
1214 long labs(long x)
1215 {
1216  long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
1217  return (x ^ sgn) - sgn;
1218 }
1219 __DEVICE__
1220 inline
1221 long long llabs(long long x)
1222 {
1223  long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
1224  return (x ^ sgn) - sgn;
1225 }
1226 
1227 #if defined(__cplusplus)
1228  __DEVICE__
1229  inline
1230  long abs(long x) { return labs(x); }
1231  __DEVICE__
1232  inline
1233  long long abs(long long x) { return llabs(x); }
1234 #endif
1235 // END INTEGER
1236 
1237 __DEVICE__
1238 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1239  return __ocml_fma_f16(x, y, z);
1240 }
1241 
1242 __DEVICE__
1243 inline float fma(float x, float y, float z) {
1244  return fmaf(x, y, z);
1245 }
1246 
1247 #pragma push_macro("__DEF_FLOAT_FUN")
1248 #pragma push_macro("__DEF_FLOAT_FUN2")
1249 #pragma push_macro("__DEF_FLOAT_FUN2I")
1250 #pragma push_macro("__HIP_OVERLOAD")
1251 #pragma push_macro("__HIP_OVERLOAD2")
1252 
1253 // __hip_enable_if::type is a type function which returns __T if __B is true.
1254 template<bool __B, class __T = void>
1256 
1257 template <class __T> struct __hip_enable_if<true, __T> {
1258  typedef __T type;
1259 };
1260 
1261 // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
1262 // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
1263 // floor(double).
1264 #define __HIP_OVERLOAD1(__retty, __fn) \
1265  template <typename __T> \
1266  __DEVICE__ \
1267  typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1268  __retty>::type \
1269  __fn(__T __x) { \
1270  return ::__fn((double)__x); \
1271  }
1272 
1273 // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
1274 // or integer argument to avoid compilation error due to ambibuity. e.g.
1275 // max(5.0f, 6.0) is resolved with max(double, double).
1276 #define __HIP_OVERLOAD2(__retty, __fn) \
1277  template <typename __T1, typename __T2> \
1278  __DEVICE__ typename __hip_enable_if< \
1279  std::numeric_limits<__T1>::is_specialized && \
1280  std::numeric_limits<__T2>::is_specialized, \
1281  __retty>::type \
1282  __fn(__T1 __x, __T2 __y) { \
1283  return __fn((double)__x, (double)__y); \
1284  }
1285 
1286 // Define cmath functions with float argument and returns float.
1287 #define __DEF_FUN1(retty, func) \
1288 __DEVICE__ \
1289 inline \
1290 float func(float x) \
1291 { \
1292  return func##f(x); \
1293 } \
1294 __HIP_OVERLOAD1(retty, func)
1295 
1296 // Define cmath functions with float argument and returns retty.
1297 #define __DEF_FUNI(retty, func) \
1298 __DEVICE__ \
1299 inline \
1300 retty func(float x) \
1301 { \
1302  return func##f(x); \
1303 } \
1304 __HIP_OVERLOAD1(retty, func)
1305 
1306 // define cmath functions with two float arguments.
1307 #define __DEF_FUN2(retty, func) \
1308 __DEVICE__ \
1309 inline \
1310 float func(float x, float y) \
1311 { \
1312  return func##f(x, y); \
1313 } \
1314 __HIP_OVERLOAD2(retty, func)
1315 
1316 __DEF_FUN1(double, acos)
1317 __DEF_FUN1(double, acosh)
1318 __DEF_FUN1(double, asin)
1319 __DEF_FUN1(double, asinh)
1320 __DEF_FUN1(double, atan)
1321 __DEF_FUN2(double, atan2);
1322 __DEF_FUN1(double, atanh)
1323 __DEF_FUN1(double, cbrt)
1324 __DEF_FUN1(double, ceil)
1325 __DEF_FUN2(double, copysign);
1326 __DEF_FUN1(double, cos)
1327 __DEF_FUN1(double, cosh)
1328 __DEF_FUN1(double, erf)
1329 __DEF_FUN1(double, erfc)
1330 __DEF_FUN1(double, exp)
1331 __DEF_FUN1(double, exp2)
1332 __DEF_FUN1(double, expm1)
1333 __DEF_FUN1(double, fabs)
1334 __DEF_FUN2(double, fdim);
1335 __DEF_FUN1(double, floor)
1336 __DEF_FUN2(double, fmax);
1337 __DEF_FUN2(double, fmin);
1338 __DEF_FUN2(double, fmod);
1339 //__HIP_OVERLOAD1(int, fpclassify)
1340 __DEF_FUN2(double, hypot);
1341 __DEF_FUNI(int, ilogb)
1342 __HIP_OVERLOAD1(bool, isfinite)
1343 __HIP_OVERLOAD2(bool, isgreater);
1344 __HIP_OVERLOAD2(bool, isgreaterequal);
1345 __HIP_OVERLOAD1(bool, isinf);
1346 __HIP_OVERLOAD2(bool, isless);
1347 __HIP_OVERLOAD2(bool, islessequal);
1348 __HIP_OVERLOAD2(bool, islessgreater);
1349 __HIP_OVERLOAD1(bool, isnan);
1350 //__HIP_OVERLOAD1(bool, isnormal)
1351 __HIP_OVERLOAD2(bool, isunordered);
1352 __DEF_FUN1(double, lgamma)
1353 __DEF_FUN1(double, log)
1354 __DEF_FUN1(double, log10)
1355 __DEF_FUN1(double, log1p)
1356 __DEF_FUN1(double, log2)
1357 __DEF_FUN1(double, logb)
1358 __DEF_FUNI(long long, llrint)
1359 __DEF_FUNI(long long, llround)
1360 __DEF_FUNI(long, lrint)
1361 __DEF_FUNI(long, lround)
1362 __DEF_FUN1(double, nearbyint);
1363 __DEF_FUN2(double, nextafter);
1364 __DEF_FUN2(double, pow);
1365 __DEF_FUN2(double, remainder);
1366 __DEF_FUN1(double, rint);
1367 __DEF_FUN1(double, round);
1368 __HIP_OVERLOAD1(bool, signbit)
1369 __DEF_FUN1(double, sin)
1370 __DEF_FUN1(double, sinh)
1371 __DEF_FUN1(double, sqrt)
1372 __DEF_FUN1(double, tan)
1373 __DEF_FUN1(double, tanh)
1374 __DEF_FUN1(double, tgamma)
1375 __DEF_FUN1(double, trunc);
1376 
1377 // define cmath functions with a float and an integer argument.
1378 #define __DEF_FLOAT_FUN2I(func) \
1379 __DEVICE__ \
1380 inline \
1381 float func(float x, int y) \
1382 { \
1383  return func##f(x, y); \
1384 }
1385 __DEF_FLOAT_FUN2I(scalbn)
1386 
1387 #if __HCC__
1388 template<class T>
1389 __DEVICE__ inline static T min(T arg1, T arg2) {
1390  return (arg1 < arg2) ? arg1 : arg2;
1391 }
1392 
1393 __DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1394  return min(arg1, (uint32_t) arg2);
1395 }
1396 /*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) {
1397  return min((uint32_t) arg1, arg2);
1398 }
1399 
1400 __DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) {
1401  return min(arg1, (uint64_t) arg2);
1402 }
1403 __DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) {
1404  return min((uint64_t) arg1, arg2);
1405 }
1406 
1407 __DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) {
1408  return min(arg1, (unsigned long long) arg2);
1409 }
1410 __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) {
1411  return min((unsigned long long) arg1, arg2);
1412 }*/
1413 
1414 template<class T>
1415 __DEVICE__ inline static T max(T arg1, T arg2) {
1416  return (arg1 > arg2) ? arg1 : arg2;
1417 }
1418 
1419 __DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1420  return max(arg1, (uint32_t) arg2);
1421 }
1422 __DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1423  return max((uint32_t) arg1, arg2);
1424 }
1425 
1426 /*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) {
1427  return max(arg1, (uint64_t) arg2);
1428 }
1429 __DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) {
1430  return max((uint64_t) arg1, arg2);
1431 }
1432 
1433 __DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) {
1434  return max(arg1, (unsigned long long) arg2);
1435 }
1436 __DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) {
1437  return max((unsigned long long) arg1, arg2);
1438 }*/
1439 #else
1440 __DEVICE__ inline int min(int arg1, int arg2) {
1441  return (arg1 < arg2) ? arg1 : arg2;
1442 }
1443 __DEVICE__ inline int max(int arg1, int arg2) {
1444  return (arg1 > arg2) ? arg1 : arg2;
1445 }
1446 
1447 __DEVICE__
1448 inline
1449 float max(float x, float y) {
1450  return fmaxf(x, y);
1451 }
1452 
1453 __DEVICE__
1454 inline
1455 double max(double x, double y) {
1456  return fmax(x, y);
1457 }
1458 
1459 __DEVICE__
1460 inline
1461 float min(float x, float y) {
1462  return fminf(x, y);
1463 }
1464 
1465 __DEVICE__
1466 inline
1467 double min(double x, double y) {
1468  return fmin(x, y);
1469 }
1470 
1471 __HIP_OVERLOAD2(double, max)
1472 __HIP_OVERLOAD2(double, min)
1473 
1474 #endif
1475 
1476 __host__ inline static int min(int arg1, int arg2) {
1477  return std::min(arg1, arg2);
1478 }
1479 
1480 __host__ inline static int max(int arg1, int arg2) {
1481  return std::max(arg1, arg2);
1482 }
1483 
1484 
1485 #pragma pop_macro("__DEF_FLOAT_FUN")
1486 #pragma pop_macro("__DEF_FLOAT_FUN2")
1487 #pragma pop_macro("__DEF_FLOAT_FUN2I")
1488 #pragma pop_macro("__HIP_OVERLOAD")
1489 #pragma pop_macro("__HIP_OVERLOAD2")
1490 #pragma pop_macro("__DEVICE__")
1491 #pragma pop_macro("__RETURN_TYPE")
1492 
1493 // For backward compatibility.
1494 // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
1495 // defined after including math_functions.h.
TODO-doc.
#define __host__
Definition: host_defines.h:41
Contains definitions of APIs for HIP runtime.
Defines the different newt vector types for HIP runtime.
Definition: math_functions.h:1255