HIP: Heterogenous-computing Interface for Portability
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
math_functions.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #pragma once
24 
25 #include "hip_fp16_math_fwd.h"
26 #include "math_fwd.h"
27 
29 
30 #include <algorithm>
31 #include <assert.h>
32 #include <limits.h>
33 #include <limits>
34 #include <stdint.h>
35 
36 // HCC's own math functions should be included first, otherwise there will
37 // be conflicts when hip/math_functions.h is included before hip/hip_runtime.h.
38 #ifdef __HCC__
39 #include "kalmar_math.h"
40 #endif
41 
42 #pragma push_macro("__DEVICE__")
43 #pragma push_macro("__RETURN_TYPE")
44 
45 #ifdef __HCC__
46 #define __DEVICE__ __device__
47 #define __RETURN_TYPE int
48 #else // to be consistent with __clang_cuda_math_forward_declares
49 #define __DEVICE__ static __device__
50 #define __RETURN_TYPE bool
51 #endif
52 
53 __DEVICE__
54 inline
55 uint64_t __make_mantissa_base8(const char* tagp)
56 {
57  uint64_t r = 0;
58  while (tagp) {
59  char tmp = *tagp;
60 
61  if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
62  else return 0;
63 
64  ++tagp;
65  }
66 
67  return r;
68 }
69 
70 __DEVICE__
71 inline
72 uint64_t __make_mantissa_base10(const char* tagp)
73 {
74  uint64_t r = 0;
75  while (tagp) {
76  char tmp = *tagp;
77 
78  if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
79  else return 0;
80 
81  ++tagp;
82  }
83 
84  return r;
85 }
86 
87 __DEVICE__
88 inline
89 uint64_t __make_mantissa_base16(const char* tagp)
90 {
91  uint64_t r = 0;
92  while (tagp) {
93  char tmp = *tagp;
94 
95  if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
96  else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
97  else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
98  else return 0;
99 
100  ++tagp;
101  }
102 
103  return r;
104 }
105 
106 __DEVICE__
107 inline
108 uint64_t __make_mantissa(const char* tagp)
109 {
110  if (!tagp) return 0u;
111 
112  if (*tagp == '0') {
113  ++tagp;
114 
115  if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
116  else return __make_mantissa_base8(tagp);
117  }
118 
119  return __make_mantissa_base10(tagp);
120 }
121 
122 // BEGIN FLOAT
123 __DEVICE__
124 inline
125 float abs(float x) { return __ocml_fabs_f32(x); }
126 __DEVICE__
127 inline
128 float acosf(float x) { return __ocml_acos_f32(x); }
129 __DEVICE__
130 inline
131 float acoshf(float x) { return __ocml_acosh_f32(x); }
132 __DEVICE__
133 inline
134 float asinf(float x) { return __ocml_asin_f32(x); }
135 __DEVICE__
136 inline
137 float asinhf(float x) { return __ocml_asinh_f32(x); }
138 __DEVICE__
139 inline
140 float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
141 __DEVICE__
142 inline
143 float atanf(float x) { return __ocml_atan_f32(x); }
144 __DEVICE__
145 inline
146 float atanhf(float x) { return __ocml_atanh_f32(x); }
147 __DEVICE__
148 inline
149 float cbrtf(float x) { return __ocml_cbrt_f32(x); }
150 __DEVICE__
151 inline
152 float ceilf(float x) { return __ocml_ceil_f32(x); }
153 __DEVICE__
154 inline
155 float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
156 __DEVICE__
157 inline
158 float cosf(float x) { return __ocml_cos_f32(x); }
159 __DEVICE__
160 inline
161 float coshf(float x) { return __ocml_cosh_f32(x); }
162 __DEVICE__
163 inline
164 float cospif(float x) { return __ocml_cospi_f32(x); }
165 __DEVICE__
166 inline
167 float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
168 __DEVICE__
169 inline
170 float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
171 __DEVICE__
172 inline
173 float erfcf(float x) { return __ocml_erfc_f32(x); }
174 __DEVICE__
175 inline
176 float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
177 __DEVICE__
178 inline
179 float erfcxf(float x) { return __ocml_erfcx_f32(x); }
180 __DEVICE__
181 inline
182 float erff(float x) { return __ocml_erf_f32(x); }
183 __DEVICE__
184 inline
185 float erfinvf(float x) { return __ocml_erfinv_f32(x); }
186 __DEVICE__
187 inline
188 float exp10f(float x) { return __ocml_exp10_f32(x); }
189 __DEVICE__
190 inline
191 float exp2f(float x) { return __ocml_exp2_f32(x); }
192 __DEVICE__
193 inline
194 float expf(float x) { return __ocml_exp_f32(x); }
195 __DEVICE__
196 inline
197 float expm1f(float x) { return __ocml_expm1_f32(x); }
198 __DEVICE__
199 inline
200 float fabsf(float x) { return __ocml_fabs_f32(x); }
201 __DEVICE__
202 inline
203 float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
204 __DEVICE__
205 inline
206 float fdividef(float x, float y) { return x / y; }
207 __DEVICE__
208 inline
209 float floorf(float x) { return __ocml_floor_f32(x); }
210 __DEVICE__
211 inline
212 float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
213 __DEVICE__
214 inline
215 float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
216 __DEVICE__
217 inline
218 float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
219 __DEVICE__
220 inline
221 float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
222 __DEVICE__
223 inline
224 float frexpf(float x, int* nptr)
225 {
226  int tmp;
227  float r =
228  __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
229  *nptr = tmp;
230 
231  return r;
232 }
233 __DEVICE__
234 inline
235 float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
236 __DEVICE__
237 inline
238 int ilogbf(float x) { return __ocml_ilogb_f32(x); }
239 __DEVICE__
240 inline
241 __RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
242 __DEVICE__
243 inline
244 __RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
245 __DEVICE__
246 inline
247 __RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
248 __DEVICE__
249 inline
250 float j0f(float x) { return __ocml_j0_f32(x); }
251 __DEVICE__
252 inline
253 float j1f(float x) { return __ocml_j1_f32(x); }
254 __DEVICE__
255 inline
256 float jnf(int n, float x)
257 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
258  // for linear recurrences to get O(log n) steps, but it's unclear if
259  // it'd be beneficial in this case.
260  if (n == 0) return j0f(x);
261  if (n == 1) return j1f(x);
262 
263  float x0 = j0f(x);
264  float x1 = j1f(x);
265  for (int i = 1; i < n; ++i) {
266  float x2 = (2 * i) / x * x1 - x0;
267  x0 = x1;
268  x1 = x2;
269  }
270 
271  return x1;
272 }
273 __DEVICE__
274 inline
275 float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
276 __DEVICE__
277 inline
278 float lgammaf(float x) { return __ocml_lgamma_f32(x); }
279 __DEVICE__
280 inline
281 long long int llrintf(float x) { return __ocml_rint_f32(x); }
282 __DEVICE__
283 inline
284 long long int llroundf(float x) { return __ocml_round_f32(x); }
285 __DEVICE__
286 inline
287 float log10f(float x) { return __ocml_log10_f32(x); }
288 __DEVICE__
289 inline
290 float log1pf(float x) { return __ocml_log1p_f32(x); }
291 __DEVICE__
292 inline
293 float log2f(float x) { return __ocml_log2_f32(x); }
294 __DEVICE__
295 inline
296 float logbf(float x) { return __ocml_logb_f32(x); }
297 __DEVICE__
298 inline
299 float logf(float x) { return __ocml_log_f32(x); }
300 __DEVICE__
301 inline
302 long int lrintf(float x) { return __ocml_rint_f32(x); }
303 __DEVICE__
304 inline
305 long int lroundf(float x) { return __ocml_round_f32(x); }
306 __DEVICE__
307 inline
308 float modff(float x, float* iptr)
309 {
310  float tmp;
311  float r =
312  __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
313  *iptr = tmp;
314 
315  return r;
316 }
317 __DEVICE__
318 inline
319 float nanf(const char* tagp)
320 {
321  union {
322  float val;
323  struct ieee_float {
324  uint32_t mantissa : 22;
325  uint32_t quiet : 1;
326  uint32_t exponent : 8;
327  uint32_t sign : 1;
328  } bits;
329 
330  static_assert(sizeof(float) == sizeof(ieee_float), "");
331  } tmp;
332 
333  tmp.bits.sign = 0u;
334  tmp.bits.exponent = ~0u;
335  tmp.bits.quiet = 1u;
336  tmp.bits.mantissa = __make_mantissa(tagp);
337 
338  return tmp.val;
339 }
340 __DEVICE__
341 inline
342 float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
343 __DEVICE__
344 inline
345 float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
346 __DEVICE__
347 inline
348 float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
349 __DEVICE__
350 inline
351 float norm4df(float x, float y, float z, float w)
352 {
353  return __ocml_len4_f32(x, y, z, w);
354 }
355 __DEVICE__
356 inline
357 float normcdff(float x) { return __ocml_ncdf_f32(x); }
358 __DEVICE__
359 inline
360 float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
361 __DEVICE__
362 inline
363 float normf(int dim, const float* a)
364 { // TODO: placeholder until OCML adds support.
365  float r = 0;
366  while (dim--) { r += a[0] * a[0]; ++a; }
367 
368  return __ocml_sqrt_f32(r);
369 }
370 __DEVICE__
371 inline
372 float powf(float x, float y) { return __ocml_pow_f32(x, y); }
373 __DEVICE__
374 inline
375 float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
376 __DEVICE__
377 inline
378 float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
379 __DEVICE__
380 inline
381 float remquof(float x, float y, int* quo)
382 {
383  int tmp;
384  float r =
385  __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
386  *quo = tmp;
387 
388  return r;
389 }
390 __DEVICE__
391 inline
392 float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
393 __DEVICE__
394 inline
395 float rintf(float x) { return __ocml_rint_f32(x); }
396 __DEVICE__
397 inline
398 float rnorm3df(float x, float y, float z)
399 {
400  return __ocml_rlen3_f32(x, y, z);
401 }
402 
403 __DEVICE__
404 inline
405 float rnorm4df(float x, float y, float z, float w)
406 {
407  return __ocml_rlen4_f32(x, y, z, w);
408 }
409 __DEVICE__
410 inline
411 float rnormf(int dim, const float* a)
412 { // TODO: placeholder until OCML adds support.
413  float r = 0;
414  while (dim--) { r += a[0] * a[0]; ++a; }
415 
416  return __ocml_rsqrt_f32(r);
417 }
418 __DEVICE__
419 inline
420 float roundf(float x) { return __ocml_round_f32(x); }
421 __DEVICE__
422 inline
423 float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
424 __DEVICE__
425 inline
426 float scalblnf(float x, long int n)
427 {
428  return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
429 }
430 __DEVICE__
431 inline
432 float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
433 __DEVICE__
434 inline
435 __RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
436 __DEVICE__
437 inline
438 void sincosf(float x, float* sptr, float* cptr)
439 {
440  float tmp;
441 
442  *sptr =
443  __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
444  *cptr = tmp;
445 }
446 __DEVICE__
447 inline
448 void sincospif(float x, float* sptr, float* cptr)
449 {
450  float tmp;
451 
452  *sptr =
453  __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
454  *cptr = tmp;
455 }
456 __DEVICE__
457 inline
458 float sinf(float x) { return __ocml_sin_f32(x); }
459 __DEVICE__
460 inline
461 float sinhf(float x) { return __ocml_sinh_f32(x); }
462 __DEVICE__
463 inline
464 float sinpif(float x) { return __ocml_sinpi_f32(x); }
465 __DEVICE__
466 inline
467 float sqrtf(float x) { return __ocml_sqrt_f32(x); }
468 __DEVICE__
469 inline
470 float tanf(float x) { return __ocml_tan_f32(x); }
471 __DEVICE__
472 inline
473 float tanhf(float x) { return __ocml_tanh_f32(x); }
474 __DEVICE__
475 inline
476 float tgammaf(float x) { return __ocml_tgamma_f32(x); }
477 __DEVICE__
478 inline
479 float truncf(float x) { return __ocml_trunc_f32(x); }
480 __DEVICE__
481 inline
482 float y0f(float x) { return __ocml_y0_f32(x); }
483 __DEVICE__
484 inline
485 float y1f(float x) { return __ocml_y1_f32(x); }
486 __DEVICE__
487 inline
488 float ynf(int n, float x)
489 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
490  // for linear recurrences to get O(log n) steps, but it's unclear if
491  // it'd be beneficial in this case. Placeholder until OCML adds
492  // support.
493  if (n == 0) return y0f(x);
494  if (n == 1) return y1f(x);
495 
496  float x0 = y0f(x);
497  float x1 = y1f(x);
498  for (int i = 1; i < n; ++i) {
499  float x2 = (2 * i) / x * x1 - x0;
500  x0 = x1;
501  x1 = x2;
502  }
503 
504  return x1;
505 }
506 
507 // BEGIN INTRINSICS
508 __DEVICE__
509 inline
510 float __cosf(float x) { return __ocml_cos_f32(x); }
511 __DEVICE__
512 inline
513 float __exp10f(float x) { return __ocml_exp10_f32(x); }
514 __DEVICE__
515 inline
516 float __expf(float x) { return __ocml_exp_f32(x); }
517 #if defined OCML_BASIC_ROUNDED_OPERATIONS
518 __DEVICE__
519 inline
520 float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
521 __DEVICE__
522 inline
523 float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); }
524 __DEVICE__
525 inline
526 float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
527 __DEVICE__
528 inline
529 float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
530 __DEVICE__
531 inline
532 float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
533 __DEVICE__
534 inline
535 float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); }
536 __DEVICE__
537 inline
538 float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
539 __DEVICE__
540 inline
541 float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
542 #endif
543 __DEVICE__
544 inline
545 float __fdividef(float x, float y) { return x / y; }
546 #if defined OCML_BASIC_ROUNDED_OPERATIONS
547 __DEVICE__
548 inline
549 float __fmaf_rd(float x, float y, float z)
550 {
551  return __ocml_fma_rtn_f32(x, y, z);
552 }
553 __DEVICE__
554 inline
555 float __fmaf_rn(float x, float y, float z)
556 {
557  return __ocml_fma_rte_f32(x, y, z);
558 }
559 __DEVICE__
560 inline
561 float __fmaf_ru(float x, float y, float z)
562 {
563  return __ocml_fma_rtp_f32(x, y, z);
564 }
565 __DEVICE__
566 inline
567 float __fmaf_rz(float x, float y, float z)
568 {
569  return __ocml_fma_rtz_f32(x, y, z);
570 }
571 __DEVICE__
572 inline
573 float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
574 __DEVICE__
575 inline
576 float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); }
577 __DEVICE__
578 inline
579 float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
580 __DEVICE__
581 inline
582 float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
583 __DEVICE__
584 inline
585 float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
586 __DEVICE__
587 inline
588 float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
589 __DEVICE__
590 inline
591 float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
592 __DEVICE__
593 inline
594 float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
595 __DEVICE__
596 inline
597 float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
598 __DEVICE__
599 inline
600 float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
601 __DEVICE__
602 inline
603 float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); }
604 __DEVICE__
605 inline
606 float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
607 __DEVICE__
608 inline
609 float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
610 __DEVICE__
611 inline
612 float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
613 __DEVICE__
614 inline
615 float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); }
616 __DEVICE__
617 inline
618 float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
619 __DEVICE__
620 inline
621 float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
622 #endif
623 __DEVICE__
624 inline
625 float __log10f(float x) { return __ocml_log10_f32(x); }
626 __DEVICE__
627 inline
628 float __log2f(float x) { return __ocml_log2_f32(x); }
629 __DEVICE__
630 inline
631 float __logf(float x) { return __ocml_log_f32(x); }
632 __DEVICE__
633 inline
634 float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
635 __DEVICE__
636 inline
637 float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
638 __DEVICE__
639 inline
640 void __sincosf(float x, float* sptr, float* cptr)
641 {
642  float tmp;
643 
644  *sptr =
645  __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
646  *cptr = tmp;
647 }
648 __DEVICE__
649 inline
650 float __sinf(float x) { return __ocml_sin_f32(x); }
651 __DEVICE__
652 inline
653 float __tanf(float x) { return __ocml_tan_f32(x); }
654 // END INTRINSICS
655 // END FLOAT
656 
657 // BEGIN DOUBLE
658 __DEVICE__
659 inline
660 double abs(double x) { return __ocml_fabs_f64(x); }
661 __DEVICE__
662 inline
663 double acos(double x) { return __ocml_acos_f64(x); }
664 __DEVICE__
665 inline
666 double acosh(double x) { return __ocml_acosh_f64(x); }
667 __DEVICE__
668 inline
669 double asin(double x) { return __ocml_asin_f64(x); }
670 __DEVICE__
671 inline
672 double asinh(double x) { return __ocml_asinh_f64(x); }
673 __DEVICE__
674 inline
675 double atan(double x) { return __ocml_atan_f64(x); }
676 __DEVICE__
677 inline
678 double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
679 __DEVICE__
680 inline
681 double atanh(double x) { return __ocml_atanh_f64(x); }
682 __DEVICE__
683 inline
684 double cbrt(double x) { return __ocml_cbrt_f64(x); }
685 __DEVICE__
686 inline
687 double ceil(double x) { return __ocml_ceil_f64(x); }
688 __DEVICE__
689 inline
690 double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
691 __DEVICE__
692 inline
693 double cos(double x) { return __ocml_cos_f64(x); }
694 __DEVICE__
695 inline
696 double cosh(double x) { return __ocml_cosh_f64(x); }
697 __DEVICE__
698 inline
699 double cospi(double x) { return __ocml_cospi_f64(x); }
700 __DEVICE__
701 inline
702 double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
703 __DEVICE__
704 inline
705 double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
706 __DEVICE__
707 inline
708 double erf(double x) { return __ocml_erf_f64(x); }
709 __DEVICE__
710 inline
711 double erfc(double x) { return __ocml_erfc_f64(x); }
712 __DEVICE__
713 inline
714 double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
715 __DEVICE__
716 inline
717 double erfcx(double x) { return __ocml_erfcx_f64(x); }
718 __DEVICE__
719 inline
720 double erfinv(double x) { return __ocml_erfinv_f64(x); }
721 __DEVICE__
722 inline
723 double exp(double x) { return __ocml_exp_f64(x); }
724 __DEVICE__
725 inline
726 double exp10(double x) { return __ocml_exp10_f64(x); }
727 __DEVICE__
728 inline
729 double exp2(double x) { return __ocml_exp2_f64(x); }
730 __DEVICE__
731 inline
732 double expm1(double x) { return __ocml_expm1_f64(x); }
733 __DEVICE__
734 inline
735 double fabs(double x) { return __ocml_fabs_f64(x); }
736 __DEVICE__
737 inline
738 double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
739 __DEVICE__
740 inline
741 double floor(double x) { return __ocml_floor_f64(x); }
742 __DEVICE__
743 inline
744 double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
745 __DEVICE__
746 inline
747 double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
748 __DEVICE__
749 inline
750 double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
751 __DEVICE__
752 inline
753 double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
754 __DEVICE__
755 inline
756 double frexp(double x, int* nptr)
757 {
758  int tmp;
759  double r =
760  __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
761  *nptr = tmp;
762 
763  return r;
764 }
765 __DEVICE__
766 inline
767 double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
768 __DEVICE__
769 inline
770 int ilogb(double x) { return __ocml_ilogb_f64(x); }
771 __DEVICE__
772 inline
773 __RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
774 __DEVICE__
775 inline
776 __RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
777 __DEVICE__
778 inline
779 __RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
780 __DEVICE__
781 inline
782 double j0(double x) { return __ocml_j0_f64(x); }
783 __DEVICE__
784 inline
785 double j1(double x) { return __ocml_j1_f64(x); }
786 __DEVICE__
787 inline
788 double jn(int n, double x)
789 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
790  // for linear recurrences to get O(log n) steps, but it's unclear if
791  // it'd be beneficial in this case. Placeholder until OCML adds
792  // support.
793  if (n == 0) return j0f(x);
794  if (n == 1) return j1f(x);
795 
796  double x0 = j0f(x);
797  double x1 = j1f(x);
798  for (int i = 1; i < n; ++i) {
799  double x2 = (2 * i) / x * x1 - x0;
800  x0 = x1;
801  x1 = x2;
802  }
803 
804  return x1;
805 }
806 __DEVICE__
807 inline
808 double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
809 __DEVICE__
810 inline
811 double lgamma(double x) { return __ocml_lgamma_f64(x); }
812 __DEVICE__
813 inline
814 long long int llrint(double x) { return __ocml_rint_f64(x); }
815 __DEVICE__
816 inline
817 long long int llround(double x) { return __ocml_round_f64(x); }
818 __DEVICE__
819 inline
820 double log(double x) { return __ocml_log_f64(x); }
821 __DEVICE__
822 inline
823 double log10(double x) { return __ocml_log10_f64(x); }
824 __DEVICE__
825 inline
826 double log1p(double x) { return __ocml_log1p_f64(x); }
827 __DEVICE__
828 inline
829 double log2(double x) { return __ocml_log2_f64(x); }
830 __DEVICE__
831 inline
832 double logb(double x) { return __ocml_logb_f64(x); }
833 __DEVICE__
834 inline
835 long int lrint(double x) { return __ocml_rint_f64(x); }
836 __DEVICE__
837 inline
838 long int lround(double x) { return __ocml_round_f64(x); }
839 __DEVICE__
840 inline
841 double modf(double x, double* iptr)
842 {
843  double tmp;
844  double r =
845  __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
846  *iptr = tmp;
847 
848  return r;
849 }
850 __DEVICE__
851 inline
852 double nan(const char* tagp)
853 {
854  union {
855  double val;
856  struct ieee_double {
857  uint64_t mantissa : 51;
858  uint32_t quiet : 1;
859  uint32_t exponent : 11;
860  uint32_t sign : 1;
861  } bits;
862 
863  static_assert(sizeof(double) == sizeof(ieee_double), "");
864  } tmp;
865 
866  tmp.bits.sign = 0u;
867  tmp.bits.exponent = ~0u;
868  tmp.bits.quiet = 1u;
869  tmp.bits.mantissa = __make_mantissa(tagp);
870 
871  return tmp.val;
872 }
873 __DEVICE__
874 inline
875 double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
876 __DEVICE__
877 inline
878 double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
879 __DEVICE__
880 inline
881 double norm(int dim, const double* a)
882 { // TODO: placeholder until OCML adds support.
883  double r = 0;
884  while (dim--) { r += a[0] * a[0]; ++a; }
885 
886  return __ocml_sqrt_f64(r);
887 }
888 __DEVICE__
889 inline
890 double norm3d(double x, double y, double z)
891 {
892  return __ocml_len3_f64(x, y, z);
893 }
894 __DEVICE__
895 inline
896 double norm4d(double x, double y, double z, double w)
897 {
898  return __ocml_len4_f64(x, y, z, w);
899 }
900 __DEVICE__
901 inline
902 double normcdf(double x) { return __ocml_ncdf_f64(x); }
903 __DEVICE__
904 inline
905 double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
906 __DEVICE__
907 inline
908 double pow(double x, double y) { return __ocml_pow_f64(x, y); }
909 __DEVICE__
910 inline
911 double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
912 __DEVICE__
913 inline
914 double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
915 __DEVICE__
916 inline
917 double remquo(double x, double y, int* quo)
918 {
919  int tmp;
920  double r =
921  __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
922  *quo = tmp;
923 
924  return r;
925 }
926 __DEVICE__
927 inline
928 double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
929 __DEVICE__
930 inline
931 double rint(double x) { return __ocml_rint_f64(x); }
932 __DEVICE__
933 inline
934 double rnorm(int dim, const double* a)
935 { // TODO: placeholder until OCML adds support.
936  double r = 0;
937  while (dim--) { r += a[0] * a[0]; ++a; }
938 
939  return __ocml_rsqrt_f64(r);
940 }
941 __DEVICE__
942 inline
943 double rnorm3d(double x, double y, double z)
944 {
945  return __ocml_rlen3_f64(x, y, z);
946 }
947 __DEVICE__
948 inline
949 double rnorm4d(double x, double y, double z, double w)
950 {
951  return __ocml_rlen4_f64(x, y, z, w);
952 }
953 __DEVICE__
954 inline
955 double round(double x) { return __ocml_round_f64(x); }
956 __DEVICE__
957 inline
958 double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
959 __DEVICE__
960 inline
961 double scalbln(double x, long int n)
962 {
963  return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
964 }
965 __DEVICE__
966 inline
967 double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
968 __DEVICE__
969 inline
970 __RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
971 __DEVICE__
972 inline
973 double sin(double x) { return __ocml_sin_f64(x); }
974 __DEVICE__
975 inline
976 void sincos(double x, double* sptr, double* cptr)
977 {
978  double tmp;
979  *sptr =
980  __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
981  *cptr = tmp;
982 }
983 __DEVICE__
984 inline
985 void sincospi(double x, double* sptr, double* cptr)
986 {
987  double tmp;
988  *sptr = __ocml_sincospi_f64(
989  x, (__attribute__((address_space(5))) double*) &tmp);
990  *cptr = tmp;
991 }
992 __DEVICE__
993 inline
994 double sinh(double x) { return __ocml_sinh_f64(x); }
995 __DEVICE__
996 inline
997 double sinpi(double x) { return __ocml_sinpi_f64(x); }
998 __DEVICE__
999 inline
1000 double sqrt(double x) { return __ocml_sqrt_f64(x); }
1001 __DEVICE__
1002 inline
1003 double tan(double x) { return __ocml_tan_f64(x); }
1004 __DEVICE__
1005 inline
1006 double tanh(double x) { return __ocml_tanh_f64(x); }
1007 __DEVICE__
1008 inline
1009 double tgamma(double x) { return __ocml_tgamma_f64(x); }
1010 __DEVICE__
1011 inline
1012 double trunc(double x) { return __ocml_trunc_f64(x); }
1013 __DEVICE__
1014 inline
1015 double y0(double x) { return __ocml_y0_f64(x); }
1016 __DEVICE__
1017 inline
1018 double y1(double x) { return __ocml_y1_f64(x); }
1019 __DEVICE__
1020 inline
1021 double yn(int n, double x)
1022 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
1023  // for linear recurrences to get O(log n) steps, but it's unclear if
1024  // it'd be beneficial in this case. Placeholder until OCML adds
1025  // support.
1026  if (n == 0) return j0f(x);
1027  if (n == 1) return j1f(x);
1028 
1029  double x0 = j0f(x);
1030  double x1 = j1f(x);
1031  for (int i = 1; i < n; ++i) {
1032  double x2 = (2 * i) / x * x1 - x0;
1033  x0 = x1;
1034  x1 = x2;
1035  }
1036 
1037  return x1;
1038 }
1039 
1040 // BEGIN INTRINSICS
1041 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1042 __DEVICE__
1043 inline
1044 double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
1045 __DEVICE__
1046 inline
1047 double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); }
1048 __DEVICE__
1049 inline
1050 double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
1051 __DEVICE__
1052 inline
1053 double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
1054 __DEVICE__
1055 inline
1056 double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
1057 __DEVICE__
1058 inline
1059 double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); }
1060 __DEVICE__
1061 inline
1062 double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
1063 __DEVICE__
1064 inline
1065 double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
1066 __DEVICE__
1067 inline
1068 double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
1069 __DEVICE__
1070 inline
1071 double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); }
1072 __DEVICE__
1073 inline
1074 double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
1075 __DEVICE__
1076 inline
1077 double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
1078 __DEVICE__
1079 inline
1080 double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
1081 __DEVICE__
1082 inline
1083 double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
1084 __DEVICE__
1085 inline
1086 double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
1087 __DEVICE__
1088 inline
1089 double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
1090 __DEVICE__
1091 inline
1092 double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
1093 __DEVICE__
1094 inline
1095 double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); }
1096 __DEVICE__
1097 inline
1098 double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
1099 __DEVICE__
1100 inline
1101 double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
1102 __DEVICE__
1103 inline
1104 double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
1105 __DEVICE__
1106 inline
1107 double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); }
1108 __DEVICE__
1109 inline
1110 double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
1111 __DEVICE__
1112 inline
1113 double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
1114 __DEVICE__
1115 inline
1116 double __fma_rd(double x, double y, double z)
1117 {
1118  return __ocml_fma_rtn_f64(x, y, z);
1119 }
1120 __DEVICE__
1121 inline
1122 double __fma_rn(double x, double y, double z)
1123 {
1124  return __ocml_fma_rte_f64(x, y, z);
1125 }
1126 __DEVICE__
1127 inline
1128 double __fma_ru(double x, double y, double z)
1129 {
1130  return __ocml_fma_rtp_f64(x, y, z);
1131 }
1132 __DEVICE__
1133 inline
1134 double __fma_rz(double x, double y, double z)
1135 {
1136  return __ocml_fma_rtz_f64(x, y, z);
1137 }
1138 #endif
1139 // END INTRINSICS
1140 // END DOUBLE
1141 
1142 // BEGIN INTEGER
1143 __DEVICE__
1144 inline
1145 int abs(int x)
1146 {
1147  int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
1148  return (x ^ sgn) - sgn;
1149 }
1150 __DEVICE__
1151 inline
1152 long labs(long x)
1153 {
1154  long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
1155  return (x ^ sgn) - sgn;
1156 }
1157 __DEVICE__
1158 inline
1159 long long llabs(long long x)
1160 {
1161  long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
1162  return (x ^ sgn) - sgn;
1163 }
1164 
1165 #if defined(__cplusplus)
1166  __DEVICE__
1167  inline
1168  long abs(long x) { return labs(x); }
1169  __DEVICE__
1170  inline
1171  long long abs(long long x) { return llabs(x); }
1172 #endif
1173 // END INTEGER
1174 
1175 __DEVICE__
1176 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1177  return __ocml_fma_f16(x, y, z);
1178 }
1179 
1180 __DEVICE__
1181 inline float fma(float x, float y, float z) {
1182  return fmaf(x, y, z);
1183 }
1184 
1185 #pragma push_macro("__DEF_FLOAT_FUN")
1186 #pragma push_macro("__DEF_FLOAT_FUN2")
1187 #pragma push_macro("__DEF_FLOAT_FUN2I")
1188 #pragma push_macro("__HIP_OVERLOAD")
1189 #pragma push_macro("__HIP_OVERLOAD2")
1190 
1191 // __hip_enable_if::type is a type function which returns __T if __B is true.
1192 template<bool __B, class __T = void>
1194 
1195 template <class __T> struct __hip_enable_if<true, __T> {
1196  typedef __T type;
1197 };
1198 
1199 // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
1200 // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
1201 // floor(double).
1202 #define __HIP_OVERLOAD1(__retty, __fn) \
1203  template <typename __T> \
1204  __DEVICE__ \
1205  typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1206  __retty>::type \
1207  __fn(__T __x) { \
1208  return ::__fn((double)__x); \
1209  }
1210 
1211 // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
1212 // or integer argument to avoid compilation error due to ambibuity. e.g.
1213 // max(5.0f, 6.0) is resolved with max(double, double).
1214 #define __HIP_OVERLOAD2(__retty, __fn) \
1215  template <typename __T1, typename __T2> \
1216  __DEVICE__ typename __hip_enable_if< \
1217  std::numeric_limits<__T1>::is_specialized && \
1218  std::numeric_limits<__T2>::is_specialized, \
1219  __retty>::type \
1220  __fn(__T1 __x, __T2 __y) { \
1221  return __fn((double)__x, (double)__y); \
1222  }
1223 
1224 // Define cmath functions with float argument and returns float.
1225 #define __DEF_FUN1(retty, func) \
1226 __DEVICE__ \
1227 inline \
1228 float func(float x) \
1229 { \
1230  return func##f(x); \
1231 } \
1232 __HIP_OVERLOAD1(retty, func)
1233 
1234 // Define cmath functions with float argument and returns retty.
1235 #define __DEF_FUNI(retty, func) \
1236 __DEVICE__ \
1237 inline \
1238 retty func(float x) \
1239 { \
1240  return func##f(x); \
1241 } \
1242 __HIP_OVERLOAD1(retty, func)
1243 
1244 // define cmath functions with two float arguments.
1245 #define __DEF_FUN2(retty, func) \
1246 __DEVICE__ \
1247 inline \
1248 float func(float x, float y) \
1249 { \
1250  return func##f(x, y); \
1251 } \
1252 __HIP_OVERLOAD2(retty, func)
1253 
1254 __DEF_FUN1(double, acos)
1255 __DEF_FUN1(double, acosh)
1256 __DEF_FUN1(double, asin)
1257 __DEF_FUN1(double, asinh)
1258 __DEF_FUN1(double, atan)
1259 __DEF_FUN2(double, atan2);
1260 __DEF_FUN1(double, atanh)
1261 __DEF_FUN1(double, cbrt)
1262 __DEF_FUN1(double, ceil)
1263 __DEF_FUN2(double, copysign);
1264 __DEF_FUN1(double, cos)
1265 __DEF_FUN1(double, cosh)
1266 __DEF_FUN1(double, erf)
1267 __DEF_FUN1(double, erfc)
1268 __DEF_FUN1(double, exp)
1269 __DEF_FUN1(double, exp2)
1270 __DEF_FUN1(double, expm1)
1271 __DEF_FUN1(double, fabs)
1272 __DEF_FUN2(double, fdim);
1273 __DEF_FUN1(double, floor)
1274 __DEF_FUN2(double, fmax);
1275 __DEF_FUN2(double, fmin);
1276 __DEF_FUN2(double, fmod);
1277 //__HIP_OVERLOAD1(int, fpclassify)
1278 __DEF_FUN2(double, hypot);
1279 __DEF_FUNI(int, ilogb)
1280 __HIP_OVERLOAD1(bool, isfinite)
1281 __HIP_OVERLOAD2(bool, isgreater);
1282 __HIP_OVERLOAD2(bool, isgreaterequal);
1283 __HIP_OVERLOAD1(bool, isinf);
1284 __HIP_OVERLOAD2(bool, isless);
1285 __HIP_OVERLOAD2(bool, islessequal);
1286 __HIP_OVERLOAD2(bool, islessgreater);
1287 __HIP_OVERLOAD1(bool, isnan);
1288 //__HIP_OVERLOAD1(bool, isnormal)
1289 __HIP_OVERLOAD2(bool, isunordered);
1290 __DEF_FUN1(double, lgamma)
1291 __DEF_FUN1(double, log)
1292 __DEF_FUN1(double, log10)
1293 __DEF_FUN1(double, log1p)
1294 __DEF_FUN1(double, log2)
1295 __DEF_FUN1(double, logb)
1296 __DEF_FUNI(long long, llrint)
1297 __DEF_FUNI(long long, llround)
1298 __DEF_FUNI(long, lrint)
1299 __DEF_FUNI(long, lround)
1300 __DEF_FUN1(double, nearbyint);
1301 __DEF_FUN2(double, nextafter);
1302 __DEF_FUN2(double, pow);
1303 __DEF_FUN2(double, remainder);
1304 __DEF_FUN1(double, rint);
1305 __DEF_FUN1(double, round);
1306 __HIP_OVERLOAD1(bool, signbit)
1307 __DEF_FUN1(double, sin)
1308 __DEF_FUN1(double, sinh)
1309 __DEF_FUN1(double, sqrt)
1310 __DEF_FUN1(double, tan)
1311 __DEF_FUN1(double, tanh)
1312 __DEF_FUN1(double, tgamma)
1313 __DEF_FUN1(double, trunc);
1314 
1315 // define cmath functions with a float and an integer argument.
1316 #define __DEF_FLOAT_FUN2I(func) \
1317 __DEVICE__ \
1318 inline \
1319 float func(float x, int y) \
1320 { \
1321  return func##f(x, y); \
1322 }
1323 __DEF_FLOAT_FUN2I(scalbn)
1324 
1325 #if __HCC__
1326 template<class T>
1327 __DEVICE__ inline static T min(T arg1, T arg2) {
1328  return (arg1 < arg2) ? arg1 : arg2;
1329 }
1330 
1331 __DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1332  return min(arg1, (uint32_t) arg2);
1333 }
1334 /*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) {
1335  return min((uint32_t) arg1, arg2);
1336 }
1337 
1338 __DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) {
1339  return min(arg1, (uint64_t) arg2);
1340 }
1341 __DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) {
1342  return min((uint64_t) arg1, arg2);
1343 }
1344 
1345 __DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) {
1346  return min(arg1, (unsigned long long) arg2);
1347 }
1348 __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) {
1349  return min((unsigned long long) arg1, arg2);
1350 }*/
1351 
1352 template<class T>
1353 __DEVICE__ inline static T max(T arg1, T arg2) {
1354  return (arg1 > arg2) ? arg1 : arg2;
1355 }
1356 
1357 __DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1358  return max(arg1, (uint32_t) arg2);
1359 }
1360 __DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1361  return max((uint32_t) arg1, arg2);
1362 }
1363 
1364 /*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) {
1365  return max(arg1, (uint64_t) arg2);
1366 }
1367 __DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) {
1368  return max((uint64_t) arg1, arg2);
1369 }
1370 
1371 __DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) {
1372  return max(arg1, (unsigned long long) arg2);
1373 }
1374 __DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) {
1375  return max((unsigned long long) arg1, arg2);
1376 }*/
1377 #else
1378 __DEVICE__ inline int min(int arg1, int arg2) {
1379  return (arg1 < arg2) ? arg1 : arg2;
1380 }
1381 __DEVICE__ inline int max(int arg1, int arg2) {
1382  return (arg1 > arg2) ? arg1 : arg2;
1383 }
1384 
1385 __DEVICE__
1386 inline
1387 float max(float x, float y) {
1388  return fmaxf(x, y);
1389 }
1390 
1391 __DEVICE__
1392 inline
1393 double max(double x, double y) {
1394  return fmax(x, y);
1395 }
1396 
1397 __DEVICE__
1398 inline
1399 float min(float x, float y) {
1400  return fminf(x, y);
1401 }
1402 
1403 __DEVICE__
1404 inline
1405 double min(double x, double y) {
1406  return fmin(x, y);
1407 }
1408 
1409 __HIP_OVERLOAD2(double, max)
1410 __HIP_OVERLOAD2(double, min)
1411 
1412 #endif
1413 
1414 __host__ inline static int min(int arg1, int arg2) {
1415  return std::min(arg1, arg2);
1416 }
1417 
1418 __host__ inline static int max(int arg1, int arg2) {
1419  return std::max(arg1, arg2);
1420 }
1421 
1422 
1423 #pragma pop_macro("__DEF_FLOAT_FUN")
1424 #pragma pop_macro("__DEF_FLOAT_FUN2")
1425 #pragma pop_macro("__DEF_FLOAT_FUN2I")
1426 #pragma pop_macro("__HIP_OVERLOAD")
1427 #pragma pop_macro("__HIP_OVERLOAD2")
1428 #pragma pop_macro("__DEVICE__")
1429 #pragma pop_macro("__RETURN_TYPE")
1430 
1431 // For backward compatibility.
1432 // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
1433 // defined after including math_functions.h.
TODO-doc.
#define __host__
Definition: host_defines.h:41
Contains definitions of APIs for HIP runtime.
Definition: math_functions.h:1193