HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_hip_bf16.h
Go to the documentation of this file.
1
24
29
35
41
47
53
59
65
71
77
83
89
95
101
107
108#ifndef _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BF16_H_
109#define _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BF16_H_
110
111#if !defined(__HIPCC_RTC__)
112#include <hip/amd_detail/amd_hip_common.h>
113#include <hip/amd_detail/amd_warp_functions.h> // Sync functions
114#include "amd_hip_vector_types.h" // float2 etc
115#include "device_library_decls.h" // ocml conversion functions
116#if defined(__clang__) && defined(__HIP__)
117#include "amd_hip_atomic.h"
118#endif // defined(__clang__) && defined(__HIP__)
119#include "math_fwd.h" // ocml device functions
120#endif // !defined(__HIPCC_RTC__)
121
122#define __BF16_DEVICE__ __device__
123#if defined(__HIPCC_RTC__)
124#define __BF16_HOST_DEVICE__ __BF16_DEVICE__
125#else
126#include <algorithm>
127#include <climits>
128#include <cmath>
129#define __BF16_HOST_DEVICE__ __host__ __BF16_DEVICE__
130#endif
131#define __BF16_DEVICE_STATIC__ __BF16_DEVICE__ static inline
132#define __BF16_HOST_DEVICE_STATIC__ __BF16_HOST_DEVICE__ static inline
133
134#if defined(__AVX512VL__) and defined(__AVX512BF16__) and not defined(__HIP_DEVICE_COMPILE__)
135// Enable with -mavx512vl -mavx512bf16
136#if defined(__MINGW64__)
137#include <intrin.h>
138#else
139#include <immintrin.h>
140#endif
141#define HIP_BF16_AVX512_OP 1
142static_assert(sizeof(__bf16) == sizeof(unsigned short),
143 "sizeof __bf16 should match sizeof unsigned short");
144#else
145#define HIP_BF16_AVX512_OP 0
146#endif
147
148#define HIPRT_ONE_BF16 __ushort_as_bfloat16((unsigned short)0x3F80U)
149#define HIPRT_ZERO_BF16 __ushort_as_bfloat16((unsigned short)0x0000U)
150#define HIPRT_INF_BF16 __ushort_as_bfloat16((unsigned short)0x7F80U)
151#define HIPRT_MAX_NORMAL_BF16 __ushort_as_bfloat16((unsigned short)0x7F7FU)
152#define HIPRT_MIN_DENORM_BF16 __ushort_as_bfloat16((unsigned short)0x0001U)
153#define HIPRT_NAN_BF16 __ushort_as_bfloat16((unsigned short)0x7FFFU)
154#define HIPRT_NEG_ZERO_BF16 __ushort_as_bfloat16((unsigned short)0x8000U)
155
156// Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on
157// different machines. These naive checks should prevent some undefined behavior on systems which
158// have different sizes for basic types.
159#if !defined(__HIPCC_RTC__)
160static_assert(CHAR_BIT == 8, "byte size should be of 8 bits");
161#endif
162static_assert(sizeof(unsigned short) == 2, "size of unsigned short should be 2 bytes");
163
168typedef struct __attribute__((aligned(2))) {
169 unsigned short x;
170} __hip_bfloat16_raw;
171
176typedef struct __attribute__((aligned(4))) {
177 unsigned short x;
178 unsigned short y;
179} __hip_bfloat162_raw;
180
187struct __attribute__((aligned(2))) __hip_bfloat16 {
188 private:
189 __BF16_HOST_DEVICE_STATIC__ float bfloatraw_2_float(unsigned short val) {
190#if HIP_BF16_AVX512_OP
191 union {
192 unsigned short us;
193 __bf16 bf16;
194 } u = {val};
195 return _mm_cvtsbh_ss(u.bf16);
196#else
197 unsigned int uval = val << 16;
198 union {
199 unsigned int u32;
200 float fp32;
201 } u = {uval};
202 return u.fp32;
203#endif
204 }
205
206 __BF16_HOST_DEVICE_STATIC__ unsigned short float_2_bfloatraw(float f) {
207#if HIP_BF16_AVX512_OP
208 union {
209 __bf16 bf16;
210 unsigned short us;
211 } u = {_mm_cvtness_sbh(f)};
212 return u.us;
213#else
214 union {
215 float fp32;
216 unsigned int u32;
217 } u = {f};
218 if (~u.u32 & 0x7f800000) {
219 // When the exponent bits are not all 1s, then the value is zero, normal,
220 // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
221 // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
222 // This causes the bfloat16's mantissa to be incremented by 1 if the 16
223 // least significant bits of the float mantissa are greater than 0x8000,
224 // or if they are equal to 0x8000 and the least significant bit of the
225 // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
226 // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
227 // has the value 0x7f, then incrementing it causes it to become 0x00 and
228 // the exponent is incremented by one, which is the next higher FP value
229 // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
230 // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
231 // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
232 // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
233 // incrementing it causes it to become an exponent of 0xFF and a mantissa
234 // of 0x00, which is Inf, the next higher value to the unrounded value.
235 u.u32 += 0x7fff + ((u.u32 >> 16) & 1); // Round to nearest, round to even
236 } else if (u.u32 & 0xffff) {
237 // When all of the exponent bits are 1, the value is Inf or NaN.
238 // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
239 // mantissa bit. Quiet NaN is indicated by the most significant mantissa
240 // bit being 1. Signaling NaN is indicated by the most significant
241 // mantissa bit being 0 but some other bit(s) being 1. If any of the
242 // lower 16 bits of the mantissa are 1, we set the least significant bit
243 // of the bfloat16 mantissa, in order to preserve signaling NaN in case
244 // the bloat16's mantissa bits are all 0.
245 u.u32 |= 0x10000; // Preserve signaling NaN
246 }
247 return static_cast<unsigned short>(u.u32 >> 16);
248#endif
249 }
250
251 __BF16_HOST_DEVICE_STATIC__ unsigned short double_2_bfloatraw(double d_in) {
252 union {
253 float fp32;
254 unsigned int u32;
255 } u = {static_cast<float>(d_in)};
256 double d = u.fp32;
257
258 // Round to odd
259 if ((d_in > 0.0 && d > d_in) || (d_in < 0.0 && d < d_in)) {
260 u.u32--;
261 u.u32 |= 1;
262 }
263
264 return float_2_bfloatraw(u.fp32);
265 }
266
267 protected:
269 unsigned short __x;
270
271 public:
272 // TODO: SWDEV-452411
273 // Need to add constructor of __hip_bfloat16 from
274 // unsigned long long
275 // long long
276 // long
277 // unsigned long
278 // Casting directly to double might lead to double rounding.
279
281 __BF16_HOST_DEVICE__ __hip_bfloat16(unsigned int val)
282 : __x(double_2_bfloatraw(static_cast<double>(val))) {}
283
285 __BF16_HOST_DEVICE__ __hip_bfloat16(int val)
286 : __x(double_2_bfloatraw(static_cast<double>(val))) {}
287
289 __BF16_HOST_DEVICE__ __hip_bfloat16(unsigned short val)
290 : __x(float_2_bfloatraw(static_cast<float>(val))) {}
291
293 __BF16_HOST_DEVICE__ __hip_bfloat16(short val)
294 : __x(float_2_bfloatraw(static_cast<float>(val))) {}
295
297 __BF16_HOST_DEVICE__ __hip_bfloat16(const double val) : __x(double_2_bfloatraw(val)) {}
298
300 __BF16_HOST_DEVICE__ __hip_bfloat16(const float val) : __x(float_2_bfloatraw(val)) {}
301
303 __BF16_HOST_DEVICE__ __hip_bfloat16(const __hip_bfloat16_raw& val) : __x(val.x) {}
304
306 __BF16_HOST_DEVICE__ __hip_bfloat16() = default;
307
309 __BF16_HOST_DEVICE__ operator __hip_bfloat16_raw() const { return __hip_bfloat16_raw{__x}; }
310
312 __BF16_HOST_DEVICE__ operator __hip_bfloat16_raw() const volatile {
313 return __hip_bfloat16_raw{__x};
314 }
315
317 __BF16_HOST_DEVICE__ operator bool() const {
318 auto val = bfloatraw_2_float(__x);
319 return val != 0.0f && val != -0.0f;
320 }
321
323 __BF16_HOST_DEVICE__ operator char() const { return static_cast<char>(bfloatraw_2_float(__x)); }
324
326 __BF16_HOST_DEVICE__ operator float() const { return bfloatraw_2_float(__x); }
327
329 __BF16_HOST_DEVICE__ operator int() const { return static_cast<int>(bfloatraw_2_float(__x)); }
330
332 __BF16_HOST_DEVICE__ operator long() const { return static_cast<long>(bfloatraw_2_float(__x)); }
333
335 __BF16_HOST_DEVICE__ operator long long() const {
336 return static_cast<long long>(bfloatraw_2_float(__x));
337 }
338
340 __BF16_HOST_DEVICE__ operator short() const { return static_cast<short>(bfloatraw_2_float(__x)); }
341
343 __BF16_HOST_DEVICE__ operator signed char() const {
344 return static_cast<signed char>(bfloatraw_2_float(__x));
345 }
346
348 __BF16_HOST_DEVICE__ operator unsigned char() const {
349 return static_cast<unsigned char>(bfloatraw_2_float(__x));
350 }
351
353 __BF16_HOST_DEVICE__ operator unsigned int() const {
354 return static_cast<unsigned int>(bfloatraw_2_float(__x));
355 }
356
358 __BF16_HOST_DEVICE__ operator unsigned long() const {
359 return static_cast<unsigned long>(bfloatraw_2_float(__x));
360 }
361
363 __BF16_HOST_DEVICE__ operator unsigned long long() const {
364 return static_cast<unsigned long long>(bfloatraw_2_float(__x));
365 }
366
368 __BF16_HOST_DEVICE__ operator unsigned short() const {
369 return static_cast<unsigned short>(bfloatraw_2_float(__x));
370 }
371
372 // TODO: SWDEV-452411 add operator which converts unsigned long long and long long to bfloat
373
375 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(unsigned int val) {
376 __x = float_2_bfloatraw(static_cast<float>(val));
377 return *this;
378 }
379
381 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(int val) {
382 __x = float_2_bfloatraw(static_cast<float>(val));
383 return *this;
384 }
385
387 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(unsigned short val) {
388 __x = float_2_bfloatraw(static_cast<float>(val));
389 return *this;
390 }
391
393 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(short val) {
394 __x = float_2_bfloatraw(static_cast<float>(val));
395 return *this;
396 }
397
399 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(const double f) {
400 __x = float_2_bfloatraw(static_cast<float>(f));
401 return *this;
402 }
403
405 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(const float f) {
406 __x = float_2_bfloatraw(f);
407 return *this;
408 }
409
411 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(const __hip_bfloat16_raw& hr) {
412 __x = hr.x;
413 return *this;
414 }
415
417 __BF16_HOST_DEVICE__ volatile __hip_bfloat16& operator=(const __hip_bfloat16_raw& hr) volatile {
418 __x = hr.x;
419 return *this;
420 }
421
423 __BF16_HOST_DEVICE__ volatile __hip_bfloat16& operator=(
424 const volatile __hip_bfloat16_raw& hr) volatile {
425 __x = hr.x;
426 return *this;
427 }
428};
430
437struct __attribute__((aligned(4))) __hip_bfloat162 {
438 public:
439 __hip_bfloat16 x;
440 __hip_bfloat16 y;
441
442
443 public:
445 __BF16_HOST_DEVICE__ __hip_bfloat162(const __hip_bfloat162_raw& h2r)
446 : x(__hip_bfloat16(__hip_bfloat16_raw{h2r.x})),
447 y(__hip_bfloat16(__hip_bfloat16_raw{h2r.y})) {}
448
450 __BF16_HOST_DEVICE__ __hip_bfloat162(const __hip_bfloat162& val) : x(val.x), y(val.y) {}
451
453 __BF16_HOST_DEVICE__ __hip_bfloat162(const __hip_bfloat16& a, const __hip_bfloat16& b)
454 : x(a), y(b) {}
455
457 __BF16_HOST_DEVICE__ __hip_bfloat162() = default;
458
460 __BF16_HOST_DEVICE__ operator __hip_bfloat162_raw() const {
461 __hip_bfloat16_raw l = x;
462 __hip_bfloat16_raw r = y;
463 return __hip_bfloat162_raw{l.x, r.x};
464 }
465
467 __BF16_HOST_DEVICE__ operator float2() const {
468#if HIP_BF16_AVX512_OP
469 union {
470 __hip_bfloat162_raw raw2;
471 __bf16 bf162[2];
472 static_assert(sizeof(__bf16[2]) == sizeof(__hip_bfloat162_raw));
473 } u;
474 u.raw2 = *this;
475 __m128bh pbf16{u.bf162[0], u.bf162[1], 0, 0};
476 __m128 pf32 = _mm_cvtpbh_ps(pbf16);
477 float2 ret(pf32[0], pf32[1]);
478#else
479 float2 ret(x, y);
480#endif
481 return ret;
482 }
483
485 __BF16_HOST_DEVICE__ __hip_bfloat162& operator=(const __hip_bfloat162_raw& h2r) {
486 x = __hip_bfloat16(__hip_bfloat16_raw{h2r.x});
487 y = __hip_bfloat16(__hip_bfloat16_raw{h2r.y});
488 return *this;
489 }
490
492 __BF16_HOST_DEVICE__ __hip_bfloat162& operator=(const __hip_bfloat162& src) {
493 x = src.x;
494 y = src.y;
495 return *this;
496 }
497};
498
499
504__BF16_HOST_DEVICE_STATIC__ float __bfloat162float(__hip_bfloat16 a) {
505 float ret = a;
506 return ret;
507}
508
513__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __float2bfloat16(float f) {
514 __hip_bfloat16 ret{f};
515 return ret;
516}
517
522__BF16_HOST_DEVICE_STATIC__ float2 __bfloat1622float2(const __hip_bfloat162 a) {
523 float2 ret = a;
524 return ret;
525}
526
531__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a) {
532 return __hip_bfloat162(a, a);
533}
534
539__BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h) {
540 static_assert(sizeof(__hip_bfloat16) == sizeof(short int));
541 union {
542 __hip_bfloat16 bf16;
543 short int si;
544 } u{h};
545 return u.si;
546}
547
552__BF16_HOST_DEVICE_STATIC__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h) {
553 static_assert(sizeof(__hip_bfloat16) == sizeof(unsigned short int));
554 union {
555 __hip_bfloat16 bf16;
556 unsigned short int usi;
557 } u{h};
558 return u.usi;
559}
560
565__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __double2bfloat16(const double a) {
566 __hip_bfloat16 ret{a};
567 return ret;
568}
569
574__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __float22bfloat162_rn(const float2 a) {
575 return __hip_bfloat162{__float2bfloat16(a.x), __float2bfloat16(a.y)};
576}
577
582__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a,
583 const __hip_bfloat16 b) {
584 return __hip_bfloat162(a, b);
585}
586
591__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a) { return a.y; }
592
597__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a) {
598 return __hip_bfloat162(a.y, a.y);
599}
600
605__BF16_HOST_DEVICE_STATIC__ float __high2float(const __hip_bfloat162 a) {
606 return __bfloat162float(a.y);
607}
608
613__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a,
614 const __hip_bfloat162 b) {
615 return __hip_bfloat162(a.y, b.y);
616}
617
622__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a) { return a.x; }
623
628__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a) {
629 return __hip_bfloat162(a.x, a.x);
630}
631
636__BF16_HOST_DEVICE_STATIC__ float __low2float(const __hip_bfloat162 a) {
637 return __bfloat162float(a.x);
638}
639
644__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a) {
645 return __hip_bfloat162(a.y, a.x);
646}
647
652__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a,
653 const __hip_bfloat162 b) {
654 return __hip_bfloat162(a.x, b.x);
655}
656
661__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __short_as_bfloat16(const short int a) {
662 static_assert(sizeof(__hip_bfloat16) == sizeof(short int));
663 union {
664 short int si;
665 __hip_bfloat16 bf16;
666 } u{a};
667 return u.bf16;
668}
669
674__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a) {
675 static_assert(sizeof(__hip_bfloat16) == sizeof(unsigned short int));
676 union {
677 unsigned short int usi;
678 __hip_bfloat16 bf16;
679 } u{a};
680 return u.bf16;
681}
682
683#ifdef HIP_ENABLE_WARP_SYNC_BUILTINS
688__BF16_DEVICE_STATIC__ __hip_bfloat16 __shfl_down_sync(const unsigned long long mask,
689 const __hip_bfloat16 in,
690 const unsigned int delta,
691 const int width = __AMDGCN_WAVEFRONT_SIZE) {
692 return __ushort_as_bfloat16(__shfl_down_sync(mask, __bfloat16_as_ushort(in), delta, width));
693}
694
699__BF16_DEVICE_STATIC__ __hip_bfloat162 __shfl_down_sync(const unsigned long long mask,
700 const __hip_bfloat162 in,
701 const unsigned int delta,
702 const int width = __AMDGCN_WAVEFRONT_SIZE) {
703 static_assert(sizeof(__hip_bfloat162) == sizeof(unsigned int));
704 union {
705 __hip_bfloat162 bf162;
706 unsigned int ui;
707 } u{in};
708 u.ui = __shfl_down_sync<unsigned long long, unsigned int>(mask, u.ui, delta, width);
709 return u.bf162;
710}
711
716__BF16_DEVICE_STATIC__ __hip_bfloat16 __shfl_sync(const unsigned long long mask,
717 const __hip_bfloat16 in, const int delta,
718 const int width = __AMDGCN_WAVEFRONT_SIZE) {
719 return __ushort_as_bfloat16(__shfl_sync(mask, __bfloat16_as_ushort(in), delta, width));
720}
721
726__BF16_DEVICE_STATIC__ __hip_bfloat162 __shfl_sync(const unsigned long long mask,
727 const __hip_bfloat162 in, const int delta,
728 const int width = __AMDGCN_WAVEFRONT_SIZE) {
729 static_assert(sizeof(__hip_bfloat162) == sizeof(unsigned int));
730 union {
731 __hip_bfloat162 bf162;
732 unsigned int ui;
733 } u{in};
734 u.ui = __shfl_sync(mask, u.ui, delta, width);
735 return u.bf162;
736}
737
742__BF16_DEVICE_STATIC__ __hip_bfloat16 __shfl_up_sync(const unsigned long long mask,
743 const __hip_bfloat16 in,
744 const unsigned int delta,
745 const int width = __AMDGCN_WAVEFRONT_SIZE) {
746 return __ushort_as_bfloat16(__shfl_up_sync(mask, __bfloat16_as_ushort(in), delta, width));
747}
748
753__BF16_DEVICE_STATIC__ __hip_bfloat162 __shfl_up_sync(const unsigned long long mask,
754 const __hip_bfloat162 in,
755 const unsigned int delta,
756 const int width = __AMDGCN_WAVEFRONT_SIZE) {
757 static_assert(sizeof(__hip_bfloat162) == sizeof(unsigned int));
758 union {
759 __hip_bfloat162 bf162;
760 unsigned int ui;
761 } u{in};
762 u.ui = __shfl_up_sync(mask, u.ui, delta, width);
763 return u.bf162;
764}
765
770__BF16_DEVICE_STATIC__ __hip_bfloat16 __shfl_xor_sync(const unsigned long long mask,
771 const __hip_bfloat16 in, const int delta,
772 const int width = __AMDGCN_WAVEFRONT_SIZE) {
773 return __ushort_as_bfloat16(__shfl_xor_sync(mask, __bfloat16_as_ushort(in), delta, width));
774}
775
780__BF16_DEVICE_STATIC__ __hip_bfloat162 __shfl_xor_sync(const unsigned long long mask,
781 const __hip_bfloat162 in, const int delta,
782 const int width = __AMDGCN_WAVEFRONT_SIZE) {
783 static_assert(sizeof(__hip_bfloat162) == sizeof(unsigned int));
784 union {
785 __hip_bfloat162 bf162;
786 unsigned int ui;
787 } u{in};
788 u.ui = __shfl_xor_sync(mask, u.ui, delta, width);
789 return u.bf162;
790}
791#endif
792
797__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b) {
799}
800
805__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b) {
807}
808
813__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b) {
815}
816
821__BF16_DEVICE_STATIC__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b,
822 const __hip_bfloat16 c) {
823 return __float2bfloat16(
824 __ocml_fma_f32(__bfloat162float(a), __bfloat162float(b), __bfloat162float(c)));
825}
826
831__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b) {
833}
834
839__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hneg(const __hip_bfloat16 a) {
840 __hip_bfloat16_raw hr = a;
841 hr.x ^= 0x8000;
842 return __hip_bfloat16(hr);
843}
844
849__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __habs(const __hip_bfloat16 a) {
850 __hip_bfloat16_raw hr = a;
851 hr.x &= 0x7FFF;
852 return __hip_bfloat16(hr);
853}
854
859__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __h2div(const __hip_bfloat162 a,
860 const __hip_bfloat162 b) {
861 return __hip_bfloat162(__float2bfloat16(__bfloat162float(a.x) / __bfloat162float(b.x)),
863}
864
869__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __habs2(const __hip_bfloat162 a) {
870 return __hip_bfloat162(__habs(a.x), __habs(a.y));
871}
872
877__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a,
878 const __hip_bfloat162 b) {
879 return __hip_bfloat162(__hadd(a.x, b.x), __hadd(a.y, b.y));
880}
881
886__BF16_DEVICE_STATIC__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b,
887 const __hip_bfloat162 c) {
888 return __hip_bfloat162(__hfma(a.x, b.x, c.x), __hfma(a.y, b.y, c.y));
889}
890
895__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a,
896 const __hip_bfloat162 b) {
897 return __hip_bfloat162(__hmul(a.x, b.x), __hmul(a.y, b.y));
898}
899
904__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a) {
905 return __hip_bfloat162(__hneg(a.x), __hneg(a.y));
906}
907
912__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a,
913 const __hip_bfloat162 b) {
914 return __hip_bfloat162(__hsub(a.x, b.x), __hsub(a.y, b.y));
915}
916
921__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator*(const __hip_bfloat16& l,
922 const __hip_bfloat16& r) {
923 return __hmul(l, r);
924}
925
930__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator*=(__hip_bfloat16& l, const __hip_bfloat16& r) {
931 l = __hmul(l, r);
932 return l;
933}
934
939__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator+(const __hip_bfloat16& l) { return l; }
940
945__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator+(const __hip_bfloat16& l,
946 const __hip_bfloat16& r) {
947 return __hadd(l, r);
948}
949
954__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator-(const __hip_bfloat16& l) { return __hneg(l); }
955
960__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator-(const __hip_bfloat16& l,
961 const __hip_bfloat16& r) {
962 return __hsub(l, r);
963}
964
969__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator++(__hip_bfloat16& l, const int) {
970 auto ret = l;
971 l = __hadd(l, HIPRT_ONE_BF16);
972 return ret;
973}
974
979__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator++(__hip_bfloat16& l) {
980 l = __hadd(l, HIPRT_ONE_BF16);
981 return l;
982}
983
988__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator--(__hip_bfloat16& l, const int) {
989 auto ret = l;
990 l = __hsub(l, HIPRT_ONE_BF16);
991 return ret;
992}
993
998__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator--(__hip_bfloat16& l) {
999 l = __hsub(l, HIPRT_ONE_BF16);
1000 return l;
1001}
1002
1007__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator+=(__hip_bfloat16& l, const __hip_bfloat16& r) {
1008 l = __hadd(l, r);
1009 return l;
1010}
1011
1016__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator-=(__hip_bfloat16& l, const __hip_bfloat16& r) {
1017 l = __hsub(l, r);
1018 return l;
1019}
1020
1025__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator/(const __hip_bfloat16& l,
1026 const __hip_bfloat16& r) {
1027 return __hdiv(l, r);
1028}
1029
1034__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator/=(__hip_bfloat16& l, const __hip_bfloat16& r) {
1035 l = __hdiv(l, r);
1036 return l;
1037}
1038
1043__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator*(const __hip_bfloat162& l,
1044 const __hip_bfloat162& r) {
1045 return __hmul2(l, r);
1046}
1047
1052__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator*=(__hip_bfloat162& l,
1053 const __hip_bfloat162& r) {
1054 l = __hmul2(l, r);
1055 return l;
1056}
1057
1062__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator+(const __hip_bfloat162& l) { return l; }
1063
1068__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator+(const __hip_bfloat162& l,
1069 const __hip_bfloat162& r) {
1070 return __hadd2(l, r);
1071}
1072
1077__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator-(const __hip_bfloat162& l) {
1078 return __hneg2(l);
1079}
1080
1085__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator-(const __hip_bfloat162& l,
1086 const __hip_bfloat162& r) {
1087 return __hsub2(l, r);
1088}
1089
1094__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator++(__hip_bfloat162& l, const int) {
1095 auto ret = l;
1096 l = __hadd2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
1097 return ret;
1098}
1099
1104__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator++(__hip_bfloat162& l) {
1105 l = __hadd2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
1106 return l;
1107}
1108
1113__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator--(__hip_bfloat162& l, const int) {
1114 auto ret = l;
1115 l = __hsub2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
1116 return ret;
1117}
1118
1123__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator--(__hip_bfloat162& l) {
1124 l = __hsub2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
1125 return l;
1126}
1127
1132__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator+=(__hip_bfloat162& l,
1133 const __hip_bfloat162& r) {
1134 l = __hadd2(l, r);
1135 return l;
1136}
1137
1142__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator-=(__hip_bfloat162& l,
1143 const __hip_bfloat162& r) {
1144 l = __hsub2(l, r);
1145 return l;
1146}
1147
1152__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator/(const __hip_bfloat162& l,
1153 const __hip_bfloat162& r) {
1154 return __h2div(l, r);
1155}
1156
1161__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator/=(__hip_bfloat162& l,
1162 const __hip_bfloat162& r) {
1163 l = __h2div(l, r);
1164 return l;
1165}
1166
1171__BF16_HOST_DEVICE_STATIC__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1172 return __bfloat162float(a) == __bfloat162float(b);
1173}
1174
1179__BF16_HOST_DEVICE_STATIC__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1180 return !(__bfloat162float(a) < __bfloat162float(b)) &&
1182}
1183
1188__BF16_HOST_DEVICE_STATIC__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1189 return __bfloat162float(a) > __bfloat162float(b);
1190}
1191
1196__BF16_HOST_DEVICE_STATIC__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1197 return !(__bfloat162float(a) <= __bfloat162float(b));
1198}
1199
1204__BF16_HOST_DEVICE_STATIC__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1205 return __bfloat162float(a) >= __bfloat162float(b);
1206}
1207
1212__BF16_HOST_DEVICE_STATIC__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1213 return !(__bfloat162float(a) < __bfloat162float(b));
1214}
1215
1220__BF16_HOST_DEVICE_STATIC__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1221 return __bfloat162float(a) != __bfloat162float(b);
1222}
1223
1228__BF16_HOST_DEVICE_STATIC__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1229 return !(__bfloat162float(a) == __bfloat162float(b));
1230}
1231
1236__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1237#if __HIP_DEVICE_COMPILE__
1238 return __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a), __bfloat162float(b)));
1239#else
1240 return __float2bfloat16(std::max(__bfloat162float(a), __bfloat162float(b)));
1241#endif
1242}
1243
1248__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1249#if __HIP_DEVICE_COMPILE__
1250 return __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a), __bfloat162float(b)));
1251#else
1252 return __float2bfloat16(std::min(__bfloat162float(a), __bfloat162float(b)));
1253#endif
1254}
1255
1260__BF16_HOST_DEVICE_STATIC__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1261 return __bfloat162float(a) < __bfloat162float(b);
1262}
1263
1268__BF16_HOST_DEVICE_STATIC__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1269 return !(__bfloat162float(a) >= __bfloat162float(b));
1270}
1271
1276__BF16_HOST_DEVICE_STATIC__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1277 return __bfloat162float(a) <= __bfloat162float(b);
1278}
1279
1284__BF16_HOST_DEVICE_STATIC__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1285 return !(__bfloat162float(a) > __bfloat162float(b));
1286}
1287
1292__BF16_HOST_DEVICE_STATIC__ int __hisinf(const __hip_bfloat16 a) {
1293 __hip_bfloat16_raw hr = a;
1294 return !(~hr.x & 0x7f80) && !(hr.x & 0x7f);
1295}
1296
1301__BF16_HOST_DEVICE_STATIC__ bool __hisnan(const __hip_bfloat16 a) {
1302 __hip_bfloat16_raw hr = a;
1303 return !(~hr.x & 0x7f80) && +(hr.x & 0x7f);
1304}
1305
1310__BF16_HOST_DEVICE_STATIC__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1311 return __heq(a.x, b.x) && __heq(a.y, b.y);
1312}
1313
1318__BF16_HOST_DEVICE_STATIC__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1319 return __hequ(a.x, b.x) && __hequ(a.y, b.y);
1320}
1321
1326__BF16_HOST_DEVICE_STATIC__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1327 return __hge(a.x, b.x) && __hge(a.y, b.y);
1328}
1329
1334__BF16_HOST_DEVICE_STATIC__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1335 return __hgeu(a.x, b.x) && __hgeu(a.y, b.y);
1336}
1337
1342__BF16_HOST_DEVICE_STATIC__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1343 return __hgt(a.x, b.x) && __hgt(a.y, b.y);
1344}
1345
1350__BF16_HOST_DEVICE_STATIC__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1351 return __hgtu(a.x, b.x) && __hgtu(a.y, b.y);
1352}
1353
1358__BF16_HOST_DEVICE_STATIC__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1359 return __hle(a.x, b.x) && __hle(a.y, b.y);
1360}
1361
1366__BF16_HOST_DEVICE_STATIC__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1367 return __hleu(a.x, b.x) && __hleu(a.y, b.y);
1368}
1369
1374__BF16_HOST_DEVICE_STATIC__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1375 return __hlt(a.x, b.x) && __hlt(a.y, b.y);
1376}
1377
1382__BF16_HOST_DEVICE_STATIC__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1383 return __hltu(a.x, b.x) && __hltu(a.y, b.y);
1384}
1385
1390__BF16_HOST_DEVICE_STATIC__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1391 return __hne(a.x, b.x) && __hne(a.y, b.y);
1392}
1393
1398__BF16_HOST_DEVICE_STATIC__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1399 return __hneu(a.x, b.x) || __hneu(a.y, b.y);
1400}
1401
1406__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __heq2(const __hip_bfloat162 a,
1407 const __hip_bfloat162 b) {
1408 return __hip_bfloat162{{__heq(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
1409 {__heq(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
1410}
1411
1416__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hge2(const __hip_bfloat162 a,
1417 const __hip_bfloat162 b) {
1418 return __hip_bfloat162{{__hge(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
1419 {__hge(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
1420}
1421
1426__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a,
1427 const __hip_bfloat162 b) {
1428 return __hip_bfloat162{{__hgt(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
1429 {__hgt(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ONE_BF16}};
1430}
1431
1436__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a) {
1437 return __hip_bfloat162{{__hisnan(a.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
1438 {__hisnan(a.y) ? HIPRT_ONE_BF16 : HIPRT_ONE_BF16}};
1439}
1440
1445__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hle2(const __hip_bfloat162 a,
1446 const __hip_bfloat162 b) {
1447 return __hip_bfloat162{{__hle(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
1448 {__hle(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
1449}
1450
1455__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a,
1456 const __hip_bfloat162 b) {
1457 return __hip_bfloat162{{__hlt(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
1458 {__hlt(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
1459}
1460
1465__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a,
1466 const __hip_bfloat162 b) {
1467 return __hip_bfloat162(__hmax(a.x, b.x), __hmax(a.y, b.y));
1468}
1469
1474__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a,
1475 const __hip_bfloat162 b) {
1476 return __hip_bfloat162(__hmin(a.x, b.x), __hmin(a.y, b.y));
1477}
1478
1483__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hne2(const __hip_bfloat162 a,
1484 const __hip_bfloat162 b) {
1485 return __hip_bfloat162{{__hne(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
1486 {__hne(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
1487}
1488
1493__BF16_HOST_DEVICE_STATIC__ bool operator==(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1494 return __heq(l, r);
1495}
1496
1501__BF16_HOST_DEVICE_STATIC__ bool operator!=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1502 return __hne(l, r);
1503}
1504
1509__BF16_HOST_DEVICE_STATIC__ bool operator<(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1510 return __hlt(l, r);
1511}
1512
1517__BF16_HOST_DEVICE_STATIC__ bool operator<=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1518 return __hle(l, r);
1519}
1520
1525__BF16_HOST_DEVICE_STATIC__ bool operator>(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1526 return __hgt(l, r);
1527}
1528
1533__BF16_HOST_DEVICE_STATIC__ bool operator>=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1534 return __hge(l, r);
1535}
1536
1541__BF16_HOST_DEVICE_STATIC__ bool operator==(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1542 float2 ret = __heq2(l, r);
1543 return ret.x != 0.0f && ret.y != 0.0f;
1544}
1545
1550__BF16_HOST_DEVICE_STATIC__ bool operator!=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1551 return !(l == r);
1552}
1553
1558__BF16_HOST_DEVICE_STATIC__ bool operator<(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1559 float2 fl = l, fr = r;
1560 return fl.x < fr.x && fl.x < fr.y;
1561}
1562
1567__BF16_HOST_DEVICE_STATIC__ bool operator<=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1568 float2 fl = l, fr = r;
1569 return fl.x <= fr.x && fl.x <= fr.y;
1570}
1571
1576__BF16_HOST_DEVICE_STATIC__ bool operator>(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1577 float2 fl = l, fr = r;
1578 return fl.x > fr.x && fl.x > fr.y;
1579}
1580
1585__BF16_HOST_DEVICE_STATIC__ bool operator>=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1586 float2 fl = l, fr = r;
1587 return fl.x >= fr.x && fl.x >= fr.y;
1588}
1589
1594__BF16_DEVICE_STATIC__ __hip_bfloat16 hceil(const __hip_bfloat16 h) {
1595 return __float2bfloat16(__ocml_ceil_f32(__bfloat162float(h)));
1596}
1597
1602__BF16_DEVICE_STATIC__ __hip_bfloat16 hcos(const __hip_bfloat16 h) {
1603 return __float2bfloat16(__ocml_cos_f32(__bfloat162float(h)));
1604}
1605
1610__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp(const __hip_bfloat16 h) {
1611 return __float2bfloat16(__ocml_exp_f32(__bfloat162float(h)));
1612}
1613
1618__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp10(const __hip_bfloat16 h) {
1619 return __float2bfloat16(__ocml_exp10_f32(__bfloat162float(h)));
1620}
1621
1626__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp2(const __hip_bfloat16 h) {
1627 return __float2bfloat16(__ocml_exp2_f32(__bfloat162float(h)));
1628}
1629
1634__BF16_DEVICE_STATIC__ __hip_bfloat16 hfloor(const __hip_bfloat16 h) {
1635 return __float2bfloat16(__ocml_floor_f32(__bfloat162float(h)));
1636}
1637
1642__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog(const __hip_bfloat16 h) {
1643 return __float2bfloat16(__ocml_log_f32(__bfloat162float(h)));
1644}
1645
1650__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog10(const __hip_bfloat16 h) {
1651 return __float2bfloat16(__ocml_log10_f32(__bfloat162float(h)));
1652}
1653
1658__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog2(const __hip_bfloat16 h) {
1659 return __float2bfloat16(__ocml_log2_f32(__bfloat162float(h)));
1660}
1661
1666__BF16_DEVICE_STATIC__ __hip_bfloat16 hrcp(const __hip_bfloat16 h) {
1667 return __float2bfloat16(1.0f / (__bfloat162float(h)));
1668}
1669
1674__BF16_DEVICE_STATIC__ __hip_bfloat16 hrint(const __hip_bfloat16 h) {
1675 return __float2bfloat16(__ocml_rint_f32(__bfloat162float(h)));
1676}
1677
1682__BF16_DEVICE_STATIC__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h) {
1683 return __float2bfloat16(__ocml_rsqrt_f32(__bfloat162float(h)));
1684}
1685
1690__BF16_DEVICE_STATIC__ __hip_bfloat16 hsin(const __hip_bfloat16 h) {
1691 return __float2bfloat16(__ocml_sin_f32(__bfloat162float(h)));
1692}
1693
1698__BF16_DEVICE_STATIC__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h) {
1699 return __float2bfloat16(__ocml_sqrt_f32(__bfloat162float(h)));
1700}
1701
1706__BF16_DEVICE_STATIC__ __hip_bfloat16 htrunc(const __hip_bfloat16 h) {
1707 return __float2bfloat16(__ocml_trunc_f32(__bfloat162float(h)));
1708}
1709
1714__BF16_DEVICE_STATIC__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h) {
1715 return __hip_bfloat162(hceil(h.x), hceil(h.y));
1716}
1717
1722__BF16_DEVICE_STATIC__ __hip_bfloat162 h2cos(const __hip_bfloat162 h) {
1723 return __hip_bfloat162(hcos(h.x), hcos(h.y));
1724}
1725
1730__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp(const __hip_bfloat162 h) {
1731 return __hip_bfloat162(hexp(h.x), hexp(h.y));
1732}
1733
1738__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h) {
1739 return __hip_bfloat162(hexp10(h.x), hexp10(h.y));
1740}
1741
1746__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h) {
1747 return __hip_bfloat162(hexp2(h.x), hexp2(h.y));
1748}
1749
1754__BF16_DEVICE_STATIC__ __hip_bfloat162 h2floor(const __hip_bfloat162 h) {
1755 return __hip_bfloat162(hfloor(h.x), hfloor(h.y));
1756}
1757
1762__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log(const __hip_bfloat162 h) {
1763 return __hip_bfloat162(hlog(h.x), hlog(h.y));
1764}
1765
1770__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log10(const __hip_bfloat162 h) {
1771 return __hip_bfloat162(hlog10(h.x), hlog10(h.y));
1772}
1773
1778__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log2(const __hip_bfloat162 h) {
1779 return __hip_bfloat162(hlog2(h.x), hlog2(h.y));
1780}
1781
1786__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h) {
1787 return __hip_bfloat162(hrcp(h.x), hrcp(h.y));
1788}
1789
1794__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rint(const __hip_bfloat162 h) {
1795 return __hip_bfloat162(hrint(h.x), hrint(h.y));
1796}
1797
1802__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h) {
1803 return __hip_bfloat162(hrsqrt(h.x), hrsqrt(h.y));
1804}
1805
1810__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sin(const __hip_bfloat162 h) {
1811 return __hip_bfloat162(hsin(h.x), hsin(h.y));
1812}
1813
1818__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h) {
1819 return __hip_bfloat162(hsqrt(h.x), hsqrt(h.y));
1820}
1821
1826__BF16_DEVICE_STATIC__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h) {
1827 return __hip_bfloat162(htrunc(h.x), htrunc(h.y));
1828}
1829
1830#if defined(__clang__) && defined(__HIP__)
1835__BF16_DEVICE_STATIC__ __hip_bfloat162 unsafeAtomicAdd(__hip_bfloat162* address,
1836 __hip_bfloat162 value) {
1837#if __has_builtin(__builtin_amdgcn_flat_atomic_fadd_v2bf16)
1838 typedef short __attribute__((ext_vector_type(2))) vec_short2;
1839 static_assert(sizeof(vec_short2) == sizeof(__hip_bfloat162_raw));
1840 union {
1841 __hip_bfloat162_raw bf162_raw;
1842 vec_short2 vs2;
1843 } u{static_cast<__hip_bfloat162_raw>(value)};
1844 u.vs2 = __builtin_amdgcn_flat_atomic_fadd_v2bf16((vec_short2*)address, u.vs2);
1845 return static_cast<__hip_bfloat162>(u.bf162_raw);
1846#else
1847 static_assert(sizeof(unsigned int) == sizeof(__hip_bfloat162_raw));
1848 union u_hold {
1849 __hip_bfloat162_raw h2r;
1850 unsigned int u32;
1851 };
1852 u_hold old_val, new_val;
1853 old_val.u32 =
1854 __hip_atomic_load((unsigned int*)address, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1855 do {
1856 new_val.h2r = __hadd2(old_val.h2r, value);
1857 } while (!__hip_atomic_compare_exchange_strong((unsigned int*)address, &old_val.u32, new_val.u32,
1858 __ATOMIC_RELAXED, __ATOMIC_RELAXED,
1859 __HIP_MEMORY_SCOPE_AGENT));
1860 return old_val.h2r;
1861#endif
1862}
1863#endif // defined(__clang__) && defined(__HIP__)
1864#endif
Contains declarations for types and functions in device library. Uses int64_t and uint64_t instead of...
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
Subtracts two bfloat16 values.
Definition amd_hip_bf16.h:805
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator-=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to subtract-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1016
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator+(const __hip_bfloat16 &l)
Operator to unary+ on a __hip_bfloat16 number.
Definition amd_hip_bf16.h:939
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator/(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1025
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator-(const __hip_bfloat16 &l)
Operator to negate a __hip_bfloat16 number.
Definition amd_hip_bf16.h:954
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hneg(const __hip_bfloat16 a)
Negate a bfloat16 value.
Definition amd_hip_bf16.h:839
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
Adds two bfloat16 values.
Definition amd_hip_bf16.h:797
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator/=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1034
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator*(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:921
__BF16_DEVICE_STATIC__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b, const __hip_bfloat16 c)
Performs FMA of given bfloat16 values.
Definition amd_hip_bf16.h:821
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator*=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:930
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
Multiplies two bfloat16 values.
Definition amd_hip_bf16.h:831
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator++(__hip_bfloat16 &l, const int)
Operator to post increment a __hip_bfloat16 number.
Definition amd_hip_bf16.h:969
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator+=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to add-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1007
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __habs(const __hip_bfloat16 a)
Returns absolute of a bfloat16.
Definition amd_hip_bf16.h:849
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator--(__hip_bfloat16 &l, const int)
Operator to post decrement a __hip_bfloat16 number.
Definition amd_hip_bf16.h:988
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b)
Divides two bfloat16 values.
Definition amd_hip_bf16.h:813
__BF16_HOST_DEVICE_STATIC__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values.
Definition amd_hip_bf16.h:1171
__BF16_HOST_DEVICE_STATIC__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than.
Definition amd_hip_bf16.h:1188
__BF16_HOST_DEVICE_STATIC__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - not equal.
Definition amd_hip_bf16.h:1220
__BF16_HOST_DEVICE_STATIC__ bool operator==(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform an equal compare on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1493
__BF16_HOST_DEVICE_STATIC__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than equal.
Definition amd_hip_bf16.h:1276
__BF16_HOST_DEVICE_STATIC__ bool operator!=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a not equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1501
__BF16_HOST_DEVICE_STATIC__ bool operator>(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1525
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return min.
Definition amd_hip_bf16.h:1248
__BF16_HOST_DEVICE_STATIC__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than equal.
Definition amd_hip_bf16.h:1204
__BF16_HOST_DEVICE_STATIC__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than equal.
Definition amd_hip_bf16.h:1284
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return max.
Definition amd_hip_bf16.h:1236
__BF16_HOST_DEVICE_STATIC__ bool operator<=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1517
__BF16_HOST_DEVICE_STATIC__ int __hisinf(const __hip_bfloat16 a)
Checks if number is inf.
Definition amd_hip_bf16.h:1292
__BF16_HOST_DEVICE_STATIC__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered not equal.
Definition amd_hip_bf16.h:1228
__BF16_HOST_DEVICE_STATIC__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than.
Definition amd_hip_bf16.h:1196
__BF16_HOST_DEVICE_STATIC__ bool operator<(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1509
__BF16_HOST_DEVICE_STATIC__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than.
Definition amd_hip_bf16.h:1268
__BF16_HOST_DEVICE_STATIC__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than equal.
Definition amd_hip_bf16.h:1212
__BF16_HOST_DEVICE_STATIC__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than operator.
Definition amd_hip_bf16.h:1260
__BF16_HOST_DEVICE_STATIC__ bool operator>=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1533
__BF16_HOST_DEVICE_STATIC__ bool __hisnan(const __hip_bfloat16 a)
Checks if number is nan.
Definition amd_hip_bf16.h:1301
__BF16_HOST_DEVICE_STATIC__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered equal.
Definition amd_hip_bf16.h:1179
__BF16_HOST_DEVICE_STATIC__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b - unordered.
Definition amd_hip_bf16.h:1334
__BF16_HOST_DEVICE_STATIC__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b.
Definition amd_hip_bf16.h:1342
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Returns max of two elements.
Definition amd_hip_bf16.h:1465
__BF16_HOST_DEVICE_STATIC__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b.
Definition amd_hip_bf16.h:1358
__BF16_HOST_DEVICE_STATIC__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b.
Definition amd_hip_bf16.h:1374
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Returns min of two elements.
Definition amd_hip_bf16.h:1474
__BF16_HOST_DEVICE_STATIC__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b - unordered.
Definition amd_hip_bf16.h:1366
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks for not equal to.
Definition amd_hip_bf16.h:1483
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1416
__BF16_HOST_DEVICE_STATIC__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:1390
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1455
__BF16_HOST_DEVICE_STATIC__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b - unordered.
Definition amd_hip_bf16.h:1382
__BF16_HOST_DEVICE_STATIC__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal - unordered.
Definition amd_hip_bf16.h:1318
__BF16_HOST_DEVICE_STATIC__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal.
Definition amd_hip_bf16.h:1310
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1445
__BF16_HOST_DEVICE_STATIC__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:1398
__BF16_HOST_DEVICE_STATIC__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b - unordered.
Definition amd_hip_bf16.h:1350
__BF16_HOST_DEVICE_STATIC__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b.
Definition amd_hip_bf16.h:1326
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b, returns 1.0 if equal, otherwise 0.0.
Definition amd_hip_bf16.h:1406
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1426
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a)
Check for a is NaN, returns 1.0 if NaN, otherwise 0.0.
Definition amd_hip_bf16.h:1436
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __habs2(const __hip_bfloat162 a)
Returns absolute of a bfloat162.
Definition amd_hip_bf16.h:869
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Subtracts two bfloat162 values.
Definition amd_hip_bf16.h:912
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Multiplies two bfloat162 values.
Definition amd_hip_bf16.h:895
__BF16_DEVICE_STATIC__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b, const __hip_bfloat162 c)
Performs FMA of given bfloat162 values.
Definition amd_hip_bf16.h:886
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Adds two bfloat162 values.
Definition amd_hip_bf16.h:877
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a)
Converts a bfloat162 into negative.
Definition amd_hip_bf16.h:904
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b)
Divides bfloat162 values.
Definition amd_hip_bf16.h:859
__BF16_HOST_DEVICE_STATIC__ float __bfloat162float(__hip_bfloat16 a)
Converts bfloat16 to float.
Definition amd_hip_bf16.h:504
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __float2bfloat16(float f)
Converts float to bfloat16.
Definition amd_hip_bf16.h:513
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a)
Moves bfloat16 value to bfloat162.
Definition amd_hip_bf16.h:531
__BF16_HOST_DEVICE_STATIC__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h)
Reinterprets bits in a __hip_bfloat16 as an unsigned signed short integer.
Definition amd_hip_bf16.h:552
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a)
Reinterprets unsigned short int into a bfloat16.
Definition amd_hip_bf16.h:674
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a)
Returns low 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:622
__BF16_HOST_DEVICE_STATIC__ float2 __bfloat1622float2(const __hip_bfloat162 a)
Converts and moves bfloat162 to float2.
Definition amd_hip_bf16.h:522
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b)
Combine two __hip_bfloat16 to __hip_bfloat162.
Definition amd_hip_bf16.h:582
__BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h)
Reinterprets bits in a __hip_bfloat16 as a signed short integer.
Definition amd_hip_bf16.h:539
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __double2bfloat16(const double a)
Convert double to __hip_bfloat16.
Definition amd_hip_bf16.h:565
__BF16_HOST_DEVICE_STATIC__ float __low2float(const __hip_bfloat162 a)
Converts low 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:636
__BF16_HOST_DEVICE_STATIC__ float __high2float(const __hip_bfloat162 a)
Converts high 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:605
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a)
Returns low 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:628
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __float22bfloat162_rn(const float2 a)
Convert float2 to __hip_bfloat162.
Definition amd_hip_bf16.h:574
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b)
Extracts high 16 bits from each and combines them.
Definition amd_hip_bf16.h:613
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b)
Extracts low 16 bits from each and combines them.
Definition amd_hip_bf16.h:652
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a)
Returns high 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:591
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a)
Returns high 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:597
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __short_as_bfloat16(const short int a)
Reinterprets short int into a bfloat16.
Definition amd_hip_bf16.h:661
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a)
Swaps both halves.
Definition amd_hip_bf16.h:644
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp10(const __hip_bfloat16 h)
Calculate exponential 10 of bfloat16.
Definition amd_hip_bf16.h:1618
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog(const __hip_bfloat16 h)
Calculate natural log of bfloat16.
Definition amd_hip_bf16.h:1642
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp2(const __hip_bfloat16 h)
Calculate exponential 2 of bfloat16.
Definition amd_hip_bf16.h:1626
__BF16_DEVICE_STATIC__ __hip_bfloat16 hceil(const __hip_bfloat16 h)
Calculate ceil of bfloat16.
Definition amd_hip_bf16.h:1594
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrcp(const __hip_bfloat16 h)
Calculate reciprocal.
Definition amd_hip_bf16.h:1666
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h)
Calculate sqrt of bfloat16.
Definition amd_hip_bf16.h:1698
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog10(const __hip_bfloat16 h)
Calculate log 10 of bfloat16.
Definition amd_hip_bf16.h:1650
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsin(const __hip_bfloat16 h)
Calculate sin of bfloat16.
Definition amd_hip_bf16.h:1690
__BF16_DEVICE_STATIC__ __hip_bfloat16 hfloor(const __hip_bfloat16 h)
Calculate floor of bfloat16.
Definition amd_hip_bf16.h:1634
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrint(const __hip_bfloat16 h)
Round to nearest int.
Definition amd_hip_bf16.h:1674
__BF16_DEVICE_STATIC__ __hip_bfloat16 htrunc(const __hip_bfloat16 h)
Calculate truncate of bfloat16.
Definition amd_hip_bf16.h:1706
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h)
Reciprocal square root.
Definition amd_hip_bf16.h:1682
__BF16_DEVICE_STATIC__ __hip_bfloat16 hcos(const __hip_bfloat16 h)
Calculate cosine of bfloat16.
Definition amd_hip_bf16.h:1602
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog2(const __hip_bfloat16 h)
Calculate log 2 of bfloat16.
Definition amd_hip_bf16.h:1658
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp(const __hip_bfloat16 h)
Calculate exponential of bfloat16.
Definition amd_hip_bf16.h:1610
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h)
Calculate truncate of bfloat162.
Definition amd_hip_bf16.h:1826
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h)
Calculate vector reciprocal.
Definition amd_hip_bf16.h:1786
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log(const __hip_bfloat162 h)
Calculate natural log of bfloat162.
Definition amd_hip_bf16.h:1762
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp(const __hip_bfloat162 h)
Calculate exponential of bfloat162.
Definition amd_hip_bf16.h:1730
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2cos(const __hip_bfloat162 h)
Calculate cosine of bfloat162.
Definition amd_hip_bf16.h:1722
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sin(const __hip_bfloat162 h)
Calculate sin of bfloat162.
Definition amd_hip_bf16.h:1810
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log2(const __hip_bfloat162 h)
Calculate log 2 of bfloat162.
Definition amd_hip_bf16.h:1778
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h)
Calculate ceil of bfloat162.
Definition amd_hip_bf16.h:1714
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2floor(const __hip_bfloat162 h)
Calculate floor of bfloat162.
Definition amd_hip_bf16.h:1754
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h)
Calculate exponential 10 of bfloat162.
Definition amd_hip_bf16.h:1738
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h)
Calculate exponential 2 of bfloat162.
Definition amd_hip_bf16.h:1746
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log10(const __hip_bfloat162 h)
Calculate log 10 of bfloat162.
Definition amd_hip_bf16.h:1770
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h)
Calculate vector reciprocal square root.
Definition amd_hip_bf16.h:1802
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rint(const __hip_bfloat162 h)
Calculate vector round to nearest int.
Definition amd_hip_bf16.h:1794
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h)
Calculate sqrt of bfloat162.
Definition amd_hip_bf16.h:1818
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Definition hip_fp16_math_fwd.h:57
Definition amd_hip_vector_types.h:2035