17 #ifndef __TBB_machine_H
18 #define __TBB_machine_H
124 template <
typename T, std::
size_t S>
125 struct machine_load_store;
127 template <
typename T, std::
size_t S>
128 struct machine_load_store_relaxed;
130 template <
typename T, std::
size_t S>
131 struct machine_load_store_seq_cst;
140 inline static word fetch_store (
volatile void* location,
word value );
145 inline static word fetch_store (
volatile void* location,
word value );
149 #if _MSC_VER && !_WIN64
151 typedef intptr_t
word;
155 inline static word fetch_store (
volatile void* location,
word value );
160 inline static word fetch_store (
volatile void* location,
word value );
165 #define __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(M) \
166 inline void __TBB_machine_generic_store8##M(volatile void *ptr, int64_t value) { \
168 int64_t result = *(volatile int64_t *)ptr; \
169 if( __TBB_machine_cmpswp8##M(ptr,value,result)==result ) break; \
173 #define __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(M) \
174 inline int64_t __TBB_machine_generic_load8##M(const volatile void *ptr) { \
177 const int64_t anyvalue = 2305843009213693951LL; \
178 return __TBB_machine_cmpswp8##M(const_cast<volatile void *>(ptr),anyvalue,anyvalue); \
182 #define __TBB_ENDIAN_UNSUPPORTED -1
183 #define __TBB_ENDIAN_LITTLE 0
184 #define __TBB_ENDIAN_BIG 1
185 #define __TBB_ENDIAN_DETECT 2
190 #pragma managed(push, off)
193 #if __MINGW64__ || __MINGW32__
194 extern "C" __declspec(dllimport)
int __stdcall SwitchToThread(
void );
195 #define __TBB_Yield() SwitchToThread()
196 #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
203 #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
205 #elif defined(_M_IX86) && !defined(__TBB_WIN32_USE_CL_BUILTINS)
207 #elif defined(_M_X64)
209 #elif defined(_M_ARM) || defined(__TBB_WIN32_USE_CL_BUILTINS)
217 #elif __TBB_DEFINE_MIC
220 #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
226 #elif __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__
228 #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
230 #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
240 #elif __ARM_ARCH_7A__ || __aarch64__
242 #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
249 #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
264 #elif __sun || __SUNPRO_CC
267 #define __volatile__ volatile
269 #if __i386 || __i386__
278 #define __TBB_Yield() sched_yield()
282 #ifndef __TBB_64BIT_ATOMICS
283 #define __TBB_64BIT_ATOMICS 1
289 #if __TBB_USE_FENCED_ATOMICS
290 #define __TBB_machine_cmpswp1 __TBB_machine_cmpswp1full_fence
291 #define __TBB_machine_cmpswp2 __TBB_machine_cmpswp2full_fence
292 #define __TBB_machine_cmpswp4 __TBB_machine_cmpswp4full_fence
293 #define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8full_fence
295 #if __TBB_WORDSIZE==8
296 #define __TBB_machine_fetchadd8 __TBB_machine_fetchadd8full_fence
297 #define __TBB_machine_fetchstore8 __TBB_machine_fetchstore8full_fence
298 #define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd8release(P,V)
299 #define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd8acquire(P,1)
300 #define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd8release(P,(-1))
302 #define __TBB_machine_fetchadd4 __TBB_machine_fetchadd4full_fence
303 #define __TBB_machine_fetchstore4 __TBB_machine_fetchstore4full_fence
304 #define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd4release(P,V)
305 #define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd4acquire(P,1)
306 #define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd4release(P,(-1))
309 #define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V)
310 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
311 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
314 #if __TBB_WORDSIZE==4
315 #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
316 #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd4(P,V)
317 #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore4(P,V)
318 #elif __TBB_WORDSIZE==8
319 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
320 #error These macros should only be used on 32-bit platforms.
323 #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C)
324 #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd8(P,V)
325 #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore8(P,V)
327 #error Unsupported machine word size.
390 template<
typename T,
typename U>
398 template<
typename T,
typename U>
404 template <
typename predicate_type>
407 while( condition() ) backoff.
pause();
413 #ifndef __TBB_ENDIANNESS
414 #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
417 #if __TBB_USE_GENERIC_PART_WORD_CAS && __TBB_ENDIANNESS==__TBB_ENDIAN_UNSUPPORTED
418 #error Generic implementation of part-word CAS may not be used with __TBB_ENDIAN_UNSUPPORTED
421 #if __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED
433 struct endianness{
static bool is_big_endian(){
434 #if __TBB_ENDIANNESS==__TBB_ENDIAN_DETECT
435 const uint32_t probe = 0x03020100;
436 return (((
const char*)(&probe))[0]==0x03);
437 #elif __TBB_ENDIANNESS==__TBB_ENDIAN_BIG || __TBB_ENDIANNESS==__TBB_ENDIAN_LITTLE
440 #error Unexpected value of __TBB_ENDIANNESS
444 const uint32_t byte_offset = (uint32_t) ((uintptr_t)ptr & 0x3);
445 volatile uint32_t *
const aligned_ptr = (uint32_t*)((uintptr_t)ptr - byte_offset );
448 const uint32_t bits_to_shift = 8*(endianness::is_big_endian() ? (4 -
sizeof(T) - (byte_offset)) : byte_offset);
449 const uint32_t
mask = (((uint32_t)1<<(
sizeof(T)*8)) - 1 )<<bits_to_shift;
451 const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&
mask;
452 const uint32_t shifted_value = ((uint32_t)
value << bits_to_shift)&
mask;
455 const uint32_t surroundings = *aligned_ptr & ~
mask ;
456 const uint32_t big_comparand = surroundings | shifted_comparand ;
457 const uint32_t big_value = surroundings | shifted_value ;
460 const uint32_t big_result = (uint32_t)
__TBB_machine_cmpswp4( aligned_ptr, big_value, big_comparand );
461 if( big_result == big_comparand
462 || ((big_result ^ big_comparand) &
mask) != 0)
464 return T((big_result &
mask) >> bits_to_shift);
469 #endif // __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED
472 template<
size_t S,
typename T>
477 #if __TBB_USE_GENERIC_PART_WORD_CAS
478 return __TBB_MaskedCompareAndSwap<int8_t>((
volatile int8_t *)ptr,
value,comparand);
486 #if __TBB_USE_GENERIC_PART_WORD_CAS
487 return __TBB_MaskedCompareAndSwap<int16_t>((
volatile int16_t *)ptr,
value,comparand);
499 #if __TBB_64BIT_ATOMICS
506 template<
size_t S,
typename T>
510 result = *
reinterpret_cast<volatile T *
>(ptr);
512 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
518 template<
size_t S,
typename T>
522 result = *
reinterpret_cast<volatile T *
>(ptr);
524 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr,
value, result )==result )
530 #if __TBB_USE_GENERIC_PART_WORD_CAS
531 #define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,int8_t>
532 #define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,int16_t>
535 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
536 #define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,int8_t>
537 #define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,int16_t>
540 #if __TBB_USE_GENERIC_FETCH_ADD
541 #define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,int32_t>
544 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
545 #define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,int64_t>
548 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
549 #define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,int8_t>
550 #define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,int16_t>
553 #if __TBB_USE_GENERIC_FETCH_STORE
554 #define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,int32_t>
557 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
558 #define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,int64_t>
561 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
562 #define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S) \
563 atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) { \
564 return __TBB_machine_fetchstore##S( location, value ); \
567 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
568 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
569 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
570 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
572 #undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
575 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE
577 #if ! __TBB_USE_FENCED_ATOMICS
580 #define __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8
585 #if ! __TBB_USE_FENCED_ATOMICS
586 #undef __TBB_machine_cmpswp8full_fence
589 #define __TBB_machine_store8 tbb::internal::__TBB_machine_generic_store8full_fence
590 #define __TBB_machine_load8 tbb::internal::__TBB_machine_generic_load8full_fence
593 #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
604 template <
typename T,
size_t S>
607 T to_return = location;
618 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
619 template <
typename T>
620 struct machine_load_store<T,8> {
631 #if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
632 template <
typename T,
size_t S>
633 struct machine_load_store_seq_cst {
634 static T
load (
const volatile T& location ) {
638 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
639 static void store (
volatile T &location, T
value ) {
643 static void store (
volatile T &location, T
value ) {
650 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
653 template <
typename T>
654 struct machine_load_store_seq_cst<T,8> {
655 static T
load (
const volatile T& location ) {
658 const int64_t anyvalue = 2305843009213693951LL;
659 return __TBB_machine_cmpswp8( (
volatile void*)
const_cast<volatile T*
>(&location), anyvalue, anyvalue );
661 static void store (
volatile T &location, T
value ) {
662 #if __TBB_GCC_VERSION >= 40702
663 #pragma GCC diagnostic push
664 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
667 int64_t result = (
volatile int64_t&)location;
668 #if __TBB_GCC_VERSION >= 40702
669 #pragma GCC diagnostic pop
672 result = (
volatile int64_t&)location;
678 #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
683 template <
typename T,
size_t S>
684 struct machine_load_store_relaxed {
685 static inline T
load (
const volatile T& location ) {
688 static inline void store (
volatile T& location, T
value ) {
693 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
694 template <
typename T>
695 struct machine_load_store_relaxed<T,8> {
696 static inline T
load (
const volatile T& location ) {
699 static inline void store (
volatile T& location, T
value ) {
706 #undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic machinery
712 template<
typename T,
typename V>
725 template<
typename T,
typename V>
738 template<
typename T,
typename V>
754 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
756 #if __TBB_ALIGNAS_PRESENT
759 #define __TBB_DefineTypeWithAlignment(PowerOf2) \
760 struct alignas(PowerOf2) __TBB_machine_type_with_alignment_##PowerOf2 { \
761 uint32_t member[PowerOf2/sizeof(uint32_t)]; \
763 #define __TBB_alignof(T) alignof(T)
765 #elif __TBB_ATTRIBUTE_ALIGNED_PRESENT
767 #define __TBB_DefineTypeWithAlignment(PowerOf2) \
768 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
769 uint32_t member[PowerOf2/sizeof(uint32_t)]; \
770 } __attribute__((aligned(PowerOf2)));
771 #define __TBB_alignof(T) __alignof__(T)
773 #elif __TBB_DECLSPEC_ALIGN_PRESENT
775 #define __TBB_DefineTypeWithAlignment(PowerOf2) \
776 __declspec(align(PowerOf2)) \
777 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
778 uint32_t member[PowerOf2/sizeof(uint32_t)]; \
780 #define __TBB_alignof(T) __alignof(T)
783 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
792 typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment;
806 #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN
810 template<
size_t Size,
typename T>
811 struct work_around_alignment_bug {
814 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
816 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)>
830 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
831 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
832 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
833 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
834 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
835 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
836 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
837 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
838 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
839 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
840 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
841 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
842 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
843 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
844 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
845 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
856 #define __TBB_load_acquire __TBB_load_with_acquire
857 #define __TBB_store_release __TBB_store_with_release
861 if( x==0 )
return -1;
866 if(
sizeof(x)>4 && (tmp_ = ((uint64_t)x)>>32) ) { x=tmp_; result += 32; }
868 if( uintptr_t tmp = x>>16 ) { x=tmp; result += 16; }
869 if( uintptr_t tmp = x>>8 ) { x=tmp; result += 8; }
870 if( uintptr_t tmp = x>>4 ) { x=tmp; result += 4; }
871 if( uintptr_t tmp = x>>2 ) { x=tmp; result += 2; }
873 return (x&2)? result+1: result;
877 #ifndef __TBB_AtomicOR
880 uintptr_t tmp = *(
volatile uintptr_t *)operand;
881 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
882 if( result==tmp )
break;
887 #ifndef __TBB_AtomicAND
890 uintptr_t tmp = *(
volatile uintptr_t *)operand;
891 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
892 if( result==tmp )
break;
897 #if __TBB_PREFETCHING
898 #ifndef __TBB_cl_prefetch
899 #error This platform does not define cache management primitives required for __TBB_PREFETCHING
902 #ifndef __TBB_cl_evict
903 #define __TBB_cl_evict(p)
912 #ifndef __TBB_TryLockByte
918 #ifndef __TBB_LockByte
926 #ifndef __TBB_UnlockByte
927 #define __TBB_UnlockByte(addr) __TBB_store_with_release((addr),0)
931 #if ( __TBB_x86_32 || __TBB_x86_64 )
939 if( !res ) __TBB_TryLockByteElidedCancel();
951 __TBB_TryLockByteElidedCancel();
960 #ifndef __TBB_ReverseByte
969 unsigned char *original = (
unsigned char *) &src;
970 unsigned char *reversed = (
unsigned char *) &dst;
972 for(
int i =
sizeof(T)-1; i >= 0; i-- )