Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
gcc_arm.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16 
17 
18 
19 */
20 
21 /*
22  Platform isolation layer for the ARMv7-a architecture.
23 */
24 
25 #ifndef __TBB_machine_H
26 #error Do not include this file directly; include tbb_machine.h instead
27 #endif
28 
29 #if __ARM_ARCH_7A__
30 
31 #include <sys/param.h>
32 #include <unistd.h>
33 
34 #define __TBB_WORDSIZE 4
35 
36 // Traditionally ARM is little-endian.
37 // Note that, since only the layout of aligned 32-bit words is of interest,
38 // any apparent PDP-endianness of 32-bit words at half-word alignment or
39 // any little-endian ordering of big-endian 32-bit words in 64-bit quantities
40 // may be disregarded for this setting.
41 #if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
42  #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
43 #elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
44  #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
45 #elif defined(__BYTE_ORDER__)
46  #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
47 #else
48  #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
49 #endif
50 
51 
52 #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
53 #define __TBB_full_memory_fence() __asm__ __volatile__("dmb ish": : :"memory")
54 #define __TBB_control_consistency_helper() __TBB_full_memory_fence()
55 #define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
56 #define __TBB_release_consistency_helper() __TBB_full_memory_fence()
57 
58 //--------------------------------------------------
59 // Compare and swap
60 //--------------------------------------------------
61 
69 static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand )
70 {
71  int32_t oldval, res;
72 
74 
75  do {
76  __asm__ __volatile__(
77  "ldrex %1, [%3]\n"
78  "mov %0, #0\n"
79  "cmp %1, %4\n"
80  "it eq\n"
81  "strexeq %0, %5, [%3]\n"
82  : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int32_t*)ptr)
83  : "r" ((volatile int32_t *)ptr), "Ir" (comparand), "r" (value)
84  : "cc");
85  } while (res);
86 
88 
89  return oldval;
90 }
91 
99 static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand )
100 {
101  int64_t oldval;
102  int32_t res;
103 
105 
106  do {
107  __asm__ __volatile__(
108  "mov %0, #0\n"
109  "ldrexd %1, %H1, [%3]\n"
110  "cmp %1, %4\n"
111  "it eq\n"
112  "cmpeq %H1, %H4\n"
113  "it eq\n"
114  "strexdeq %0, %5, %H5, [%3]"
115  : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int64_t*)ptr)
116  : "r" ((volatile int64_t *)ptr), "r" (comparand), "r" (value)
117  : "cc");
118  } while (res);
119 
121 
122  return oldval;
123 }
124 
125 static inline int32_t __TBB_machine_fetchadd4(volatile void* ptr, int32_t addend)
126 {
127  unsigned long tmp;
128  int32_t result, tmp2;
129 
131 
132  __asm__ __volatile__(
133 "1: ldrex %0, [%4]\n"
134 " add %3, %0, %5\n"
135 " strex %1, %3, [%4]\n"
136 " cmp %1, #0\n"
137 " bne 1b\n"
138  : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int32_t*)ptr), "=&r"(tmp2)
139  : "r" ((volatile int32_t *)ptr), "Ir" (addend)
140  : "cc");
141 
143 
144  return result;
145 }
146 
147 static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
148 {
149  unsigned long tmp;
150  int64_t result, tmp2;
151 
153 
154  __asm__ __volatile__(
155 "1: ldrexd %0, %H0, [%4]\n"
156 " adds %3, %0, %5\n"
157 " adc %H3, %H0, %H5\n"
158 " strexd %1, %3, %H3, [%4]\n"
159 " cmp %1, #0\n"
160 " bne 1b"
161  : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int64_t*)ptr), "=&r"(tmp2)
162  : "r" ((volatile int64_t *)ptr), "r" (addend)
163  : "cc");
164 
165 
167 
168  return result;
169 }
170 
171 namespace tbb {
172 namespace internal {
173  template <typename T, size_t S>
174  struct machine_load_store_relaxed {
175  static inline T load ( const volatile T& location ) {
176  const T value = location;
177 
178  /*
179  * An extra memory barrier is required for errata #761319
180  * Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
181  */
183  return value;
184  }
185 
186  static inline void store ( volatile T& location, T value ) {
187  location = value;
188  }
189  };
190 }} // namespaces internal, tbb
191 
192 // Machine specific atomic operations
193 
194 #define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
195 #define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
196 
197 // Use generics for some things
198 #define __TBB_USE_GENERIC_PART_WORD_CAS 1
199 #define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
200 #define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
201 #define __TBB_USE_GENERIC_FETCH_STORE 1
202 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
203 #define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
204 #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
205 #elif defined __aarch64__
206 // Generic gcc implementations are fine for ARMv8-a except __TBB_PAUSE.
207 #include "gcc_generic.h"
208 #else
209 #error compilation requires an ARMv7-a or ARMv8-a architecture.
210 #endif // __ARM_ARCH_7A__
211 
212 inline void __TBB_machine_pause (int32_t delay)
213 {
214  while(delay>0)
215  {
216  __asm__ __volatile__("yield" ::: "memory");
217  delay--;
218  }
219 }
220 #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_acquire_consistency_helper()
Definition: gcc_generic.h:61
#define __TBB_full_memory_fence()
Definition: gcc_generic.h:63
The graph class.
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
long __TBB_machine_fetchadd4(volatile void *ptr, __int32 addend)
static void store(T &location, T value)
Definition: icc_generic.h:112
#define __TBB_machine_cmpswp8
Definition: ibm_aix51.h:46
static int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
Definition: macos_common.h:117
#define __TBB_machine_cmpswp4
Definition: ibm_aix51.h:45
void __TBB_machine_pause(int32_t delay)
Definition: gcc_arm.h:212
static T load(const T &location)
Definition: icc_generic.h:109

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.