libstdc++
ext/opt_random.h
1 // Optimizations for random number extensions, x86 version -*- C++ -*-
2 
3 // Copyright (C) 2012-2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file ext/random.tcc
26  * This is an internal header file, included by other library headers.
27  * Do not attempt to use it directly. @headername{ext/random}
28  */
29 
30 #ifndef _EXT_OPT_RANDOM_H
31 #define _EXT_OPT_RANDOM_H 1
32 
33 #pragma GCC system_header
34 
35 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
36 
37 #ifdef __SSE2__
38 
39 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
40 {
41 _GLIBCXX_BEGIN_NAMESPACE_VERSION
42 
43  namespace {
44 
45  template<size_t __sl1, size_t __sl2, size_t __sr1, size_t __sr2,
46  uint32_t __msk1, uint32_t __msk2, uint32_t __msk3, uint32_t __msk4>
47  inline __m128i __sse2_recursion(__m128i __a, __m128i __b,
48  __m128i __c, __m128i __d)
49  {
50  __m128i __y = _mm_srli_epi32(__b, __sr1);
51  __m128i __z = _mm_srli_si128(__c, __sr2);
52  __m128i __v = _mm_slli_epi32(__d, __sl1);
53  __z = _mm_xor_si128(__z, __a);
54  __z = _mm_xor_si128(__z, __v);
55  __m128i __x = _mm_slli_si128(__a, __sl2);
56  __y = _mm_and_si128(__y, _mm_set_epi32(__msk4, __msk3, __msk2, __msk1));
57  __z = _mm_xor_si128(__z, __x);
58  return _mm_xor_si128(__z, __y);
59  }
60 
61  }
62 
63 
64 #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_GEN_READ 1
65  template<typename _UIntType, size_t __m,
66  size_t __pos1, size_t __sl1, size_t __sl2,
67  size_t __sr1, size_t __sr2,
68  uint32_t __msk1, uint32_t __msk2,
69  uint32_t __msk3, uint32_t __msk4,
70  uint32_t __parity1, uint32_t __parity2,
71  uint32_t __parity3, uint32_t __parity4>
72  void simd_fast_mersenne_twister_engine<_UIntType, __m,
73  __pos1, __sl1, __sl2, __sr1, __sr2,
74  __msk1, __msk2, __msk3, __msk4,
75  __parity1, __parity2, __parity3,
76  __parity4>::
77  _M_gen_rand(void)
78  {
79  __m128i __r1 = _mm_load_si128(&_M_state[_M_nstate - 2]);
80  __m128i __r2 = _mm_load_si128(&_M_state[_M_nstate - 1]);
81 
82  size_t __i;
83  for (__i = 0; __i < _M_nstate - __pos1; ++__i)
84  {
85  __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
86  __msk1, __msk2, __msk3, __msk4>
87  (_M_state[__i], _M_state[__i + __pos1], __r1, __r2);
88  _mm_store_si128(&_M_state[__i], __r);
89  __r1 = __r2;
90  __r2 = __r;
91  }
92  for (; __i < _M_nstate; ++__i)
93  {
94  __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
95  __msk1, __msk2, __msk3, __msk4>
96  (_M_state[__i], _M_state[__i + __pos1 - _M_nstate], __r1, __r2);
97  _mm_store_si128(&_M_state[__i], __r);
98  __r1 = __r2;
99  __r2 = __r;
100  }
101 
102  _M_pos = 0;
103  }
104 
105 
106 #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_OPERATOREQUAL 1
107  template<typename _UIntType, size_t __m,
108  size_t __pos1, size_t __sl1, size_t __sl2,
109  size_t __sr1, size_t __sr2,
110  uint32_t __msk1, uint32_t __msk2,
111  uint32_t __msk3, uint32_t __msk4,
112  uint32_t __parity1, uint32_t __parity2,
113  uint32_t __parity3, uint32_t __parity4>
114  bool
115  operator==(const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
116  __m, __pos1, __sl1, __sl2, __sr1, __sr2,
117  __msk1, __msk2, __msk3, __msk4,
118  __parity1, __parity2, __parity3, __parity4>& __lhs,
119  const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
120  __m, __pos1, __sl1, __sl2, __sr1, __sr2,
121  __msk1, __msk2, __msk3, __msk4,
122  __parity1, __parity2, __parity3, __parity4>& __rhs)
123  {
124  __m128i __res = _mm_cmpeq_epi8(__lhs._M_state[0], __rhs._M_state[0]);
125  for (size_t __i = 1; __i < __lhs._M_nstate; ++__i)
126  __res = _mm_and_si128(__res, _mm_cmpeq_epi8(__lhs._M_state[__i],
127  __rhs._M_state[__i]));
128  return (_mm_movemask_epi8(__res) == 0xffff
129  && __lhs._M_pos == __rhs._M_pos);
130  }
131 
132 
133 _GLIBCXX_END_NAMESPACE_VERSION
134 } // namespace
135 
136 #endif // __SSE2__
137 
138 #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
139 
140 #endif // _EXT_OPT_RANDOM_H