/search.css" rel="stylesheet" type="text/css"/> /search.js">
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
array.h
Go to the documentation of this file.
1 /*
2 Copyright 2010-2011, D. E. Shaw Research.
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 
9 * Redistributions of source code must retain the above copyright
10  notice, this list of conditions, and the following disclaimer.
11 
12 * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions, and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16 * Neither the name of D. E. Shaw Research nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32 #ifndef _r123array_dot_h__
33 #define _r123array_dot_h__
35 #include "features/sse.h"
36 
37 #ifndef __cplusplus
38 #define CXXMETHODS(_N, W, T)
39 #define CXXOVERLOADS(_N, W, T)
40 #else
41 
42 #include <stddef.h>
43 #include <algorithm>
44 #include <stdexcept>
45 #include <iterator>
46 #include <limits>
47 #include <iostream>
48 
68 template <typename value_type>
69 inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
70  value_type v=0;
71  for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
72  v |= ((value_type)(*p32++)) << (32*i);
73  return v;
74 }
75 
76 // Work-alike methods and typedefs modeled on std::array:
77 #define CXXMETHODS(_N, W, T) \
78  typedef T value_type; \
79  typedef T* iterator; \
80  typedef const T* const_iterator; \
81  typedef value_type& reference; \
82  typedef const value_type& const_reference; \
83  typedef size_t size_type; \
84  typedef ptrdiff_t difference_type; \
85  typedef T* pointer; \
86  typedef const T* const_pointer; \
87  typedef std::reverse_iterator<iterator> reverse_iterator; \
88  typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
89  /* Boost.array has static_size. C++11 specializes tuple_size */ \
90  enum {static_size = _N}; \
91  R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
92  R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
93  R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
94  R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
95  R123_CUDA_DEVICE size_type size() const { return _N; } \
96  R123_CUDA_DEVICE size_type max_size() const { return _N; } \
97  R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
98  R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
99  R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
100  R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
101  R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
102  R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
103  R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
104  R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
105  R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
106  R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
107  R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
108  R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
109  R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } \
110  R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
111  R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
112  R123_CUDA_DEVICE reference front(){ return v[0]; } \
113  R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
114  R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
115  R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
116  R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
117  /* CUDA3 does not have std::equal */ \
118  for (size_t i = 0; i < _N; ++i) \
119  if (v[i] != rhs.v[i]) return false; \
120  return true; \
121  } \
122  R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
123  /* CUDA3 does not have std::fill_n */ \
124  R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
125  R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
126  /* CUDA3 does not have std::swap_ranges */ \
127  for (size_t i = 0; i < _N; ++i) { \
128  T tmp = v[i]; \
129  v[i] = rhs.v[i]; \
130  rhs.v[i] = tmp; \
131  } \
132  } \
133  R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \
134  /* This test is tricky because we're trying to avoid spurious \
135  complaints about illegal shifts, yet still be compile-time \
136  evaulated. */ \
137  if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
138  return incr_carefully(n); \
139  if(n==1){ \
140  ++v[0]; \
141  if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
142  }else{ \
143  v[0] += n; \
144  if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
145  } \
146  /* We expect that the N==?? tests will be \
147  constant-folded/optimized away by the compiler, so only the \
148  overflow tests (!!v[i]) remain to be done at runtime. For \
149  small values of N, it would be better to do this as an \
150  uncondtional sequence of adc. An experiment/optimization \
151  for another day... \
152  N.B. The weird subscripting: v[_N>3?3:0] is to silence \
153  a spurious error from icpc \
154  */ \
155  ++v[_N>1?1:0]; \
156  if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
157  ++v[_N>2?2:0]; \
158  if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
159  ++v[_N>3?3:0]; \
160  for(size_t i=4; i<_N; ++i){ \
161  if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
162  ++v[i]; \
163  } \
164  return *this; \
165  } \
166  /* seed(SeedSeq) would be a constructor if having a constructor */ \
167  /* didn't cause headaches with defaults */ \
168  template <typename SeedSeq> \
169  R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \
170  r123array##_N##x##W ret; \
171  const size_t Ngen = _N*((3+sizeof(value_type))/4); \
172  uint32_t u32[Ngen]; \
173  uint32_t *p32 = &u32[0]; \
174  ss.generate(&u32[0], &u32[Ngen]); \
175  for(size_t i=0; i<_N; ++i){ \
176  ret.v[i] = assemble_from_u32<value_type>(p32); \
177  p32 += (3+sizeof(value_type))/4; \
178  } \
179  return ret; \
180  } \
181 protected: \
182  R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
183  /* n may be greater than the maximum value of a single value_type */ \
184  value_type vtn; \
185  vtn = n; \
186  v[0] += n; \
187  const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
188  for(size_t i=1; i<_N; ++i){ \
189  if(rshift){ \
190  n >>= rshift; \
191  }else{ \
192  n=0; \
193  } \
194  if( v[i-1] < vtn ) \
195  ++n; \
196  if( n==0 ) break; \
197  vtn = n; \
198  v[i] += n; \
199  } \
200  return *this; \
201  } \
202 
203 
204 // There are several tricky considerations for the insertion and extraction
205 // operators:
206 // - we would like to be able to print r123array16x8 as a sequence of 16 integers,
207 // not as 16 bytes.
208 // - we would like to be able to print r123array1xm128i.
209 // - we do not want an int conversion operator in r123m128i because it causes
210 // lots of ambiguity problems with automatic promotions.
211 // Solution: r123arrayinsertable and r123arrayextractable
212 
213 template<typename T>
214 struct r123arrayinsertable{
215  const T& v;
216  r123arrayinsertable(const T& t_) : v(t_) {}
217  friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
218  return os << t.v;
219  }
220 };
221 
222 template<>
223 struct r123arrayinsertable<uint8_t>{
224  const uint8_t& v;
225  r123arrayinsertable(const uint8_t& t_) : v(t_) {}
226  friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
227  return os << (int)t.v;
228  }
229 };
230 
231 template<typename T>
232 struct r123arrayextractable{
233  T& v;
234  r123arrayextractable(T& t_) : v(t_) {}
235  friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
236  return is >> t.v;
237  }
238 };
239 
240 template<>
241 struct r123arrayextractable<uint8_t>{
242  uint8_t& v;
243  r123arrayextractable(uint8_t& t_) : v(t_) {}
244  friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
245  int i;
246  is >> i;
247  t.v = i;
248  return is;
249  }
250 };
251 
252 #define CXXOVERLOADS(_N, W, T) \
253  \
254 inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
255  os << r123arrayinsertable<T>(a.v[0]); \
256  for(size_t i=1; i<_N; ++i) \
257  os << " " << r123arrayinsertable<T>(a.v[i]); \
258  return os; \
259 } \
260  \
261 inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
262  for(size_t i=0; i<_N; ++i){ \
263  r123arrayextractable<T> x(a.v[i]); \
264  is >> x; \
265  } \
266  return is; \
267 } \
268  \
269 namespace r123{ \
270  typedef r123array##_N##x##W Array##_N##x##W; \
271 }
272 
273 #endif /* __cplusplus */
274 
275 /* _r123array_tpl expands to a declaration of struct r123arrayNxW.
276 
277  In C, it's nothing more than a struct containing an array of N
278  objects of type T.
279 
280  In C++ it's the same, but endowed with an assortment of member
281  functions, typedefs and friends. In C++, r123arrayNxW looks a lot
282  like std::array<T,N>, has most of the capabilities of a container,
283  and satisfies the requirements outlined in compat/Engine.hpp for
284  counter and key types. ArrayNxW, in the r123 namespace is
285  a typedef equivalent to r123arrayNxW.
286 */
287 
288 #define _r123array_tpl(_N, W, T) \
289  \
290  \
291 struct r123array##_N##x##W{ \
292  T v[_N]; \
293  CXXMETHODS(_N, W, T) \
294 }; \
295  \
296 CXXOVERLOADS(_N, W, T)
297 
300 _r123array_tpl(1, 32, uint32_t) /* r123array1x32 */
301 _r123array_tpl(2, 32, uint32_t) /* r123array2x32 */
302 _r123array_tpl(4, 32, uint32_t) /* r123array4x32 */
303 _r123array_tpl(8, 32, uint32_t) /* r123array8x32 */
304 
305 _r123array_tpl(1, 64, uint64_t) /* r123array1x64 */
306 _r123array_tpl(2, 64, uint64_t) /* r123array2x64 */
307 _r123array_tpl(4, 64, uint64_t) /* r123array4x64 */
308 
309 _r123array_tpl(16, 8, uint8_t) /* r123array16x8 for ARSsw, AESsw */
310 
311 #if R123_USE_SSE
312 _r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */
313 #endif
314 
315 /* In C++, it's natural to use sizeof(a::value_type), but in C it's
316  pretty convoluted to figure out the width of the value_type of an
317  r123arrayNxW:
318 */
319 #define R123_W(a) (8*sizeof(((a *)0)->v[0]))
320 
325 #endif
326 
_r123array_tpl(1, 32, uint32_t) _r123array_tpl(2
static std::istream & operator>>(std::istream &is, r123m128i &m)
Definition: sse.h:250
Definition: sse.h:148
T assemble_from_u32(uint32_t *p32)