/build/rocrand-D7zBeW/rocrand-7.2.4/library/include/rocrand/rocrand_mrg31k3p.h Source File

/build/rocrand-D7zBeW/rocrand-7.2.4/library/include/rocrand/rocrand_mrg31k3p.h Source File#

API library: /build/rocrand-D7zBeW/rocrand-7.2.4/library/include/rocrand/rocrand_mrg31k3p.h Source File
rocrand_mrg31k3p.h
1// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21#ifndef ROCRAND_MRG31K3P_H_
22#define ROCRAND_MRG31K3P_H_
23
24#include "rocrand/rocrand_common.h"
25#include "rocrand/rocrand_mrg31k3p_precomputed.h"
26
27#include <hip/hip_runtime.h>
28
29#define ROCRAND_MRG31K3P_M1 2147483647U // 2 ^ 31 - 1
30#define ROCRAND_MRG31K3P_M2 2147462579U // 2 ^ 31 - 21069
31#define ROCRAND_MRG31K3P_MASK12 511U // 2 ^ 9 - 1
32#define ROCRAND_MRG31K3P_MASK13 16777215U // 2 ^ 24 - 1
33#define ROCRAND_MRG31K3P_MASK21 65535U // 2 ^ 16 - 1
34#define ROCRAND_MRG31K3P_NORM_DOUBLE (4.656612875245796923e-10) // 1 / ROCRAND_MRG31K3P_M1
35#define ROCRAND_MRG31K3P_UINT32_NORM \
36 (2.000000001396983862) // UINT32_MAX / (ROCRAND_MRG31K3P_M1 - 1)
37
46#define ROCRAND_MRG31K3P_DEFAULT_SEED 12345ULL // end of group rocranddevice
48
49namespace rocrand_device
50{
51
52class mrg31k3p_engine
53{
54public:
55 struct mrg31k3p_state
56 {
57#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
58 // The Box–Muller transform requires two inputs to convert uniformly
59 // distributed real values [0; 1] to normally distributed real values
60 // (with mean = 0, and stddev = 1). Often user wants only one
61 // normally distributed number, to save performance and random
62 // numbers the 2nd value is saved for future requests.
63 double boxmuller_double; // normally distributed double
64 float boxmuller_float; // normally distributed float
65#endif
66 unsigned int x1[3];
67 unsigned int x2[3];
68 };
69
70 __forceinline__ __device__ __host__ mrg31k3p_engine()
71 {
72 this->seed(ROCRAND_MRG31K3P_DEFAULT_SEED, 0, 0);
73 }
74
83 __forceinline__ __device__ __host__ mrg31k3p_engine(const unsigned long long seed,
84 const unsigned long long subsequence,
85 const unsigned long long offset)
86 {
87 this->seed(seed, subsequence, offset);
88 }
89
98 __forceinline__ __device__ __host__ void seed(unsigned long long seed_value,
99 const unsigned long long subsequence,
100 const unsigned long long offset)
101 {
102 if(seed_value == 0)
103 {
105 }
106 unsigned int x = static_cast<unsigned int>(seed_value ^ 0x55555555U);
107 unsigned int y = static_cast<unsigned int>((seed_value >> 32) ^ 0xAAAAAAAAU);
108 m_state.x1[0] = mod_mul_m1(x, seed_value);
109 m_state.x1[1] = mod_mul_m1(y, seed_value);
110 m_state.x1[2] = mod_mul_m1(x, seed_value);
111 m_state.x2[0] = mod_mul_m2(y, seed_value);
112 m_state.x2[1] = mod_mul_m2(x, seed_value);
113 m_state.x2[2] = mod_mul_m2(y, seed_value);
114 this->restart(subsequence, offset);
115 }
116
118 __forceinline__ __device__ __host__ void discard(unsigned long long offset)
119 {
120 this->discard_impl(offset);
121 }
122
125 __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence)
126 {
127 this->discard_subsequence_impl(subsequence);
128 }
129
132 __forceinline__ __device__ __host__ void discard_sequence(unsigned long long sequence)
133 {
134 this->discard_sequence_impl(sequence);
135 }
136
137 __forceinline__ __device__ __host__ void restart(const unsigned long long subsequence,
138 const unsigned long long offset)
139 {
140#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
141 m_state.boxmuller_float = ROCRAND_NAN_FLOAT;
142 m_state.boxmuller_double = ROCRAND_NAN_DOUBLE;
143#endif
144 this->discard_subsequence_impl(subsequence);
145 this->discard_impl(offset);
146 }
147
148 __forceinline__ __device__ __host__ unsigned int operator()()
149 {
150 return this->next();
151 }
152
153 // Returned value is in range [1, ROCRAND_MRG31K3P_M1].
154 __forceinline__ __device__ __host__ unsigned int next()
155 {
156 // First component
157 unsigned int tmp
158 = (((m_state.x1[1] & ROCRAND_MRG31K3P_MASK12) << 22) + (m_state.x1[1] >> 9))
159 + (((m_state.x1[2] & ROCRAND_MRG31K3P_MASK13) << 7) + (m_state.x1[2] >> 24));
160 tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
161 tmp += m_state.x1[2];
162 tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
163 m_state.x1[2] = m_state.x1[1];
164 m_state.x1[1] = m_state.x1[0];
165 m_state.x1[0] = tmp;
166
167 // Second component
168 tmp = (((m_state.x2[0] & ROCRAND_MRG31K3P_MASK21) << 15) + 21069 * (m_state.x2[0] >> 16));
169 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
170 tmp += ((m_state.x2[2] & ROCRAND_MRG31K3P_MASK21) << 15);
171 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
172 tmp += 21069 * (m_state.x2[2] >> 16);
173 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
174 tmp += m_state.x2[2];
175 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
176 m_state.x2[2] = m_state.x2[1];
177 m_state.x2[1] = m_state.x2[0];
178 m_state.x2[0] = tmp;
179
180 // Combination
181 return m_state.x1[0] - m_state.x2[0]
182 + (m_state.x1[0] <= m_state.x2[0] ? ROCRAND_MRG31K3P_M1 : 0);
183 }
184
185protected:
186 // Advances the internal state to skip \p offset numbers.
187 __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset)
188 {
189 discard_state(offset);
190 }
191
192 // Advances the internal state to skip \p subsequence subsequences.
193 __forceinline__ __device__ __host__ void
194 discard_subsequence_impl(unsigned long long subsequence)
195 {
196 int i = 0;
197
198 while(subsequence > 0)
199 {
200 if(subsequence & 1)
201 {
202#if defined(__HIP_DEVICE_COMPILE__)
203 mod_mat_vec_m1(d_mrg31k3p_A1P72 + i, m_state.x1);
204 mod_mat_vec_m2(d_mrg31k3p_A2P72 + i, m_state.x2);
205#else
206 mod_mat_vec_m1(h_mrg31k3p_A1P72 + i, m_state.x1);
207 mod_mat_vec_m2(h_mrg31k3p_A2P72 + i, m_state.x2);
208#endif
209 }
210 subsequence >>= 1;
211 i += 9;
212 }
213 }
214
215 // Advances the internal state to skip \p sequences.
216 __forceinline__ __device__ __host__ void discard_sequence_impl(unsigned long long sequence)
217 {
218 int i = 0;
219
220 while(sequence > 0)
221 {
222 if(sequence & 1)
223 {
224#if defined(__HIP_DEVICE_COMPILE__)
225 mod_mat_vec_m1(d_mrg31k3p_A1P134 + i, m_state.x1);
226 mod_mat_vec_m2(d_mrg31k3p_A2P134 + i, m_state.x2);
227#else
228 mod_mat_vec_m1(h_mrg31k3p_A1P134 + i, m_state.x1);
229 mod_mat_vec_m2(h_mrg31k3p_A2P134 + i, m_state.x2);
230#endif
231 }
232 sequence >>= 1;
233 i += 9;
234 }
235 }
236
237 // Advances the internal state to skip \p offset numbers.
238 __forceinline__ __device__ __host__ void discard_state(unsigned long long offset)
239 {
240 int i = 0;
241
242 while(offset > 0)
243 {
244 if(offset & 1)
245 {
246#if defined(__HIP_DEVICE_COMPILE__)
247 mod_mat_vec_m1(d_mrg31k3p_A1 + i, m_state.x1);
248 mod_mat_vec_m2(d_mrg31k3p_A2 + i, m_state.x2);
249#else
250 mod_mat_vec_m1(h_mrg31k3p_A1 + i, m_state.x1);
251 mod_mat_vec_m2(h_mrg31k3p_A2 + i, m_state.x2);
252#endif
253 }
254 offset >>= 1;
255 i += 9;
256 }
257 }
258
259 // Advances the internal state to the next state.
260 __forceinline__ __device__ __host__ void discard_state()
261 {
262 discard_state(1);
263 }
264
265private:
266 __forceinline__ __device__ __host__ static void mod_mat_vec_m1(const unsigned int* A,
267 unsigned int* s)
268 {
269 unsigned long long x[3] = {s[0], s[1], s[2]};
270
271 s[0] = mod_m1(mod_m1(A[0] * x[0]) + mod_m1(A[1] * x[1]) + mod_m1(A[2] * x[2]));
272
273 s[1] = mod_m1(mod_m1(A[3] * x[0]) + mod_m1(A[4] * x[1]) + mod_m1(A[5] * x[2]));
274
275 s[2] = mod_m1(mod_m1(A[6] * x[0]) + mod_m1(A[7] * x[1]) + mod_m1(A[8] * x[2]));
276 }
277
278 __forceinline__ __device__ __host__ static void mod_mat_vec_m2(const unsigned int* A,
279 unsigned int* s)
280 {
281 unsigned long long x[3] = {s[0], s[1], s[2]};
282
283 s[0] = mod_m2(mod_m2(A[0] * x[0]) + mod_m2(A[1] * x[1]) + mod_m2(A[2] * x[2]));
284
285 s[1] = mod_m2(mod_m2(A[3] * x[0]) + mod_m2(A[4] * x[1]) + mod_m2(A[5] * x[2]));
286
287 s[2] = mod_m2(mod_m2(A[6] * x[0]) + mod_m2(A[7] * x[1]) + mod_m2(A[8] * x[2]));
288 }
289
290 __forceinline__ __device__ __host__ static unsigned long long mod_mul_m1(unsigned int i,
291 unsigned long long j)
292 {
293 return mod_m1(i * j);
294 }
295
296 __forceinline__ __device__ __host__ static unsigned long long mod_m1(unsigned long long p)
297 {
298 return p % ROCRAND_MRG31K3P_M1;
299 }
300
301 __forceinline__ __device__ __host__ static unsigned long long mod_mul_m2(unsigned int i,
302 unsigned long long j)
303 {
304 return mod_m2(i * j);
305 }
306
307 __forceinline__ __device__ __host__ static unsigned long long mod_m2(unsigned long long p)
308 {
309 return p % ROCRAND_MRG31K3P_M2;
310 }
311
312protected:
313 // State
314 mrg31k3p_state m_state;
315
316#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
317 friend struct detail::engine_boxmuller_helper<mrg31k3p_engine>;
318#endif
319}; // mrg31k3p_engine class
320
321} // end namespace rocrand_device
322
327
329typedef rocrand_device::mrg31k3p_engine rocrand_state_mrg31k3p;
331
343__forceinline__ __device__ __host__
344void rocrand_init(const unsigned long long seed,
345 const unsigned long long subsequence,
346 const unsigned long long offset,
347 rocrand_state_mrg31k3p* state)
348{
349 *state = rocrand_state_mrg31k3p(seed, subsequence, offset);
350}
351
364__forceinline__ __device__ __host__
365unsigned int rocrand(rocrand_state_mrg31k3p* state)
366{
367 // next() in [1, ROCRAND_MRG31K3P_M1]
368 return static_cast<unsigned int>((state->next() - 1) * ROCRAND_MRG31K3P_UINT32_NORM);
369}
370
379__forceinline__ __device__ __host__
380void skipahead(unsigned long long offset, rocrand_state_mrg31k3p* state)
381{
382 return state->discard(offset);
383}
384
394__forceinline__ __device__ __host__
395void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg31k3p* state)
396{
397 return state->discard_subsequence(subsequence);
398}
399
409__forceinline__ __device__ __host__
410void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p* state)
411{
412 return state->discard_sequence(sequence);
413}
414 // end of group rocranddevice
416
417#endif // ROCRAND_MRG31K3P_H_
#define ROCRAND_MRG31K3P_DEFAULT_SEED
Default seed for MRG31K3P PRNG.
Definition rocrand_mrg31k3p.h:46
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_mrg31k3p *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition rocrand_mrg31k3p.h:365
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by subsequence subsequences.
Definition rocrand_mrg31k3p.h:395
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by offset elements.
Definition rocrand_mrg31k3p.h:380
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by sequence sequences.
Definition rocrand_mrg31k3p.h:410
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_mrg31k3p *state)
Initializes MRG31K3P state.
Definition rocrand_mrg31k3p.h:344