10#ifndef RANLUXPP_HELPERS_H
11#define RANLUXPP_HELPERS_H
16static inline uint64_t add_overflow(uint64_t a, uint64_t b,
24static inline uint64_t add_carry(uint64_t a, uint64_t b,
unsigned &carry) {
26 uint64_t add = add_overflow(a, b, overflow);
34static inline uint64_t sub_overflow(uint64_t a, uint64_t b,
42static inline uint64_t sub_carry(uint64_t a, uint64_t b,
unsigned &carry) {
44 uint64_t sub = sub_overflow(a, b, overflow);
58static inline int64_t compute_r(
const uint64_t *upper, uint64_t *r) {
61 for (
int i = 0; i < 9; i++) {
63 r_i = sub_overflow(r_i, carry, carry);
65 uint64_t t1_i = upper[i];
66 r_i = sub_carry(r_i, t1_i, carry);
69 int64_t c = -((int64_t)carry);
73 for (
int i = 0; i < 9; i++) {
75 r_i = sub_overflow(r_i, carry, carry);
79 t2_bits += upper[i + 5] >> 16;
81 t2_bits += upper[i + 6] << 48;
84 r_i = sub_carry(r_i, t2_bits, carry);
94 uint64_t t2_bits = (upper[5] >> 16) << 48;
95 uint64_t t3_bits = (upper[0] << 48);
97 r_3 = add_carry(r_3, t2_bits, carry);
98 r_3 = add_carry(r_3, t3_bits, carry);
102 for (
int i = 0; i < 3; i++) {
103 uint64_t r_i = r[i + 4];
104 r_i = add_overflow(r_i, carry, carry);
106 uint64_t t2_bits = (upper[5 + i] >> 32) + (upper[6 + i] << 32);
107 uint64_t t3_bits = (upper[i] >> 16) + (upper[1 + i] << 48);
109 r_i = add_carry(r_i, t2_bits, carry);
110 r_i = add_carry(r_i, t3_bits, carry);
116 r_7 = add_overflow(r_7, carry, carry);
118 uint64_t t2_bits = (upper[8] >> 32);
119 uint64_t t3_bits = (upper[3] >> 16) + (upper[4] << 48);
121 r_7 = add_carry(r_7, t2_bits, carry);
122 r_7 = add_carry(r_7, t3_bits, carry);
128 r_8 = add_overflow(r_8, carry, carry);
130 uint64_t t3_bits = (upper[4] >> 16) + (upper[5] << 48);
132 r_8 = add_carry(r_8, t3_bits, carry);
143 bool greater_m = r[0] | r[1] | r[2] | (r[3] & 0x0000ffffffffffff);
144 greater_m &= (r[3] >> 48) == 0xffff;
145 for (
int i = 4; i < 9; i++) {
146 greater_m &= (r[i] == UINT64_MAX);
148 return c + (c == 0 && greater_m);