Coverage Report

Created: 2026-05-08 10:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/tmp/bitcoin/src/crypto/muhash.cpp
Line
Count
Source
1
// Copyright (c) 2017-present The Bitcoin Core developers
2
// Distributed under the MIT software license, see the accompanying
3
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4
5
#include <crypto/muhash.h>
6
7
#include <crypto/chacha20.h>
8
#include <crypto/common.h>
9
#include <hash.h>
10
#include <span.h>
11
#include <uint256.h>
12
#include <util/check.h>
13
14
#include <bit>
15
#include <cstring>
16
#include <limits>
17
18
namespace {
19
20
using limb_t = Num3072::limb_t;
21
using signed_limb_t = Num3072::signed_limb_t;
22
using double_limb_t = Num3072::double_limb_t;
23
using signed_double_limb_t = Num3072::signed_double_limb_t;
24
constexpr int LIMB_SIZE = Num3072::LIMB_SIZE;
25
constexpr int SIGNED_LIMB_SIZE = Num3072::SIGNED_LIMB_SIZE;
26
constexpr int LIMBS = Num3072::LIMBS;
27
constexpr int SIGNED_LIMBS = Num3072::SIGNED_LIMBS;
28
constexpr int FINAL_LIMB_POSITION = 3072 / SIGNED_LIMB_SIZE;
29
constexpr int FINAL_LIMB_MODULUS_BITS = 3072 % SIGNED_LIMB_SIZE;
30
constexpr limb_t MAX_LIMB = (limb_t)(-1);
31
constexpr limb_t MAX_SIGNED_LIMB = (((limb_t)1) << SIGNED_LIMB_SIZE) - 1;
32
/** 2^3072 - 1103717, the largest 3072-bit safe prime number, is used as the modulus. */
33
constexpr limb_t MAX_PRIME_DIFF = 1103717;
34
/** The modular inverse of (2**3072 - MAX_PRIME_DIFF) mod (MAX_SIGNED_LIMB + 1). */
35
constexpr limb_t MODULUS_INVERSE = limb_t(0x70a1421da087d93);
36
37
38
/** Extract the lowest limb of [c0,c1,c2] into n, and left shift the number by 1 limb. */
39
inline void extract3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& n)
40
497k
{
41
497k
    n = c0;
42
497k
    c0 = c1;
43
497k
    c1 = c2;
44
497k
    c2 = 0;
45
497k
}
46
47
/** [c0,c1] = a * b */
48
inline void mul(limb_t& c0, limb_t& c1, const limb_t& a, const limb_t& b)
49
486k
{
50
486k
    double_limb_t t = (double_limb_t)a * b;
51
486k
    c1 = t >> LIMB_SIZE;
52
486k
    c0 = t;
53
486k
}
54
55
/* [c0,c1,c2] += n * [d0,d1,d2]. c2 is 0 initially */
56
inline void mulnadd3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& d0, limb_t& d1, limb_t& d2, const limb_t& n)
57
486k
{
58
486k
    double_limb_t t = (double_limb_t)d0 * n + c0;
59
486k
    c0 = t;
60
486k
    t >>= LIMB_SIZE;
61
486k
    t += (double_limb_t)d1 * n + c1;
62
486k
    c1 = t;
63
486k
    t >>= LIMB_SIZE;
64
486k
    c2 = t + d2 * n;
65
486k
}
66
67
/* [c0,c1] *= n */
68
inline void muln2(limb_t& c0, limb_t& c1, const limb_t& n)
69
10.3k
{
70
10.3k
    double_limb_t t = (double_limb_t)c0 * n;
71
10.3k
    c0 = t;
72
10.3k
    t >>= LIMB_SIZE;
73
10.3k
    t += (double_limb_t)c1 * n;
74
10.3k
    c1 = t;
75
10.3k
}
76
77
/** [c0,c1,c2] += a * b */
78
inline void muladd3(limb_t& c0, limb_t& c1, limb_t& c2, const limb_t& a, const limb_t& b)
79
23.3M
{
80
23.3M
    double_limb_t t = (double_limb_t)a * b;
81
23.3M
    limb_t th = t >> LIMB_SIZE;
82
23.3M
    limb_t tl = t;
83
84
23.3M
    c0 += tl;
85
23.3M
    th += (c0 < tl) ? 1 : 0;
86
23.3M
    c1 += th;
87
23.3M
    c2 += (c1 < th) ? 1 : 0;
88
23.3M
}
89
90
/**
91
 * Add limb a to [c0,c1]: [c0,c1] += a. Then extract the lowest
92
 * limb of [c0,c1] into n, and left shift the number by 1 limb.
93
 * */
94
inline void addnextract2(limb_t& c0, limb_t& c1, const limb_t& a, limb_t& n)
95
497k
{
96
497k
    limb_t c2 = 0;
97
98
    // add
99
497k
    c0 += a;
100
497k
    if (c0 < a) {
101
3.45k
        c1 += 1;
102
103
        // Handle case when c1 has overflown
104
3.45k
        if (c1 == 0) c2 = 1;
105
3.45k
    }
106
107
    // extract
108
497k
    n = c0;
109
497k
    c0 = c1;
110
497k
    c1 = c2;
111
497k
}
112
113
} // namespace
114
115
/** Indicates whether d is larger than the modulus. */
116
bool Num3072::IsOverflow() const
117
22.6k
{
118
22.6k
    if (this->limbs[0] <= std::numeric_limits<limb_t>::max() - MAX_PRIME_DIFF) return false;
119
528
    for (int i = 1; i < LIMBS; ++i) {
120
517
        if (this->limbs[i] != std::numeric_limits<limb_t>::max()) return false;
121
517
    }
122
11
    return true;
123
11
}
124
125
void Num3072::FullReduce()
126
11
{
127
11
    limb_t c0 = MAX_PRIME_DIFF;
128
11
    limb_t c1 = 0;
129
539
    for (int i = 0; i < LIMBS; ++i) {
130
528
        addnextract2(c0, c1, this->limbs[i], this->limbs[i]);
131
528
    }
132
11
}
133
134
namespace {
135
/** A type representing a number in signed limb representation. */
136
struct Num3072Signed
137
{
138
    /** The represented value is sum(limbs[i]*2^(SIGNED_LIMB_SIZE*i), i=0..SIGNED_LIMBS-1).
139
     *  Note that limbs may be negative, or exceed 2^SIGNED_LIMB_SIZE-1. */
140
    signed_limb_t limbs[SIGNED_LIMBS];
141
142
    /** Construct a Num3072Signed with value 0. */
143
    Num3072Signed()
144
16.3k
    {
145
16.3k
        memset(limbs, 0, sizeof(limbs));
146
16.3k
    }
147
148
    /** Convert a Num3072 to a Num3072Signed. Output will be normalized and in
149
     *  range 0..2^3072-1. */
150
    void FromNum3072(const Num3072& in)
151
4.08k
    {
152
4.08k
        double_limb_t c = 0;
153
4.08k
        int b = 0, outpos = 0;
154
200k
        for (int i = 0; i < LIMBS; ++i) {
155
196k
            c += double_limb_t{in.limbs[i]} << b;
156
196k
            b += LIMB_SIZE;
157
396k
            while (b >= SIGNED_LIMB_SIZE) {
158
200k
                limbs[outpos++] = limb_t(c) & MAX_SIGNED_LIMB;
159
200k
                c >>= SIGNED_LIMB_SIZE;
160
200k
                b -= SIGNED_LIMB_SIZE;
161
200k
            }
162
196k
        }
163
4.08k
        Assume(outpos == SIGNED_LIMBS - 1);
164
4.08k
        limbs[SIGNED_LIMBS - 1] = c;
165
4.08k
        c >>= SIGNED_LIMB_SIZE;
166
4.08k
        Assume(c == 0);
167
4.08k
    }
168
169
    /** Convert a Num3072Signed to a Num3072. Input must be in range 0..modulus-1. */
170
    void ToNum3072(Num3072& out) const
171
4.08k
    {
172
4.08k
        double_limb_t c = 0;
173
4.08k
        int b = 0, outpos = 0;
174
208k
        for (int i = 0; i < SIGNED_LIMBS; ++i) {
175
204k
            c += double_limb_t(limbs[i]) << b;
176
204k
            b += SIGNED_LIMB_SIZE;
177
204k
            if (b >= LIMB_SIZE) {
178
196k
                out.limbs[outpos++] = c;
179
196k
                c >>= LIMB_SIZE;
180
196k
                b -= LIMB_SIZE;
181
196k
            }
182
204k
        }
183
4.08k
        Assume(outpos == LIMBS);
184
4.08k
        Assume(c == 0);
185
4.08k
    }
186
187
    /** Take a Num3072Signed in range 1-2*2^3072..2^3072-1, and:
188
     *  - optionally negate it (if negate is true)
189
     *  - reduce it modulo the modulus (2^3072 - MAX_PRIME_DIFF)
190
     *  - produce output with all limbs in range 0..2^SIGNED_LIMB_SIZE-1
191
     */
192
    void Normalize(bool negate)
193
4.08k
    {
194
        // Add modulus if this was negative. This brings the range of *this to 1-2^3072..2^3072-1.
195
4.08k
        signed_limb_t cond_add = limbs[SIGNED_LIMBS-1] >> (LIMB_SIZE-1); // -1 if this is negative; 0 otherwise
196
4.08k
        limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
197
4.08k
        limbs[FINAL_LIMB_POSITION] += (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
198
        // Next negate all limbs if negate was set. This does not change the range of *this.
199
4.08k
        signed_limb_t cond_negate = -signed_limb_t(negate); // -1 if this negate is true; 0 otherwise
200
208k
        for (int i = 0; i < SIGNED_LIMBS; ++i) {
201
204k
            limbs[i] = (limbs[i] ^ cond_negate) - cond_negate;
202
204k
        }
203
        // Perform carry (make all limbs except the top one be in range 0..2^SIGNED_LIMB_SIZE-1).
204
204k
        for (int i = 0; i < SIGNED_LIMBS - 1; ++i) {
205
200k
            limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
206
200k
            limbs[i] &= MAX_SIGNED_LIMB;
207
200k
        }
208
        // Again add modulus if *this was negative. This brings the range of *this to 0..2^3072-1.
209
4.08k
        cond_add = limbs[SIGNED_LIMBS-1] >> (LIMB_SIZE-1); // -1 if this is negative; 0 otherwise
210
4.08k
        limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
211
4.08k
        limbs[FINAL_LIMB_POSITION] += (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
212
        // Perform another carry. Now all limbs are in range 0..2^SIGNED_LIMB_SIZE-1.
213
204k
        for (int i = 0; i < SIGNED_LIMBS - 1; ++i) {
214
200k
            limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
215
200k
            limbs[i] &= MAX_SIGNED_LIMB;
216
200k
        }
217
4.08k
    }
218
};
219
220
/** 2x2 transformation matrix with signed_limb_t elements. */
221
struct SignedMatrix
222
{
223
    signed_limb_t u, v, q, r;
224
};
225
226
/** Compute the transformation matrix for SIGNED_LIMB_SIZE divsteps.
227
 *
228
 * eta: initial eta value
229
 * f:   bottom SIGNED_LIMB_SIZE bits of initial f value
230
 * g:   bottom SIGNED_LIMB_SIZE bits of initial g value
231
 * out: resulting transformation matrix, scaled by 2^SIGNED_LIMB_SIZE
232
 * return: eta value after SIGNED_LIMB_SIZE divsteps
233
 */
234
inline limb_t ComputeDivstepMatrix(signed_limb_t eta, limb_t f, limb_t g, SignedMatrix& out)
235
409k
{
236
    /** inv256[i] = -1/(2*i+1) (mod 256) */
237
409k
    static const uint8_t NEGINV256[128] = {
238
409k
        0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
239
409k
        0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
240
409k
        0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
241
409k
        0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
242
409k
        0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
243
409k
        0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
244
409k
        0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
245
409k
        0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
246
409k
        0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
247
409k
        0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
248
409k
        0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
249
409k
    };
250
    // Coefficients of returned SignedMatrix; starts off as identity matrix. */
251
409k
    limb_t u = 1, v = 0, q = 0, r = 1;
252
    // The number of divsteps still left.
253
409k
    int i = SIGNED_LIMB_SIZE;
254
2.62M
    while (true) {
255
        /* Use a sentinel bit to count zeros only up to i. */
256
2.62M
        int zeros = std::countr_zero(g | (MAX_LIMB << i));
257
        /* Perform zeros divsteps at once; they all just divide g by two. */
258
2.62M
        g >>= zeros;
259
2.62M
        u <<= zeros;
260
2.62M
        v <<= zeros;
261
2.62M
        eta -= zeros;
262
2.62M
        i -= zeros;
263
         /* We're done once we've performed SIGNED_LIMB_SIZE divsteps. */
264
2.62M
        if (i == 0) break;
265
        /* If eta is negative, negate it and replace f,g with g,-f. */
266
2.21M
        if (eta < 0) {
267
757k
            limb_t tmp;
268
757k
            eta = -eta;
269
757k
            tmp = f; f = g; g = -tmp;
270
757k
            tmp = u; u = q; q = -tmp;
271
757k
            tmp = v; v = r; r = -tmp;
272
757k
        }
273
        /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
274
         * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
275
         * can be done as its sign will flip once that happens. */
276
2.21M
        int limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
277
        /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
278
2.21M
        limb_t m = (MAX_LIMB >> (LIMB_SIZE - limit)) & 255U;
279
        /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
280
2.21M
        limb_t w = (g * NEGINV256[(f >> 1) & 127]) & m;
281
        /* Do so. */
282
2.21M
        g += f * w;
283
2.21M
        q += u * w;
284
2.21M
        r += v * w;
285
2.21M
    }
286
409k
    out.u = (signed_limb_t)u;
287
409k
    out.v = (signed_limb_t)v;
288
409k
    out.q = (signed_limb_t)q;
289
409k
    out.r = (signed_limb_t)r;
290
409k
    return eta;
291
409k
}
292
293
/** Apply matrix t/2^SIGNED_LIMB_SIZE to vector [d,e], modulo modulus.
294
 *
295
 * On input and output, d and e are in range 1-2*modulus..modulus-1.
296
 */
297
inline void UpdateDE(Num3072Signed& d, Num3072Signed& e, const SignedMatrix& t)
298
409k
{
299
409k
    const signed_limb_t u = t.u, v=t.v, q=t.q, r=t.r;
300
301
    /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
302
409k
    signed_limb_t sd = d.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
303
409k
    signed_limb_t se = e.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
304
409k
    signed_limb_t md = (u & sd) + (v & se);
305
409k
    signed_limb_t me = (q & sd) + (r & se);
306
    /* Begin computing t*[d,e]. */
307
409k
    signed_limb_t di = d.limbs[0], ei = e.limbs[0];
308
409k
    signed_double_limb_t cd = (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
309
409k
    signed_double_limb_t ce = (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
310
    /* Correct md,me so that t*[d,e]+modulus*[md,me] has SIGNED_LIMB_SIZE zero bottom bits. */
311
409k
    md -= (MODULUS_INVERSE * limb_t(cd) + md) & MAX_SIGNED_LIMB;
312
409k
    me -= (MODULUS_INVERSE * limb_t(ce) + me) & MAX_SIGNED_LIMB;
313
    /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
314
409k
    cd -= (signed_double_limb_t)1103717 * md;
315
409k
    ce -= (signed_double_limb_t)1103717 * me;
316
    /* Verify that the low SIGNED_LIMB_SIZE bits of the computation are indeed zero, and then throw them away. */
317
409k
    Assume((cd & MAX_SIGNED_LIMB) == 0);
318
409k
    Assume((ce & MAX_SIGNED_LIMB) == 0);
319
409k
    cd >>= SIGNED_LIMB_SIZE;
320
409k
    ce >>= SIGNED_LIMB_SIZE;
321
    /* Now iteratively compute limb i=1..SIGNED_LIMBS-2 of t*[d,e]+modulus*[md,me], and store them in output
322
     * limb i-1 (shifting down by SIGNED_LIMB_SIZE bits). The corresponding limbs in modulus are all zero,
323
     * so modulus/md/me are not actually involved here. */
324
20.0M
    for (int i = 1; i < SIGNED_LIMBS - 1; ++i) {
325
19.6M
        di = d.limbs[i];
326
19.6M
        ei = e.limbs[i];
327
19.6M
        cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
328
19.6M
        ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
329
19.6M
        d.limbs[i - 1] = (signed_limb_t)cd & MAX_SIGNED_LIMB; cd >>= SIGNED_LIMB_SIZE;
330
19.6M
        e.limbs[i - 1] = (signed_limb_t)ce & MAX_SIGNED_LIMB; ce >>= SIGNED_LIMB_SIZE;
331
19.6M
    }
332
    /* Compute limb SIGNED_LIMBS-1 of t*[d,e]+modulus*[md,me], and store it in output limb SIGNED_LIMBS-2. */
333
409k
    di = d.limbs[SIGNED_LIMBS - 1];
334
409k
    ei = e.limbs[SIGNED_LIMBS - 1];
335
409k
    cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
336
409k
    ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
337
409k
    cd += (signed_double_limb_t)md << FINAL_LIMB_MODULUS_BITS;
338
409k
    ce += (signed_double_limb_t)me << FINAL_LIMB_MODULUS_BITS;
339
409k
    d.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)cd & MAX_SIGNED_LIMB; cd >>= SIGNED_LIMB_SIZE;
340
409k
    e.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)ce & MAX_SIGNED_LIMB; ce >>= SIGNED_LIMB_SIZE;
341
    /* What remains goes into output limb SINGED_LIMBS-1 */
342
409k
    d.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)cd;
343
409k
    e.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)ce;
344
409k
}
345
346
/** Apply matrix t/2^SIGNED_LIMB_SIZE to vector (f,g).
347
 *
348
 * The matrix t must be chosen such that t*(f,g) results in multiples of 2^SIGNED_LIMB_SIZE.
349
 * This is the case for matrices computed by ComputeDivstepMatrix().
350
 */
351
inline void UpdateFG(Num3072Signed& f, Num3072Signed& g, const SignedMatrix& t, int len)
352
409k
{
353
409k
    const signed_limb_t u = t.u, v=t.v, q=t.q, r=t.r;
354
355
409k
    signed_limb_t fi, gi;
356
409k
    signed_double_limb_t cf, cg;
357
    /* Start computing t*[f,g]. */
358
409k
    fi = f.limbs[0];
359
409k
    gi = g.limbs[0];
360
409k
    cf = (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
361
409k
    cg = (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
362
    /* Verify that the bottom SIGNED_LIMB_BITS bits of the result are zero, and then throw them away. */
363
409k
    Assume((cf & MAX_SIGNED_LIMB) == 0);
364
409k
    Assume((cg & MAX_SIGNED_LIMB) == 0);
365
409k
    cf >>= SIGNED_LIMB_SIZE;
366
409k
    cg >>= SIGNED_LIMB_SIZE;
367
    /* Now iteratively compute limb i=1..SIGNED_LIMBS-1 of t*[f,g], and store them in output limb i-1 (shifting
368
     * down by SIGNED_LIMB_BITS bits). */
369
14.7M
    for (int i = 1; i < len; ++i) {
370
14.3M
        fi = f.limbs[i];
371
14.3M
        gi = g.limbs[i];
372
14.3M
        cf += (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
373
14.3M
        cg += (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
374
14.3M
        f.limbs[i - 1] = (signed_limb_t)cf & MAX_SIGNED_LIMB; cf >>= SIGNED_LIMB_SIZE;
375
14.3M
        g.limbs[i - 1] = (signed_limb_t)cg & MAX_SIGNED_LIMB; cg >>= SIGNED_LIMB_SIZE;
376
14.3M
    }
377
    /* What remains is limb SIGNED_LIMBS of t*[f,g]; store it as output limb SIGNED_LIMBS-1. */
378
409k
    f.limbs[len - 1] = (signed_limb_t)cf;
379
409k
    g.limbs[len - 1] = (signed_limb_t)cg;
380
381
409k
}
382
} // namespace
383
384
Num3072 Num3072::GetInverse() const
385
4.08k
{
386
    // Compute a modular inverse based on a variant of the safegcd algorithm:
387
    // - Paper: https://gcd.cr.yp.to/papers.html
388
    // - Inspired by this code in libsecp256k1:
389
    //   https://github.com/bitcoin-core/secp256k1/blob/master/src/modinv32_impl.h
390
    // - Explanation of the algorithm:
391
    //   https://github.com/bitcoin-core/secp256k1/blob/master/doc/safegcd_implementation.md
392
393
    // Local variables d, e, f, g:
394
    // - f and g are the variables whose gcd we compute (despite knowing the answer is 1):
395
    //   - f is always odd, and initialized as modulus
396
    //   - g is initialized as *this (called x in what follows)
397
    // - d and e are the numbers for which at every step it is the case that:
398
    //   - f = d * x mod modulus; d is initialized as 0
399
    //   - g = e * x mod modulus; e is initialized as 1
400
4.08k
    Num3072Signed d, e, f, g;
401
4.08k
    e.limbs[0] = 1;
402
    // F is initialized as modulus, which in signed limb representation can be expressed
403
    // simply as 2^3072 + -MAX_PRIME_DIFF.
404
4.08k
    f.limbs[0] = -MAX_PRIME_DIFF;
405
4.08k
    f.limbs[FINAL_LIMB_POSITION] = ((limb_t)1) << FINAL_LIMB_MODULUS_BITS;
406
4.08k
    g.FromNum3072(*this);
407
4.08k
    int len = SIGNED_LIMBS; //!< The number of significant limbs in f and g
408
4.08k
    signed_limb_t eta = -1; //!< State to track knowledge about ratio of f and g
409
    // Perform divsteps on [f,g] until g=0 is reached, keeping (d,e) synchronized with them.
410
409k
    while (true) {
411
        // Compute transformation matrix t that represents the next SIGNED_LIMB_SIZE divsteps
412
        // to apply. This can be computed from just the bottom limb of f and g, and eta.
413
409k
        SignedMatrix t;
414
409k
        eta = ComputeDivstepMatrix(eta, f.limbs[0], g.limbs[0], t);
415
        // Apply that transformation matrix to the full [f,g] vector.
416
409k
        UpdateFG(f, g, t, len);
417
        // Apply that transformation matrix to the full [d,e] vector (mod modulus).
418
409k
        UpdateDE(d, e, t);
419
420
        // Check if g is zero.
421
409k
        if (g.limbs[0] == 0) {
422
180k
            signed_limb_t cond = 0;
423
8.81M
            for (int j = 1; j < len; ++j) {
424
8.63M
                cond |= g.limbs[j];
425
8.63M
            }
426
            // If so, we're done.
427
180k
            if (cond == 0) break;
428
180k
        }
429
430
        // Check if the top limbs of both f and g are both 0 or -1.
431
405k
        signed_limb_t fn = f.limbs[len - 1], gn = g.limbs[len - 1];
432
405k
        signed_limb_t cond = ((signed_limb_t)len - 2) >> (LIMB_SIZE - 1);
433
405k
        cond |= fn ^ (fn >> (LIMB_SIZE - 1));
434
405k
        cond |= gn ^ (gn >> (LIMB_SIZE - 1));
435
405k
        if (cond == 0) {
436
            // If so, drop the top limb, shrinking the size of f and g, by
437
            // propagating the sign to the previous limb.
438
200k
            f.limbs[len - 2] |= (limb_t)f.limbs[len - 1] << SIGNED_LIMB_SIZE;
439
200k
            g.limbs[len - 2] |= (limb_t)g.limbs[len - 1] << SIGNED_LIMB_SIZE;
440
200k
            --len;
441
200k
        }
442
405k
    }
443
    // At some point, [f,g] will have been rewritten into [f',0], such that gcd(f,g) = gcd(f',0).
444
    // This is proven in the paper. As f started out being modulus, a prime number, we know that
445
    // gcd is 1, and thus f' is 1 or -1.
446
4.08k
    Assume((f.limbs[0] & MAX_SIGNED_LIMB) == 1 || (f.limbs[0] & MAX_SIGNED_LIMB) == MAX_SIGNED_LIMB);
447
    // As we've maintained the invariant that f = d * x mod modulus, we get d/f mod modulus is the
448
    // modular inverse of x we're looking for. As f is 1 or -1, it is also true that d/f = d*f.
449
    // Normalize d to prepare it for output, while negating it if f is negative.
450
4.08k
    d.Normalize(f.limbs[len - 1] >> (LIMB_SIZE  - 1));
451
4.08k
    Num3072 ret;
452
4.08k
    d.ToNum3072(ret);
453
4.08k
    return ret;
454
4.08k
}
455
456
void Num3072::Multiply(const Num3072& a)
457
10.3k
{
458
10.3k
    limb_t c0 = 0, c1 = 0, c2 = 0;
459
10.3k
    Num3072 tmp;
460
461
    /* Compute limbs 0..N-2 of this*a into tmp, including one reduction. */
462
497k
    for (int j = 0; j < LIMBS - 1; ++j) {
463
486k
        limb_t d0 = 0, d1 = 0, d2 = 0;
464
486k
        mul(d0, d1, this->limbs[1 + j], a.limbs[LIMBS + j - (1 + j)]);
465
11.6M
        for (int i = 2 + j; i < LIMBS; ++i) muladd3(d0, d1, d2, this->limbs[i], a.limbs[LIMBS + j - i]);
466
486k
        mulnadd3(c0, c1, c2, d0, d1, d2, MAX_PRIME_DIFF);
467
12.1M
        for (int i = 0; i < j + 1; ++i) muladd3(c0, c1, c2, this->limbs[i], a.limbs[j - i]);
468
486k
        extract3(c0, c1, c2, tmp.limbs[j]);
469
486k
    }
470
471
    /* Compute limb N-1 of a*b into tmp. */
472
10.3k
    assert(c2 == 0);
473
507k
    for (int i = 0; i < LIMBS; ++i) muladd3(c0, c1, c2, this->limbs[i], a.limbs[LIMBS - 1 - i]);
474
10.3k
    extract3(c0, c1, c2, tmp.limbs[LIMBS - 1]);
475
476
    /* Perform a second reduction. */
477
10.3k
    muln2(c0, c1, MAX_PRIME_DIFF);
478
507k
    for (int j = 0; j < LIMBS; ++j) {
479
497k
        addnextract2(c0, c1, tmp.limbs[j], this->limbs[j]);
480
497k
    }
481
482
10.3k
    assert(c1 == 0);
483
10.3k
    assert(c0 == 0 || c0 == 1);
484
485
    /* Perform up to two more reductions if the internal state has already
486
     * overflown the MAX of Num3072 or if it is larger than the modulus or
487
     * if both are the case.
488
     * */
489
10.3k
    if (this->IsOverflow()) this->FullReduce();
490
10.3k
    if (c0) this->FullReduce();
491
10.3k
}
492
493
void Num3072::SetToOne()
494
23.2k
{
495
23.2k
    this->limbs[0] = 1;
496
1.11M
    for (int i = 1; i < LIMBS; ++i) this->limbs[i] = 0;
497
23.2k
}
498
499
void Num3072::Divide(const Num3072& a)
500
4.08k
{
501
4.08k
    if (this->IsOverflow()) this->FullReduce();
502
503
4.08k
    Num3072 inv{};
504
4.08k
    if (a.IsOverflow()) {
505
0
        Num3072 b = a;
506
0
        b.FullReduce();
507
0
        inv = b.GetInverse();
508
4.08k
    } else {
509
4.08k
        inv = a.GetInverse();
510
4.08k
    }
511
512
4.08k
    this->Multiply(inv);
513
4.08k
    if (this->IsOverflow()) this->FullReduce();
514
4.08k
}
515
516
6.05k
Num3072::Num3072(const unsigned char (&data)[BYTE_SIZE]) {
517
296k
    for (int i = 0; i < LIMBS; ++i) {
518
290k
        if (sizeof(limb_t) == 4) {
519
0
            this->limbs[i] = ReadLE32(data + 4 * i);
520
290k
        } else if (sizeof(limb_t) == 8) {
521
290k
            this->limbs[i] = ReadLE64(data + 8 * i);
522
290k
        }
523
290k
    }
524
6.05k
}
525
526
4.08k
void Num3072::ToBytes(unsigned char (&out)[BYTE_SIZE]) {
527
200k
    for (int i = 0; i < LIMBS; ++i) {
528
196k
        if (sizeof(limb_t) == 4) {
529
0
            WriteLE32(out + i * 4, this->limbs[i]);
530
196k
        } else if (sizeof(limb_t) == 8) {
531
196k
            WriteLE64(out + i * 8, this->limbs[i]);
532
196k
        }
533
196k
    }
534
4.08k
}
535
536
6.05k
Num3072 MuHash3072::ToNum3072(std::span<const unsigned char> in) {
537
6.05k
    unsigned char tmp[Num3072::BYTE_SIZE];
538
539
6.05k
    uint256 hashed_in{(HashWriter{} << in).GetSHA256()};
540
6.05k
    static_assert(sizeof(tmp) % ChaCha20Aligned::BLOCKLEN == 0);
541
6.05k
    ChaCha20Aligned{MakeByteSpan(hashed_in)}.Keystream(MakeWritableByteSpan(tmp));
542
6.05k
    Num3072 out{tmp};
543
544
6.05k
    return out;
545
6.05k
}
546
547
MuHash3072::MuHash3072(std::span<const unsigned char> in) noexcept
548
186
{
549
186
    m_numerator = ToNum3072(in);
550
186
}
551
552
void MuHash3072::Finalize(uint256& out) noexcept
553
4.08k
{
554
4.08k
    m_numerator.Divide(m_denominator);
555
4.08k
    m_denominator.SetToOne();  // Needed to keep the MuHash object valid
556
557
4.08k
    unsigned char data[Num3072::BYTE_SIZE];
558
4.08k
    m_numerator.ToBytes(data);
559
560
4.08k
    out = (HashWriter{} << data).GetSHA256();
561
4.08k
}
562
563
MuHash3072& MuHash3072::operator*=(const MuHash3072& mul) noexcept
564
132
{
565
132
    m_numerator.Multiply(mul.m_numerator);
566
132
    m_denominator.Multiply(mul.m_denominator);
567
132
    return *this;
568
132
}
569
570
MuHash3072& MuHash3072::operator/=(const MuHash3072& div) noexcept
571
71
{
572
71
    m_numerator.Multiply(div.m_denominator);
573
71
    m_denominator.Multiply(div.m_numerator);
574
71
    return *this;
575
71
}
576
577
5.38k
MuHash3072& MuHash3072::Insert(std::span<const unsigned char> in) noexcept {
578
5.38k
    m_numerator.Multiply(ToNum3072(in));
579
5.38k
    return *this;
580
5.38k
}
581
582
484
MuHash3072& MuHash3072::Remove(std::span<const unsigned char> in) noexcept {
583
484
    m_denominator.Multiply(ToNum3072(in));
584
484
    return *this;
585
484
}