Coverage Report

Created: 2026-04-29 19:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/tmp/bitcoin/src/crypto/muhash.cpp
Line
Count
Source
1
// Copyright (c) 2017-present The Bitcoin Core developers
2
// Distributed under the MIT software license, see the accompanying
3
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4
5
#include <crypto/muhash.h>
6
7
#include <crypto/chacha20.h>
8
#include <crypto/common.h>
9
#include <hash.h>
10
#include <span.h>
11
#include <uint256.h>
12
#include <util/check.h>
13
14
#include <bit>
15
#include <cstring>
16
#include <limits>
17
18
namespace {
19
20
using limb_t = Num3072::limb_t;
21
using signed_limb_t = Num3072::signed_limb_t;
22
using double_limb_t = Num3072::double_limb_t;
23
using signed_double_limb_t = Num3072::signed_double_limb_t;
24
constexpr int LIMB_SIZE = Num3072::LIMB_SIZE;
25
constexpr int SIGNED_LIMB_SIZE = Num3072::SIGNED_LIMB_SIZE;
26
constexpr int LIMBS = Num3072::LIMBS;
27
constexpr int SIGNED_LIMBS = Num3072::SIGNED_LIMBS;
28
constexpr int FINAL_LIMB_POSITION = 3072 / SIGNED_LIMB_SIZE;
29
constexpr int FINAL_LIMB_MODULUS_BITS = 3072 % SIGNED_LIMB_SIZE;
30
constexpr limb_t MAX_LIMB = (limb_t)(-1);
31
constexpr limb_t MAX_SIGNED_LIMB = (((limb_t)1) << SIGNED_LIMB_SIZE) - 1;
32
/** 2^3072 - 1103717, the largest 3072-bit safe prime number, is used as the modulus. */
33
constexpr limb_t MAX_PRIME_DIFF = 1103717;
34
/** The modular inverse of (2**3072 - MAX_PRIME_DIFF) mod (MAX_SIGNED_LIMB + 1). */
35
constexpr limb_t MODULUS_INVERSE = limb_t(0x70a1421da087d93);
36
37
38
/** Extract the lowest limb of [c0,c1,c2] into n, and left shift the number by 1 limb. */
39
inline void extract3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& n)
40
497k
{
41
497k
    n = c0;
42
497k
    c0 = c1;
43
497k
    c1 = c2;
44
497k
    c2 = 0;
45
497k
}
46
47
/** [c0,c1] = a * b */
48
inline void mul(limb_t& c0, limb_t& c1, const limb_t& a, const limb_t& b)
49
487k
{
50
487k
    double_limb_t t = (double_limb_t)a * b;
51
487k
    c1 = t >> LIMB_SIZE;
52
487k
    c0 = t;
53
487k
}
54
55
/* [c0,c1,c2] += n * [d0,d1,d2]. c2 is 0 initially */
56
inline void mulnadd3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& d0, limb_t& d1, limb_t& d2, const limb_t& n)
57
487k
{
58
487k
    double_limb_t t = (double_limb_t)d0 * n + c0;
59
487k
    c0 = t;
60
487k
    t >>= LIMB_SIZE;
61
487k
    t += (double_limb_t)d1 * n + c1;
62
487k
    c1 = t;
63
487k
    t >>= LIMB_SIZE;
64
487k
    c2 = t + d2 * n;
65
487k
}
66
67
/* [c0,c1] *= n */
68
inline void muln2(limb_t& c0, limb_t& c1, const limb_t& n)
69
10.3k
{
70
10.3k
    double_limb_t t = (double_limb_t)c0 * n;
71
10.3k
    c0 = t;
72
10.3k
    t >>= LIMB_SIZE;
73
10.3k
    t += (double_limb_t)c1 * n;
74
10.3k
    c1 = t;
75
10.3k
}
76
77
/** [c0,c1,c2] += a * b */
78
inline void muladd3(limb_t& c0, limb_t& c1, limb_t& c2, const limb_t& a, const limb_t& b)
79
23.3M
{
80
23.3M
    double_limb_t t = (double_limb_t)a * b;
81
23.3M
    limb_t th = t >> LIMB_SIZE;
82
23.3M
    limb_t tl = t;
83
84
23.3M
    c0 += tl;
85
23.3M
    th += (c0 < tl) ? 1 : 0;
86
23.3M
    c1 += th;
87
23.3M
    c2 += (c1 < th) ? 1 : 0;
88
23.3M
}
89
90
/**
91
 * Add limb a to [c0,c1]: [c0,c1] += a. Then extract the lowest
92
 * limb of [c0,c1] into n, and left shift the number by 1 limb.
93
 * */
94
inline void addnextract2(limb_t& c0, limb_t& c1, const limb_t& a, limb_t& n)
95
497k
{
96
497k
    limb_t c2 = 0;
97
98
    // add
99
497k
    c0 += a;
100
497k
    if (c0 < a) {
101
3.47k
        c1 += 1;
102
103
        // Handle case when c1 has overflown
104
3.47k
        if (c1 == 0) c2 = 1;
105
3.47k
    }
106
107
    // extract
108
497k
    n = c0;
109
497k
    c0 = c1;
110
497k
    c1 = c2;
111
497k
}
112
113
} // namespace
114
115
/** Indicates whether d is larger than the modulus. */
116
bool Num3072::IsOverflow() const
117
22.6k
{
118
22.6k
    if (this->limbs[0] <= std::numeric_limits<limb_t>::max() - MAX_PRIME_DIFF) return false;
119
528
    for (int i = 1; i < LIMBS; ++i) {
120
517
        if (this->limbs[i] != std::numeric_limits<limb_t>::max()) return false;
121
517
    }
122
11
    return true;
123
11
}
124
125
void Num3072::FullReduce()
126
11
{
127
11
    limb_t c0 = MAX_PRIME_DIFF;
128
11
    limb_t c1 = 0;
129
539
    for (int i = 0; i < LIMBS; ++i) {
130
528
        addnextract2(c0, c1, this->limbs[i], this->limbs[i]);
131
528
    }
132
11
}
133
134
namespace {
135
/** A type representing a number in signed limb representation. */
136
struct Num3072Signed
137
{
138
    /** The represented value is sum(limbs[i]*2^(SIGNED_LIMB_SIZE*i), i=0..SIGNED_LIMBS-1).
139
     *  Note that limbs may be negative, or exceed 2^SIGNED_LIMB_SIZE-1. */
140
    signed_limb_t limbs[SIGNED_LIMBS];
141
142
    /** Construct a Num3072Signed with value 0. */
143
    Num3072Signed()
144
16.3k
    {
145
16.3k
        memset(limbs, 0, sizeof(limbs));
146
16.3k
    }
147
148
    /** Convert a Num3072 to a Num3072Signed. Output will be normalized and in
149
     *  range 0..2^3072-1. */
150
    void FromNum3072(const Num3072& in)
151
4.09k
    {
152
4.09k
        double_limb_t c = 0;
153
4.09k
        int b = 0, outpos = 0;
154
200k
        for (int i = 0; i < LIMBS; ++i) {
155
196k
            c += double_limb_t{in.limbs[i]} << b;
156
196k
            b += LIMB_SIZE;
157
396k
            while (b >= SIGNED_LIMB_SIZE) {
158
200k
                limbs[outpos++] = limb_t(c) & MAX_SIGNED_LIMB;
159
200k
                c >>= SIGNED_LIMB_SIZE;
160
200k
                b -= SIGNED_LIMB_SIZE;
161
200k
            }
162
196k
        }
163
4.09k
        Assume(outpos == SIGNED_LIMBS - 1);
164
4.09k
        limbs[SIGNED_LIMBS - 1] = c;
165
4.09k
        c >>= SIGNED_LIMB_SIZE;
166
4.09k
        Assume(c == 0);
167
4.09k
    }
168
169
    /** Convert a Num3072Signed to a Num3072. Input must be in range 0..modulus-1. */
170
    void ToNum3072(Num3072& out) const
171
4.09k
    {
172
4.09k
        double_limb_t c = 0;
173
4.09k
        int b = 0, outpos = 0;
174
208k
        for (int i = 0; i < SIGNED_LIMBS; ++i) {
175
204k
            c += double_limb_t(limbs[i]) << b;
176
204k
            b += SIGNED_LIMB_SIZE;
177
204k
            if (b >= LIMB_SIZE) {
178
196k
                out.limbs[outpos++] = c;
179
196k
                c >>= LIMB_SIZE;
180
196k
                b -= LIMB_SIZE;
181
196k
            }
182
204k
        }
183
4.09k
        Assume(outpos == LIMBS);
184
4.09k
        Assume(c == 0);
185
4.09k
    }
186
187
    /** Take a Num3072Signed in range 1-2*2^3072..2^3072-1, and:
188
     *  - optionally negate it (if negate is true)
189
     *  - reduce it modulo the modulus (2^3072 - MAX_PRIME_DIFF)
190
     *  - produce output with all limbs in range 0..2^SIGNED_LIMB_SIZE-1
191
     */
192
    void Normalize(bool negate)
193
4.09k
    {
194
        // Add modulus if this was negative. This brings the range of *this to 1-2^3072..2^3072-1.
195
4.09k
        signed_limb_t cond_add = limbs[SIGNED_LIMBS-1] >> (LIMB_SIZE-1); // -1 if this is negative; 0 otherwise
196
4.09k
        limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
197
4.09k
        limbs[FINAL_LIMB_POSITION] += (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
198
        // Next negate all limbs if negate was set. This does not change the range of *this.
199
4.09k
        signed_limb_t cond_negate = -signed_limb_t(negate); // -1 if this negate is true; 0 otherwise
200
208k
        for (int i = 0; i < SIGNED_LIMBS; ++i) {
201
204k
            limbs[i] = (limbs[i] ^ cond_negate) - cond_negate;
202
204k
        }
203
        // Perform carry (make all limbs except the top one be in range 0..2^SIGNED_LIMB_SIZE-1).
204
204k
        for (int i = 0; i < SIGNED_LIMBS - 1; ++i) {
205
200k
            limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
206
200k
            limbs[i] &= MAX_SIGNED_LIMB;
207
200k
        }
208
        // Again add modulus if *this was negative. This brings the range of *this to 0..2^3072-1.
209
4.09k
        cond_add = limbs[SIGNED_LIMBS-1] >> (LIMB_SIZE-1); // -1 if this is negative; 0 otherwise
210
4.09k
        limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
211
4.09k
        limbs[FINAL_LIMB_POSITION] += (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
212
        // Perform another carry. Now all limbs are in range 0..2^SIGNED_LIMB_SIZE-1.
213
204k
        for (int i = 0; i < SIGNED_LIMBS - 1; ++i) {
214
200k
            limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
215
200k
            limbs[i] &= MAX_SIGNED_LIMB;
216
200k
        }
217
4.09k
    }
218
};
219
220
/** 2x2 transformation matrix with signed_limb_t elements. */
221
struct SignedMatrix
222
{
223
    signed_limb_t u, v, q, r;
224
};
225
226
/** Compute the transformation matrix for SIGNED_LIMB_SIZE divsteps.
227
 *
228
 * eta: initial eta value
229
 * f:   bottom SIGNED_LIMB_SIZE bits of initial f value
230
 * g:   bottom SIGNED_LIMB_SIZE bits of initial g value
231
 * out: resulting transformation matrix, scaled by 2^SIGNED_LIMB_SIZE
232
 * return: eta value after SIGNED_LIMB_SIZE divsteps
233
 */
234
inline limb_t ComputeDivstepMatrix(signed_limb_t eta, limb_t f, limb_t g, SignedMatrix& out)
235
410k
{
236
    /** inv256[i] = -1/(2*i+1) (mod 256) */
237
410k
    static const uint8_t NEGINV256[128] = {
238
410k
        0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
239
410k
        0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
240
410k
        0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
241
410k
        0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
242
410k
        0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
243
410k
        0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
244
410k
        0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
245
410k
        0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
246
410k
        0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
247
410k
        0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
248
410k
        0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
249
410k
    };
250
    // Coefficients of returned SignedMatrix; starts off as identity matrix. */
251
410k
    limb_t u = 1, v = 0, q = 0, r = 1;
252
    // The number of divsteps still left.
253
410k
    int i = SIGNED_LIMB_SIZE;
254
2.63M
    while (true) {
255
        /* Use a sentinel bit to count zeros only up to i. */
256
2.63M
        int zeros = std::countr_zero(g | (MAX_LIMB << i));
257
        /* Perform zeros divsteps at once; they all just divide g by two. */
258
2.63M
        g >>= zeros;
259
2.63M
        u <<= zeros;
260
2.63M
        v <<= zeros;
261
2.63M
        eta -= zeros;
262
2.63M
        i -= zeros;
263
         /* We're done once we've performed SIGNED_LIMB_SIZE divsteps. */
264
2.63M
        if (i == 0) break;
265
        /* If eta is negative, negate it and replace f,g with g,-f. */
266
2.22M
        if (eta < 0) {
267
776k
            limb_t tmp;
268
776k
            eta = -eta;
269
776k
            tmp = f; f = g; g = -tmp;
270
776k
            tmp = u; u = q; q = -tmp;
271
776k
            tmp = v; v = r; r = -tmp;
272
776k
        }
273
        /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
274
         * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
275
         * can be done as its sign will flip once that happens. */
276
2.22M
        int limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
277
        /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
278
2.22M
        limb_t m = (MAX_LIMB >> (LIMB_SIZE - limit)) & 255U;
279
        /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
280
2.22M
        limb_t w = (g * NEGINV256[(f >> 1) & 127]) & m;
281
        /* Do so. */
282
2.22M
        g += f * w;
283
2.22M
        q += u * w;
284
2.22M
        r += v * w;
285
2.22M
    }
286
410k
    out.u = (signed_limb_t)u;
287
410k
    out.v = (signed_limb_t)v;
288
410k
    out.q = (signed_limb_t)q;
289
410k
    out.r = (signed_limb_t)r;
290
410k
    return eta;
291
410k
}
292
293
/** Apply matrix t/2^SIGNED_LIMB_SIZE to vector [d,e], modulo modulus.
294
 *
295
 * On input and output, d and e are in range 1-2*modulus..modulus-1.
296
 */
297
inline void UpdateDE(Num3072Signed& d, Num3072Signed& e, const SignedMatrix& t)
298
410k
{
299
410k
    const signed_limb_t u = t.u, v=t.v, q=t.q, r=t.r;
300
301
    /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
302
410k
    signed_limb_t sd = d.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
303
410k
    signed_limb_t se = e.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
304
410k
    signed_limb_t md = (u & sd) + (v & se);
305
410k
    signed_limb_t me = (q & sd) + (r & se);
306
    /* Begin computing t*[d,e]. */
307
410k
    signed_limb_t di = d.limbs[0], ei = e.limbs[0];
308
410k
    signed_double_limb_t cd = (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
309
410k
    signed_double_limb_t ce = (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
310
    /* Correct md,me so that t*[d,e]+modulus*[md,me] has SIGNED_LIMB_SIZE zero bottom bits. */
311
410k
    md -= (MODULUS_INVERSE * limb_t(cd) + md) & MAX_SIGNED_LIMB;
312
410k
    me -= (MODULUS_INVERSE * limb_t(ce) + me) & MAX_SIGNED_LIMB;
313
    /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
314
410k
    cd -= (signed_double_limb_t)1103717 * md;
315
410k
    ce -= (signed_double_limb_t)1103717 * me;
316
    /* Verify that the low SIGNED_LIMB_SIZE bits of the computation are indeed zero, and then throw them away. */
317
410k
    Assume((cd & MAX_SIGNED_LIMB) == 0);
318
410k
    Assume((ce & MAX_SIGNED_LIMB) == 0);
319
410k
    cd >>= SIGNED_LIMB_SIZE;
320
410k
    ce >>= SIGNED_LIMB_SIZE;
321
    /* Now iteratively compute limb i=1..SIGNED_LIMBS-2 of t*[d,e]+modulus*[md,me], and store them in output
322
     * limb i-1 (shifting down by SIGNED_LIMB_SIZE bits). The corresponding limbs in modulus are all zero,
323
     * so modulus/md/me are not actually involved here. */
324
20.1M
    for (int i = 1; i < SIGNED_LIMBS - 1; ++i) {
325
19.7M
        di = d.limbs[i];
326
19.7M
        ei = e.limbs[i];
327
19.7M
        cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
328
19.7M
        ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
329
19.7M
        d.limbs[i - 1] = (signed_limb_t)cd & MAX_SIGNED_LIMB; cd >>= SIGNED_LIMB_SIZE;
330
19.7M
        e.limbs[i - 1] = (signed_limb_t)ce & MAX_SIGNED_LIMB; ce >>= SIGNED_LIMB_SIZE;
331
19.7M
    }
332
    /* Compute limb SIGNED_LIMBS-1 of t*[d,e]+modulus*[md,me], and store it in output limb SIGNED_LIMBS-2. */
333
410k
    di = d.limbs[SIGNED_LIMBS - 1];
334
410k
    ei = e.limbs[SIGNED_LIMBS - 1];
335
410k
    cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
336
410k
    ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
337
410k
    cd += (signed_double_limb_t)md << FINAL_LIMB_MODULUS_BITS;
338
410k
    ce += (signed_double_limb_t)me << FINAL_LIMB_MODULUS_BITS;
339
410k
    d.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)cd & MAX_SIGNED_LIMB; cd >>= SIGNED_LIMB_SIZE;
340
410k
    e.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)ce & MAX_SIGNED_LIMB; ce >>= SIGNED_LIMB_SIZE;
341
    /* What remains goes into output limb SINGED_LIMBS-1 */
342
410k
    d.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)cd;
343
410k
    e.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)ce;
344
410k
}
345
346
/** Apply matrix t/2^SIGNED_LIMB_SIZE to vector (f,g).
347
 *
348
 * The matrix t must be chosen such that t*(f,g) results in multiples of 2^SIGNED_LIMB_SIZE.
349
 * This is the case for matrices computed by ComputeDivstepMatrix().
350
 */
351
inline void UpdateFG(Num3072Signed& f, Num3072Signed& g, const SignedMatrix& t, int len)
352
410k
{
353
410k
    const signed_limb_t u = t.u, v=t.v, q=t.q, r=t.r;
354
355
410k
    signed_limb_t fi, gi;
356
410k
    signed_double_limb_t cf, cg;
357
    /* Start computing t*[f,g]. */
358
410k
    fi = f.limbs[0];
359
410k
    gi = g.limbs[0];
360
410k
    cf = (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
361
410k
    cg = (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
362
    /* Verify that the bottom SIGNED_LIMB_BITS bits of the result are zero, and then throw them away. */
363
410k
    Assume((cf & MAX_SIGNED_LIMB) == 0);
364
410k
    Assume((cg & MAX_SIGNED_LIMB) == 0);
365
410k
    cf >>= SIGNED_LIMB_SIZE;
366
410k
    cg >>= SIGNED_LIMB_SIZE;
367
    /* Now iteratively compute limb i=1..SIGNED_LIMBS-1 of t*[f,g], and store them in output limb i-1 (shifting
368
     * down by SIGNED_LIMB_BITS bits). */
369
14.7M
    for (int i = 1; i < len; ++i) {
370
14.3M
        fi = f.limbs[i];
371
14.3M
        gi = g.limbs[i];
372
14.3M
        cf += (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
373
14.3M
        cg += (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
374
14.3M
        f.limbs[i - 1] = (signed_limb_t)cf & MAX_SIGNED_LIMB; cf >>= SIGNED_LIMB_SIZE;
375
14.3M
        g.limbs[i - 1] = (signed_limb_t)cg & MAX_SIGNED_LIMB; cg >>= SIGNED_LIMB_SIZE;
376
14.3M
    }
377
    /* What remains is limb SIGNED_LIMBS of t*[f,g]; store it as output limb SIGNED_LIMBS-1. */
378
410k
    f.limbs[len - 1] = (signed_limb_t)cf;
379
410k
    g.limbs[len - 1] = (signed_limb_t)cg;
380
381
410k
}
382
} // namespace
383
384
Num3072 Num3072::GetInverse() const
385
4.09k
{
386
    // Compute a modular inverse based on a variant of the safegcd algorithm:
387
    // - Paper: https://gcd.cr.yp.to/papers.html
388
    // - Inspired by this code in libsecp256k1:
389
    //   https://github.com/bitcoin-core/secp256k1/blob/master/src/modinv32_impl.h
390
    // - Explanation of the algorithm:
391
    //   https://github.com/bitcoin-core/secp256k1/blob/master/doc/safegcd_implementation.md
392
393
    // Local variables d, e, f, g:
394
    // - f and g are the variables whose gcd we compute (despite knowing the answer is 1):
395
    //   - f is always odd, and initialized as modulus
396
    //   - g is initialized as *this (called x in what follows)
397
    // - d and e are the numbers for which at every step it is the case that:
398
    //   - f = d * x mod modulus; d is initialized as 0
399
    //   - g = e * x mod modulus; e is initialized as 1
400
4.09k
    Num3072Signed d, e, f, g;
401
4.09k
    e.limbs[0] = 1;
402
    // F is initialized as modulus, which in signed limb representation can be expressed
403
    // simply as 2^3072 + -MAX_PRIME_DIFF.
404
4.09k
    f.limbs[0] = -MAX_PRIME_DIFF;
405
4.09k
    f.limbs[FINAL_LIMB_POSITION] = ((limb_t)1) << FINAL_LIMB_MODULUS_BITS;
406
4.09k
    g.FromNum3072(*this);
407
4.09k
    int len = SIGNED_LIMBS; //!< The number of significant limbs in f and g
408
4.09k
    signed_limb_t eta = -1; //!< State to track knowledge about ratio of f and g
409
    // Perform divsteps on [f,g] until g=0 is reached, keeping (d,e) synchronized with them.
410
410k
    while (true) {
411
        // Compute transformation matrix t that represents the next SIGNED_LIMB_SIZE divsteps
412
        // to apply. This can be computed from just the bottom limb of f and g, and eta.
413
410k
        SignedMatrix t;
414
410k
        eta = ComputeDivstepMatrix(eta, f.limbs[0], g.limbs[0], t);
415
        // Apply that transformation matrix to the full [f,g] vector.
416
410k
        UpdateFG(f, g, t, len);
417
        // Apply that transformation matrix to the full [d,e] vector (mod modulus).
418
410k
        UpdateDE(d, e, t);
419
420
        // Check if g is zero.
421
410k
        if (g.limbs[0] == 0) {
422
179k
            signed_limb_t cond = 0;
423
8.79M
            for (int j = 1; j < len; ++j) {
424
8.61M
                cond |= g.limbs[j];
425
8.61M
            }
426
            // If so, we're done.
427
179k
            if (cond == 0) break;
428
179k
        }
429
430
        // Check if the top limbs of both f and g are both 0 or -1.
431
406k
        signed_limb_t fn = f.limbs[len - 1], gn = g.limbs[len - 1];
432
406k
        signed_limb_t cond = ((signed_limb_t)len - 2) >> (LIMB_SIZE - 1);
433
406k
        cond |= fn ^ (fn >> (LIMB_SIZE - 1));
434
406k
        cond |= gn ^ (gn >> (LIMB_SIZE - 1));
435
406k
        if (cond == 0) {
436
            // If so, drop the top limb, shrinking the size of f and g, by
437
            // propagating the sign to the previous limb.
438
200k
            f.limbs[len - 2] |= (limb_t)f.limbs[len - 1] << SIGNED_LIMB_SIZE;
439
200k
            g.limbs[len - 2] |= (limb_t)g.limbs[len - 1] << SIGNED_LIMB_SIZE;
440
200k
            --len;
441
200k
        }
442
406k
    }
443
    // At some point, [f,g] will have been rewritten into [f',0], such that gcd(f,g) = gcd(f',0).
444
    // This is proven in the paper. As f started out being modulus, a prime number, we know that
445
    // gcd is 1, and thus f' is 1 or -1.
446
4.09k
    Assume((f.limbs[0] & MAX_SIGNED_LIMB) == 1 || (f.limbs[0] & MAX_SIGNED_LIMB) == MAX_SIGNED_LIMB);
447
    // As we've maintained the invariant that f = d * x mod modulus, we get d/f mod modulus is the
448
    // modular inverse of x we're looking for. As f is 1 or -1, it is also true that d/f = d*f.
449
    // Normalize d to prepare it for output, while negating it if f is negative.
450
4.09k
    d.Normalize(f.limbs[len - 1] >> (LIMB_SIZE  - 1));
451
4.09k
    Num3072 ret;
452
4.09k
    d.ToNum3072(ret);
453
4.09k
    return ret;
454
4.09k
}
455
456
void Num3072::Multiply(const Num3072& a)
457
10.3k
{
458
10.3k
    limb_t c0 = 0, c1 = 0, c2 = 0;
459
10.3k
    Num3072 tmp;
460
461
    /* Compute limbs 0..N-2 of this*a into tmp, including one reduction. */
462
497k
    for (int j = 0; j < LIMBS - 1; ++j) {
463
487k
        limb_t d0 = 0, d1 = 0, d2 = 0;
464
487k
        mul(d0, d1, this->limbs[1 + j], a.limbs[LIMBS + j - (1 + j)]);
465
11.6M
        for (int i = 2 + j; i < LIMBS; ++i) muladd3(d0, d1, d2, this->limbs[i], a.limbs[LIMBS + j - i]);
466
487k
        mulnadd3(c0, c1, c2, d0, d1, d2, MAX_PRIME_DIFF);
467
12.1M
        for (int i = 0; i < j + 1; ++i) muladd3(c0, c1, c2, this->limbs[i], a.limbs[j - i]);
468
487k
        extract3(c0, c1, c2, tmp.limbs[j]);
469
487k
    }
470
471
    /* Compute limb N-1 of a*b into tmp. */
472
10.3k
    assert(c2 == 0);
473
507k
    for (int i = 0; i < LIMBS; ++i) muladd3(c0, c1, c2, this->limbs[i], a.limbs[LIMBS - 1 - i]);
474
10.3k
    extract3(c0, c1, c2, tmp.limbs[LIMBS - 1]);
475
476
    /* Perform a second reduction. */
477
10.3k
    muln2(c0, c1, MAX_PRIME_DIFF);
478
507k
    for (int j = 0; j < LIMBS; ++j) {
479
497k
        addnextract2(c0, c1, tmp.limbs[j], this->limbs[j]);
480
497k
    }
481
482
10.3k
    assert(c1 == 0);
483
10.3k
    assert(c0 == 0 || c0 == 1);
484
485
    /* Perform up to two more reductions if the internal state has already
486
     * overflown the MAX of Num3072 or if it is larger than the modulus or
487
     * if both are the case.
488
     * */
489
10.3k
    if (this->IsOverflow()) this->FullReduce();
490
10.3k
    if (c0) this->FullReduce();
491
10.3k
}
492
493
void Num3072::SetToOne()
494
23.2k
{
495
23.2k
    this->limbs[0] = 1;
496
1.11M
    for (int i = 1; i < LIMBS; ++i) this->limbs[i] = 0;
497
23.2k
}
498
499
void Num3072::Divide(const Num3072& a)
500
4.09k
{
501
4.09k
    if (this->IsOverflow()) this->FullReduce();
502
503
4.09k
    Num3072 inv{};
504
4.09k
    if (a.IsOverflow()) {
505
0
        Num3072 b = a;
506
0
        b.FullReduce();
507
0
        inv = b.GetInverse();
508
4.09k
    } else {
509
4.09k
        inv = a.GetInverse();
510
4.09k
    }
511
512
4.09k
    this->Multiply(inv);
513
4.09k
    if (this->IsOverflow()) this->FullReduce();
514
4.09k
}
515
516
6.05k
Num3072::Num3072(const unsigned char (&data)[BYTE_SIZE]) {
517
296k
    for (int i = 0; i < LIMBS; ++i) {
518
290k
        if (sizeof(limb_t) == 4) {
519
0
            this->limbs[i] = ReadLE32(data + 4 * i);
520
290k
        } else if (sizeof(limb_t) == 8) {
521
290k
            this->limbs[i] = ReadLE64(data + 8 * i);
522
290k
        }
523
290k
    }
524
6.05k
}
525
526
4.09k
void Num3072::ToBytes(unsigned char (&out)[BYTE_SIZE]) {
527
200k
    for (int i = 0; i < LIMBS; ++i) {
528
196k
        if (sizeof(limb_t) == 4) {
529
0
            WriteLE32(out + i * 4, this->limbs[i]);
530
196k
        } else if (sizeof(limb_t) == 8) {
531
196k
            WriteLE64(out + i * 8, this->limbs[i]);
532
196k
        }
533
196k
    }
534
4.09k
}
535
536
6.05k
Num3072 MuHash3072::ToNum3072(std::span<const unsigned char> in) {
537
6.05k
    unsigned char tmp[Num3072::BYTE_SIZE];
538
539
6.05k
    uint256 hashed_in{(HashWriter{} << in).GetSHA256()};
540
6.05k
    static_assert(sizeof(tmp) % ChaCha20Aligned::BLOCKLEN == 0);
541
6.05k
    ChaCha20Aligned{MakeByteSpan(hashed_in)}.Keystream(MakeWritableByteSpan(tmp));
542
6.05k
    Num3072 out{tmp};
543
544
6.05k
    return out;
545
6.05k
}
546
547
MuHash3072::MuHash3072(std::span<const unsigned char> in) noexcept
548
186
{
549
186
    m_numerator = ToNum3072(in);
550
186
}
551
552
void MuHash3072::Finalize(uint256& out) noexcept
553
4.09k
{
554
4.09k
    m_numerator.Divide(m_denominator);
555
4.09k
    m_denominator.SetToOne();  // Needed to keep the MuHash object valid
556
557
4.09k
    unsigned char data[Num3072::BYTE_SIZE];
558
4.09k
    m_numerator.ToBytes(data);
559
560
4.09k
    out = (HashWriter{} << data).GetSHA256();
561
4.09k
}
562
563
MuHash3072& MuHash3072::operator*=(const MuHash3072& mul) noexcept
564
108
{
565
108
    m_numerator.Multiply(mul.m_numerator);
566
108
    m_denominator.Multiply(mul.m_denominator);
567
108
    return *this;
568
108
}
569
570
MuHash3072& MuHash3072::operator/=(const MuHash3072& div) noexcept
571
95
{
572
95
    m_numerator.Multiply(div.m_denominator);
573
95
    m_denominator.Multiply(div.m_numerator);
574
95
    return *this;
575
95
}
576
577
5.38k
MuHash3072& MuHash3072::Insert(std::span<const unsigned char> in) noexcept {
578
5.38k
    m_numerator.Multiply(ToNum3072(in));
579
5.38k
    return *this;
580
5.38k
}
581
582
484
MuHash3072& MuHash3072::Remove(std::span<const unsigned char> in) noexcept {
583
484
    m_denominator.Multiply(ToNum3072(in));
584
484
    return *this;
585
484
}