Coverage Report

Created: 2026-04-29 19:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/tmp/bitcoin/src/util/asmap.cpp
Line
Count
Source
1
// Copyright (c) 2019-present The Bitcoin Core developers
2
// Distributed under the MIT software license, see the accompanying
3
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4
5
#include <util/asmap.h>
6
7
#include <hash.h>
8
#include <streams.h>
9
#include <uint256.h>
10
#include <util/check.h>
11
#include <util/fs.h>
12
#include <util/log.h>
13
14
#include <bit>
15
#include <cstddef>
16
#include <cstdio>
17
#include <span>
18
#include <string>
19
#include <utility>
20
#include <vector>
21
22
/*
23
 * ASMap (Autonomous System Map) Implementation
24
 *
25
 * Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs).
26
 * Uses a binary trie structure encoded as bytecode instructions that are interpreted
27
 * at runtime to find the ASN for a given IP address.
28
 *
29
 * The format of the asmap data is a bit-packed binary format where the entire mapping
30
 * is treated as a continuous sequence of bits. Instructions and their arguments are
31
 * encoded using variable numbers of bits and concatenated together without regard for
32
 * byte boundaries. The bits are stored in bytes using little-endian bit ordering.
33
 *
34
 * The data structure internally represents the mapping as a binary trie where:
35
 * - Unassigned subnets (no ASN mapping present) map to 0
36
 * - Subnets mapped entirely to one ASN become leaf nodes
37
 * - Subnets whose lower and upper halves have different mappings branch into subtrees
38
 *
39
 * The encoding uses variable-length integers and four instruction types (RETURN, JUMP,
40
 * MATCH, DEFAULT) to efficiently represent the trie.
41
 */
42
43
namespace {
44
45
// Indicates decoding errors or invalid data
46
constexpr uint32_t INVALID = 0xFFFFFFFF;
47
48
/**
49
 * Extract a single bit from byte array using little-endian bit ordering (LSB first).
50
 * Used for ASMap data.
51
 */
52
inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept
53
37.5M
{
54
37.5M
    const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1;
55
37.5M
    ++bitpos;
56
37.5M
    return bit;
57
37.5M
}
58
59
/**
60
 * Extract a single bit from byte array using big-endian bit ordering (MSB first).
61
 * Used for IP addresses to match network byte order conventions.
62
 */
63
inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept
64
412k
{
65
412k
    const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1;
66
412k
    ++bitpos;
67
412k
    return bit;
68
412k
}
69
70
/**
71
 * Variable-length integer decoder using a custom encoding scheme.
72
 *
73
 * The encoding is easiest to describe using an example. Let's say minval=100 and
74
 * bit_sizes=[4,2,2,3]. In that case:
75
 * - x in [100..115]: encoded as [0] + [4-bit BE encoding of (x-100)]
76
 * - x in [116..119]: encoded as [1,0] + [2-bit BE encoding of (x-116)]
77
 * - x in [120..123]: encoded as [1,1,0] + [2-bit BE encoding of (x-120)]
78
 * - x in [124..131]: encoded as [1,1,1] + [3-bit BE encoding of (x-124)]
79
 *
80
 * In general, every number is encoded as:
81
 * - First, k "1"-bits, where k is the class the number falls in
82
 * - Then, a "0"-bit, unless k is the highest class
83
 * - Lastly, bit_sizes[k] bits encoding in big endian the position within that class
84
 */
85
uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes)
86
5.58M
{
87
5.58M
    uint32_t val = minval;  // Start with minimum encodable value
88
5.58M
    bool bit;
89
12.2M
    for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
90
        // Read continuation bit to determine if we're in this class
91
12.2M
        if (bit_sizes_it + 1 != bit_sizes.end()) {  // Unless we're in the last class
92
10.7M
            if (bitpos >= data.size() * 8) break;
93
10.7M
            bit = ConsumeBitLE(bitpos, data);
94
10.7M
        } else {
95
1.55M
            bit = 0;  // Last class has no continuation bit
96
1.55M
        }
97
12.2M
        if (bit) {
98
            // If the value will not fit in this class, subtract its range from val,
99
            // emit a "1" bit and continue with the next class
100
6.68M
            val += (1 << *bit_sizes_it);  // Add size of this class
101
6.68M
        } else {
102
            // Decode the position within this class in big endian
103
32.3M
            for (int b = 0; b < *bit_sizes_it; b++) {
104
26.8M
                if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa
105
26.8M
                bit = ConsumeBitLE(bitpos, data);
106
26.8M
                val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class
107
26.8M
            }
108
5.58M
            return val;
109
5.58M
        }
110
12.2M
    }
111
0
    return INVALID; // Reached EOF in exponent
112
5.58M
}
113
114
/**
115
 * Instruction Set
116
 *
117
 * The instruction set is designed to efficiently encode a binary trie
118
 * that maps IP prefixes to ASNs. Each instruction type serves a specific
119
 * role in trie traversal and evaluation.
120
 */
121
enum class Instruction : uint32_t
122
{
123
    // A return instruction, encoded as [0], returns a constant ASN.
124
    // It is followed by an integer using the ASN encoding.
125
    RETURN = 0,
126
    // A jump instruction, encoded as [1,0], inspects the next unused bit in the input
127
    // and either continues execution (if 0), or skips a specified number of bits (if 1).
128
    // It is followed by an integer using jump encoding.
129
    JUMP = 1,
130
    // A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits
131
    // in the input. If they all match, execution continues. If not, the default ASN is returned
132
    // (or 0 if unset). The match value encodes both the pattern and its length.
133
    MATCH = 2,
134
    // A default instruction, encoded as [1,1,1], sets the default variable to its argument,
135
    // and continues execution. It is followed by an integer in ASN encoding.
136
    DEFAULT = 3,
137
};
138
139
// Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1]
140
constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1};
141
Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data)
142
2.79M
{
143
2.79M
    return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES));
144
2.79M
}
145
146
// ASN encoding: Can encode ASNs from 1 to ~16.7 million.
147
// Uses variable-length encoding optimized for real-world ASN distribution.
148
// ASN 0 is reserved and used if there isn't a match.
149
constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
150
uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data)
151
1.20M
{
152
1.20M
    return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES);
153
1.20M
}
154
155
// MATCH argument: Values in [2, 511]. The highest set bit determines the match length
156
// n ∈ [1,8]; the lower n-1 bits are the pattern to compare.
157
constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8};
158
uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data)
159
930k
{
160
930k
    return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES);
161
930k
}
162
163
// JUMP offset: Minimum value 17. Variable-length coded and may be large
164
// for skipping big subtrees.
165
constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
166
uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data)
167
657k
{
168
657k
    return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES);
169
657k
}
170
171
} // anonymous namespace
172
173
/**
174
 * Execute the ASMap bytecode to find the ASN for an IP
175
 *
176
 * This function interprets the asmap bytecode and uses bits from the IP
177
 * address to navigate through the encoded trie structure, ultimately
178
 * returning an ASN value.
179
 */
180
uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip)
181
4.21k
{
182
4.21k
    size_t pos{0};
183
4.21k
    const size_t endpos{asmap.size() * 8};
184
4.21k
    uint8_t ip_bit{0};
185
4.21k
    const uint8_t ip_bits_end = ip.size() * 8;
186
4.21k
    uint32_t default_asn = 0;
187
59.6k
    while (pos < endpos) {
188
59.6k
        Instruction opcode = DecodeType(pos, asmap);
189
59.6k
        if (opcode == Instruction::RETURN) {
190
            // Found leaf node - return the ASN
191
3.72k
            uint32_t asn = DecodeASN(pos, asmap);
192
3.72k
            if (asn == INVALID) break; // ASN straddles EOF
193
3.72k
            return asn;
194
55.8k
        } else if (opcode == Instruction::JUMP) {
195
            // Binary branch: if IP bit is 1, jump forward; else continue
196
4.60k
            uint32_t jump = DecodeJump(pos, asmap);
197
4.60k
            if (jump == INVALID) break; // Jump offset straddles EOF
198
4.60k
            if (ip_bit == ip_bits_end) break; // No input bits left
199
4.60k
            if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF
200
4.60k
            if (ConsumeBitBE(ip_bit, ip)) {  // Check next IP bit (big-endian)
201
3.68k
                pos += jump;  // Bit = 1: skip to right subtree
202
3.68k
            }
203
            // Bit = 0: fall through to left subtree
204
51.2k
        } else if (opcode == Instruction::MATCH) {
205
            // Compare multiple IP bits against a pattern
206
            // The match value encodes both length and pattern:
207
            // - highest set bit position determines length (bit_width - 1)
208
            // - lower bits contain the pattern to compare
209
51.2k
            uint32_t match = DecodeMatch(pos, asmap);
210
51.2k
            if (match == INVALID) break; // Match bits straddle EOF
211
51.2k
            int matchlen = std::bit_width(match) - 1;  // An n-bit value matches n-1 input bits
212
51.2k
            if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits
213
458k
            for (int bit = 0; bit < matchlen; bit++) {
214
407k
                if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) {
215
486
                    return default_asn;  // Pattern mismatch - use default
216
486
                }
217
407k
            }
218
            // Pattern matched - continue execution
219
51.2k
        } else if (opcode == Instruction::DEFAULT) {
220
            // Update the default ASN for subsequent MATCH failures
221
22
            default_asn = DecodeASN(pos, asmap);
222
22
            if (default_asn == INVALID) break; // ASN straddles EOF
223
22
        } else {
224
0
            break; // Instruction straddles EOF
225
0
        }
226
59.6k
    }
227
    // Reached EOF without RETURN, or aborted (see any of the breaks above)
228
    // - should have been caught by SanityCheckAsmap below
229
4.21k
    assert(false);
230
0
    return 0; // 0 is not a valid ASN
231
0
}
232
233
/**
234
 * Validates ASMap structure by simulating all possible execution paths.
235
 * Ensures well-formed bytecode, valid jumps, and proper termination.
236
 */
237
bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits)
238
9
{
239
9
    size_t pos{0};
240
9
    const size_t endpos{asmap.size() * 8};
241
9
    std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left)
242
9
    jumps.reserve(bits);
243
9
    Instruction prevopcode = Instruction::JUMP;
244
9
    bool had_incomplete_match = false;  // Track <8 bit matches for efficiency check
245
246
2.73M
    while (pos != endpos) {
247
        // There was a jump into the middle of the previous instruction
248
2.73M
        if (!jumps.empty() && pos >= jumps.back().first) return false;
249
250
2.73M
        Instruction opcode = DecodeType(pos, asmap);
251
2.73M
        if (opcode == Instruction::RETURN) {
252
            // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN)
253
652k
            if (prevopcode == Instruction::DEFAULT) return false;
254
652k
            uint32_t asn = DecodeASN(pos, asmap);
255
652k
            if (asn == INVALID) return false; // ASN straddles EOF
256
652k
            if (jumps.empty()) {
257
                // Nothing to execute anymore
258
8
                if (endpos - pos > 7) return false; // Excessive padding
259
30
                while (pos != endpos) {
260
22
                    if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit
261
22
                }
262
8
                return true; // Sanely reached EOF
263
652k
            } else {
264
                // Continue by pretending we jumped to the next instruction
265
652k
                if (pos != jumps.back().first) return false; // Unreachable code
266
652k
                bits = jumps.back().second; // Restore the number of bits we would have had left after this jump
267
652k
                jumps.pop_back();
268
652k
                prevopcode = Instruction::JUMP;
269
652k
            }
270
2.07M
        } else if (opcode == Instruction::JUMP) {
271
652k
            uint32_t jump = DecodeJump(pos, asmap);
272
652k
            if (jump == INVALID) return false; // Jump offset straddles EOF
273
652k
            if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range
274
652k
            if (bits == 0) return false; // Consuming bits past the end of the input
275
652k
            --bits;
276
652k
            uint32_t jump_offset = pos + jump;
277
652k
            if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps
278
652k
            jumps.emplace_back(jump_offset, bits);  // Queue jump target for validation
279
652k
            prevopcode = Instruction::JUMP;
280
1.42M
        } else if (opcode == Instruction::MATCH) {
281
879k
            uint32_t match = DecodeMatch(pos, asmap);
282
879k
            if (match == INVALID) return false; // Match bits straddle EOF
283
879k
            int matchlen = std::bit_width(match) - 1;
284
879k
            if (prevopcode != Instruction::MATCH) had_incomplete_match = false;
285
            // Within a sequence of matches only at most one should be incomplete
286
879k
            if (matchlen < 8 && had_incomplete_match) return false;
287
879k
            had_incomplete_match = (matchlen < 8);
288
879k
            if (bits < matchlen) return false; // Consuming bits past the end of the input
289
879k
            bits -= matchlen;
290
879k
            prevopcode = Instruction::MATCH;
291
879k
        } else if (opcode == Instruction::DEFAULT) {
292
            // There should not be two successive DEFAULTs (they could be combined into one)
293
546k
            if (prevopcode == Instruction::DEFAULT) return false;
294
546k
            uint32_t asn = DecodeASN(pos, asmap);
295
546k
            if (asn == INVALID) return false; // ASN straddles EOF
296
546k
            prevopcode = Instruction::DEFAULT;
297
546k
        } else {
298
0
            return false; // Instruction straddles EOF
299
0
        }
300
2.73M
    }
301
1
    return false; // Reached EOF without RETURN instruction
302
9
}
303
304
/**
305
 * Provides a safe interface for validating ASMap data before use.
306
 * Returns true if the data is valid for 128 bits long inputs.
307
 */
308
bool CheckStandardAsmap(const std::span<const std::byte> data)
309
9
{
310
9
    if (!SanityCheckAsmap(data, 128)) {
311
1
        LogWarning("Sanity check of asmap data failed\n");
312
1
        return false;
313
1
    }
314
8
    return true;
315
9
}
316
317
/**
318
 * Loads an ASMap file from disk and validates it.
319
 */
320
std::vector<std::byte> DecodeAsmap(fs::path path)
321
6
{
322
6
    FILE *filestr = fsbridge::fopen(path, "rb");
323
6
    AutoFile file{filestr};
324
6
    if (file.IsNull()) {
325
0
        LogWarning("Failed to open asmap file from disk");
326
0
        return {};
327
0
    }
328
6
    int64_t length{file.size()};
329
6
    LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length);
330
331
    // Read entire file into memory
332
6
    std::vector<std::byte> buffer(length);
333
6
    file.read(buffer);
334
335
6
    if (!CheckStandardAsmap(buffer)) {
336
1
        LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path)));
337
1
        return {};
338
1
    }
339
340
5
    return buffer;
341
6
}
342
343
/**
344
 * Computes SHA256 hash of ASMap data for versioning and consistency checks.
345
 */
346
uint256 AsmapVersion(const std::span<const std::byte> data)
347
2.06k
{
348
2.06k
    if (data.empty()) return {};
349
350
26
    HashWriter asmap_hasher;
351
26
    asmap_hasher << data;
352
26
    return asmap_hasher.GetHash();
353
2.06k
}