/tmp/bitcoin/src/crypto/chacha20.cpp
Line | Count | Source |
1 | | // Copyright (c) 2017-present The Bitcoin Core developers |
2 | | // Distributed under the MIT software license, see the accompanying |
3 | | // file COPYING or http://www.opensource.org/licenses/mit-license.php. |
4 | | |
5 | | // Based on the public domain implementation 'merged' by D. J. Bernstein |
6 | | // See https://cr.yp.to/chacha.html. |
7 | | |
8 | | #include <crypto/common.h> |
9 | | #include <crypto/chacha20.h> |
10 | | #include <support/cleanse.h> |
11 | | |
12 | | #include <algorithm> |
13 | | #include <bit> |
14 | | #include <cassert> |
15 | | |
16 | | #define QUARTERROUND(a,b,c,d) \ |
17 | | a += b; d = std::rotl(d ^ a, 16); \ |
18 | | c += d; b = std::rotl(b ^ c, 12); \ |
19 | | a += b; d = std::rotl(d ^ a, 8); \ |
20 | | c += d; b = std::rotl(b ^ c, 7); |
21 | | |
22 | 17.2M | #define REPEAT10(a) do { {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; } while(0) |
23 | | |
24 | | void ChaCha20Aligned::SetKey(std::span<const std::byte> key) noexcept |
25 | 4.38M | { |
26 | 4.38M | assert(key.size() == KEYLEN); |
27 | 4.38M | input[0] = ReadLE32(key.data() + 0); |
28 | 4.38M | input[1] = ReadLE32(key.data() + 4); |
29 | 4.38M | input[2] = ReadLE32(key.data() + 8); |
30 | 4.38M | input[3] = ReadLE32(key.data() + 12); |
31 | 4.38M | input[4] = ReadLE32(key.data() + 16); |
32 | 4.38M | input[5] = ReadLE32(key.data() + 20); |
33 | 4.38M | input[6] = ReadLE32(key.data() + 24); |
34 | 4.38M | input[7] = ReadLE32(key.data() + 28); |
35 | 4.38M | input[8] = 0; |
36 | 4.38M | input[9] = 0; |
37 | 4.38M | input[10] = 0; |
38 | 4.38M | input[11] = 0; |
39 | 4.38M | } |
40 | | |
41 | | ChaCha20Aligned::~ChaCha20Aligned() |
42 | 2.48M | { |
43 | 2.48M | memory_cleanse(input, sizeof(input)); |
44 | 2.48M | } |
45 | | |
46 | | ChaCha20Aligned::ChaCha20Aligned(std::span<const std::byte> key) noexcept |
47 | 2.48M | { |
48 | 2.48M | SetKey(key); |
49 | 2.48M | } |
50 | | |
51 | | void ChaCha20Aligned::Seek(Nonce96 nonce, uint32_t block_counter) noexcept |
52 | 2.09M | { |
53 | 2.09M | input[8] = block_counter; |
54 | 2.09M | input[9] = nonce.first; |
55 | 2.09M | input[10] = nonce.second; |
56 | 2.09M | input[11] = nonce.second >> 32; |
57 | 2.09M | } |
58 | | |
59 | | inline void ChaCha20Aligned::Keystream(std::span<std::byte> output) noexcept |
60 | 8.59M | { |
61 | 8.59M | std::byte* c = output.data(); |
62 | 8.59M | size_t blocks = output.size() / BLOCKLEN; |
63 | 8.59M | assert(blocks * BLOCKLEN == output.size()); |
64 | | |
65 | 8.59M | uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; |
66 | 8.59M | uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; |
67 | | |
68 | 8.59M | if (!blocks) return; |
69 | | |
70 | 8.59M | j4 = input[0]; |
71 | 8.59M | j5 = input[1]; |
72 | 8.59M | j6 = input[2]; |
73 | 8.59M | j7 = input[3]; |
74 | 8.59M | j8 = input[4]; |
75 | 8.59M | j9 = input[5]; |
76 | 8.59M | j10 = input[6]; |
77 | 8.59M | j11 = input[7]; |
78 | 8.59M | j12 = input[8]; |
79 | 8.59M | j13 = input[9]; |
80 | 8.59M | j14 = input[10]; |
81 | 8.59M | j15 = input[11]; |
82 | | |
83 | 11.3M | for (;;) { |
84 | 11.3M | x0 = 0x61707865; |
85 | 11.3M | x1 = 0x3320646e; |
86 | 11.3M | x2 = 0x79622d32; |
87 | 11.3M | x3 = 0x6b206574; |
88 | 11.3M | x4 = j4; |
89 | 11.3M | x5 = j5; |
90 | 11.3M | x6 = j6; |
91 | 11.3M | x7 = j7; |
92 | 11.3M | x8 = j8; |
93 | 11.3M | x9 = j9; |
94 | 11.3M | x10 = j10; |
95 | 11.3M | x11 = j11; |
96 | 11.3M | x12 = j12; |
97 | 11.3M | x13 = j13; |
98 | 11.3M | x14 = j14; |
99 | 11.3M | x15 = j15; |
100 | | |
101 | | // The 20 inner ChaCha20 rounds are unrolled here for performance. |
102 | 11.3M | REPEAT10( |
103 | 11.3M | QUARTERROUND( x0, x4, x8,x12); |
104 | 11.3M | QUARTERROUND( x1, x5, x9,x13); |
105 | 11.3M | QUARTERROUND( x2, x6,x10,x14); |
106 | 11.3M | QUARTERROUND( x3, x7,x11,x15); |
107 | 11.3M | QUARTERROUND( x0, x5,x10,x15); |
108 | 11.3M | QUARTERROUND( x1, x6,x11,x12); |
109 | 11.3M | QUARTERROUND( x2, x7, x8,x13); |
110 | 11.3M | QUARTERROUND( x3, x4, x9,x14); |
111 | 11.3M | ); |
112 | | |
113 | 11.3M | x0 += 0x61707865; |
114 | 11.3M | x1 += 0x3320646e; |
115 | 11.3M | x2 += 0x79622d32; |
116 | 11.3M | x3 += 0x6b206574; |
117 | 11.3M | x4 += j4; |
118 | 11.3M | x5 += j5; |
119 | 11.3M | x6 += j6; |
120 | 11.3M | x7 += j7; |
121 | 11.3M | x8 += j8; |
122 | 11.3M | x9 += j9; |
123 | 11.3M | x10 += j10; |
124 | 11.3M | x11 += j11; |
125 | 11.3M | x12 += j12; |
126 | 11.3M | x13 += j13; |
127 | 11.3M | x14 += j14; |
128 | 11.3M | x15 += j15; |
129 | | |
130 | 11.3M | ++j12; |
131 | 11.3M | if (!j12) ++j13; |
132 | | |
133 | 11.3M | WriteLE32(c + 0, x0); |
134 | 11.3M | WriteLE32(c + 4, x1); |
135 | 11.3M | WriteLE32(c + 8, x2); |
136 | 11.3M | WriteLE32(c + 12, x3); |
137 | 11.3M | WriteLE32(c + 16, x4); |
138 | 11.3M | WriteLE32(c + 20, x5); |
139 | 11.3M | WriteLE32(c + 24, x6); |
140 | 11.3M | WriteLE32(c + 28, x7); |
141 | 11.3M | WriteLE32(c + 32, x8); |
142 | 11.3M | WriteLE32(c + 36, x9); |
143 | 11.3M | WriteLE32(c + 40, x10); |
144 | 11.3M | WriteLE32(c + 44, x11); |
145 | 11.3M | WriteLE32(c + 48, x12); |
146 | 11.3M | WriteLE32(c + 52, x13); |
147 | 11.3M | WriteLE32(c + 56, x14); |
148 | 11.3M | WriteLE32(c + 60, x15); |
149 | | |
150 | 11.3M | if (blocks == 1) { |
151 | 8.59M | input[8] = j12; |
152 | 8.59M | input[9] = j13; |
153 | 8.59M | return; |
154 | 8.59M | } |
155 | 2.80M | blocks -= 1; |
156 | 2.80M | c += BLOCKLEN; |
157 | 2.80M | } |
158 | 8.59M | } |
159 | | |
160 | | inline void ChaCha20Aligned::Crypt(std::span<const std::byte> in_bytes, std::span<std::byte> out_bytes) noexcept |
161 | 101k | { |
162 | 101k | assert(in_bytes.size() == out_bytes.size()); |
163 | 101k | const std::byte* m = in_bytes.data(); |
164 | 101k | std::byte* c = out_bytes.data(); |
165 | 101k | size_t blocks = out_bytes.size() / BLOCKLEN; |
166 | 101k | assert(blocks * BLOCKLEN == out_bytes.size()); |
167 | | |
168 | 101k | uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; |
169 | 101k | uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; |
170 | | |
171 | 101k | if (!blocks) return; |
172 | | |
173 | 101k | j4 = input[0]; |
174 | 101k | j5 = input[1]; |
175 | 101k | j6 = input[2]; |
176 | 101k | j7 = input[3]; |
177 | 101k | j8 = input[4]; |
178 | 101k | j9 = input[5]; |
179 | 101k | j10 = input[6]; |
180 | 101k | j11 = input[7]; |
181 | 101k | j12 = input[8]; |
182 | 101k | j13 = input[9]; |
183 | 101k | j14 = input[10]; |
184 | 101k | j15 = input[11]; |
185 | | |
186 | 5.88M | for (;;) { |
187 | 5.88M | x0 = 0x61707865; |
188 | 5.88M | x1 = 0x3320646e; |
189 | 5.88M | x2 = 0x79622d32; |
190 | 5.88M | x3 = 0x6b206574; |
191 | 5.88M | x4 = j4; |
192 | 5.88M | x5 = j5; |
193 | 5.88M | x6 = j6; |
194 | 5.88M | x7 = j7; |
195 | 5.88M | x8 = j8; |
196 | 5.88M | x9 = j9; |
197 | 5.88M | x10 = j10; |
198 | 5.88M | x11 = j11; |
199 | 5.88M | x12 = j12; |
200 | 5.88M | x13 = j13; |
201 | 5.88M | x14 = j14; |
202 | 5.88M | x15 = j15; |
203 | | |
204 | | // The 20 inner ChaCha20 rounds are unrolled here for performance. |
205 | 5.88M | REPEAT10( |
206 | 5.88M | QUARTERROUND( x0, x4, x8,x12); |
207 | 5.88M | QUARTERROUND( x1, x5, x9,x13); |
208 | 5.88M | QUARTERROUND( x2, x6,x10,x14); |
209 | 5.88M | QUARTERROUND( x3, x7,x11,x15); |
210 | 5.88M | QUARTERROUND( x0, x5,x10,x15); |
211 | 5.88M | QUARTERROUND( x1, x6,x11,x12); |
212 | 5.88M | QUARTERROUND( x2, x7, x8,x13); |
213 | 5.88M | QUARTERROUND( x3, x4, x9,x14); |
214 | 5.88M | ); |
215 | | |
216 | 5.88M | x0 += 0x61707865; |
217 | 5.88M | x1 += 0x3320646e; |
218 | 5.88M | x2 += 0x79622d32; |
219 | 5.88M | x3 += 0x6b206574; |
220 | 5.88M | x4 += j4; |
221 | 5.88M | x5 += j5; |
222 | 5.88M | x6 += j6; |
223 | 5.88M | x7 += j7; |
224 | 5.88M | x8 += j8; |
225 | 5.88M | x9 += j9; |
226 | 5.88M | x10 += j10; |
227 | 5.88M | x11 += j11; |
228 | 5.88M | x12 += j12; |
229 | 5.88M | x13 += j13; |
230 | 5.88M | x14 += j14; |
231 | 5.88M | x15 += j15; |
232 | | |
233 | 5.88M | x0 ^= ReadLE32(m + 0); |
234 | 5.88M | x1 ^= ReadLE32(m + 4); |
235 | 5.88M | x2 ^= ReadLE32(m + 8); |
236 | 5.88M | x3 ^= ReadLE32(m + 12); |
237 | 5.88M | x4 ^= ReadLE32(m + 16); |
238 | 5.88M | x5 ^= ReadLE32(m + 20); |
239 | 5.88M | x6 ^= ReadLE32(m + 24); |
240 | 5.88M | x7 ^= ReadLE32(m + 28); |
241 | 5.88M | x8 ^= ReadLE32(m + 32); |
242 | 5.88M | x9 ^= ReadLE32(m + 36); |
243 | 5.88M | x10 ^= ReadLE32(m + 40); |
244 | 5.88M | x11 ^= ReadLE32(m + 44); |
245 | 5.88M | x12 ^= ReadLE32(m + 48); |
246 | 5.88M | x13 ^= ReadLE32(m + 52); |
247 | 5.88M | x14 ^= ReadLE32(m + 56); |
248 | 5.88M | x15 ^= ReadLE32(m + 60); |
249 | | |
250 | 5.88M | ++j12; |
251 | 5.88M | if (!j12) ++j13; |
252 | | |
253 | 5.88M | WriteLE32(c + 0, x0); |
254 | 5.88M | WriteLE32(c + 4, x1); |
255 | 5.88M | WriteLE32(c + 8, x2); |
256 | 5.88M | WriteLE32(c + 12, x3); |
257 | 5.88M | WriteLE32(c + 16, x4); |
258 | 5.88M | WriteLE32(c + 20, x5); |
259 | 5.88M | WriteLE32(c + 24, x6); |
260 | 5.88M | WriteLE32(c + 28, x7); |
261 | 5.88M | WriteLE32(c + 32, x8); |
262 | 5.88M | WriteLE32(c + 36, x9); |
263 | 5.88M | WriteLE32(c + 40, x10); |
264 | 5.88M | WriteLE32(c + 44, x11); |
265 | 5.88M | WriteLE32(c + 48, x12); |
266 | 5.88M | WriteLE32(c + 52, x13); |
267 | 5.88M | WriteLE32(c + 56, x14); |
268 | 5.88M | WriteLE32(c + 60, x15); |
269 | | |
270 | 5.88M | if (blocks == 1) { |
271 | 101k | input[8] = j12; |
272 | 101k | input[9] = j13; |
273 | 101k | return; |
274 | 101k | } |
275 | 5.78M | blocks -= 1; |
276 | 5.78M | c += BLOCKLEN; |
277 | 5.78M | m += BLOCKLEN; |
278 | 5.78M | } |
279 | 101k | } |
280 | | |
281 | | void ChaCha20::Keystream(std::span<std::byte> out) noexcept |
282 | 37.0M | { |
283 | 37.0M | if (out.empty()) return; |
284 | 37.0M | if (m_bufleft) { |
285 | 29.1M | unsigned reuse = std::min<size_t>(m_bufleft, out.size()); |
286 | 29.1M | std::copy(m_buffer.end() - m_bufleft, m_buffer.end() - m_bufleft + reuse, out.begin()); |
287 | 29.1M | m_bufleft -= reuse; |
288 | 29.1M | out = out.subspan(reuse); |
289 | 29.1M | } |
290 | 37.0M | if (out.size() >= m_aligned.BLOCKLEN) { |
291 | 1.47M | size_t blocks = out.size() / m_aligned.BLOCKLEN; |
292 | 1.47M | m_aligned.Keystream(out.first(blocks * m_aligned.BLOCKLEN)); |
293 | 1.47M | out = out.subspan(blocks * m_aligned.BLOCKLEN); |
294 | 1.47M | } |
295 | 37.0M | if (!out.empty()) { |
296 | 6.82M | m_aligned.Keystream(m_buffer); |
297 | 6.82M | std::copy(m_buffer.begin(), m_buffer.begin() + out.size(), out.begin()); |
298 | 6.82M | m_bufleft = m_aligned.BLOCKLEN - out.size(); |
299 | 6.82M | } |
300 | 37.0M | } |
301 | | |
302 | | void ChaCha20::Crypt(std::span<const std::byte> input, std::span<std::byte> output) noexcept |
303 | 1.76M | { |
304 | 1.76M | assert(input.size() == output.size()); |
305 | | |
306 | 1.76M | if (!input.size()) return; |
307 | 513k | if (m_bufleft) { |
308 | 335k | unsigned reuse = std::min<size_t>(m_bufleft, input.size()); |
309 | 8.42M | for (unsigned i = 0; i < reuse; i++) { |
310 | 8.09M | output[i] = input[i] ^ m_buffer[m_aligned.BLOCKLEN - m_bufleft + i]; |
311 | 8.09M | } |
312 | 335k | m_bufleft -= reuse; |
313 | 335k | output = output.subspan(reuse); |
314 | 335k | input = input.subspan(reuse); |
315 | 335k | } |
316 | 513k | if (input.size() >= m_aligned.BLOCKLEN) { |
317 | 101k | size_t blocks = input.size() / m_aligned.BLOCKLEN; |
318 | 101k | m_aligned.Crypt(input.first(blocks * m_aligned.BLOCKLEN), output.first(blocks * m_aligned.BLOCKLEN)); |
319 | 101k | output = output.subspan(blocks * m_aligned.BLOCKLEN); |
320 | 101k | input = input.subspan(blocks * m_aligned.BLOCKLEN); |
321 | 101k | } |
322 | 513k | if (!input.empty()) { |
323 | 292k | m_aligned.Keystream(m_buffer); |
324 | 3.98M | for (unsigned i = 0; i < input.size(); i++) { |
325 | 3.69M | output[i] = input[i] ^ m_buffer[i]; |
326 | 3.69M | } |
327 | 292k | m_bufleft = m_aligned.BLOCKLEN - input.size(); |
328 | 292k | } |
329 | 513k | } |
330 | | |
331 | | ChaCha20::~ChaCha20() |
332 | 2.47M | { |
333 | 2.47M | memory_cleanse(m_buffer.data(), m_buffer.size()); |
334 | 2.47M | } |
335 | | |
336 | | void ChaCha20::SetKey(std::span<const std::byte> key) noexcept |
337 | 1.90M | { |
338 | 1.90M | m_aligned.SetKey(key); |
339 | 1.90M | m_bufleft = 0; |
340 | 1.90M | memory_cleanse(m_buffer.data(), m_buffer.size()); |
341 | 1.90M | } |
342 | | |
343 | | FSChaCha20::FSChaCha20(std::span<const std::byte> key, uint32_t rekey_interval) noexcept : |
344 | 827 | m_chacha20(key), m_rekey_interval(rekey_interval) |
345 | 827 | { |
346 | 827 | assert(key.size() == KEYLEN); |
347 | 827 | } |
348 | | |
349 | | void FSChaCha20::Crypt(std::span<const std::byte> input, std::span<std::byte> output) noexcept |
350 | 214k | { |
351 | 214k | assert(input.size() == output.size()); |
352 | | |
353 | | // Invoke internal stream cipher for actual encryption/decryption. |
354 | 214k | m_chacha20.Crypt(input, output); |
355 | | |
356 | | // Rekey after m_rekey_interval encryptions/decryptions. |
357 | 214k | if (++m_chunk_counter == m_rekey_interval) { |
358 | | // Get new key from the stream cipher. |
359 | 822 | std::byte new_key[KEYLEN]; |
360 | 822 | m_chacha20.Keystream(new_key); |
361 | | // Update its key. |
362 | 822 | m_chacha20.SetKey(new_key); |
363 | | // Wipe the key (a copy remains inside m_chacha20, where it'll be wiped on the next rekey |
364 | | // or on destruction). |
365 | 822 | memory_cleanse(new_key, sizeof(new_key)); |
366 | | // Set the nonce for the new section of output. |
367 | 822 | m_chacha20.Seek({0, ++m_rekey_counter}, 0); |
368 | | // Reset the chunk counter. |
369 | 822 | m_chunk_counter = 0; |
370 | 822 | } |
371 | 214k | } |