From 410a96752b39f260705cdd13243ab364e589c5f8 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Thu, 2 Oct 2025 13:16:15 +0200 Subject: [PATCH 1/2] pq patch: enable PQ by default like upstream The big diff is misleading. Applying each patch to the base 478b28ab12f and comparing them, we see: git range-diff 478b28ab12f2001a03261624261fd041f5439706..adcd4022f75953605a9bf9f6a4a45c0b4fd8ed94 478b28ab12f2001a03261624261fd041f5439706..6f1b1e1f451e61cd2bda0922eecaa8387397ac5a 1: adcd4022f ! 1: 6f1b1e1f4 Add additional post-quantum key agreements @@ Commit message This patch adds: - 1. Support for MLKEM768X25519 under the codepoint 0x11ec. The version - of BoringSSL we patch against did not support it yet. + 1. Support for X25519MLKEM768 under the codepoint 0x11ec. The version + of BoringSSL we patch against did not support it yet. Like recent + upstream, enable by default. 2. Supports for P256Kyber768Draft00 under 0xfe32, which we temporarily need for compliance reasons. (Note that this is not the codepoint @@ ssl/extensions.cc: static bool tls1_check_duplicate_extensions(const CBS *cbs) { return true; default: return false; +@@ ssl/extensions.cc: bool ssl_client_hello_get_extension(const SSL_CLIENT_HELLO *client_hello, + } + + static const uint16_t kDefaultGroups[] = { ++ SSL_GROUP_X25519_MLKEM768, + SSL_GROUP_X25519, + SSL_GROUP_SECP256R1, + SSL_GROUP_SECP384R1, ## ssl/ssl_key_share.cc ## @@ --- boring-sys/patches/boring-pq.patch | 2529 ++++++++++++++-------------- 1 file changed, 1242 insertions(+), 1287 deletions(-) diff --git a/boring-sys/patches/boring-pq.patch b/boring-sys/patches/boring-pq.patch index 1f13962a..405a0185 100644 --- a/boring-sys/patches/boring-pq.patch +++ b/boring-sys/patches/boring-pq.patch @@ -1,6 +1,6 @@ -From b98d803dbecc9d6848d8cbffa62b5c943fb75f70 Mon Sep 17 00:00:00 2001 +From 6f1b1e1f451e61cd2bda0922eecaa8387397ac5a Mon Sep 17 00:00:00 2001 From: Bas Westerbaan -Date: Fri, 22 Jul 2022 16:43:48 +0200 +Date: Thu, 2 Oct 2025 13:07:05 +0200 Subject: [PATCH] Add additional post-quantum key agreements BoringSSL upstream has supported the temporary post-quantum @@ -13,8 +13,9 @@ and many browsers are expected to switch to it before the end of 2024. This patch adds: -1. Support for MLKEM768X25519 under the codepoint 0x11ec. The version - of BoringSSL we patch against did not support it yet. +1. Support for X25519MLKEM768 under the codepoint 0x11ec. The version + of BoringSSL we patch against did not support it yet. Like recent + upstream, enable by default. 2. Supports for P256Kyber768Draft00 under 0xfe32, which we temporarily need for compliance reasons. (Note that this is not the codepoint @@ -32,39 +33,29 @@ portable reference implementation, so as to support Kyber512. Cf RTG-2076 RTG-2051 RTG-2508 RTG-2707 RTG-2607 RTG-3239 --- - BUILD.generated.bzl | 5 +- - BUILD.generated_tests.bzl | 4 - - CMakeLists.txt | 4 +- - sources.json | 9 +- - src/crypto/CMakeLists.txt | 5 +- - src/crypto/kyber/internal.h | 91 - - src/crypto/kyber/keccak.c | 204 -- - src/crypto/kyber/keccak_tests.txt | 3071 ----------------------------- - src/crypto/kyber/kyber.c | 3011 +++++++++++++++++++++------- - src/crypto/kyber/kyber512.c | 5 + - src/crypto/kyber/kyber768.c | 4 + - src/crypto/kyber/kyber_test.cc | 229 --- - src/crypto/kyber/kyber_tests.txt | 905 --------- - src/crypto/obj/obj_dat.h | 17 +- - src/crypto/obj/obj_mac.num | 4 + - src/crypto/obj/objects.txt | 6 +- - src/include/openssl/kyber.h | 203 +- - src/include/openssl/nid.h | 12 + - src/include/openssl/ssl.h | 4 + - src/sources.cmake | 2 - - src/ssl/extensions.cc | 4 + - src/ssl/ssl_key_share.cc | 525 ++++- - src/ssl/ssl_lib.cc | 2 +- - src/ssl/ssl_test.cc | 29 +- - src/tool/speed.cc | 162 +- - 26 files changed, 3088 insertions(+), 5433 deletions(-) - delete mode 100644 src/crypto/kyber/internal.h - delete mode 100644 src/crypto/kyber/keccak.c - delete mode 100644 src/crypto/kyber/keccak_tests.txt - create mode 100644 src/crypto/kyber/kyber512.c - create mode 100644 src/crypto/kyber/kyber768.c - delete mode 100644 src/crypto/kyber/kyber_test.cc - delete mode 100644 src/crypto/kyber/kyber_tests.txt + crypto/CMakeLists.txt | 3 +- + crypto/kyber/internal.h | 60 - + crypto/kyber/kyber.c | 3013 +++++++++++++++++++++++++++--------- + crypto/kyber/kyber512.c | 5 + + crypto/kyber/kyber768.c | 4 + + crypto/kyber/kyber_test.cc | 184 --- + crypto/obj/obj_dat.h | 17 +- + crypto/obj/obj_mac.num | 4 + + crypto/obj/objects.txt | 6 +- + include/openssl/kyber.h | 203 ++- + include/openssl/nid.h | 12 + + include/openssl/ssl.h | 4 + + sources.cmake | 2 - + ssl/extensions.cc | 5 + + ssl/ssl_key_share.cc | 525 ++++++- + ssl/ssl_lib.cc | 2 +- + ssl/ssl_test.cc | 29 +- + tool/speed.cc | 162 +- + 18 files changed, 3082 insertions(+), 1158 deletions(-) + delete mode 100644 crypto/kyber/internal.h + create mode 100644 crypto/kyber/kyber512.c + create mode 100644 crypto/kyber/kyber768.c + delete mode 100644 crypto/kyber/kyber_test.cc diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt index a594b9e9d..ed468237f 100644 @@ -147,7 +138,7 @@ index b11211726..000000000 - -#endif // OPENSSL_HEADER_CRYPTO_KYBER_INTERNAL_H diff --git a/crypto/kyber/kyber.c b/crypto/kyber/kyber.c -index d3ea02090..ccb5b3d9b 100644 +index d3ea02090..74d092907 100644 --- a/crypto/kyber/kyber.c +++ b/crypto/kyber/kyber.c @@ -1,835 +1,2426 @@ @@ -191,17 +182,17 @@ index d3ea02090..ccb5b3d9b 100644 +// implementation or https://github.com/cloudflare/circl/tree/main/pke/kyber +// +// - Option to keep A stored in private key. -+ + +-#include +#ifndef KYBER_K +#error "Don't compile this file direcly" +#endif - #include -+#include - -#include -#include -- ++#include ++#include + -#include -#include +#include @@ -211,27 +202,8 @@ index d3ea02090..ccb5b3d9b 100644 #include "../internal.h" -#include "../keccak/internal.h" -#include "./internal.h" -+ -+#if (KYBER_K == 2) -+#define KYBER_NAMESPACE(s) KYBER512_##s -+#elif (KYBER_K == 3) -+#define KYBER_NAMESPACE(s) KYBER768_##s -+#elif (KYBER_K == 4) -+#define KYBER_NAMESPACE(s) KYBER1024_##s -+#else -+#error "KYBER_K must be in {2,3,4}" -+#endif -+ -+#define public_key KYBER_NAMESPACE(public_key) -+#define private_key KYBER_NAMESPACE(private_key) -+ -+#define generate_key KYBER_NAMESPACE(generate_key) -+#define encap KYBER_NAMESPACE(encap) -+#define decap KYBER_NAMESPACE(decap) -+#define marshal_public_key KYBER_NAMESPACE(marshal_public_key) -+#define parse_public_key KYBER_NAMESPACE(parse_public_key) - - +- +- -// See -// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf - @@ -266,10 +238,9 @@ index d3ea02090..ccb5b3d9b 100644 -} matrix; - -// This bit of Python will be referenced in some of the following comments: - // +-// -// p = 3329 -+// params.h - // +-// -// def bitreverse(i): -// ret = 0 -// for n in range(7): @@ -278,9 +249,7 @@ index d3ea02090..ccb5b3d9b 100644 -// ret |= bit -// i >>= 1 -// return ret -+#define KYBER_N 256 -+#define KYBER_Q 3329 - +- -// kNTTRoots = [pow(17, bitreverse(i), p) for i in range(128)] -static const uint16_t kNTTRoots[128] = { - 1, 1729, 2580, 3289, 2642, 630, 1897, 848, 1062, 1919, 193, 797, @@ -294,6 +263,110 @@ index d3ea02090..ccb5b3d9b 100644 - 1584, 2298, 2037, 3220, 375, 2549, 2090, 1645, 1063, 319, 2773, 757, - 2099, 561, 2466, 2594, 2804, 1092, 403, 1026, 1143, 2150, 2775, 886, - 1722, 1212, 1874, 1029, 2110, 2935, 885, 2154, +-}; + +-// kInverseNTTRoots = [pow(17, -bitreverse(i), p) for i in range(128)] +-static const uint16_t kInverseNTTRoots[128] = { +- 1, 1600, 40, 749, 2481, 1432, 2699, 687, 1583, 2760, 69, 543, +- 2532, 3136, 1410, 2267, 2508, 1355, 450, 936, 447, 2794, 1235, 1903, +- 1996, 1089, 3273, 283, 1853, 1990, 882, 3033, 2419, 2102, 219, 855, +- 2681, 1848, 712, 682, 927, 1795, 461, 1891, 2877, 2522, 1894, 1010, +- 1414, 2009, 3296, 464, 2697, 816, 1352, 2679, 1274, 1052, 1025, 2132, +- 1573, 76, 2998, 3040, 1175, 2444, 394, 1219, 2300, 1455, 2117, 1607, +- 2443, 554, 1179, 2186, 2303, 2926, 2237, 525, 735, 863, 2768, 1230, +- 2572, 556, 3010, 2266, 1684, 1239, 780, 2954, 109, 1292, 1031, 1745, +- 2688, 3061, 992, 2596, 941, 892, 1021, 2390, 642, 1868, 2377, 1482, +- 1540, 540, 1678, 1626, 279, 314, 1173, 2573, 3096, 48, 667, 1920, +- 2229, 1041, 2606, 1692, 680, 2746, 568, 3312, +-}; ++#if (KYBER_K == 2) ++#define KYBER_NAMESPACE(s) KYBER512_##s ++#elif (KYBER_K == 3) ++#define KYBER_NAMESPACE(s) KYBER768_##s ++#elif (KYBER_K == 4) ++#define KYBER_NAMESPACE(s) KYBER1024_##s ++#else ++#error "KYBER_K must be in {2,3,4}" ++#endif + +-// kModRoots = [pow(17, 2*bitreverse(i) + 1, p) for i in range(128)] +-static const uint16_t kModRoots[128] = { +- 17, 3312, 2761, 568, 583, 2746, 2649, 680, 1637, 1692, 723, 2606, +- 2288, 1041, 1100, 2229, 1409, 1920, 2662, 667, 3281, 48, 233, 3096, +- 756, 2573, 2156, 1173, 3015, 314, 3050, 279, 1703, 1626, 1651, 1678, +- 2789, 540, 1789, 1540, 1847, 1482, 952, 2377, 1461, 1868, 2687, 642, +- 939, 2390, 2308, 1021, 2437, 892, 2388, 941, 733, 2596, 2337, 992, +- 268, 3061, 641, 2688, 1584, 1745, 2298, 1031, 2037, 1292, 3220, 109, +- 375, 2954, 2549, 780, 2090, 1239, 1645, 1684, 1063, 2266, 319, 3010, +- 2773, 556, 757, 2572, 2099, 1230, 561, 2768, 2466, 863, 2594, 735, +- 2804, 525, 1092, 2237, 403, 2926, 1026, 2303, 1143, 2186, 2150, 1179, +- 2775, 554, 886, 2443, 1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300, +- 2110, 1219, 2935, 394, 885, 2444, 2154, 1175, +-}; ++#define public_key KYBER_NAMESPACE(public_key) ++#define private_key KYBER_NAMESPACE(private_key) + +-// reduce_once reduces 0 <= x < 2*kPrime, mod kPrime. +-static uint16_t reduce_once(uint16_t x) { +- assert(x < 2 * kPrime); +- const uint16_t subtracted = x - kPrime; +- uint16_t mask = 0u - (subtracted >> 15); +- // On Aarch64, omitting a |value_barrier_u16| results in a 2x speedup of Kyber +- // overall and Clang still produces constant-time code using `csel`. On other +- // platforms & compilers on godbolt that we care about, this code also +- // produces constant-time output. +- return (mask & x) | (~mask & subtracted); +-} +- +-// constant time reduce x mod kPrime using Barrett reduction. x must be less +-// than kPrime + 2×kPrime². +-static uint16_t reduce(uint32_t x) { +- assert(x < kPrime + 2u * kPrime * kPrime); +- uint64_t product = (uint64_t)x * kBarrettMultiplier; +- uint32_t quotient = (uint32_t)(product >> kBarrettShift); +- uint32_t remainder = x - quotient * kPrime; +- return reduce_once(remainder); +-} +- +-static void scalar_zero(scalar *out) { OPENSSL_memset(out, 0, sizeof(*out)); } +- +-static void vector_zero(vector *out) { OPENSSL_memset(out, 0, sizeof(*out)); } +- +-// In place number theoretic transform of a given scalar. +-// Note that Kyber's kPrime 3329 does not have a 512th root of unity, so this +-// transform leaves off the last iteration of the usual FFT code, with the 128 +-// relevant roots of unity being stored in |kNTTRoots|. This means the output +-// should be seen as 128 elements in GF(3329^2), with the coefficients of the +-// elements being consecutive entries in |s->c|. +-static void scalar_ntt(scalar *s) { +- int offset = DEGREE; +- // `int` is used here because using `size_t` throughout caused a ~5% slowdown +- // with Clang 14 on Aarch64. +- for (int step = 1; step < DEGREE / 2; step <<= 1) { +- offset >>= 1; +- int k = 0; +- for (int i = 0; i < step; i++) { +- const uint32_t step_root = kNTTRoots[i + step]; +- for (int j = k; j < k + offset; j++) { +- uint16_t odd = reduce(step_root * s->c[j + offset]); +- uint16_t even = s->c[j]; +- s->c[j] = reduce_once(odd + even); +- s->c[j + offset] = reduce_once(even - odd + kPrime); +- } +- k += 2 * offset; ++#define generate_key KYBER_NAMESPACE(generate_key) ++#define encap KYBER_NAMESPACE(encap) ++#define decap KYBER_NAMESPACE(decap) ++#define marshal_public_key KYBER_NAMESPACE(marshal_public_key) ++#define parse_public_key KYBER_NAMESPACE(parse_public_key) ++ ++ ++// ++// params.h ++// ++#define KYBER_N 256 ++#define KYBER_Q 3329 ++ +#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define KYBER_SSBYTES 32 /* size in bytes of shared key */ + @@ -675,9 +748,9 @@ index d3ea02090..ccb5b3d9b 100644 + a = (d >> (6*j+0)) & 0x7; + b = (d >> (6*j+3)) & 0x7; + r->coeffs[4*i+j] = a - b; -+ } -+ } -+} + } + } + } +#endif + +static void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]) @@ -690,7 +763,10 @@ index d3ea02090..ccb5b3d9b 100644 +#error "This implementation requires eta1 in {2,3}" +#endif +} -+ + +-static void vector_ntt(vector *a) { +- for (int i = 0; i < RANK; i++) { +- scalar_ntt(&a->v[i]); +static void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) +{ +#if KYBER_ETA2 == 2 @@ -717,21 +793,8 @@ index d3ea02090..ccb5b3d9b 100644 + 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, + 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, + 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 - }; - --// kInverseNTTRoots = [pow(17, -bitreverse(i), p) for i in range(128)] --static const uint16_t kInverseNTTRoots[128] = { -- 1, 1600, 40, 749, 2481, 1432, 2699, 687, 1583, 2760, 69, 543, -- 2532, 3136, 1410, 2267, 2508, 1355, 450, 936, 447, 2794, 1235, 1903, -- 1996, 1089, 3273, 283, 1853, 1990, 882, 3033, 2419, 2102, 219, 855, -- 2681, 1848, 712, 682, 927, 1795, 461, 1891, 2877, 2522, 1894, 1010, -- 1414, 2009, 3296, 464, 2697, 816, 1352, 2679, 1274, 1052, 1025, 2132, -- 1573, 76, 2998, 3040, 1175, 2444, 394, 1219, 2300, 1455, 2117, 1607, -- 2443, 554, 1179, 2186, 2303, 2926, 2237, 525, 735, 863, 2768, 1230, -- 2572, 556, 3010, 2266, 1684, 1239, 780, 2954, 109, 1292, 1031, 1745, -- 2688, 3061, 992, 2596, 941, 892, 1021, 2390, 642, 1868, 2377, 1482, -- 1540, 540, 1678, 1626, 279, 314, 1173, 2573, 3096, 48, 667, 1920, -- 2229, 1041, 2606, 1692, 680, 2746, 568, 3312, ++}; ++ +void init_ntt() { + unsigned int i; + int16_t tmp[128]; @@ -746,8 +809,8 @@ index d3ea02090..ccb5b3d9b 100644 + zetas[i] -= KYBER_Q; + if(zetas[i] < -KYBER_Q/2) + zetas[i] += KYBER_Q; -+ } -+} + } + } +*/ + +static const int16_t zetas[128] = { @@ -767,21 +830,8 @@ index d3ea02090..ccb5b3d9b 100644 + -1215, -136, 1218, -1335, -874, 220, -1187, -1659, + -1185, -1530, -1278, 794, -1510, -854, -870, 478, + -108, -308, 996, 991, 958, -1460, 1522, 1628 - }; - --// kModRoots = [pow(17, 2*bitreverse(i) + 1, p) for i in range(128)] --static const uint16_t kModRoots[128] = { -- 17, 3312, 2761, 568, 583, 2746, 2649, 680, 1637, 1692, 723, 2606, -- 2288, 1041, 1100, 2229, 1409, 1920, 2662, 667, 3281, 48, 233, 3096, -- 756, 2573, 2156, 1173, 3015, 314, 3050, 279, 1703, 1626, 1651, 1678, -- 2789, 540, 1789, 1540, 1847, 1482, 952, 2377, 1461, 1868, 2687, 642, -- 939, 2390, 2308, 1021, 2437, 892, 2388, 941, 733, 2596, 2337, 992, -- 268, 3061, 641, 2688, 1584, 1745, 2298, 1031, 2037, 1292, 3220, 109, -- 375, 2954, 2549, 780, 2090, 1239, 1645, 1684, 1063, 2266, 319, 3010, -- 2773, 556, 757, 2572, 2099, 1230, 561, 2768, 2466, 863, 2594, 735, -- 2804, 525, 1092, 2237, 403, 2926, 1026, 2303, 1143, 2186, 2150, 1179, -- 2775, 554, 886, 2443, 1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300, -- 2110, 1219, 2935, 394, 885, 2444, 2154, 1175, ++}; ++ +/************************************************* +* Name: fqmul +* @@ -795,7 +845,26 @@ index d3ea02090..ccb5b3d9b 100644 +static int16_t fqmul(int16_t a, int16_t b) { + return montgomery_reduce((int32_t)a*b); +} -+ + +-// In place inverse number theoretic transform of a given scalar, with pairs of +-// entries of s->v being interpreted as elements of GF(3329^2). Just as with the +-// number theoretic transform, this leaves off the first step of the normal iFFT +-// to account for the fact that 3329 does not have a 512th root of unity, using +-// the precomputed 128 roots of unity stored in |kInverseNTTRoots|. +-static void scalar_inverse_ntt(scalar *s) { +- int step = DEGREE / 2; +- // `int` is used here because using `size_t` throughout caused a ~5% slowdown +- // with Clang 14 on Aarch64. +- for (int offset = 2; offset < DEGREE; offset <<= 1) { +- step >>= 1; +- int k = 0; +- for (int i = 0; i < step; i++) { +- uint32_t step_root = kInverseNTTRoots[i + step]; +- for (int j = k; j < k + offset; j++) { +- uint16_t odd = s->c[j + offset]; +- uint16_t even = s->c[j]; +- s->c[j] = reduce_once(odd + even); +- s->c[j + offset] = reduce(step_root * (even - odd + kPrime)); +/************************************************* +* Name: ntt +* @@ -816,11 +885,18 @@ index d3ea02090..ccb5b3d9b 100644 + t = fqmul(zeta, r[j + len]); + r[j + len] = r[j] - t; + r[j] = r[j] + t; -+ } -+ } -+ } -+} -+ + } +- k += 2 * offset; + } + } +- for (int i = 0; i < DEGREE; i++) { +- s->c[i] = reduce(s->c[i] * kInverseDegree); +- } + } + +-static void vector_inverse_ntt(vector *a) { +- for (int i = 0; i < RANK; i++) { +- scalar_inverse_ntt(&a->v[i]); +/************************************************* +* Name: invntt_tomont +* @@ -846,7 +922,7 @@ index d3ea02090..ccb5b3d9b 100644 + r[j + len] = fqmul(zeta, r[j + len]); + } + } -+ } + } + + for(j = 0; j < 256; j++) + r[j] = fqmul(r[j], f); @@ -870,8 +946,11 @@ index d3ea02090..ccb5b3d9b 100644 + r[0] += fqmul(a[0], b[0]); + r[1] = fqmul(a[0], b[1]); + r[1] += fqmul(a[1], b[0]); -+} -+ + } + +-static void scalar_add(scalar *lhs, const scalar *rhs) { +- for (int i = 0; i < DEGREE; i++) { +- lhs->c[i] = reduce_once(lhs->c[i] + rhs->c[i]); +// +// poly.c +// @@ -910,7 +989,7 @@ index d3ea02090..ccb5b3d9b 100644 + r[2] = t[4] | (t[5] << 4); + r[3] = t[6] | (t[7] << 4); + r += 4; -+ } + } +#elif (KYBER_POLYCOMPRESSEDBYTES == 160) + for(i=0;ic[i] = reduce_once(lhs->c[i] - rhs->c[i] + kPrime); +/************************************************* +* Name: poly_decompress +* @@ -972,12 +1054,29 @@ index d3ea02090..ccb5b3d9b 100644 + + for(j=0;j<8;j++) + r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5; -+ } + } +#else +#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" +#endif -+} -+ + } + +-// Multiplying two scalars in the number theoretically transformed state. Since +-// 3329 does not have a 512th root of unity, this means we have to interpret +-// the 2*ith and (2*i+1)th entries of the scalar as elements of GF(3329)[X]/(X^2 +-// - 17^(2*bitreverse(i)+1)) The value of 17^(2*bitreverse(i)+1) mod 3329 is +-// stored in the precomputed |kModRoots| table. Note that our Barrett transform +-// only allows us to multipy two reduced numbers together, so we need some +-// intermediate reduction steps, even if an uint64_t could hold 3 multiplied +-// numbers. +-static void scalar_mult(scalar *out, const scalar *lhs, const scalar *rhs) { +- for (int i = 0; i < DEGREE / 2; i++) { +- uint32_t real_real = (uint32_t)lhs->c[2 * i] * rhs->c[2 * i]; +- uint32_t img_img = (uint32_t)lhs->c[2 * i + 1] * rhs->c[2 * i + 1]; +- uint32_t real_img = (uint32_t)lhs->c[2 * i] * rhs->c[2 * i + 1]; +- uint32_t img_real = (uint32_t)lhs->c[2 * i + 1] * rhs->c[2 * i]; +- out->c[2 * i] = +- reduce(real_real + (uint32_t)reduce(img_img) * kModRoots[i]); +- out->c[2 * i + 1] = reduce(img_real + real_img); +/************************************************* +* Name: poly_tobytes +* @@ -1001,9 +1100,12 @@ index d3ea02090..ccb5b3d9b 100644 + r[3*i+0] = (t0 >> 0); + r[3*i+1] = (t0 >> 8) | (t1 << 4); + r[3*i+2] = (t1 >> 4); -+ } -+} -+ + } + } + +-static void vector_add(vector *lhs, const vector *rhs) { +- for (int i = 0; i < RANK; i++) { +- scalar_add(&lhs->v[i], &rhs->v[i]); +/************************************************* +* Name: poly_frombytes +* @@ -1020,9 +1122,16 @@ index d3ea02090..ccb5b3d9b 100644 + for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF; + r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF; -+ } -+} -+ + } + } + +-static void matrix_mult(vector *out, const matrix *m, const vector *a) { +- vector_zero(out); +- for (int i = 0; i < RANK; i++) { +- for (int j = 0; j < RANK; j++) { +- scalar product; +- scalar_mult(&product, &m->v[i][j], &a->v[j]); +- scalar_add(&out->v[i], &product); +/************************************************* +* Name: poly_frommsg +* @@ -1044,10 +1153,18 @@ index d3ea02090..ccb5b3d9b 100644 + for(j=0;j<8;j++) { + mask = -(int16_t)value_barrier_u32((msg[i] >> j)&1); + r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2); -+ } -+ } -+} -+ + } + } + } + +-static void matrix_mult_transpose(vector *out, const matrix *m, +- const vector *a) { +- vector_zero(out); +- for (int i = 0; i < RANK; i++) { +- for (int j = 0; j < RANK; j++) { +- scalar product; +- scalar_mult(&product, &m->v[j][i], &a->v[j]); +- scalar_add(&out->v[i], &product); +/************************************************* +* Name: poly_tomsg +* @@ -1071,10 +1188,18 @@ index d3ea02090..ccb5b3d9b 100644 + t >>= 28; + t &= 1; + msg[i] |= t << j; -+ } -+ } -+} -+ + } + } + } + +-static void scalar_inner_product(scalar *out, const vector *lhs, +- const vector *rhs) { +- scalar_zero(out); +- for (int i = 0; i < RANK; i++) { +- scalar product; +- scalar_mult(&product, &lhs->v[i], &rhs->v[i]); +- scalar_add(out, &product); +- } +/************************************************* +* Name: poly_getnoise_eta1 +* @@ -1092,8 +1217,32 @@ index d3ea02090..ccb5b3d9b 100644 + uint8_t buf[KYBER_ETA1*KYBER_N/4]; + prf(buf, sizeof(buf), seed, nonce); + poly_cbd_eta1(r, buf); -+} -+ + } + +-// Algorithm 1 of the Kyber spec. Rejection samples a Keccak stream to get +-// uniformly distributed elements. This is used for matrix expansion and only +-// operates on public inputs. +-static void scalar_from_keccak_vartime(scalar *out, +- struct BORINGSSL_keccak_st *keccak_ctx) { +- assert(keccak_ctx->squeeze_offset == 0); +- assert(keccak_ctx->rate_bytes == 168); +- static_assert(168 % 3 == 0, "block and coefficient boundaries do not align"); +- +- int done = 0; +- while (done < DEGREE) { +- uint8_t block[168]; +- BORINGSSL_keccak_squeeze(keccak_ctx, block, sizeof(block)); +- for (size_t i = 0; i < sizeof(block) && done < DEGREE; i += 3) { +- uint16_t d1 = block[i] + 256 * (block[i + 1] % 16); +- uint16_t d2 = block[i + 1] / 16 + 16 * block[i + 2]; +- if (d1 < kPrime) { +- out->c[done++] = d1; +- } +- if (d2 < kPrime && done < DEGREE) { +- out->c[done++] = d2; +- } +- } +- } +/************************************************* +* Name: poly_getnoise_eta2 +* @@ -1111,8 +1260,34 @@ index d3ea02090..ccb5b3d9b 100644 + uint8_t buf[KYBER_ETA2*KYBER_N/4]; + prf(buf, sizeof(buf), seed, nonce); + poly_cbd_eta2(r, buf); -+} -+ + } + +-// Algorithm 2 of the Kyber spec, with eta fixed to two and the PRF call +-// included. Creates binominally distributed elements by sampling 2*|eta| bits, +-// and setting the coefficient to the count of the first bits minus the count of +-// the second bits, resulting in a centered binomial distribution. Since eta is +-// two this gives -2/2 with a probability of 1/16, -1/1 with probability 1/4, +-// and 0 with probability 3/8. +-static void scalar_centered_binomial_distribution_eta_2_with_prf( +- scalar *out, const uint8_t input[33]) { +- uint8_t entropy[128]; +- static_assert(sizeof(entropy) == 2 * /*kEta=*/2 * DEGREE / 8, ""); +- BORINGSSL_keccak(entropy, sizeof(entropy), input, 33, boringssl_shake256); +- +- for (int i = 0; i < DEGREE; i += 2) { +- uint8_t byte = entropy[i / 2]; +- +- uint16_t value = kPrime; +- value += (byte & 1) + ((byte >> 1) & 1); +- value -= ((byte >> 2) & 1) + ((byte >> 3) & 1); +- out->c[i] = reduce_once(value); +- +- byte >>= 4; +- value = kPrime; +- value += (byte & 1) + ((byte >> 1) & 1); +- value -= ((byte >> 2) & 1) + ((byte >> 3) & 1); +- out->c[i + 1] = reduce_once(value); +- } + +/************************************************* +* Name: poly_ntt @@ -1127,8 +1302,19 @@ index d3ea02090..ccb5b3d9b 100644 +{ + ntt(r->coeffs); + poly_reduce(r); -+} -+ + } + +-// Generates a secret vector by using +-// |scalar_centered_binomial_distribution_eta_2_with_prf|, using the given seed +-// appending and incrementing |counter| for entry of the vector. +-static void vector_generate_secret_eta_2(vector *out, uint8_t *counter, +- const uint8_t seed[32]) { +- uint8_t input[33]; +- OPENSSL_memcpy(input, seed, 32); +- for (int i = 0; i < RANK; i++) { +- input[32] = (*counter)++; +- scalar_centered_binomial_distribution_eta_2_with_prf(&out->v[i], input); +- } +/************************************************* +* Name: poly_invntt_tomont +* @@ -1141,8 +1327,21 @@ index d3ea02090..ccb5b3d9b 100644 +static void poly_invntt_tomont(poly *r) +{ + invntt(r->coeffs); -+} -+ + } + +-// Expands the matrix of a seed for key generation and for encaps-CPA. +-static void matrix_expand(matrix *out, const uint8_t rho[32]) { +- uint8_t input[34]; +- OPENSSL_memcpy(input, rho, 32); +- for (int i = 0; i < RANK; i++) { +- for (int j = 0; j < RANK; j++) { +- input[32] = i; +- input[33] = j; +- struct BORINGSSL_keccak_st keccak_ctx; +- BORINGSSL_keccak_init(&keccak_ctx, boringssl_shake128); +- BORINGSSL_keccak_absorb(&keccak_ctx, input, sizeof(input)); +- scalar_from_keccak_vartime(&out->v[i][j], &keccak_ctx); +- } +/************************************************* +* Name: poly_basemul_montgomery +* @@ -1158,9 +1357,35 @@ index d3ea02090..ccb5b3d9b 100644 + for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]); + basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]); -+ } -+} -+ + } + } + +-static const uint8_t kMasks[8] = {0x01, 0x03, 0x07, 0x0f, +- 0x1f, 0x3f, 0x7f, 0xff}; +- +-static void scalar_encode(uint8_t *out, const scalar *s, int bits) { +- assert(bits <= (int)sizeof(*s->c) * 8 && bits != 1); +- +- uint8_t out_byte = 0; +- int out_byte_bits = 0; +- +- for (int i = 0; i < DEGREE; i++) { +- uint16_t element = s->c[i]; +- int element_bits_done = 0; +- +- while (element_bits_done < bits) { +- int chunk_bits = bits - element_bits_done; +- int out_bits_remaining = 8 - out_byte_bits; +- if (chunk_bits >= out_bits_remaining) { +- chunk_bits = out_bits_remaining; +- out_byte |= (element & kMasks[chunk_bits - 1]) << out_byte_bits; +- *out = out_byte; +- out++; +- out_byte_bits = 0; +- out_byte = 0; +- } else { +- out_byte |= (element & kMasks[chunk_bits - 1]) << out_byte_bits; +- out_byte_bits += chunk_bits; +/************************************************* +* Name: poly_tomont +* @@ -1255,8 +1480,10 @@ index d3ea02090..ccb5b3d9b 100644 + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; -+ } -+ + } + +- element_bits_done += chunk_bits; +- element >>= chunk_bits; + r[ 0] = (t[0] >> 0); + r[ 1] = (t[0] >> 8) | (t[1] << 3); + r[ 2] = (t[1] >> 5) | (t[2] << 6); @@ -1269,8 +1496,8 @@ index d3ea02090..ccb5b3d9b 100644 + r[ 9] = (t[6] >> 6) | (t[7] << 5); + r[10] = (t[7] >> 3); + r += 11; -+ } -+ } + } + } +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + uint16_t t[4]; + for(i=0;i>= 32; + t[k] = d0 & 0x3ff; + } -+ + +- if (out_byte_bits > 0) { +- *out = out_byte; + r[0] = (t[0] >> 0); + r[1] = (t[0] >> 8) | (t[1] << 2); + r[2] = (t[1] >> 6) | (t[2] << 4); @@ -1293,12 +1522,18 @@ index d3ea02090..ccb5b3d9b 100644 + r[4] = (t[3] >> 2); + r += 5; + } -+ } + } +#else +#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" +#endif -+} -+ + } + +-// scalar_encode_1 is |scalar_encode| specialised for |bits| == 1. +-static void scalar_encode_1(uint8_t out[32], const scalar *s) { +- for (int i = 0; i < DEGREE; i += 8) { +- uint8_t out_byte = 0; +- for (int j = 0; j < 8; j++) { +- out_byte |= (s->c[i + j] & 1) << j; +/************************************************* +* Name: polyvec_decompress +* @@ -1343,13 +1578,22 @@ index d3ea02090..ccb5b3d9b 100644 + + for(k=0;k<4;k++) + r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10; -+ } -+ } + } +- *out = out_byte; +- out++; + } +#else +#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" +#endif -+} -+ + } + +-// Encodes an entire vector into 32*|RANK|*|bits| bytes. Note that since 256 +-// (DEGREE) is divisible by 8, the individual vector entries will always fill a +-// whole number of bytes, so we do not need to worry about bit packing here. +-static void vector_encode(uint8_t *out, const vector *a, int bits) { +- for (int i = 0; i < RANK; i++) { +- scalar_encode(out + i * bits * DEGREE / 8, &a->v[i], bits); +- } +/************************************************* +* Name: polyvec_tobytes +* @@ -1364,8 +1608,13 @@ index d3ea02090..ccb5b3d9b 100644 + unsigned int i; + for(i=0;ivec[i]); -+} -+ + } + +-// scalar_decode parses |DEGREE * bits| bits from |in| into |DEGREE| values in +-// |out|. It returns one on success and zero if any parsed value is >= +-// |kPrime|. +-static int scalar_decode(scalar *out, const uint8_t *in, int bits) { +- assert(bits <= (int)sizeof(*out->c) * 8 && bits != 1); +/************************************************* +* Name: polyvec_frombytes +* @@ -1382,7 +1631,9 @@ index d3ea02090..ccb5b3d9b 100644 + for(i=0;ivec[i], a+i*KYBER_POLYBYTES); +} -+ + +- uint8_t in_byte = 0; +- int in_byte_bits_left = 0; +/************************************************* +* Name: polyvec_ntt +* @@ -1396,7 +1647,10 @@ index d3ea02090..ccb5b3d9b 100644 + for(i=0;ivec[i]); +} -+ + +- for (int i = 0; i < DEGREE; i++) { +- uint16_t element = 0; +- int element_bits_done = 0; +/************************************************* +* Name: polyvec_invntt_tomont +* @@ -1411,7 +1665,13 @@ index d3ea02090..ccb5b3d9b 100644 + for(i=0;ivec[i]); +} -+ + +- while (element_bits_done < bits) { +- if (in_byte_bits_left == 0) { +- in_byte = *in; +- in++; +- in_byte_bits_left = 8; +- } +/************************************************* +* Name: polyvec_basemul_acc_montgomery +* @@ -1432,10 +1692,17 @@ index d3ea02090..ccb5b3d9b 100644 + poly_basemul_montgomery(&t, &a->vec[i], &b->vec[i]); + poly_add(r, r, &t); + } -+ + +- int chunk_bits = bits - element_bits_done; +- if (chunk_bits > in_byte_bits_left) { +- chunk_bits = in_byte_bits_left; +- } + poly_reduce(r); +} -+ + +- element |= (in_byte & kMasks[chunk_bits - 1]) << element_bits_done; +- in_byte_bits_left -= chunk_bits; +- in_byte >>= chunk_bits; +/************************************************* +* Name: polyvec_reduce +* @@ -1451,7 +1718,9 @@ index d3ea02090..ccb5b3d9b 100644 + for(i=0;ivec[i]); +} -+ + +- element_bits_done += chunk_bits; +- } +/************************************************* +* Name: polyvec_add +* @@ -1467,7 +1736,12 @@ index d3ea02090..ccb5b3d9b 100644 + for(i=0;ivec[i], &a->vec[i], &b->vec[i]); +} -+ + +- if (element >= kPrime) { +- return 0; +- } +- out->c[i] = element; +- } +// +// indcpa.c +// @@ -1516,12 +1790,21 @@ index d3ea02090..ccb5b3d9b 100644 + + if(verify(repacked, packedpk, KYBER_POLYVECBYTES) != 0) + return 0; -+ + + for(i=0;ic[i + j] = in_byte & 1; +- in_byte >>= 1; +- } +/************************************************* +* Name: pack_sk +* @@ -1612,11 +1895,17 @@ index d3ea02090..ccb5b3d9b 100644 + r[ctr++] = val0; + if(ctr < len && val1 < KYBER_Q) + r[ctr++] = val1; -+ } + } + + return ctr; -+} -+ + } + +-// Decodes 32*|RANK|*|bits| bytes from |in| into |out|. It returns one on +-// success or zero if any parsed value is >= |kPrime|. +-static int vector_decode(vector *out, const uint8_t *in, int bits) { +- for (int i = 0; i < RANK; i++) { +- if (!scalar_decode(&out->v[i], in + i * bits * DEGREE / 8, bits)) { +- return 0; +#define gen_a(A,B) gen_matrix(A,B,0) +#define gen_at(A,B) gen_matrix(A,B,1) + @@ -1660,10 +1949,53 @@ index d3ea02090..ccb5b3d9b 100644 + buflen = off + XOF_BLOCKBYTES; + ctr += rej_uniform(a[i].vec[j].coeffs + ctr, KYBER_N - ctr, buf, buflen); + } -+ } -+ } -+} -+ + } + } +- return 1; + } + +-// Compresses (lossily) an input |x| mod 3329 into |bits| many bits by grouping +-// numbers close to each other together. The formula used is +-// round(2^|bits|/kPrime*x) mod 2^|bits|. +-// Uses Barrett reduction to achieve constant time. Since we need both the +-// remainder (for rounding) and the quotient (as the result), we cannot use +-// |reduce| here, but need to do the Barrett reduction directly. +-static uint16_t compress(uint16_t x, int bits) { +- uint32_t shifted = (uint32_t)x << bits; +- uint64_t product = (uint64_t)shifted * kBarrettMultiplier; +- uint32_t quotient = (uint32_t)(product >> kBarrettShift); +- uint32_t remainder = shifted - quotient * kPrime; +- +- // Adjust the quotient to round correctly: +- // 0 <= remainder <= kHalfPrime round to 0 +- // kHalfPrime < remainder <= kPrime + kHalfPrime round to 1 +- // kPrime + kHalfPrime < remainder < 2 * kPrime round to 2 +- assert(remainder < 2u * kPrime); +- quotient += 1 & constant_time_lt_w(kHalfPrime, remainder); +- quotient += 1 & constant_time_lt_w(kPrime + kHalfPrime, remainder); +- return quotient & ((1 << bits) - 1); +-} +- +-// Decompresses |x| by using an equi-distant representative. The formula is +-// round(kPrime/2^|bits|*x). Note that 2^|bits| being the divisor allows us to +-// implement this logic using only bit operations. +-static uint16_t decompress(uint16_t x, int bits) { +- uint32_t product = (uint32_t)x * kPrime; +- uint32_t power = 1 << bits; +- // This is |product| % power, since |power| is a power of 2. +- uint32_t remainder = product & (power - 1); +- // This is |product| / power, since |power| is a power of 2. +- uint32_t lower = product >> bits; +- // The rounding logic works since the first half of numbers mod |power| have a +- // 0 as first bit, and the second half has a 1 as first bit, since |power| is +- // a power of 2. As a 12 bit number, |remainder| is always positive, so we +- // will shift in 0s for a right shift. +- return lower + (remainder >> (bits - 1)); +-} +- +-static void scalar_compress(scalar *s, int bits) { +- for (int i = 0; i < DEGREE; i++) { +- s->c[i] = compress(s->c[i], bits); +/************************************************* +* Name: indcpa_keypair +* @@ -1703,15 +2035,19 @@ index d3ea02090..ccb5b3d9b 100644 + for(i=0;ic[i] = decompress(s->c[i], bits); +- } +/************************************************* +* Name: indcpa_enc +* @@ -1770,8 +2106,12 @@ index d3ea02090..ccb5b3d9b 100644 + + pack_ciphertext(c, &b, &v); + return 1; -+} -+ + } + +-static void vector_compress(vector *a, int bits) { +- for (int i = 0; i < RANK; i++) { +- scalar_compress(&a->v[i], bits); +- } +/************************************************* +* Name: indcpa_dec +* @@ -1803,8 +2143,12 @@ index d3ea02090..ccb5b3d9b 100644 + poly_reduce(&mp); + + poly_tomsg(m, &mp); -+} -+ + } + +-static void vector_decompress(vector *a, int bits) { +- for (int i = 0; i < RANK; i++) { +- scalar_decompress(&a->v[i], bits); +- } +// +// fips202.c +// @@ -1834,8 +2178,13 @@ index d3ea02090..ccb5b3d9b 100644 + r |= (uint64_t)x[i] << 8*i; + + return r; -+} -+ + } + +-struct public_key { +- vector t; +- uint8_t rho[32]; +- uint8_t public_key_hash[32]; +- matrix m; +/************************************************* +* Name: store64 +* @@ -1879,16 +2228,13 @@ index d3ea02090..ccb5b3d9b 100644 + (uint64_t)0x8000000080008008ULL }; --// reduce_once reduces 0 <= x < 2*kPrime, mod kPrime. --static uint16_t reduce_once(uint16_t x) { -- assert(x < 2 * kPrime); -- const uint16_t subtracted = x - kPrime; -- uint16_t mask = 0u - (subtracted >> 15); -- // On Aarch64, omitting a |value_barrier_u16| results in a 2x speedup of Kyber -- // overall and Clang still produces constant-time code using `csel`. On other -- // platforms & compilers on godbolt that we care about, this code also -- // produces constant-time output. -- return (mask & x) | (~mask & subtracted); +-static struct public_key *public_key_from_external( +- const struct KYBER_public_key *external) { +- static_assert(sizeof(struct KYBER_public_key) >= sizeof(struct public_key), +- "Kyber public key is too small"); +- static_assert(alignof(struct KYBER_public_key) >= alignof(struct public_key), +- "Kyber public key align incorrect"); +- return (struct public_key *)external; +/************************************************* +* Name: KeccakF1600_StatePermute +* @@ -2160,17 +2506,36 @@ index d3ea02090..ccb5b3d9b 100644 + state[24] = Asu; } --// constant time reduce x mod kPrime using Barrett reduction. x must be less --// than kPrime + 2×kPrime². --static uint16_t reduce(uint32_t x) { -- assert(x < kPrime + 2u * kPrime * kPrime); -- uint64_t product = (uint64_t)x * kBarrettMultiplier; -- uint32_t quotient = (uint32_t)(product >> kBarrettShift); -- uint32_t remainder = x - quotient * kPrime; -- return reduce_once(remainder); --} +-struct private_key { +- struct public_key pub; +- vector s; +- uint8_t fo_failure_secret[32]; +-}; --static void scalar_zero(scalar *out) { OPENSSL_memset(out, 0, sizeof(*out)); } +-static struct private_key *private_key_from_external( +- const struct KYBER_private_key *external) { +- static_assert(sizeof(struct KYBER_private_key) >= sizeof(struct private_key), +- "Kyber private key too small"); +- static_assert( +- alignof(struct KYBER_private_key) >= alignof(struct private_key), +- "Kyber private key align incorrect"); +- return (struct private_key *)external; +-} +- +-// Calls |KYBER_generate_key_external_entropy| with random bytes from +-// |RAND_bytes|. +-void KYBER_generate_key(uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], +- struct KYBER_private_key *out_private_key) { +- uint8_t entropy[KYBER_GENERATE_KEY_ENTROPY]; +- RAND_bytes(entropy, sizeof(entropy)); +- KYBER_generate_key_external_entropy(out_encoded_public_key, out_private_key, +- entropy); +-} +- +-static int kyber_marshal_public_key(CBB *out, const struct public_key *pub) { +- uint8_t *vector_output; +- if (!CBB_add_space(out, &vector_output, kEncodedVectorSize)) { +- return 0; +/************************************************* +* Name: keccak_squeeze +* @@ -2193,41 +2558,20 @@ index d3ea02090..ccb5b3d9b 100644 + unsigned int r) +{ + unsigned int i; - --static void vector_zero(vector *out) { OPENSSL_memset(out, 0, sizeof(*out)); } -- --// In place number theoretic transform of a given scalar. --// Note that Kyber's kPrime 3329 does not have a 512th root of unity, so this --// transform leaves off the last iteration of the usual FFT code, with the 128 --// relevant roots of unity being stored in |kNTTRoots|. This means the output --// should be seen as 128 elements in GF(3329^2), with the coefficients of the --// elements being consecutive entries in |s->c|. --static void scalar_ntt(scalar *s) { -- int offset = DEGREE; -- // `int` is used here because using `size_t` throughout caused a ~5% slowdown -- // with Clang 14 on Aarch64. -- for (int step = 1; step < DEGREE / 2; step <<= 1) { -- offset >>= 1; -- int k = 0; -- for (int i = 0; i < step; i++) { -- const uint32_t step_root = kNTTRoots[i + step]; -- for (int j = k; j < k + offset; j++) { -- uint16_t odd = reduce(step_root * s->c[j + offset]); -- uint16_t even = s->c[j]; -- s->c[j] = reduce_once(odd + even); -- s->c[j + offset] = reduce_once(even - odd + kPrime); -- } -- k += 2 * offset; ++ + while(outlen) { + if(pos == r) { + KeccakF1600_StatePermute(s); + pos = 0; - } ++ } + for(i=pos;i < r && i < pos+outlen; i++) + *out++ = s[i/8] >> 8*(i%8); + outlen -= i-pos; + pos = i; -+ } + } +- vector_encode(vector_output, &pub->t, kLog2Prime); +- if (!CBB_add_bytes(out, pub->rho, sizeof(pub->rho))) { +- return 0; + + return pos; +} @@ -2259,7 +2603,8 @@ index d3ea02090..ccb5b3d9b 100644 + inlen -= r-pos; + KeccakF1600_StatePermute(s); + pos = 0; -+ } + } +- return 1; + + for(i=pos;ipub.rho, hashed, sizeof(priv->pub.rho)); +- matrix_expand(&priv->pub.m, rho); +- uint8_t counter = 0; +- vector_generate_secret_eta_2(&priv->s, &counter, sigma); +- vector_ntt(&priv->s); +- vector error; +- vector_generate_secret_eta_2(&error, &counter, sigma); +- vector_ntt(&error); +- matrix_mult_transpose(&priv->pub.t, &priv->pub.m, &priv->s); +- vector_add(&priv->pub.t, &error); +- +- CBB cbb; +- CBB_init_fixed(&cbb, out_encoded_public_key, KYBER_PUBLIC_KEY_BYTES); +- if (!kyber_marshal_public_key(&cbb, &priv->pub)) { +- abort(); + +/************************************************* +* Name: keccak_absorb_once @@ -2313,824 +2685,8 @@ index d3ea02090..ccb5b3d9b 100644 + in += r; + inlen -= r; + KeccakF1600_StatePermute(s); -+ } -+ -+ for(i=0;iv[i]); -- } -+ -+/************************************************* -+* Name: shake128_absorb_once -+* -+* Description: Initialize, absorb into and finalize SHAKE128 XOF; non-incremental. -+* -+* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state -+* - const uint8_t *in: pointer to input to be absorbed into s -+* - size_t inlen: length of input in bytes -+**************************************************/ -+static void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen) -+{ -+ keccak_absorb_once(state->s, SHAKE128_RATE, in, inlen, 0x1F); -+ state->pos = SHAKE128_RATE; - } - --// In place inverse number theoretic transform of a given scalar, with pairs of --// entries of s->v being interpreted as elements of GF(3329^2). Just as with the --// number theoretic transform, this leaves off the first step of the normal iFFT --// to account for the fact that 3329 does not have a 512th root of unity, using --// the precomputed 128 roots of unity stored in |kInverseNTTRoots|. --static void scalar_inverse_ntt(scalar *s) { -- int step = DEGREE / 2; -- // `int` is used here because using `size_t` throughout caused a ~5% slowdown -- // with Clang 14 on Aarch64. -- for (int offset = 2; offset < DEGREE; offset <<= 1) { -- step >>= 1; -- int k = 0; -- for (int i = 0; i < step; i++) { -- uint32_t step_root = kInverseNTTRoots[i + step]; -- for (int j = k; j < k + offset; j++) { -- uint16_t odd = s->c[j + offset]; -- uint16_t even = s->c[j]; -- s->c[j] = reduce_once(odd + even); -- s->c[j + offset] = reduce(step_root * (even - odd + kPrime)); -- } -- k += 2 * offset; -- } -- } -- for (int i = 0; i < DEGREE; i++) { -- s->c[i] = reduce(s->c[i] * kInverseDegree); -- } -+/************************************************* -+* Name: shake128_squeezeblocks -+* -+* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of -+* SHAKE128_RATE bytes each. Can be called multiple times -+* to keep squeezing. Assumes new block has not yet been -+* started (state->pos = SHAKE128_RATE). -+* -+* Arguments: - uint8_t *out: pointer to output blocks -+* - size_t nblocks: number of blocks to be squeezed (written to output) -+* - keccak_state *s: pointer to input/output Keccak state -+**************************************************/ -+static void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state) -+{ -+ keccak_squeezeblocks(out, nblocks, state->s, SHAKE128_RATE); - } - --static void vector_inverse_ntt(vector *a) { -- for (int i = 0; i < RANK; i++) { -- scalar_inverse_ntt(&a->v[i]); -- } -+/************************************************* -+* Name: shake256_squeeze -+* -+* Description: Squeeze step of SHAKE256 XOF. Squeezes arbitraily many -+* bytes. Can be called multiple times to keep squeezing. -+* -+* Arguments: - uint8_t *out: pointer to output blocks -+* - size_t outlen : number of bytes to be squeezed (written to output) -+* - keccak_state *s: pointer to input/output Keccak state -+**************************************************/ -+static void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state) -+{ -+ state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE256_RATE); - } - --static void scalar_add(scalar *lhs, const scalar *rhs) { -- for (int i = 0; i < DEGREE; i++) { -- lhs->c[i] = reduce_once(lhs->c[i] + rhs->c[i]); -- } -+/************************************************* -+* Name: shake256_absorb_once -+* -+* Description: Initialize, absorb into and finalize SHAKE256 XOF; non-incremental. -+* -+* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state -+* - const uint8_t *in: pointer to input to be absorbed into s -+* - size_t inlen: length of input in bytes -+**************************************************/ -+static void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen) -+{ -+ keccak_absorb_once(state->s, SHAKE256_RATE, in, inlen, 0x1F); -+ state->pos = SHAKE256_RATE; - } - --static void scalar_sub(scalar *lhs, const scalar *rhs) { -- for (int i = 0; i < DEGREE; i++) { -- lhs->c[i] = reduce_once(lhs->c[i] - rhs->c[i] + kPrime); -- } -+/************************************************* -+* Name: shake256_squeezeblocks -+* -+* Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of -+* SHAKE256_RATE bytes each. Can be called multiple times -+* to keep squeezing. Assumes next block has not yet been -+* started (state->pos = SHAKE256_RATE). -+* -+* Arguments: - uint8_t *out: pointer to output blocks -+* - size_t nblocks: number of blocks to be squeezed (written to output) -+* - keccak_state *s: pointer to input/output Keccak state -+**************************************************/ -+static void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state) -+{ -+ keccak_squeezeblocks(out, nblocks, state->s, SHAKE256_RATE); - } - --// Multiplying two scalars in the number theoretically transformed state. Since --// 3329 does not have a 512th root of unity, this means we have to interpret --// the 2*ith and (2*i+1)th entries of the scalar as elements of GF(3329)[X]/(X^2 --// - 17^(2*bitreverse(i)+1)) The value of 17^(2*bitreverse(i)+1) mod 3329 is --// stored in the precomputed |kModRoots| table. Note that our Barrett transform --// only allows us to multipy two reduced numbers together, so we need some --// intermediate reduction steps, even if an uint64_t could hold 3 multiplied --// numbers. --static void scalar_mult(scalar *out, const scalar *lhs, const scalar *rhs) { -- for (int i = 0; i < DEGREE / 2; i++) { -- uint32_t real_real = (uint32_t)lhs->c[2 * i] * rhs->c[2 * i]; -- uint32_t img_img = (uint32_t)lhs->c[2 * i + 1] * rhs->c[2 * i + 1]; -- uint32_t real_img = (uint32_t)lhs->c[2 * i] * rhs->c[2 * i + 1]; -- uint32_t img_real = (uint32_t)lhs->c[2 * i + 1] * rhs->c[2 * i]; -- out->c[2 * i] = -- reduce(real_real + (uint32_t)reduce(img_img) * kModRoots[i]); -- out->c[2 * i + 1] = reduce(img_real + real_img); -- } -+/************************************************* -+* Name: shake256_absorb -+* -+* Description: Absorb step of the SHAKE256 XOF; incremental. -+* -+* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state -+* - const uint8_t *in: pointer to input to be absorbed into s -+* - size_t inlen: length of input in bytes -+**************************************************/ -+static void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen) -+{ -+ state->pos = keccak_absorb(state->s, state->pos, SHAKE256_RATE, in, inlen); - } - --static void vector_add(vector *lhs, const vector *rhs) { -- for (int i = 0; i < RANK; i++) { -- scalar_add(&lhs->v[i], &rhs->v[i]); -- } -+/************************************************* -+* Name: shake256_finalize -+* -+* Description: Finalize absorb step of the SHAKE256 XOF. -+* -+* Arguments: - keccak_state *state: pointer to Keccak state -+**************************************************/ -+static void shake256_finalize(keccak_state *state) -+{ -+ keccak_finalize(state->s, state->pos, SHAKE256_RATE, 0x1F); -+ state->pos = SHAKE256_RATE; - } - --static void matrix_mult(vector *out, const matrix *m, const vector *a) { -- vector_zero(out); -- for (int i = 0; i < RANK; i++) { -- for (int j = 0; j < RANK; j++) { -- scalar product; -- scalar_mult(&product, &m->v[i][j], &a->v[j]); -- scalar_add(&out->v[i], &product); -- } -- } -+/************************************************* -+* Name: keccak_init -+* -+* Description: Initializes the Keccak state. -+* -+* Arguments: - uint64_t *s: pointer to Keccak state -+**************************************************/ -+static void keccak_init(uint64_t s[25]) -+{ -+ unsigned int i; -+ for(i=0;i<25;i++) -+ s[i] = 0; - } - --static void matrix_mult_transpose(vector *out, const matrix *m, -- const vector *a) { -- vector_zero(out); -- for (int i = 0; i < RANK; i++) { -- for (int j = 0; j < RANK; j++) { -- scalar product; -- scalar_mult(&product, &m->v[j][i], &a->v[j]); -- scalar_add(&out->v[i], &product); -- } -- } -+/************************************************* -+* Name: shake256_init -+* -+* Description: Initilizes Keccak state for use as SHAKE256 XOF -+* -+* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state -+**************************************************/ -+static void shake256_init(keccak_state *state) -+{ -+ keccak_init(state->s); -+ state->pos = 0; - } - --static void scalar_inner_product(scalar *out, const vector *lhs, -- const vector *rhs) { -- scalar_zero(out); -- for (int i = 0; i < RANK; i++) { -- scalar product; -- scalar_mult(&product, &lhs->v[i], &rhs->v[i]); -- scalar_add(out, &product); -- } -+ -+/************************************************* -+* Name: shake256 -+* -+* Description: SHAKE256 XOF with non-incremental API -+* -+* Arguments: - uint8_t *out: pointer to output -+* - size_t outlen: requested output length in bytes -+* - const uint8_t *in: pointer to input -+* - size_t inlen: length of input in bytes -+**************************************************/ -+static void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen) -+{ -+ size_t nblocks; -+ keccak_state state; -+ -+ shake256_absorb_once(&state, in, inlen); -+ nblocks = outlen/SHAKE256_RATE; -+ shake256_squeezeblocks(out, nblocks, &state); -+ outlen -= nblocks*SHAKE256_RATE; -+ out += nblocks*SHAKE256_RATE; -+ shake256_squeeze(out, outlen, &state); - } - --// Algorithm 1 of the Kyber spec. Rejection samples a Keccak stream to get --// uniformly distributed elements. This is used for matrix expansion and only --// operates on public inputs. --static void scalar_from_keccak_vartime(scalar *out, -- struct BORINGSSL_keccak_st *keccak_ctx) { -- assert(keccak_ctx->squeeze_offset == 0); -- assert(keccak_ctx->rate_bytes == 168); -- static_assert(168 % 3 == 0, "block and coefficient boundaries do not align"); -+/************************************************* -+* Name: sha3_256 -+* -+* Description: SHA3-256 with non-incremental API -+* -+* Arguments: - uint8_t *h: pointer to output (32 bytes) -+* - const uint8_t *in: pointer to input -+* - size_t inlen: length of input in bytes -+**************************************************/ -+static void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen) -+{ -+ unsigned int i; -+ uint64_t s[25]; - -- int done = 0; -- while (done < DEGREE) { -- uint8_t block[168]; -- BORINGSSL_keccak_squeeze(keccak_ctx, block, sizeof(block)); -- for (size_t i = 0; i < sizeof(block) && done < DEGREE; i += 3) { -- uint16_t d1 = block[i] + 256 * (block[i + 1] % 16); -- uint16_t d2 = block[i + 1] / 16 + 16 * block[i + 2]; -- if (d1 < kPrime) { -- out->c[done++] = d1; -- } -- if (d2 < kPrime && done < DEGREE) { -- out->c[done++] = d2; -- } -- } -- } -+ keccak_absorb_once(s, SHA3_256_RATE, in, inlen, 0x06); -+ KeccakF1600_StatePermute(s); -+ for(i=0;i<4;i++) -+ store64(h+8*i,s[i]); - } - --// Algorithm 2 of the Kyber spec, with eta fixed to two and the PRF call --// included. Creates binominally distributed elements by sampling 2*|eta| bits, --// and setting the coefficient to the count of the first bits minus the count of --// the second bits, resulting in a centered binomial distribution. Since eta is --// two this gives -2/2 with a probability of 1/16, -1/1 with probability 1/4, --// and 0 with probability 3/8. --static void scalar_centered_binomial_distribution_eta_2_with_prf( -- scalar *out, const uint8_t input[33]) { -- uint8_t entropy[128]; -- static_assert(sizeof(entropy) == 2 * /*kEta=*/2 * DEGREE / 8, ""); -- BORINGSSL_keccak(entropy, sizeof(entropy), input, 33, boringssl_shake256); -+/************************************************* -+* Name: sha3_512 -+* -+* Description: SHA3-512 with non-incremental API -+* -+* Arguments: - uint8_t *h: pointer to output (64 bytes) -+* - const uint8_t *in: pointer to input -+* - size_t inlen: length of input in bytes -+**************************************************/ -+static void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen) -+{ -+ unsigned int i; -+ uint64_t s[25]; - -- for (int i = 0; i < DEGREE; i += 2) { -- uint8_t byte = entropy[i / 2]; -- -- uint16_t value = kPrime; -- value += (byte & 1) + ((byte >> 1) & 1); -- value -= ((byte >> 2) & 1) + ((byte >> 3) & 1); -- out->c[i] = reduce_once(value); -- -- byte >>= 4; -- value = kPrime; -- value += (byte & 1) + ((byte >> 1) & 1); -- value -= ((byte >> 2) & 1) + ((byte >> 3) & 1); -- out->c[i + 1] = reduce_once(value); -- } -+ keccak_absorb_once(s, SHA3_512_RATE, in, inlen, 0x06); -+ KeccakF1600_StatePermute(s); -+ for(i=0;i<8;i++) -+ store64(h+8*i,s[i]); - } - --// Generates a secret vector by using --// |scalar_centered_binomial_distribution_eta_2_with_prf|, using the given seed --// appending and incrementing |counter| for entry of the vector. --static void vector_generate_secret_eta_2(vector *out, uint8_t *counter, -- const uint8_t seed[32]) { -- uint8_t input[33]; -- OPENSSL_memcpy(input, seed, 32); -- for (int i = 0; i < RANK; i++) { -- input[32] = (*counter)++; -- scalar_centered_binomial_distribution_eta_2_with_prf(&out->v[i], input); -- } -+// -+// symmetric-shake.c -+// -+ -+/************************************************* -+* Name: kyber_shake128_absorb -+* -+* Description: Absorb step of the SHAKE128 specialized for the Kyber context. -+* -+* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state -+* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state -+* - uint8_t i: additional byte of input -+* - uint8_t j: additional byte of input -+**************************************************/ -+static void kyber_shake128_absorb(keccak_state *state, -+ const uint8_t seed[KYBER_SYMBYTES], -+ uint8_t x, -+ uint8_t y) -+{ -+ uint8_t extseed[KYBER_SYMBYTES+2]; -+ -+ memcpy(extseed, seed, KYBER_SYMBYTES); -+ extseed[KYBER_SYMBYTES+0] = x; -+ extseed[KYBER_SYMBYTES+1] = y; -+ -+ shake128_absorb_once(state, extseed, sizeof(extseed)); - } - --// Expands the matrix of a seed for key generation and for encaps-CPA. --static void matrix_expand(matrix *out, const uint8_t rho[32]) { -- uint8_t input[34]; -- OPENSSL_memcpy(input, rho, 32); -- for (int i = 0; i < RANK; i++) { -- for (int j = 0; j < RANK; j++) { -- input[32] = i; -- input[33] = j; -- struct BORINGSSL_keccak_st keccak_ctx; -- BORINGSSL_keccak_init(&keccak_ctx, boringssl_shake128); -- BORINGSSL_keccak_absorb(&keccak_ctx, input, sizeof(input)); -- scalar_from_keccak_vartime(&out->v[i][j], &keccak_ctx); -- } -- } -+/************************************************* -+* Name: kyber_shake256_prf -+* -+* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -+* and then generates outlen bytes of SHAKE256 output -+* -+* Arguments: - uint8_t *out: pointer to output -+* - size_t outlen: number of requested output bytes -+* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -+* - uint8_t nonce: single-byte nonce (public PRF input) -+**************************************************/ -+static void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) -+{ -+ uint8_t extkey[KYBER_SYMBYTES+1]; -+ -+ memcpy(extkey, key, KYBER_SYMBYTES); -+ extkey[KYBER_SYMBYTES] = nonce; -+ -+ shake256(out, outlen, extkey, sizeof(extkey)); - } - --static const uint8_t kMasks[8] = {0x01, 0x03, 0x07, 0x0f, -- 0x1f, 0x3f, 0x7f, 0xff}; -+// -+// kem.c -+// - --static void scalar_encode(uint8_t *out, const scalar *s, int bits) { -- assert(bits <= (int)sizeof(*s->c) * 8 && bits != 1); -+// Modified crypto_kem_keypair to BoringSSL style API -+void generate_key(struct public_key *out_pub, struct private_key *out_priv, -+ const uint8_t seed[KYBER_GENERATE_KEY_BYTES]) -+{ -+ size_t i; -+ uint8_t* pk = &out_pub->opaque[0]; -+ uint8_t* sk = &out_priv->opaque[0]; - -- uint8_t out_byte = 0; -- int out_byte_bits = 0; -- -- for (int i = 0; i < DEGREE; i++) { -- uint16_t element = s->c[i]; -- int element_bits_done = 0; -- -- while (element_bits_done < bits) { -- int chunk_bits = bits - element_bits_done; -- int out_bits_remaining = 8 - out_byte_bits; -- if (chunk_bits >= out_bits_remaining) { -- chunk_bits = out_bits_remaining; -- out_byte |= (element & kMasks[chunk_bits - 1]) << out_byte_bits; -- *out = out_byte; -- out++; -- out_byte_bits = 0; -- out_byte = 0; -- } else { -- out_byte |= (element & kMasks[chunk_bits - 1]) << out_byte_bits; -- out_byte_bits += chunk_bits; -- } -- -- element_bits_done += chunk_bits; -- element >>= chunk_bits; -- } -- } -- -- if (out_byte_bits > 0) { -- *out = out_byte; -- } -+ indcpa_keypair(pk, sk, seed); -+ for(i=0;ic[i + j] & 1) << j; -- } -- *out = out_byte; -- out++; -- } --} -+// Modified crypto_kem_enc to BoringSSL style API -+int encap(uint8_t out_ciphertext[KYBER_CIPHERTEXTBYTES], -+ uint8_t ss[KYBER_KEY_BYTES], -+ const struct public_key *in_pub, -+ const uint8_t seed[KYBER_ENCAP_BYTES], int mlkem) -+{ -+ const uint8_t *pk = &in_pub->opaque[0]; -+ uint8_t *ct = out_ciphertext; -+ -+ uint8_t buf[2*KYBER_SYMBYTES]; -+ /* Will contain key, coins */ -+ uint8_t kr[2*KYBER_SYMBYTES]; - --// Encodes an entire vector into 32*|RANK|*|bits| bytes. Note that since 256 --// (DEGREE) is divisible by 8, the individual vector entries will always fill a --// whole number of bytes, so we do not need to worry about bit packing here. --static void vector_encode(uint8_t *out, const vector *a, int bits) { -- for (int i = 0; i < RANK; i++) { -- scalar_encode(out + i * bits * DEGREE / 8, &a->v[i], bits); -- } --} -+ memcpy(buf, seed, KYBER_SYMBYTES); - --// scalar_decode parses |DEGREE * bits| bits from |in| into |DEGREE| values in --// |out|. It returns one on success and zero if any parsed value is >= --// |kPrime|. --static int scalar_decode(scalar *out, const uint8_t *in, int bits) { -- assert(bits <= (int)sizeof(*out->c) * 8 && bits != 1); -+ /* Don't release system RNG output */ -+ hash_h(buf, buf, KYBER_SYMBYTES); - -- uint8_t in_byte = 0; -- int in_byte_bits_left = 0; -+ /* Multitarget countermeasure for coins + contributory KEM */ -+ hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); -+ hash_g(kr, buf, 2*KYBER_SYMBYTES); - -- for (int i = 0; i < DEGREE; i++) { -- uint16_t element = 0; -- int element_bits_done = 0; -+ /* coins are in kr+KYBER_SYMBYTES */ -+ if(!indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES)) -+ return 0; - -- while (element_bits_done < bits) { -- if (in_byte_bits_left == 0) { -- in_byte = *in; -- in++; -- in_byte_bits_left = 8; -- } -- -- int chunk_bits = bits - element_bits_done; -- if (chunk_bits > in_byte_bits_left) { -- chunk_bits = in_byte_bits_left; -- } -- -- element |= (in_byte & kMasks[chunk_bits - 1]) << element_bits_done; -- in_byte_bits_left -= chunk_bits; -- in_byte >>= chunk_bits; -- -- element_bits_done += chunk_bits; -- } -- -- if (element >= kPrime) { -- return 0; -- } -- out->c[i] = element; -- } -- -- return 1; --} -- --// scalar_decode_1 is |scalar_decode| specialised for |bits| == 1. --static void scalar_decode_1(scalar *out, const uint8_t in[32]) { -- for (int i = 0; i < DEGREE; i += 8) { -- uint8_t in_byte = *in; -- in++; -- for (int j = 0; j < 8; j++) { -- out->c[i + j] = in_byte & 1; -- in_byte >>= 1; -- } -- } --} -- --// Decodes 32*|RANK|*|bits| bytes from |in| into |out|. It returns one on --// success or zero if any parsed value is >= |kPrime|. --static int vector_decode(vector *out, const uint8_t *in, int bits) { -- for (int i = 0; i < RANK; i++) { -- if (!scalar_decode(&out->v[i], in + i * bits * DEGREE / 8, bits)) { -- return 0; -- } -+ if (mlkem == 1) { -+ memcpy(ss, kr, KYBER_SYMBYTES); -+ } else { -+ /* overwrite coins in kr with H(c) */ -+ hash_h(kr+KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); -+ /* hash concatenation of pre-k and H(c) to k */ -+ kdf(ss, kr, 2*KYBER_SYMBYTES); - } - return 1; - } - --// Compresses (lossily) an input |x| mod 3329 into |bits| many bits by grouping --// numbers close to each other together. The formula used is --// round(2^|bits|/kPrime*x) mod 2^|bits|. --// Uses Barrett reduction to achieve constant time. Since we need both the --// remainder (for rounding) and the quotient (as the result), we cannot use --// |reduce| here, but need to do the Barrett reduction directly. --static uint16_t compress(uint16_t x, int bits) { -- uint32_t shifted = (uint32_t)x << bits; -- uint64_t product = (uint64_t)shifted * kBarrettMultiplier; -- uint32_t quotient = (uint32_t)(product >> kBarrettShift); -- uint32_t remainder = shifted - quotient * kPrime; -+// Modified crypto_kem_decap to BoringSSL style API -+void decap(uint8_t out_shared_key[KYBER_SSBYTES], -+ const struct private_key *in_priv, -+ const uint8_t *ct, size_t ciphertext_len, int mlkem) -+{ -+ uint8_t *ss = out_shared_key; -+ const uint8_t *sk = &in_priv->opaque[0]; - -- // Adjust the quotient to round correctly: -- // 0 <= remainder <= kHalfPrime round to 0 -- // kHalfPrime < remainder <= kPrime + kHalfPrime round to 1 -- // kPrime + kHalfPrime < remainder < 2 * kPrime round to 2 -- assert(remainder < 2u * kPrime); -- quotient += 1 & constant_time_lt_w(kHalfPrime, remainder); -- quotient += 1 & constant_time_lt_w(kPrime + kHalfPrime, remainder); -- return quotient & ((1 << bits) - 1); --} -+ size_t i; -+ int fail = 1; -+ uint8_t buf[2*KYBER_SYMBYTES]; -+ /* Will contain key, coins */ -+ uint8_t kr[2*KYBER_SYMBYTES]; -+ uint8_t cmp[KYBER_CIPHERTEXTBYTES]; -+ const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; - --// Decompresses |x| by using an equi-distant representative. The formula is --// round(kPrime/2^|bits|*x). Note that 2^|bits| being the divisor allows us to --// implement this logic using only bit operations. --static uint16_t decompress(uint16_t x, int bits) { -- uint32_t product = (uint32_t)x * kPrime; -- uint32_t power = 1 << bits; -- // This is |product| % power, since |power| is a power of 2. -- uint32_t remainder = product & (power - 1); -- // This is |product| / power, since |power| is a power of 2. -- uint32_t lower = product >> bits; -- // The rounding logic works since the first half of numbers mod |power| have a -- // 0 as first bit, and the second half has a 1 as first bit, since |power| is -- // a power of 2. As a 12 bit number, |remainder| is always positive, so we -- // will shift in 0s for a right shift. -- return lower + (remainder >> (bits - 1)); --} -+ if (ciphertext_len == KYBER_CIPHERTEXTBYTES) { -+ indcpa_dec(buf, ct, sk); - --static void scalar_compress(scalar *s, int bits) { -- for (int i = 0; i < DEGREE; i++) { -- s->c[i] = compress(s->c[i], bits); -+ /* Multitarget countermeasure for coins + contributory KEM */ -+ for(i=0;ic[i] = decompress(s->c[i], bits); -- } -+void marshal_public_key(uint8_t out[KYBER_PUBLICKEYBYTES], -+ const struct public_key *in_pub) { -+ memcpy(out, &in_pub->opaque, KYBER_PUBLICKEYBYTES); - } - --static void vector_compress(vector *a, int bits) { -- for (int i = 0; i < RANK; i++) { -- scalar_compress(&a->v[i], bits); -- } --} -- --static void vector_decompress(vector *a, int bits) { -- for (int i = 0; i < RANK; i++) { -- scalar_decompress(&a->v[i], bits); -- } --} -- --struct public_key { -- vector t; -- uint8_t rho[32]; -- uint8_t public_key_hash[32]; -- matrix m; --}; -- --static struct public_key *public_key_from_external( -- const struct KYBER_public_key *external) { -- static_assert(sizeof(struct KYBER_public_key) >= sizeof(struct public_key), -- "Kyber public key is too small"); -- static_assert(alignof(struct KYBER_public_key) >= alignof(struct public_key), -- "Kyber public key align incorrect"); -- return (struct public_key *)external; --} -- --struct private_key { -- struct public_key pub; -- vector s; -- uint8_t fo_failure_secret[32]; --}; -- --static struct private_key *private_key_from_external( -- const struct KYBER_private_key *external) { -- static_assert(sizeof(struct KYBER_private_key) >= sizeof(struct private_key), -- "Kyber private key too small"); -- static_assert( -- alignof(struct KYBER_private_key) >= alignof(struct private_key), -- "Kyber private key align incorrect"); -- return (struct private_key *)external; --} -- --// Calls |KYBER_generate_key_external_entropy| with random bytes from --// |RAND_bytes|. --void KYBER_generate_key(uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], -- struct KYBER_private_key *out_private_key) { -- uint8_t entropy[KYBER_GENERATE_KEY_ENTROPY]; -- RAND_bytes(entropy, sizeof(entropy)); -- KYBER_generate_key_external_entropy(out_encoded_public_key, out_private_key, -- entropy); --} -- --static int kyber_marshal_public_key(CBB *out, const struct public_key *pub) { -- uint8_t *vector_output; -- if (!CBB_add_space(out, &vector_output, kEncodedVectorSize)) { -- return 0; -- } -- vector_encode(vector_output, &pub->t, kLog2Prime); -- if (!CBB_add_bytes(out, pub->rho, sizeof(pub->rho))) { -- return 0; -- } -- return 1; --} -- --// Algorithms 4 and 7 of the Kyber spec. Algorithms are combined since key --// generation is not part of the FO transform, and the spec uses Algorithm 7 to --// specify the actual key format. --void KYBER_generate_key_external_entropy( -- uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], -- struct KYBER_private_key *out_private_key, -- const uint8_t entropy[KYBER_GENERATE_KEY_ENTROPY]) { -- struct private_key *priv = private_key_from_external(out_private_key); -- uint8_t hashed[64]; -- BORINGSSL_keccak(hashed, sizeof(hashed), entropy, 32, boringssl_sha3_512); -- const uint8_t *const rho = hashed; -- const uint8_t *const sigma = hashed + 32; -- OPENSSL_memcpy(priv->pub.rho, hashed, sizeof(priv->pub.rho)); -- matrix_expand(&priv->pub.m, rho); -- uint8_t counter = 0; -- vector_generate_secret_eta_2(&priv->s, &counter, sigma); -- vector_ntt(&priv->s); -- vector error; -- vector_generate_secret_eta_2(&error, &counter, sigma); -- vector_ntt(&error); -- matrix_mult_transpose(&priv->pub.t, &priv->pub.m, &priv->s); -- vector_add(&priv->pub.t, &error); -- -- CBB cbb; -- CBB_init_fixed(&cbb, out_encoded_public_key, KYBER_PUBLIC_KEY_BYTES); -- if (!kyber_marshal_public_key(&cbb, &priv->pub)) { -- abort(); -- } -- - BORINGSSL_keccak(priv->pub.public_key_hash, sizeof(priv->pub.public_key_hash), - out_encoded_public_key, KYBER_PUBLIC_KEY_BYTES, - boringssl_sha3_256); @@ -3261,18 +2817,65 @@ index d3ea02090..ccb5b3d9b 100644 - for (int i = 0; i < 32; i++) { - input[i] = constant_time_select_8(mask, prekey_and_randomness[i], - priv->fo_failure_secret[i]); -- } ++ for(i=0;is, SHAKE128_RATE, in, inlen, 0x1F); ++ state->pos = SHAKE128_RATE; + } + -// kyber_parse_public_key_no_hash parses |in| into |pub| but doesn't calculate -// the value of |pub->public_key_hash|. -static int kyber_parse_public_key_no_hash(struct public_key *pub, CBS *in) { @@ -3284,27 +2887,336 @@ index d3ea02090..ccb5b3d9b 100644 - } - matrix_expand(&pub->m, pub->rho); - return 1; --} -- ++/************************************************* ++* Name: shake128_squeezeblocks ++* ++* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of ++* SHAKE128_RATE bytes each. Can be called multiple times ++* to keep squeezing. Assumes new block has not yet been ++* started (state->pos = SHAKE128_RATE). ++* ++* Arguments: - uint8_t *out: pointer to output blocks ++* - size_t nblocks: number of blocks to be squeezed (written to output) ++* - keccak_state *s: pointer to input/output Keccak state ++**************************************************/ ++static void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state) ++{ ++ keccak_squeezeblocks(out, nblocks, state->s, SHAKE128_RATE); ++} ++ ++/************************************************* ++* Name: shake256_squeeze ++* ++* Description: Squeeze step of SHAKE256 XOF. Squeezes arbitraily many ++* bytes. Can be called multiple times to keep squeezing. ++* ++* Arguments: - uint8_t *out: pointer to output blocks ++* - size_t outlen : number of bytes to be squeezed (written to output) ++* - keccak_state *s: pointer to input/output Keccak state ++**************************************************/ ++static void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state) ++{ ++ state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE256_RATE); ++} ++ ++/************************************************* ++* Name: shake256_absorb_once ++* ++* Description: Initialize, absorb into and finalize SHAKE256 XOF; non-incremental. ++* ++* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state ++* - const uint8_t *in: pointer to input to be absorbed into s ++* - size_t inlen: length of input in bytes ++**************************************************/ ++static void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen) ++{ ++ keccak_absorb_once(state->s, SHAKE256_RATE, in, inlen, 0x1F); ++ state->pos = SHAKE256_RATE; ++} ++ ++/************************************************* ++* Name: shake256_squeezeblocks ++* ++* Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of ++* SHAKE256_RATE bytes each. Can be called multiple times ++* to keep squeezing. Assumes next block has not yet been ++* started (state->pos = SHAKE256_RATE). ++* ++* Arguments: - uint8_t *out: pointer to output blocks ++* - size_t nblocks: number of blocks to be squeezed (written to output) ++* - keccak_state *s: pointer to input/output Keccak state ++**************************************************/ ++static void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state) ++{ ++ keccak_squeezeblocks(out, nblocks, state->s, SHAKE256_RATE); ++} ++ ++/************************************************* ++* Name: shake256_absorb ++* ++* Description: Absorb step of the SHAKE256 XOF; incremental. ++* ++* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state ++* - const uint8_t *in: pointer to input to be absorbed into s ++* - size_t inlen: length of input in bytes ++**************************************************/ ++static void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen) ++{ ++ state->pos = keccak_absorb(state->s, state->pos, SHAKE256_RATE, in, inlen); ++} ++ ++/************************************************* ++* Name: shake256_finalize ++* ++* Description: Finalize absorb step of the SHAKE256 XOF. ++* ++* Arguments: - keccak_state *state: pointer to Keccak state ++**************************************************/ ++static void shake256_finalize(keccak_state *state) ++{ ++ keccak_finalize(state->s, state->pos, SHAKE256_RATE, 0x1F); ++ state->pos = SHAKE256_RATE; ++} ++ ++/************************************************* ++* Name: keccak_init ++* ++* Description: Initializes the Keccak state. ++* ++* Arguments: - uint64_t *s: pointer to Keccak state ++**************************************************/ ++static void keccak_init(uint64_t s[25]) ++{ ++ unsigned int i; ++ for(i=0;i<25;i++) ++ s[i] = 0; ++} ++ ++/************************************************* ++* Name: shake256_init ++* ++* Description: Initilizes Keccak state for use as SHAKE256 XOF ++* ++* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state ++**************************************************/ ++static void shake256_init(keccak_state *state) ++{ ++ keccak_init(state->s); ++ state->pos = 0; ++} ++ ++ ++/************************************************* ++* Name: shake256 ++* ++* Description: SHAKE256 XOF with non-incremental API ++* ++* Arguments: - uint8_t *out: pointer to output ++* - size_t outlen: requested output length in bytes ++* - const uint8_t *in: pointer to input ++* - size_t inlen: length of input in bytes ++**************************************************/ ++static void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen) ++{ ++ size_t nblocks; ++ keccak_state state; ++ ++ shake256_absorb_once(&state, in, inlen); ++ nblocks = outlen/SHAKE256_RATE; ++ shake256_squeezeblocks(out, nblocks, &state); ++ outlen -= nblocks*SHAKE256_RATE; ++ out += nblocks*SHAKE256_RATE; ++ shake256_squeeze(out, outlen, &state); ++} ++ ++/************************************************* ++* Name: sha3_256 ++* ++* Description: SHA3-256 with non-incremental API ++* ++* Arguments: - uint8_t *h: pointer to output (32 bytes) ++* - const uint8_t *in: pointer to input ++* - size_t inlen: length of input in bytes ++**************************************************/ ++static void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen) ++{ ++ unsigned int i; ++ uint64_t s[25]; ++ ++ keccak_absorb_once(s, SHA3_256_RATE, in, inlen, 0x06); ++ KeccakF1600_StatePermute(s); ++ for(i=0;i<4;i++) ++ store64(h+8*i,s[i]); ++} ++ ++/************************************************* ++* Name: sha3_512 ++* ++* Description: SHA3-512 with non-incremental API ++* ++* Arguments: - uint8_t *h: pointer to output (64 bytes) ++* - const uint8_t *in: pointer to input ++* - size_t inlen: length of input in bytes ++**************************************************/ ++static void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen) ++{ ++ unsigned int i; ++ uint64_t s[25]; ++ ++ keccak_absorb_once(s, SHA3_512_RATE, in, inlen, 0x06); ++ KeccakF1600_StatePermute(s); ++ for(i=0;i<8;i++) ++ store64(h+8*i,s[i]); ++} ++ ++// ++// symmetric-shake.c ++// ++ ++/************************************************* ++* Name: kyber_shake128_absorb ++* ++* Description: Absorb step of the SHAKE128 specialized for the Kyber context. ++* ++* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state ++* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state ++* - uint8_t i: additional byte of input ++* - uint8_t j: additional byte of input ++**************************************************/ ++static void kyber_shake128_absorb(keccak_state *state, ++ const uint8_t seed[KYBER_SYMBYTES], ++ uint8_t x, ++ uint8_t y) ++{ ++ uint8_t extseed[KYBER_SYMBYTES+2]; ++ ++ memcpy(extseed, seed, KYBER_SYMBYTES); ++ extseed[KYBER_SYMBYTES+0] = x; ++ extseed[KYBER_SYMBYTES+1] = y; ++ ++ shake128_absorb_once(state, extseed, sizeof(extseed)); + } + -int KYBER_parse_public_key(struct KYBER_public_key *public_key, CBS *in) { - struct public_key *pub = public_key_from_external(public_key); - CBS orig_in = *in; - if (!kyber_parse_public_key_no_hash(pub, in) || // - CBS_len(in) != 0) { -- return 0; -- } ++/************************************************* ++* Name: kyber_shake256_prf ++* ++* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input ++* and then generates outlen bytes of SHAKE256 output ++* ++* Arguments: - uint8_t *out: pointer to output ++* - size_t outlen: number of requested output bytes ++* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) ++* - uint8_t nonce: single-byte nonce (public PRF input) ++**************************************************/ ++static void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) ++{ ++ uint8_t extkey[KYBER_SYMBYTES+1]; ++ ++ memcpy(extkey, key, KYBER_SYMBYTES); ++ extkey[KYBER_SYMBYTES] = nonce; ++ ++ shake256(out, outlen, extkey, sizeof(extkey)); ++} ++ ++// ++// kem.c ++// ++ ++// Modified crypto_kem_keypair to BoringSSL style API ++void generate_key(struct public_key *out_pub, struct private_key *out_priv, ++ const uint8_t seed[KYBER_GENERATE_KEY_BYTES]) ++{ ++ size_t i; ++ uint8_t* pk = &out_pub->opaque[0]; ++ uint8_t* sk = &out_priv->opaque[0]; ++ ++ indcpa_keypair(pk, sk, seed); ++ for(i=0;iopaque[0]; ++ uint8_t *ct = out_ciphertext; ++ ++ uint8_t buf[2*KYBER_SYMBYTES]; ++ /* Will contain key, coins */ ++ uint8_t kr[2*KYBER_SYMBYTES]; ++ ++ memcpy(buf, seed, KYBER_SYMBYTES); ++ ++ /* Don't release system RNG output */ ++ hash_h(buf, buf, KYBER_SYMBYTES); ++ ++ /* Multitarget countermeasure for coins + contributory KEM */ ++ hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); ++ hash_g(kr, buf, 2*KYBER_SYMBYTES); ++ ++ /* coins are in kr+KYBER_SYMBYTES */ ++ if(!indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES)) + return 0; ++ ++ if (mlkem == 1) { ++ memcpy(ss, kr, KYBER_SYMBYTES); ++ } else { ++ /* overwrite coins in kr with H(c) */ ++ hash_h(kr+KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); ++ /* hash concatenation of pre-k and H(c) to k */ ++ kdf(ss, kr, 2*KYBER_SYMBYTES); + } - BORINGSSL_keccak(pub->public_key_hash, sizeof(pub->public_key_hash), - CBS_data(&orig_in), CBS_len(&orig_in), boringssl_sha3_256); -- return 1; --} -- + return 1; + } + -int KYBER_marshal_private_key(CBB *out, - const struct KYBER_private_key *private_key) { - const struct private_key *const priv = private_key_from_external(private_key); - uint8_t *s_output; - if (!CBB_add_space(out, &s_output, kEncodedVectorSize)) { - return 0; -- } ++// Modified crypto_kem_decap to BoringSSL style API ++void decap(uint8_t out_shared_key[KYBER_SSBYTES], ++ const struct private_key *in_priv, ++ const uint8_t *ct, size_t ciphertext_len, int mlkem) ++{ ++ uint8_t *ss = out_shared_key; ++ const uint8_t *sk = &in_priv->opaque[0]; ++ ++ size_t i; ++ int fail = 1; ++ uint8_t buf[2*KYBER_SYMBYTES]; ++ /* Will contain key, coins */ ++ uint8_t kr[2*KYBER_SYMBYTES]; ++ uint8_t cmp[KYBER_CIPHERTEXTBYTES]; ++ const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; ++ ++ if (ciphertext_len == KYBER_CIPHERTEXTBYTES) { ++ indcpa_dec(buf, ct, sk); ++ ++ /* Multitarget countermeasure for coins + contributory KEM */ ++ for(i=0;is, kLog2Prime); - if (!kyber_marshal_public_key(out, &priv->pub) || - !CBB_add_bytes(out, priv->pub.public_key_hash, @@ -3312,14 +3224,45 @@ index d3ea02090..ccb5b3d9b 100644 - !CBB_add_bytes(out, priv->fo_failure_secret, - sizeof(priv->fo_failure_secret))) { - return 0; -- } ++ ++ if (mlkem == 1) { ++ /* Compute shared secret in case of rejection: ss2 = PRF(z || c). */ ++ uint8_t ss2[KYBER_SYMBYTES]; ++ keccak_state ks; ++ shake256_init(&ks); ++ shake256_absorb( ++ &ks, ++ sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, ++ KYBER_SYMBYTES ++ ); ++ shake256_absorb(&ks, ct, ciphertext_len); ++ shake256_finalize(&ks); ++ shake256_squeeze(ss2, KYBER_SYMBYTES, &ks); ++ ++ /* Set ss2 to the real shared secret if c = c' */ ++ cmov(ss2, kr, KYBER_SYMBYTES, 1-fail); ++ memcpy(ss, ss2, KYBER_SYMBYTES); ++ } else { ++ /* overwrite coins in kr with H(c) */ ++ hash_h(kr+KYBER_SYMBYTES, ct, ciphertext_len); ++ ++ /* Overwrite pre-k with z on re-encryption failure */ ++ cmov(kr, sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, KYBER_SYMBYTES, fail); ++ ++ /* hash concatenation of pre-k and H(c) to k */ ++ kdf(ss, kr, 2*KYBER_SYMBYTES); + } - return 1; --} -- + } + -int KYBER_parse_private_key(struct KYBER_private_key *out_private_key, - CBS *in) { - struct private_key *const priv = private_key_from_external(out_private_key); -- ++void marshal_public_key(uint8_t out[KYBER_PUBLICKEYBYTES], ++ const struct public_key *in_pub) { ++ memcpy(out, &in_pub->opaque, KYBER_PUBLICKEYBYTES); ++} + - CBS s_bytes; - if (!CBS_get_bytes(in, &s_bytes, kEncodedVectorSize) || - !vector_decode(&priv->s, CBS_data(&s_bytes), kLog2Prime) || @@ -3687,7 +3630,15 @@ index cafae9d17..a05eb8957 100644 - } opaque; +struct KYBER512_private_key { + uint8_t opaque[KYBER512_PRIVATE_KEY_BYTES]; -+}; + }; +- +-// KYBER_private_key contains a Kyber768 private key. The contents of this +-// object should never leave the address space since the format is unstable. +-struct KYBER_private_key { +- union { +- uint8_t bytes[512 * (3 + 3 + 9) + 32 + 32 + 32]; +- uint16_t alignment; +- } opaque; +struct KYBER768_private_key { + uint8_t opaque[KYBER768_PRIVATE_KEY_BYTES]; +}; @@ -3698,34 +3649,17 @@ index cafae9d17..a05eb8957 100644 + uint8_t opaque[KYBER768_PUBLIC_KEY_BYTES]; }; --// KYBER_private_key contains a Kyber768 private key. The contents of this --// object should never leave the address space since the format is unstable. --struct KYBER_private_key { -- union { -- uint8_t bytes[512 * (3 + 3 + 9) + 32 + 32 + 32]; -- uint16_t alignment; -- } opaque; --}; -+// KYBER_GENERATE_KEY_BYTES is the number of bytes of entropy needed to -+// generate a keypair. -+#define KYBER_GENERATE_KEY_BYTES 64 - -// KYBER_PUBLIC_KEY_BYTES is the number of bytes in an encoded Kyber768 public -// key. -#define KYBER_PUBLIC_KEY_BYTES 1184 -+// KYBER_ENCAP_BYTES is the number of bytes of entropy needed to encapsulate a -+// session key. -+#define KYBER_ENCAP_BYTES 32 - +- -// KYBER_generate_key generates a random public/private key pair, writes the -// encoded public key to |out_encoded_public_key| and sets |out_private_key| to -// the private key. -OPENSSL_EXPORT void KYBER_generate_key( - uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], - struct KYBER_private_key *out_private_key); -+// KYBER_KEY_BYTES is the number of bytes in a shared key. -+#define KYBER_KEY_BYTES 32 - +- -// KYBER_public_from_private sets |*out_public_key| to the public key that -// corresponds to |private_key|. (This is faster than parsing the output of -// |KYBER_generate_key| if, for some reason, you need to encapsulate to a key @@ -3733,20 +3667,10 @@ index cafae9d17..a05eb8957 100644 -OPENSSL_EXPORT void KYBER_public_from_private( - struct KYBER_public_key *out_public_key, - const struct KYBER_private_key *private_key); -+// KYBER512_generate_key is a deterministic function that outputs a public and -+// private key based on the given entropy. -+OPENSSL_EXPORT void KYBER512_generate_key( -+ struct KYBER512_public_key *out_pub, struct KYBER512_private_key *out_priv, -+ const uint8_t input[KYBER_GENERATE_KEY_BYTES]); - +- -// KYBER_CIPHERTEXT_BYTES is number of bytes in the Kyber768 ciphertext. -#define KYBER_CIPHERTEXT_BYTES 1088 -+// KYBER768_generate_key is a deterministic function that outputs a public and -+// private key based on the given entropy. -+OPENSSL_EXPORT void KYBER768_generate_key( -+ struct KYBER768_public_key *out_pub, struct KYBER768_private_key *out_priv, -+ const uint8_t input[KYBER_GENERATE_KEY_BYTES]); - +- -// KYBER_encap encrypts a random secret key of length |out_shared_secret_len| to -// |public_key|, writes the ciphertext to |ciphertext|, and writes the random -// key to |out_shared_secret|. The party calling |KYBER_decap| must already know @@ -3755,15 +3679,7 @@ index cafae9d17..a05eb8957 100644 - uint8_t *out_shared_secret, - size_t out_shared_secret_len, - const struct KYBER_public_key *public_key); -+// KYBER512_encap is a deterministic function the generates and encrypts a random -+// session key from the given entropy, writing those values to |out_shared_key| -+// and |out_ciphertext|, respectively. If |mlkem| is 1, will use ML-KEM-512. -+OPENSSL_EXPORT int KYBER512_encap(uint8_t out_ciphertext[KYBER512_CIPHERTEXT_BYTES], -+ uint8_t out_shared_key[KYBER_KEY_BYTES], -+ const struct KYBER512_public_key *in_pub, -+ const uint8_t in[KYBER_ENCAP_BYTES], -+ int mlkem); - +- -// KYBER_decap decrypts a key of length |out_shared_secret_len| from -// |ciphertext| using |private_key| and writes it to |out_shared_secret|. If -// |ciphertext| is invalid, |out_shared_secret| is filled with a key that @@ -3776,57 +3692,23 @@ index cafae9d17..a05eb8957 100644 - uint8_t *out_shared_secret, size_t out_shared_secret_len, - const uint8_t ciphertext[KYBER_CIPHERTEXT_BYTES], - const struct KYBER_private_key *private_key); -+// KYBER768_encap is a deterministic function the generates and encrypts a random -+// session key from the given entropy, writing those values to |out_shared_key| -+// and |out_ciphertext|, respectively. If |mlkem| is 1, will use ML-KEM-768. -+OPENSSL_EXPORT int KYBER768_encap(uint8_t out_ciphertext[KYBER768_CIPHERTEXT_BYTES], -+ uint8_t out_shared_key[KYBER_KEY_BYTES], -+ const struct KYBER768_public_key *in_pub, -+ const uint8_t in[KYBER_ENCAP_BYTES], -+ int mlkem); - -+// KYBER_decap decrypts a session key from |ciphertext_len| bytes of -+// |ciphertext|. If the ciphertext is valid, the decrypted key is written to -+// |out_shared_key|. Otherwise a key dervied from |ciphertext| and a secret key (kept -+// in |in_priv|) is written. If the ciphertext is the wrong length then it will -+// leak which was done via side-channels. Otherwise it should perform either -+// action in constant-time. If |mlkem| is 1, will use ML-KEM-512. -+OPENSSL_EXPORT void KYBER512_decap(uint8_t out_shared_key[KYBER_KEY_BYTES], -+ const struct KYBER512_private_key *in_priv, -+ const uint8_t *ciphertext, size_t ciphertext_len, -+ int mlkem); - +- +- -// Serialisation of keys. -+// KYBER_decap decrypts a session key from |ciphertext_len| bytes of -+// |ciphertext|. If the ciphertext is valid, the decrypted key is written to -+// |out_shared_key|. Otherwise a key dervied from |ciphertext| and a secret key (kept -+// in |in_priv|) is written. If the ciphertext is the wrong length then it will -+// leak which was done via side-channels. Otherwise it should perform either -+// action in constant-time. If |mlkem| is 1, will use ML-KEM-768. -+OPENSSL_EXPORT void KYBER768_decap(uint8_t out_shared_key[KYBER_KEY_BYTES], -+ const struct KYBER768_private_key *in_priv, -+ const uint8_t *ciphertext, size_t ciphertext_len, -+ int mlkem); - +- -// KYBER_marshal_public_key serializes |public_key| to |out| in the standard -// format for Kyber public keys. It returns one on success or zero on allocation -// error. -OPENSSL_EXPORT int KYBER_marshal_public_key( - CBB *out, const struct KYBER_public_key *public_key); -+// KYBER512_marshal_public_key serialises |in_pub| to |out|. -+OPENSSL_EXPORT void KYBER512_marshal_public_key( -+ uint8_t out[KYBER512_PUBLIC_KEY_BYTES], const struct KYBER512_public_key *in_pub); - +- -// KYBER_parse_public_key parses a public key, in the format generated by -// |KYBER_marshal_public_key|, from |in| and writes the result to -// |out_public_key|. It returns one on success or zero on parse error or if -// there are trailing bytes in |in|. -OPENSSL_EXPORT int KYBER_parse_public_key( - struct KYBER_public_key *out_public_key, CBS *in); -+// KYBER768_marshal_public_key serialises |in_pub| to |out|. -+OPENSSL_EXPORT void KYBER768_marshal_public_key( -+ uint8_t out[KYBER768_PUBLIC_KEY_BYTES], const struct KYBER768_public_key *in_pub); - +- -// KYBER_marshal_private_key serializes |private_key| to |out| in the standard -// format for Kyber private keys. It returns one on success or zero on -// allocation error. @@ -3843,10 +3725,82 @@ index cafae9d17..a05eb8957 100644 -// there are trailing bytes in |in|. -OPENSSL_EXPORT int KYBER_parse_private_key( - struct KYBER_private_key *out_private_key, CBS *in); +- ++// KYBER_GENERATE_KEY_BYTES is the number of bytes of entropy needed to ++// generate a keypair. ++#define KYBER_GENERATE_KEY_BYTES 64 ++ ++// KYBER_ENCAP_BYTES is the number of bytes of entropy needed to encapsulate a ++// session key. ++#define KYBER_ENCAP_BYTES 32 ++ ++// KYBER_KEY_BYTES is the number of bytes in a shared key. ++#define KYBER_KEY_BYTES 32 ++ ++// KYBER512_generate_key is a deterministic function that outputs a public and ++// private key based on the given entropy. ++OPENSSL_EXPORT void KYBER512_generate_key( ++ struct KYBER512_public_key *out_pub, struct KYBER512_private_key *out_priv, ++ const uint8_t input[KYBER_GENERATE_KEY_BYTES]); ++ ++// KYBER768_generate_key is a deterministic function that outputs a public and ++// private key based on the given entropy. ++OPENSSL_EXPORT void KYBER768_generate_key( ++ struct KYBER768_public_key *out_pub, struct KYBER768_private_key *out_priv, ++ const uint8_t input[KYBER_GENERATE_KEY_BYTES]); ++ ++// KYBER512_encap is a deterministic function the generates and encrypts a random ++// session key from the given entropy, writing those values to |out_shared_key| ++// and |out_ciphertext|, respectively. If |mlkem| is 1, will use ML-KEM-512. ++OPENSSL_EXPORT int KYBER512_encap(uint8_t out_ciphertext[KYBER512_CIPHERTEXT_BYTES], ++ uint8_t out_shared_key[KYBER_KEY_BYTES], ++ const struct KYBER512_public_key *in_pub, ++ const uint8_t in[KYBER_ENCAP_BYTES], ++ int mlkem); ++ ++// KYBER768_encap is a deterministic function the generates and encrypts a random ++// session key from the given entropy, writing those values to |out_shared_key| ++// and |out_ciphertext|, respectively. If |mlkem| is 1, will use ML-KEM-768. ++OPENSSL_EXPORT int KYBER768_encap(uint8_t out_ciphertext[KYBER768_CIPHERTEXT_BYTES], ++ uint8_t out_shared_key[KYBER_KEY_BYTES], ++ const struct KYBER768_public_key *in_pub, ++ const uint8_t in[KYBER_ENCAP_BYTES], ++ int mlkem); ++ ++// KYBER_decap decrypts a session key from |ciphertext_len| bytes of ++// |ciphertext|. If the ciphertext is valid, the decrypted key is written to ++// |out_shared_key|. Otherwise a key dervied from |ciphertext| and a secret key (kept ++// in |in_priv|) is written. If the ciphertext is the wrong length then it will ++// leak which was done via side-channels. Otherwise it should perform either ++// action in constant-time. If |mlkem| is 1, will use ML-KEM-512. ++OPENSSL_EXPORT void KYBER512_decap(uint8_t out_shared_key[KYBER_KEY_BYTES], ++ const struct KYBER512_private_key *in_priv, ++ const uint8_t *ciphertext, size_t ciphertext_len, ++ int mlkem); ++ ++// KYBER_decap decrypts a session key from |ciphertext_len| bytes of ++// |ciphertext|. If the ciphertext is valid, the decrypted key is written to ++// |out_shared_key|. Otherwise a key dervied from |ciphertext| and a secret key (kept ++// in |in_priv|) is written. If the ciphertext is the wrong length then it will ++// leak which was done via side-channels. Otherwise it should perform either ++// action in constant-time. If |mlkem| is 1, will use ML-KEM-768. ++OPENSSL_EXPORT void KYBER768_decap(uint8_t out_shared_key[KYBER_KEY_BYTES], ++ const struct KYBER768_private_key *in_priv, ++ const uint8_t *ciphertext, size_t ciphertext_len, ++ int mlkem); ++ ++// KYBER512_marshal_public_key serialises |in_pub| to |out|. ++OPENSSL_EXPORT void KYBER512_marshal_public_key( ++ uint8_t out[KYBER512_PUBLIC_KEY_BYTES], const struct KYBER512_public_key *in_pub); ++ ++// KYBER768_marshal_public_key serialises |in_pub| to |out|. ++OPENSSL_EXPORT void KYBER768_marshal_public_key( ++ uint8_t out[KYBER768_PUBLIC_KEY_BYTES], const struct KYBER768_public_key *in_pub); ++ +// KYBER512_parse_public_key sets |*out| to the public-key encoded in |in|. +OPENSSL_EXPORT void KYBER512_parse_public_key( + struct KYBER512_public_key *out, const uint8_t in[KYBER512_PUBLIC_KEY_BYTES]); - ++ +// KYBER768_parse_public_key sets |*out| to the public-key encoded in |in|. +OPENSSL_EXPORT void KYBER768_parse_public_key( + struct KYBER768_public_key *out, const uint8_t in[KYBER768_PUBLIC_KEY_BYTES]); @@ -3912,7 +3866,7 @@ index ba2f5bc9e..d7ef5153a 100644 crypto/pkcs8/test/no_encryption.p12 crypto/pkcs8/test/nss.p12 diff --git a/ssl/extensions.cc b/ssl/extensions.cc -index b13400097..4655b1881 100644 +index b13400097..894396414 100644 --- a/ssl/extensions.cc +++ b/ssl/extensions.cc @@ -207,6 +207,10 @@ static bool tls1_check_duplicate_extensions(const CBS *cbs) { @@ -3926,6 +3880,14 @@ index b13400097..4655b1881 100644 return true; default: return false; +@@ -307,6 +311,7 @@ bool ssl_client_hello_get_extension(const SSL_CLIENT_HELLO *client_hello, + } + + static const uint16_t kDefaultGroups[] = { ++ SSL_GROUP_X25519_MLKEM768, + SSL_GROUP_X25519, + SSL_GROUP_SECP256R1, + SSL_GROUP_SECP384R1, diff --git a/ssl/ssl_key_share.cc b/ssl/ssl_key_share.cc index 694bec11d..3e4d2e7c4 100644 --- a/ssl/ssl_key_share.cc @@ -3938,7 +3900,7 @@ index 694bec11d..3e4d2e7c4 100644 #include #include #include -@@ -191,63 +192,145 @@ class X25519KeyShare : public SSLKeyShare { +@@ -191,63 +192,292 @@ class X25519KeyShare : public SSLKeyShare { uint8_t private_key_[32]; }; @@ -3947,27 +3909,18 @@ index 694bec11d..3e4d2e7c4 100644 public: - X25519Kyber768KeyShare() {} + P256Kyber768Draft00KeyShare() {} - -- uint16_t GroupID() const override { -- return SSL_GROUP_X25519_KYBER768_DRAFT00; -- } ++ + uint16_t GroupID() const override { return SSL_GROUP_P256_KYBER768_DRAFT00; } - - bool Generate(CBB *out) override { -- uint8_t x25519_public_key[32]; -- X25519_keypair(x25519_public_key, x25519_private_key_); ++ ++ bool Generate(CBB *out) override { + assert(!p256_private_key_); - -- uint8_t kyber_public_key[KYBER_PUBLIC_KEY_BYTES]; -- KYBER_generate_key(kyber_public_key, &kyber_private_key_); ++ + // Set up a shared |BN_CTX| for P-256 operations. + UniquePtr bn_ctx(BN_CTX_new()); + if (!bn_ctx) { + return false; + } - -- if (!CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key)) || -- !CBB_add_bytes(out, kyber_public_key, sizeof(kyber_public_key))) { ++ + BN_CTXScope scope(bn_ctx.get()); + + // Generate a P-256 private key. @@ -3999,58 +3952,33 @@ index 694bec11d..3e4d2e7c4 100644 + + uint8_t kyber_public_key_bytes[KYBER768_PUBLIC_KEY_BYTES]; + KYBER768_marshal_public_key(kyber_public_key_bytes, &kyber_public_key); -+ + +- uint16_t GroupID() const override { +- return SSL_GROUP_X25519_KYBER768_DRAFT00; + if (!CBB_add_bytes(out, kyber_public_key_bytes, + sizeof(kyber_public_key_bytes))) { - return false; - } - - return true; ++ return false; ++ } ++ ++ return true; } -- bool Encap(CBB *out_ciphertext, Array *out_secret, -- uint8_t *out_alert, Span peer_key) override { -- Array secret; -- if (!secret.Init(32 + 32)) { -- return false; -- } + bool Encap(CBB *out_public_key, Array *out_secret, + uint8_t *out_alert, Span peer_key) override { + assert(!p256_private_key_); - -- uint8_t x25519_public_key[32]; -- X25519_keypair(x25519_public_key, x25519_private_key_); -- KYBER_public_key peer_kyber_pub; -- CBS peer_key_cbs; -- CBS peer_x25519_cbs; -- CBS peer_kyber_cbs; -- CBS_init(&peer_key_cbs, peer_key.data(), peer_key.size()); -- if (!CBS_get_bytes(&peer_key_cbs, &peer_x25519_cbs, 32) || -- !CBS_get_bytes(&peer_key_cbs, &peer_kyber_cbs, -- KYBER_PUBLIC_KEY_BYTES) || -- CBS_len(&peer_key_cbs) != 0 || -- !X25519(secret.data(), x25519_private_key_, -- CBS_data(&peer_x25519_cbs)) || -- !KYBER_parse_public_key(&peer_kyber_pub, &peer_kyber_cbs)) { ++ + if (peer_key.size() != 65 + KYBER768_PUBLIC_KEY_BYTES) { - *out_alert = SSL_AD_DECODE_ERROR; - OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); - return false; - } - -- uint8_t kyber_ciphertext[KYBER_CIPHERTEXT_BYTES]; -- KYBER_encap(kyber_ciphertext, secret.data() + 32, secret.size() - 32, -- &peer_kyber_pub); ++ *out_alert = SSL_AD_DECODE_ERROR; ++ OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); ++ return false; ++ } ++ + // Set up a shared |BN_CTX| for P-256 operations. + UniquePtr bn_ctx(BN_CTX_new()); + if (!bn_ctx) { + return false; + } - -- if (!CBB_add_bytes(out_ciphertext, x25519_public_key, -- sizeof(x25519_public_key)) || -- !CBB_add_bytes(out_ciphertext, kyber_ciphertext, -- sizeof(kyber_ciphertext))) { ++ + BN_CTXScope scope(bn_ctx.get()); + + UniquePtr group; @@ -4119,35 +4047,30 @@ index 694bec11d..3e4d2e7c4 100644 + return false; + } + if(!CBB_add_bytes(out_public_key, ciphertext, sizeof(ciphertext))) { - return false; - } - -@@ -256,30 +339,380 @@ class X25519Kyber768KeyShare : public SSLKeyShare { - } - - bool Decap(Array *out_secret, uint8_t *out_alert, -- Span ciphertext) override { ++ return false; ++ } ++ ++ *out_secret = std::move(secret); ++ return true; ++ } ++ ++ bool Decap(Array *out_secret, uint8_t *out_alert, + Span peer_key) override { + assert(p256_private_key_); - *out_alert = SSL_AD_INTERNAL_ERROR; - - Array secret; -- if (!secret.Init(32 + 32)) { ++ *out_alert = SSL_AD_INTERNAL_ERROR; ++ ++ Array secret; + if (!secret.Init(32 + KYBER_KEY_BYTES)) { + OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); - return false; - } - -- if (ciphertext.size() != 32 + KYBER_CIPHERTEXT_BYTES || -- !X25519(secret.data(), x25519_private_key_, ciphertext.data())) { ++ return false; ++ } ++ + if (peer_key.size() != 65 + KYBER768_CIPHERTEXT_BYTES) { - *out_alert = SSL_AD_DECODE_ERROR; - OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); - return false; - } - -- KYBER_decap(secret.data() + 32, secret.size() - 32, ciphertext.data() + 32, -- &kyber_private_key_); ++ *out_alert = SSL_AD_DECODE_ERROR; ++ OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); ++ return false; ++ } ++ + // Set up a shared |BN_CTX| for P-256 operations. + UniquePtr bn_ctx(BN_CTX_new()); + if (!bn_ctx) { @@ -4213,10 +4136,12 @@ index 694bec11d..3e4d2e7c4 100644 + + uint16_t GroupID() const override { return group_id_; } + -+ bool Generate(CBB *out) override { -+ uint8_t x25519_public_key[32]; -+ X25519_keypair(x25519_public_key, x25519_private_key_); -+ + bool Generate(CBB *out) override { + uint8_t x25519_public_key[32]; + X25519_keypair(x25519_public_key, x25519_private_key_); + +- uint8_t kyber_public_key[KYBER_PUBLIC_KEY_BYTES]; +- KYBER_generate_key(kyber_public_key, &kyber_private_key_); + uint8_t kyber_entropy[KYBER_GENERATE_KEY_BYTES]; + KYBER768_public_key kyber_public_key; + RAND_bytes(kyber_entropy, sizeof(kyber_entropy)); @@ -4224,26 +4149,42 @@ index 694bec11d..3e4d2e7c4 100644 + + uint8_t kyber_public_key_bytes[KYBER768_PUBLIC_KEY_BYTES]; + KYBER768_marshal_public_key(kyber_public_key_bytes, &kyber_public_key); -+ -+ if (!CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key)) || + + if (!CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key)) || +- !CBB_add_bytes(out, kyber_public_key, sizeof(kyber_public_key))) { + !CBB_add_bytes(out, kyber_public_key_bytes, + sizeof(kyber_public_key_bytes))) { -+ return false; -+ } -+ -+ return true; -+ } -+ + return false; + } + + return true; + } + +- bool Encap(CBB *out_ciphertext, Array *out_secret, +- uint8_t *out_alert, Span peer_key) override { + bool Encap(CBB *out_public_key, Array *out_secret, + uint8_t *out_alert, Span peer_key) override { -+ Array secret; + Array secret; +- if (!secret.Init(32 + 32)) { + if (!secret.Init(32 + KYBER_KEY_BYTES)) { + OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); -+ return false; -+ } -+ -+ uint8_t x25519_public_key[32]; -+ X25519_keypair(x25519_public_key, x25519_private_key_); + return false; + } + + uint8_t x25519_public_key[32]; + X25519_keypair(x25519_public_key, x25519_private_key_); +- KYBER_public_key peer_kyber_pub; +- CBS peer_key_cbs; +- CBS peer_x25519_cbs; +- CBS peer_kyber_cbs; +- CBS_init(&peer_key_cbs, peer_key.data(), peer_key.size()); +- if (!CBS_get_bytes(&peer_key_cbs, &peer_x25519_cbs, 32) || +- !CBS_get_bytes(&peer_key_cbs, &peer_kyber_cbs, +- KYBER_PUBLIC_KEY_BYTES) || +- CBS_len(&peer_key_cbs) != 0 || +- !X25519(secret.data(), x25519_private_key_, +- CBS_data(&peer_x25519_cbs)) || +- !KYBER_parse_public_key(&peer_kyber_pub, &peer_kyber_cbs)) { + + KYBER768_public_key peer_public_key; + if (peer_key.size() != 32 + KYBER768_PUBLIC_KEY_BYTES) { @@ -4255,30 +4196,36 @@ index 694bec11d..3e4d2e7c4 100644 + KYBER768_parse_public_key(&peer_public_key, peer_key.data() + 32); + + if (!X25519(secret.data(), x25519_private_key_, peer_key.data())) { -+ *out_alert = SSL_AD_DECODE_ERROR; -+ OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); -+ return false; -+ } -+ + *out_alert = SSL_AD_DECODE_ERROR; + OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); + return false; + } + +- uint8_t kyber_ciphertext[KYBER_CIPHERTEXT_BYTES]; +- KYBER_encap(kyber_ciphertext, secret.data() + 32, secret.size() - 32, +- &peer_kyber_pub); + uint8_t ciphertext[KYBER768_CIPHERTEXT_BYTES]; + uint8_t entropy[KYBER_ENCAP_BYTES]; + RAND_bytes(entropy, sizeof(entropy)); -+ + +- if (!CBB_add_bytes(out_ciphertext, x25519_public_key, + if(!KYBER768_encap(ciphertext, secret.data() + 32, &peer_public_key, entropy, 0)) { + *out_alert = SSL_AD_ILLEGAL_PARAMETER; + return false; + } + if(!CBB_add_bytes(out_public_key, x25519_public_key, -+ sizeof(x25519_public_key)) || + sizeof(x25519_public_key)) || +- !CBB_add_bytes(out_ciphertext, kyber_ciphertext, +- sizeof(kyber_ciphertext))) { + !CBB_add_bytes(out_public_key, ciphertext, sizeof(ciphertext))) { -+ return false; -+ } -+ -+ *out_secret = std::move(secret); -+ return true; -+ } -+ -+ bool Decap(Array *out_secret, uint8_t *out_alert, + return false; + } + +@@ -256,30 +486,233 @@ class X25519Kyber768KeyShare : public SSLKeyShare { + } + + bool Decap(Array *out_secret, uint8_t *out_alert, +- Span ciphertext) override { + Span peer_key) override { + *out_alert = SSL_AD_INTERNAL_ERROR; + @@ -4298,13 +4245,12 @@ index 694bec11d..3e4d2e7c4 100644 + KYBER768_decap(secret.data() + 32, &kyber_private_key_, + peer_key.data() + 32, peer_key.size() - 32, 0); + - *out_secret = std::move(secret); - return true; - } - - private: - uint8_t x25519_private_key_[32]; -- KYBER_private_key kyber_private_key_; ++ *out_secret = std::move(secret); ++ return true; ++ } ++ ++ private: ++ uint8_t x25519_private_key_[32]; + KYBER768_private_key kyber_private_key_; + uint16_t group_id_; +}; @@ -4381,22 +4327,27 @@ index 694bec11d..3e4d2e7c4 100644 + + bool Decap(Array *out_secret, uint8_t *out_alert, + Span peer_key) override { -+ *out_alert = SSL_AD_INTERNAL_ERROR; -+ -+ Array secret; + *out_alert = SSL_AD_INTERNAL_ERROR; + + Array secret; +- if (!secret.Init(32 + 32)) { + if (!secret.Init(32 + KYBER_KEY_BYTES)) { + OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); -+ return false; -+ } -+ + return false; + } + +- if (ciphertext.size() != 32 + KYBER_CIPHERTEXT_BYTES || +- !X25519(secret.data(), x25519_private_key_, ciphertext.data())) { + if (peer_key.size() != KYBER768_CIPHERTEXT_BYTES + 32 || + !X25519(secret.data() + 32, x25519_private_key_, + peer_key.data() + KYBER768_CIPHERTEXT_BYTES )) { -+ *out_alert = SSL_AD_DECODE_ERROR; -+ OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); -+ return false; -+ } -+ + *out_alert = SSL_AD_DECODE_ERROR; + OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); + return false; + } + +- KYBER_decap(secret.data() + 32, secret.size() - 32, ciphertext.data() + 32, +- &kyber_private_key_); + KYBER768_decap(secret.data(), &kyber_private_key_, + peer_key.data(), peer_key.size() - 32, 1); + @@ -4500,12 +4451,13 @@ index 694bec11d..3e4d2e7c4 100644 + KYBER512_decap(secret.data() + 32, &kyber_private_key_, + peer_key.data() + 32, peer_key.size() - 32, 0); + -+ *out_secret = std::move(secret); -+ return true; -+ } -+ -+ private: -+ uint8_t x25519_private_key_[32]; + *out_secret = std::move(secret); + return true; + } + + private: + uint8_t x25519_private_key_[32]; +- KYBER_private_key kyber_private_key_; + KYBER512_private_key kyber_private_key_; }; @@ -4788,3 +4740,6 @@ index 942dcade1..f31e9e244 100644 !SpeedSpx(selected) || // !SpeedHashToCurve(selected) || // !SpeedTrustToken("TrustToken-Exp1-Batch1", TRUST_TOKEN_experiment_v1(), 1, +-- +2.50.1 (Apple Git-155) + From 47c33f64284a905bd1c26dc59c5eec6f5f38bf8b Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Fri, 3 Oct 2025 13:48:57 +0200 Subject: [PATCH 2/2] pq patch: also enable P256Kyber768Draft00 by default --- boring-sys/patches/boring-pq.patch | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/boring-sys/patches/boring-pq.patch b/boring-sys/patches/boring-pq.patch index 405a0185..5c55eb8c 100644 --- a/boring-sys/patches/boring-pq.patch +++ b/boring-sys/patches/boring-pq.patch @@ -1,4 +1,4 @@ -From 6f1b1e1f451e61cd2bda0922eecaa8387397ac5a Mon Sep 17 00:00:00 2001 +From 969fc4fb866c94b6585c323d6e27571e5286f845 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Thu, 2 Oct 2025 13:07:05 +0200 Subject: [PATCH] Add additional post-quantum key agreements @@ -20,7 +20,7 @@ This patch adds: 2. Supports for P256Kyber768Draft00 under 0xfe32, which we temporarily need for compliance reasons. (Note that this is not the codepoint allocated for that exchange in the IANA table.) - It also enables it in FIPS mode. + Enables by default and in FIPS mode. 3. Support for X25519Kyber768Draft00 under the old codepoint 0xfe31. @@ -46,12 +46,12 @@ Cf RTG-2076 RTG-2051 RTG-2508 RTG-2707 RTG-2607 RTG-3239 include/openssl/nid.h | 12 + include/openssl/ssl.h | 4 + sources.cmake | 2 - - ssl/extensions.cc | 5 + + ssl/extensions.cc | 6 + ssl/ssl_key_share.cc | 525 ++++++- ssl/ssl_lib.cc | 2 +- ssl/ssl_test.cc | 29 +- tool/speed.cc | 162 +- - 18 files changed, 3082 insertions(+), 1158 deletions(-) + 18 files changed, 3083 insertions(+), 1158 deletions(-) delete mode 100644 crypto/kyber/internal.h create mode 100644 crypto/kyber/kyber512.c create mode 100644 crypto/kyber/kyber768.c @@ -3866,7 +3866,7 @@ index ba2f5bc9e..d7ef5153a 100644 crypto/pkcs8/test/no_encryption.p12 crypto/pkcs8/test/nss.p12 diff --git a/ssl/extensions.cc b/ssl/extensions.cc -index b13400097..894396414 100644 +index b13400097..44a2d0f5c 100644 --- a/ssl/extensions.cc +++ b/ssl/extensions.cc @@ -207,6 +207,10 @@ static bool tls1_check_duplicate_extensions(const CBS *cbs) { @@ -3880,11 +3880,12 @@ index b13400097..894396414 100644 return true; default: return false; -@@ -307,6 +311,7 @@ bool ssl_client_hello_get_extension(const SSL_CLIENT_HELLO *client_hello, +@@ -307,6 +311,8 @@ bool ssl_client_hello_get_extension(const SSL_CLIENT_HELLO *client_hello, } static const uint16_t kDefaultGroups[] = { + SSL_GROUP_X25519_MLKEM768, ++ SSL_GROUP_P256_KYBER768_DRAFT00, SSL_GROUP_X25519, SSL_GROUP_SECP256R1, SSL_GROUP_SECP384R1,