284 lines
9.8 KiB
Diff
284 lines
9.8 KiB
Diff
|
|
diff --git a/crypto_kem/kyber1024/aarch64/indcpa.c b/crypto_kem/kyber1024/aarch64/indcpa.c
|
||
|
|
index 6b83943e..43f489f0 100644
|
||
|
|
--- a/crypto_kem/kyber1024/aarch64/indcpa.c
|
||
|
|
+++ b/crypto_kem/kyber1024/aarch64/indcpa.c
|
||
|
|
@@ -160,39 +160,44 @@ static void unpack_ciphertext(int16_t b[KYBER_K][KYBER_N], int16_t *v, const uin
|
||
|
|
**************************************************/
|
||
|
|
#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES)
|
||
|
|
// Not static for benchmarking
|
||
|
|
-void gen_matrix(int16_t a[KYBER_K][KYBER_K][KYBER_N], const uint8_t seed[KYBER_SYMBYTES], int transposed) {
|
||
|
|
- unsigned int ctr0, ctr1, k;
|
||
|
|
- unsigned int buflen, off;
|
||
|
|
- uint8_t buf0[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2],
|
||
|
|
- buf1[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2];
|
||
|
|
- neon_xof_state state;
|
||
|
|
-
|
||
|
|
- for (unsigned int i = 0; i < KYBER_K; i++) {
|
||
|
|
- if (transposed) {
|
||
|
|
- neon_xof_absorb(&state, seed, i, i, 0, 1);
|
||
|
|
- } else {
|
||
|
|
- neon_xof_absorb(&state, seed, 0, 1, i, i);
|
||
|
|
+void gen_matrix(int16_t a[KYBER_K][KYBER_K][KYBER_N], const uint8_t seed[KYBER_SYMBYTES], int transposed)
|
||
|
|
+{
|
||
|
|
+ unsigned int ctr0, ctr1, k;
|
||
|
|
+ unsigned int buflen, off;
|
||
|
|
+ uint8_t buf0[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2],
|
||
|
|
+ buf1[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2];
|
||
|
|
+ neon_xof_state state;
|
||
|
|
+
|
||
|
|
+ for (unsigned int i = 0; i < KYBER_K; i++)
|
||
|
|
+ {
|
||
|
|
+ for (unsigned int j = 0; j < KYBER_K; j += 2)
|
||
|
|
+ {
|
||
|
|
+ if (transposed)
|
||
|
|
+ neon_xof_absorb(&state, seed, i, i, j, j + 1);
|
||
|
|
+ else
|
||
|
|
+ neon_xof_absorb(&state, seed, j, j + 1, i, i);
|
||
|
|
+
|
||
|
|
+ neon_xof_squeezeblocks(buf0, buf1, GEN_MATRIX_NBLOCKS, &state);
|
||
|
|
+ buflen = GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES;
|
||
|
|
+ ctr0 = neon_rej_uniform(&(a[i][j][0]), buf0);
|
||
|
|
+ ctr1 = neon_rej_uniform(&(a[i][j + 1][0]), buf1);
|
||
|
|
+
|
||
|
|
+ while (ctr0 < KYBER_N || ctr1 < KYBER_N)
|
||
|
|
+ {
|
||
|
|
+ off = buflen % 3;
|
||
|
|
+ for (k = 0; k < off; k++)
|
||
|
|
+ {
|
||
|
|
+ buf0[k] = buf0[buflen - off + k];
|
||
|
|
+ buf1[k] = buf1[buflen - off + k];
|
||
|
|
}
|
||
|
|
+ neon_xof_squeezeblocks(buf0 + off, buf1 + off, 1, &state);
|
||
|
|
|
||
|
|
- neon_xof_squeezeblocks(buf0, buf1, GEN_MATRIX_NBLOCKS, &state);
|
||
|
|
-
|
||
|
|
- buflen = GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES;
|
||
|
|
-
|
||
|
|
- ctr0 = neon_rej_uniform(&(a[i][0][0]), buf0);
|
||
|
|
- ctr1 = neon_rej_uniform(&(a[i][1][0]), buf1);
|
||
|
|
- while (ctr0 < KYBER_N || ctr1 < KYBER_N) {
|
||
|
|
- off = buflen % 3;
|
||
|
|
- for (k = 0; k < off; k++) {
|
||
|
|
- buf0[k] = buf0[buflen - off + k];
|
||
|
|
- buf1[k] = buf1[buflen - off + k];
|
||
|
|
- }
|
||
|
|
- neon_xof_squeezeblocks(buf0 + off, buf1 + off, 1, &state);
|
||
|
|
-
|
||
|
|
- buflen = off + XOF_BLOCKBYTES;
|
||
|
|
- ctr0 += rej_uniform(&(a[i][0][0]) + ctr0, KYBER_N - ctr0, buf0, buflen);
|
||
|
|
- ctr1 += rej_uniform(&(a[i][1][0]) + ctr1, KYBER_N - ctr1, buf1, buflen);
|
||
|
|
- }
|
||
|
|
+ buflen = off + XOF_BLOCKBYTES;
|
||
|
|
+ ctr0 += rej_uniform(&(a[i][j][0]) + ctr0, KYBER_N - ctr0, buf0, buflen);
|
||
|
|
+ ctr1 += rej_uniform(&(a[i][j + 1][0]) + ctr1, KYBER_N - ctr1, buf1, buflen);
|
||
|
|
+ }
|
||
|
|
}
|
||
|
|
+ }
|
||
|
|
}
|
||
|
|
|
||
|
|
/*************************************************
|
||
|
|
@@ -224,7 +229,9 @@ void indcpa_keypair(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
|
||
|
|
gen_a(a, publicseed);
|
||
|
|
|
||
|
|
neon_poly_getnoise_eta1_2x(&(skpv[0][0]), &(skpv[1][0]), noiseseed, 0, 1);
|
||
|
|
- neon_poly_getnoise_eta1_2x(&(e[0][0]), &(e[1][0]), noiseseed, 2, 3);
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(skpv[2][0]), &(skpv[3][0]), noiseseed, 2, 3);
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(e[0][0]), &(e[1][0]), noiseseed, 4, 5);
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(e[2][0]), &(e[3][0]), noiseseed, 6, 7);
|
||
|
|
|
||
|
|
neon_polyvec_ntt(skpv);
|
||
|
|
neon_polyvec_ntt(e);
|
||
|
|
@@ -280,10 +287,11 @@ void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES],
|
||
|
|
poly_frommsg(k, m);
|
||
|
|
gen_at(at, seed);
|
||
|
|
|
||
|
|
- // ETA1 != ETA2 (3 != 2)
|
||
|
|
- neon_poly_getnoise_eta1_2x(&(sp[0][0]), &(sp[1][0]), coins, 0, 1);
|
||
|
|
- neon_poly_getnoise_eta2_2x(&(ep[0][0]), &(ep[1][0]), coins, 2, 3);
|
||
|
|
- neon_poly_getnoise_eta2(&(epp[0]), coins, 4);
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(sp[0][0]), &(sp[1][0]), coins, 0, 1);
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(sp[2][0]), &(sp[3][0]), coins, 2, 3);
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(ep[0][0]), &(ep[1][0]), coins, 4, 5);
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(ep[2][0]), &(ep[3][0]), coins, 6, 7);
|
||
|
|
+ neon_poly_getnoise_eta2(&(epp[0]), coins, 8);
|
||
|
|
|
||
|
|
neon_polyvec_ntt(sp);
|
||
|
|
|
||
|
|
diff --git a/crypto_kem/kyber768/aarch64/indcpa.c b/crypto_kem/kyber768/aarch64/indcpa.c
|
||
|
|
index 02448809..ff24f150 100644
|
||
|
|
--- a/crypto_kem/kyber768/aarch64/indcpa.c
|
||
|
|
+++ b/crypto_kem/kyber768/aarch64/indcpa.c
|
||
|
|
@@ -160,39 +160,114 @@ static void unpack_ciphertext(int16_t b[KYBER_K][KYBER_N], int16_t *v, const uin
|
||
|
|
**************************************************/
|
||
|
|
#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES)
|
||
|
|
// Not static for benchmarking
|
||
|
|
-void gen_matrix(int16_t a[KYBER_K][KYBER_K][KYBER_N], const uint8_t seed[KYBER_SYMBYTES], int transposed) {
|
||
|
|
- unsigned int ctr0, ctr1, k;
|
||
|
|
- unsigned int buflen, off;
|
||
|
|
- uint8_t buf0[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2],
|
||
|
|
- buf1[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2];
|
||
|
|
- neon_xof_state state;
|
||
|
|
-
|
||
|
|
- for (unsigned int i = 0; i < KYBER_K; i++) {
|
||
|
|
- if (transposed) {
|
||
|
|
- neon_xof_absorb(&state, seed, i, i, 0, 1);
|
||
|
|
- } else {
|
||
|
|
- neon_xof_absorb(&state, seed, 0, 1, i, i);
|
||
|
|
- }
|
||
|
|
-
|
||
|
|
- neon_xof_squeezeblocks(buf0, buf1, GEN_MATRIX_NBLOCKS, &state);
|
||
|
|
-
|
||
|
|
- buflen = GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES;
|
||
|
|
-
|
||
|
|
- ctr0 = neon_rej_uniform(&(a[i][0][0]), buf0);
|
||
|
|
- ctr1 = neon_rej_uniform(&(a[i][1][0]), buf1);
|
||
|
|
- while (ctr0 < KYBER_N || ctr1 < KYBER_N) {
|
||
|
|
- off = buflen % 3;
|
||
|
|
- for (k = 0; k < off; k++) {
|
||
|
|
- buf0[k] = buf0[buflen - off + k];
|
||
|
|
- buf1[k] = buf1[buflen - off + k];
|
||
|
|
- }
|
||
|
|
- neon_xof_squeezeblocks(buf0 + off, buf1 + off, 1, &state);
|
||
|
|
-
|
||
|
|
- buflen = off + XOF_BLOCKBYTES;
|
||
|
|
- ctr0 += rej_uniform(&(a[i][0][0]) + ctr0, KYBER_N - ctr0, buf0, buflen);
|
||
|
|
- ctr1 += rej_uniform(&(a[i][1][0]) + ctr1, KYBER_N - ctr1, buf1, buflen);
|
||
|
|
- }
|
||
|
|
+void gen_matrix(int16_t a[KYBER_K][KYBER_K][KYBER_N], const uint8_t seed[KYBER_SYMBYTES], int transposed)
|
||
|
|
+{
|
||
|
|
+ unsigned int ctr0, ctr1, k;
|
||
|
|
+ unsigned int buflen, off;
|
||
|
|
+ uint8_t buf0[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2],
|
||
|
|
+ buf1[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2];
|
||
|
|
+ neon_xof_state state;
|
||
|
|
+
|
||
|
|
+ int16_t *s1 = NULL, *s2 = NULL;
|
||
|
|
+ unsigned int x1, x2, y1, y2;
|
||
|
|
+ xof_state c_state;
|
||
|
|
+ shake128_inc_init(&c_state); // patch
|
||
|
|
+
|
||
|
|
+ for (unsigned int j = 0; j < KYBER_K * KYBER_K - 1; j += 2)
|
||
|
|
+ {
|
||
|
|
+ switch (j)
|
||
|
|
+ {
|
||
|
|
+ case 0:
|
||
|
|
+ s1 = &(a[0][0][0]);
|
||
|
|
+ s2 = &(a[0][1][0]);
|
||
|
|
+ x1 = 0;
|
||
|
|
+ y1 = 0;
|
||
|
|
+ x2 = 0;
|
||
|
|
+ y2 = 1;
|
||
|
|
+ break;
|
||
|
|
+ case 2:
|
||
|
|
+ s1 = &(a[0][2][0]);
|
||
|
|
+ s2 = &(a[1][0][0]);
|
||
|
|
+ x1 = 0;
|
||
|
|
+ y1 = 2;
|
||
|
|
+ x2 = 1;
|
||
|
|
+ y2 = 0;
|
||
|
|
+ break;
|
||
|
|
+ case 4:
|
||
|
|
+ s1 = &(a[1][1][0]);
|
||
|
|
+ s2 = &(a[1][2][0]);
|
||
|
|
+ x1 = 1;
|
||
|
|
+ y1 = 1;
|
||
|
|
+ x2 = 1;
|
||
|
|
+ y2 = 2;
|
||
|
|
+ break;
|
||
|
|
+ default:
|
||
|
|
+ s1 = &(a[2][0][0]);
|
||
|
|
+ s2 = &(a[2][1][0]);
|
||
|
|
+ x1 = 2;
|
||
|
|
+ y1 = 0;
|
||
|
|
+ x2 = 2;
|
||
|
|
+ y2 = 1;
|
||
|
|
+ break;
|
||
|
|
}
|
||
|
|
+
|
||
|
|
+ if (transposed)
|
||
|
|
+ neon_xof_absorb(&state, seed, x1, x2, y1, y2);
|
||
|
|
+ else
|
||
|
|
+ neon_xof_absorb(&state, seed, y1, y2, x1, x2);
|
||
|
|
+
|
||
|
|
+ neon_xof_squeezeblocks(buf0, buf1, GEN_MATRIX_NBLOCKS, &state);
|
||
|
|
+
|
||
|
|
+ buflen = GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES;
|
||
|
|
+
|
||
|
|
+ ctr0 = neon_rej_uniform(s1, buf0);
|
||
|
|
+ ctr1 = neon_rej_uniform(s2, buf1);
|
||
|
|
+
|
||
|
|
+ while (ctr0 < KYBER_N || ctr1 < KYBER_N)
|
||
|
|
+ {
|
||
|
|
+ off = buflen % 3;
|
||
|
|
+ for (k = 0; k < off; k++)
|
||
|
|
+ {
|
||
|
|
+ buf0[k] = buf0[buflen - off + k];
|
||
|
|
+ buf1[k] = buf1[buflen - off + k];
|
||
|
|
+ }
|
||
|
|
+ neon_xof_squeezeblocks(buf0 + off, buf1 + off, 1, &state);
|
||
|
|
+
|
||
|
|
+ buflen = off + XOF_BLOCKBYTES;
|
||
|
|
+ ctr0 += rej_uniform(s1 + ctr0, KYBER_N - ctr0, buf0, buflen);
|
||
|
|
+ ctr1 += rej_uniform(s2 + ctr1, KYBER_N - ctr1, buf1, buflen);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ // Last iteration [2][2]
|
||
|
|
+ if (transposed){
|
||
|
|
+ xof_absorb(&c_state, seed, 2, 2);
|
||
|
|
+ }
|
||
|
|
+ else{
|
||
|
|
+ xof_absorb(&c_state, seed, 2, 2);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ xof_squeezeblocks(buf0, GEN_MATRIX_NBLOCKS, &c_state);
|
||
|
|
+
|
||
|
|
+ buflen = GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES;
|
||
|
|
+
|
||
|
|
+ ctr0 = neon_rej_uniform(&(a[2][2][0]), buf0);
|
||
|
|
+
|
||
|
|
+ while (ctr0 < KYBER_N)
|
||
|
|
+ {
|
||
|
|
+ off = buflen % 3;
|
||
|
|
+ for (k = 0; k < off; k++)
|
||
|
|
+ {
|
||
|
|
+ buf0[k] = buf0[buflen - off + k];
|
||
|
|
+ }
|
||
|
|
+ xof_squeezeblocks(buf0 + off, 1, &c_state);
|
||
|
|
+
|
||
|
|
+ buflen = off + XOF_BLOCKBYTES;
|
||
|
|
+ ctr0 += rej_uniform(&(a[2][2][0]) + ctr0, KYBER_N - ctr0, buf0, buflen);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ shake128_inc_ctx_release(&c_state);
|
||
|
|
+
|
||
|
|
}
|
||
|
|
|
||
|
|
/*************************************************
|
||
|
|
@@ -224,7 +299,8 @@ void indcpa_keypair(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
|
||
|
|
gen_a(a, publicseed);
|
||
|
|
|
||
|
|
neon_poly_getnoise_eta1_2x(&(skpv[0][0]), &(skpv[1][0]), noiseseed, 0, 1);
|
||
|
|
- neon_poly_getnoise_eta1_2x(&(e[0][0]), &(e[1][0]), noiseseed, 2, 3);
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(skpv[2][0]), &(e[0][0]), noiseseed, 2, 3);
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(e[1][0]), &(e[2][0]), noiseseed, 4, 5);
|
||
|
|
|
||
|
|
neon_polyvec_ntt(skpv);
|
||
|
|
neon_polyvec_ntt(e);
|
||
|
|
@@ -280,10 +356,11 @@ void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES],
|
||
|
|
poly_frommsg(k, m);
|
||
|
|
gen_at(at, seed);
|
||
|
|
|
||
|
|
- // ETA1 != ETA2 (3 != 2)
|
||
|
|
- neon_poly_getnoise_eta1_2x(&(sp[0][0]), &(sp[1][0]), coins, 0, 1);
|
||
|
|
- neon_poly_getnoise_eta2_2x(&(ep[0][0]), &(ep[1][0]), coins, 2, 3);
|
||
|
|
- neon_poly_getnoise_eta2(&(epp[0]), coins, 4);
|
||
|
|
+ // Because ETA1 == ETA2
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(sp[0][0]), &(sp[1][0]), coins, 0, 1);
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(sp[2][0]), &(ep[0][0]), coins, 2, 3);
|
||
|
|
+ neon_poly_getnoise_eta1_2x(&(ep[1][0]), &(ep[2][0]), coins, 4, 5);
|
||
|
|
+ neon_poly_getnoise_eta2(&(epp[0]), coins, 6);
|
||
|
|
|
||
|
|
neon_polyvec_ntt(sp);
|
||
|
|
|