diff --git a/.gitignore b/.gitignore index dfc9a39..f32eff0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /openssl-1.1.1g-hobbled.tar.xz +/openssl-1.1.1h-hobbled.tar.xz +/openssl-1.1.1i-hobbled.tar.xz diff --git a/ectest.c b/ectest.c index c16642e..e4fd45b 100644 --- a/ectest.c +++ b/ectest.c @@ -1,5 +1,5 @@ /* - * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2020 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * * Licensed under the OpenSSL license (the "License"). You may not use @@ -1425,6 +1425,87 @@ static int ec_point_hex2point_test(int id) return ret; } +/* + * check the EC_METHOD respects the supplied EC_GROUP_set_generator G + */ +static int custom_generator_test(int id) +{ + int ret = 0, nid, bsize; + EC_GROUP *group = NULL; + EC_POINT *G2 = NULL, *Q1 = NULL, *Q2 = NULL; + BN_CTX *ctx = NULL; + BIGNUM *k = NULL; + unsigned char *b1 = NULL, *b2 = NULL; + + /* Do some setup */ + nid = curves[id].nid; + TEST_note("Curve %s", OBJ_nid2sn(nid)); + if (!TEST_ptr(ctx = BN_CTX_new())) + return 0; + + BN_CTX_start(ctx); + + if (!TEST_ptr(group = EC_GROUP_new_by_curve_name(nid))) + goto err; + + /* expected byte length of encoded points */ + bsize = (EC_GROUP_get_degree(group) + 7) / 8; + bsize = 2 * bsize + 1; + + if (!TEST_ptr(k = BN_CTX_get(ctx)) + /* fetch a testing scalar k != 0,1 */ + || !TEST_true(BN_rand(k, EC_GROUP_order_bits(group) - 1, + BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) + /* make k even */ + || !TEST_true(BN_clear_bit(k, 0)) + || !TEST_ptr(G2 = EC_POINT_new(group)) + || !TEST_ptr(Q1 = EC_POINT_new(group)) + /* Q1 := kG */ + || !TEST_true(EC_POINT_mul(group, Q1, k, NULL, NULL, ctx)) + /* pull out the bytes of that */ + || !TEST_int_eq(EC_POINT_point2oct(group, Q1, + POINT_CONVERSION_UNCOMPRESSED, NULL, + 0, ctx), bsize) + || !TEST_ptr(b1 = OPENSSL_malloc(bsize)) + || !TEST_int_eq(EC_POINT_point2oct(group, Q1, + POINT_CONVERSION_UNCOMPRESSED, b1, + bsize, ctx), bsize) + /* new generator is G2 := 2G */ + || !TEST_true(EC_POINT_dbl(group, G2, EC_GROUP_get0_generator(group), + ctx)) + || !TEST_true(EC_GROUP_set_generator(group, G2, + EC_GROUP_get0_order(group), + EC_GROUP_get0_cofactor(group))) + || !TEST_ptr(Q2 = EC_POINT_new(group)) + || !TEST_true(BN_rshift1(k, k)) + /* Q2 := k/2 G2 */ + || !TEST_true(EC_POINT_mul(group, Q2, k, NULL, NULL, ctx)) + || !TEST_int_eq(EC_POINT_point2oct(group, Q2, + POINT_CONVERSION_UNCOMPRESSED, NULL, + 0, ctx), bsize) + || !TEST_ptr(b2 = OPENSSL_malloc(bsize)) + || !TEST_int_eq(EC_POINT_point2oct(group, Q2, + POINT_CONVERSION_UNCOMPRESSED, b2, + bsize, ctx), bsize) + /* Q1 = kG = k/2 G2 = Q2 should hold */ + || !TEST_int_eq(CRYPTO_memcmp(b1, b2, bsize), 0)) + goto err; + + ret = 1; + + err: + BN_CTX_end(ctx); + EC_POINT_free(Q1); + EC_POINT_free(Q2); + EC_POINT_free(G2); + EC_GROUP_free(group); + BN_CTX_free(ctx); + OPENSSL_free(b1); + OPENSSL_free(b2); + + return ret; +} + #endif /* OPENSSL_NO_EC */ int setup_tests(void) @@ -1452,6 +1533,7 @@ int setup_tests(void) ADD_ALL_TESTS(check_named_curve_from_ecparameters, crv_len); ADD_ALL_TESTS(ec_point_hex2point_test, crv_len); + ADD_ALL_TESTS(custom_generator_test, crv_len); #endif /* OPENSSL_NO_EC */ return 1; } diff --git a/openssl-1.1.1-arm-update.patch b/openssl-1.1.1-arm-update.patch index 998905f..2b8c549 100644 --- a/openssl-1.1.1-arm-update.patch +++ b/openssl-1.1.1-arm-update.patch @@ -1,6 +1,6 @@ -diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl ---- openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update 2019-05-28 15:12:21.000000000 +0200 -+++ openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl 2019-11-20 11:36:22.389506155 +0100 +diff -up openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl +--- openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl.arm-update 2020-12-08 14:20:59.000000000 +0100 ++++ openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl 2020-12-09 10:39:50.645705385 +0100 @@ -27,44 +27,72 @@ # CBC encrypt case. On Cortex-A57 parallelizable mode performance # seems to be limited by sheer amount of NEON instructions... @@ -85,7 +85,844 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c ___ # Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax, -@@ -514,6 +542,13 @@ $code.=<<___; +@@ -361,6 +389,836 @@ ___ + &gen_block("en"); + &gen_block("de"); + }}} ++ ++# Performance in cycles per byte. ++# Processed with AES-ECB different key size. ++# It shows the value before and after optimization as below: ++# (before/after): ++# ++# AES-128-ECB AES-192-ECB AES-256-ECB ++# Cortex-A57 1.85/0.82 2.16/0.96 2.47/1.10 ++# Cortex-A72 1.64/0.85 1.82/0.99 2.13/1.14 ++ ++# Optimization is implemented by loop unrolling and interleaving. ++# Commonly, we choose the unrolling factor as 5, if the input ++# data size smaller than 5 blocks, but not smaller than 3 blocks, ++# choose 3 as the unrolling factor. ++# If the input data size dsize >= 5*16 bytes, then take 5 blocks ++# as one iteration, every loop the left size lsize -= 5*16. ++# If 5*16 > lsize >= 3*16 bytes, take 3 blocks as one iteration, ++# every loop lsize -=3*16. ++# If lsize < 3*16 bytes, treat them as the tail, interleave the ++# two blocks AES instructions. ++# There is one special case, if the original input data size dsize ++# = 16 bytes, we will treat it seperately to improve the ++# performance: one independent code block without LR, FP load and ++# store, just looks like what the original ECB implementation does. ++ ++{{{ ++my ($inp,$out,$len,$key)=map("x$_",(0..3)); ++my ($enc,$rounds,$cnt,$key_,$step)=("w4","w5","w6","x7","x8"); ++my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7)); ++ ++my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); ++ ++### q7 last round key ++### q10-q15 q7 Last 7 round keys ++### q8-q9 preloaded round keys except last 7 keys for big size ++### q5, q6, q8-q9 preloaded round keys except last 7 keys for only 16 byte ++ ++{ ++my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); ++ ++my ($dat3,$in3,$tmp3); # used only in 64-bit mode ++my ($dat4,$in4,$tmp4); ++if ($flavour =~ /64/) { ++ ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23)); ++} ++ ++$code.=<<___; ++.globl ${prefix}_ecb_encrypt ++.type ${prefix}_ecb_encrypt,%function ++.align 5 ++${prefix}_ecb_encrypt: ++___ ++$code.=<<___ if ($flavour =~ /64/); ++ subs $len,$len,#16 ++ // Original input data size bigger than 16, jump to big size processing. ++ b.ne .Lecb_big_size ++ vld1.8 {$dat0},[$inp] ++ cmp $enc,#0 // en- or decrypting? ++ ldr $rounds,[$key,#240] ++ vld1.32 {q5-q6},[$key],#32 // load key schedule... ++ ++ b.eq .Lecb_small_dec ++ aese $dat0,q5 ++ aesmc $dat0,$dat0 ++ vld1.32 {q8-q9},[$key],#32 // load key schedule... ++ aese $dat0,q6 ++ aesmc $dat0,$dat0 ++ subs $rounds,$rounds,#10 // if rounds==10, jump to aes-128-ecb processing ++ b.eq .Lecb_128_enc ++.Lecb_round_loop: ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ vld1.32 {q8},[$key],#16 // load key schedule... ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ vld1.32 {q9},[$key],#16 // load key schedule... ++ subs $rounds,$rounds,#2 // bias ++ b.gt .Lecb_round_loop ++.Lecb_128_enc: ++ vld1.32 {q10-q11},[$key],#32 // load key schedule... ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ vld1.32 {q12-q13},[$key],#32 // load key schedule... ++ aese $dat0,q10 ++ aesmc $dat0,$dat0 ++ aese $dat0,q11 ++ aesmc $dat0,$dat0 ++ vld1.32 {q14-q15},[$key],#32 // load key schedule... ++ aese $dat0,q12 ++ aesmc $dat0,$dat0 ++ aese $dat0,q13 ++ aesmc $dat0,$dat0 ++ vld1.32 {$rndlast},[$key] ++ aese $dat0,q14 ++ aesmc $dat0,$dat0 ++ aese $dat0,q15 ++ veor $dat0,$dat0,$rndlast ++ vst1.8 {$dat0},[$out] ++ b .Lecb_Final_abort ++.Lecb_small_dec: ++ aesd $dat0,q5 ++ aesimc $dat0,$dat0 ++ vld1.32 {q8-q9},[$key],#32 // load key schedule... ++ aesd $dat0,q6 ++ aesimc $dat0,$dat0 ++ subs $rounds,$rounds,#10 // bias ++ b.eq .Lecb_128_dec ++.Lecb_dec_round_loop: ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ vld1.32 {q8},[$key],#16 // load key schedule... ++ aesd $dat0,q9 ++ aesimc $dat0,$dat0 ++ vld1.32 {q9},[$key],#16 // load key schedule... ++ subs $rounds,$rounds,#2 // bias ++ b.gt .Lecb_dec_round_loop ++.Lecb_128_dec: ++ vld1.32 {q10-q11},[$key],#32 // load key schedule... ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ aesd $dat0,q9 ++ aesimc $dat0,$dat0 ++ vld1.32 {q12-q13},[$key],#32 // load key schedule... ++ aesd $dat0,q10 ++ aesimc $dat0,$dat0 ++ aesd $dat0,q11 ++ aesimc $dat0,$dat0 ++ vld1.32 {q14-q15},[$key],#32 // load key schedule... ++ aesd $dat0,q12 ++ aesimc $dat0,$dat0 ++ aesd $dat0,q13 ++ aesimc $dat0,$dat0 ++ vld1.32 {$rndlast},[$key] ++ aesd $dat0,q14 ++ aesimc $dat0,$dat0 ++ aesd $dat0,q15 ++ veor $dat0,$dat0,$rndlast ++ vst1.8 {$dat0},[$out] ++ b .Lecb_Final_abort ++.Lecb_big_size: ++___ ++$code.=<<___ if ($flavour =~ /64/); ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++___ ++$code.=<<___ if ($flavour !~ /64/); ++ mov ip,sp ++ stmdb sp!,{r4-r8,lr} ++ vstmdb sp!,{d8-d15} @ ABI specification says so ++ ldmia ip,{r4-r5} @ load remaining args ++ subs $len,$len,#16 ++___ ++$code.=<<___; ++ mov $step,#16 ++ b.lo .Lecb_done ++ cclr $step,eq ++ ++ cmp $enc,#0 // en- or decrypting? ++ ldr $rounds,[$key,#240] ++ and $len,$len,#-16 ++ vld1.8 {$dat},[$inp],$step ++ ++ vld1.32 {q8-q9},[$key] // load key schedule... ++ sub $rounds,$rounds,#6 ++ add $key_,$key,x5,lsl#4 // pointer to last 7 round keys ++ sub $rounds,$rounds,#2 ++ vld1.32 {q10-q11},[$key_],#32 ++ vld1.32 {q12-q13},[$key_],#32 ++ vld1.32 {q14-q15},[$key_],#32 ++ vld1.32 {$rndlast},[$key_] ++ ++ add $key_,$key,#32 ++ mov $cnt,$rounds ++ b.eq .Lecb_dec ++ ++ vld1.8 {$dat1},[$inp],#16 ++ subs $len,$len,#32 // bias ++ add $cnt,$rounds,#2 ++ vorr $in1,$dat1,$dat1 ++ vorr $dat2,$dat1,$dat1 ++ vorr $dat1,$dat,$dat ++ b.lo .Lecb_enc_tail ++ ++ vorr $dat1,$in1,$in1 ++ vld1.8 {$dat2},[$inp],#16 ++___ ++$code.=<<___ if ($flavour =~ /64/); ++ cmp $len,#32 ++ b.lo .Loop3x_ecb_enc ++ ++ vld1.8 {$dat3},[$inp],#16 ++ vld1.8 {$dat4},[$inp],#16 ++ sub $len,$len,#32 // bias ++ mov $cnt,$rounds ++ ++.Loop5x_ecb_enc: ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ aese $dat3,q8 ++ aesmc $dat3,$dat3 ++ aese $dat4,q8 ++ aesmc $dat4,$dat4 ++ vld1.32 {q8},[$key_],#16 ++ subs $cnt,$cnt,#2 ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ aese $dat3,q9 ++ aesmc $dat3,$dat3 ++ aese $dat4,q9 ++ aesmc $dat4,$dat4 ++ vld1.32 {q9},[$key_],#16 ++ b.gt .Loop5x_ecb_enc ++ ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ aese $dat3,q8 ++ aesmc $dat3,$dat3 ++ aese $dat4,q8 ++ aesmc $dat4,$dat4 ++ cmp $len,#0x40 // because .Lecb_enc_tail4x ++ sub $len,$len,#0x50 ++ ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ aese $dat3,q9 ++ aesmc $dat3,$dat3 ++ aese $dat4,q9 ++ aesmc $dat4,$dat4 ++ csel x6,xzr,$len,gt // borrow x6, $cnt, "gt" is not typo ++ mov $key_,$key ++ ++ aese $dat0,q10 ++ aesmc $dat0,$dat0 ++ aese $dat1,q10 ++ aesmc $dat1,$dat1 ++ aese $dat2,q10 ++ aesmc $dat2,$dat2 ++ aese $dat3,q10 ++ aesmc $dat3,$dat3 ++ aese $dat4,q10 ++ aesmc $dat4,$dat4 ++ add $inp,$inp,x6 // $inp is adjusted in such way that ++ // at exit from the loop $dat1-$dat4 ++ // are loaded with last "words" ++ add x6,$len,#0x60 // because .Lecb_enc_tail4x ++ ++ aese $dat0,q11 ++ aesmc $dat0,$dat0 ++ aese $dat1,q11 ++ aesmc $dat1,$dat1 ++ aese $dat2,q11 ++ aesmc $dat2,$dat2 ++ aese $dat3,q11 ++ aesmc $dat3,$dat3 ++ aese $dat4,q11 ++ aesmc $dat4,$dat4 ++ ++ aese $dat0,q12 ++ aesmc $dat0,$dat0 ++ aese $dat1,q12 ++ aesmc $dat1,$dat1 ++ aese $dat2,q12 ++ aesmc $dat2,$dat2 ++ aese $dat3,q12 ++ aesmc $dat3,$dat3 ++ aese $dat4,q12 ++ aesmc $dat4,$dat4 ++ ++ aese $dat0,q13 ++ aesmc $dat0,$dat0 ++ aese $dat1,q13 ++ aesmc $dat1,$dat1 ++ aese $dat2,q13 ++ aesmc $dat2,$dat2 ++ aese $dat3,q13 ++ aesmc $dat3,$dat3 ++ aese $dat4,q13 ++ aesmc $dat4,$dat4 ++ ++ aese $dat0,q14 ++ aesmc $dat0,$dat0 ++ aese $dat1,q14 ++ aesmc $dat1,$dat1 ++ aese $dat2,q14 ++ aesmc $dat2,$dat2 ++ aese $dat3,q14 ++ aesmc $dat3,$dat3 ++ aese $dat4,q14 ++ aesmc $dat4,$dat4 ++ ++ aese $dat0,q15 ++ vld1.8 {$in0},[$inp],#16 ++ aese $dat1,q15 ++ vld1.8 {$in1},[$inp],#16 ++ aese $dat2,q15 ++ vld1.8 {$in2},[$inp],#16 ++ aese $dat3,q15 ++ vld1.8 {$in3},[$inp],#16 ++ aese $dat4,q15 ++ vld1.8 {$in4},[$inp],#16 ++ cbz x6,.Lecb_enc_tail4x ++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] ++ veor $tmp0,$rndlast,$dat0 ++ vorr $dat0,$in0,$in0 ++ veor $tmp1,$rndlast,$dat1 ++ vorr $dat1,$in1,$in1 ++ veor $tmp2,$rndlast,$dat2 ++ vorr $dat2,$in2,$in2 ++ veor $tmp3,$rndlast,$dat3 ++ vorr $dat3,$in3,$in3 ++ veor $tmp4,$rndlast,$dat4 ++ vst1.8 {$tmp0},[$out],#16 ++ vorr $dat4,$in4,$in4 ++ vst1.8 {$tmp1},[$out],#16 ++ mov $cnt,$rounds ++ vst1.8 {$tmp2},[$out],#16 ++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] ++ vst1.8 {$tmp3},[$out],#16 ++ vst1.8 {$tmp4},[$out],#16 ++ b.hs .Loop5x_ecb_enc ++ ++ add $len,$len,#0x50 ++ cbz $len,.Lecb_done ++ ++ add $cnt,$rounds,#2 ++ subs $len,$len,#0x30 ++ vorr $dat0,$in2,$in2 ++ vorr $dat1,$in3,$in3 ++ vorr $dat2,$in4,$in4 ++ b.lo .Lecb_enc_tail ++ ++ b .Loop3x_ecb_enc ++ ++.align 4 ++.Lecb_enc_tail4x: ++ veor $tmp1,$rndlast,$dat1 ++ veor $tmp2,$rndlast,$dat2 ++ veor $tmp3,$rndlast,$dat3 ++ veor $tmp4,$rndlast,$dat4 ++ vst1.8 {$tmp1},[$out],#16 ++ vst1.8 {$tmp2},[$out],#16 ++ vst1.8 {$tmp3},[$out],#16 ++ vst1.8 {$tmp4},[$out],#16 ++ ++ b .Lecb_done ++.align 4 ++___ ++$code.=<<___; ++.Loop3x_ecb_enc: ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ vld1.32 {q8},[$key_],#16 ++ subs $cnt,$cnt,#2 ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ vld1.32 {q9},[$key_],#16 ++ b.gt .Loop3x_ecb_enc ++ ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ subs $len,$len,#0x30 ++ mov.lo x6,$len // x6, $cnt, is zero at this point ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ add $inp,$inp,x6 // $inp is adjusted in such way that ++ // at exit from the loop $dat1-$dat2 ++ // are loaded with last "words" ++ mov $key_,$key ++ aese $dat0,q12 ++ aesmc $dat0,$dat0 ++ aese $dat1,q12 ++ aesmc $dat1,$dat1 ++ aese $dat2,q12 ++ aesmc $dat2,$dat2 ++ vld1.8 {$in0},[$inp],#16 ++ aese $dat0,q13 ++ aesmc $dat0,$dat0 ++ aese $dat1,q13 ++ aesmc $dat1,$dat1 ++ aese $dat2,q13 ++ aesmc $dat2,$dat2 ++ vld1.8 {$in1},[$inp],#16 ++ aese $dat0,q14 ++ aesmc $dat0,$dat0 ++ aese $dat1,q14 ++ aesmc $dat1,$dat1 ++ aese $dat2,q14 ++ aesmc $dat2,$dat2 ++ vld1.8 {$in2},[$inp],#16 ++ aese $dat0,q15 ++ aese $dat1,q15 ++ aese $dat2,q15 ++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] ++ add $cnt,$rounds,#2 ++ veor $tmp0,$rndlast,$dat0 ++ veor $tmp1,$rndlast,$dat1 ++ veor $dat2,$dat2,$rndlast ++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] ++ vst1.8 {$tmp0},[$out],#16 ++ vorr $dat0,$in0,$in0 ++ vst1.8 {$tmp1},[$out],#16 ++ vorr $dat1,$in1,$in1 ++ vst1.8 {$dat2},[$out],#16 ++ vorr $dat2,$in2,$in2 ++ b.hs .Loop3x_ecb_enc ++ ++ cmn $len,#0x30 ++ b.eq .Lecb_done ++ nop ++ ++.Lecb_enc_tail: ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ vld1.32 {q8},[$key_],#16 ++ subs $cnt,$cnt,#2 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ vld1.32 {q9},[$key_],#16 ++ b.gt .Lecb_enc_tail ++ ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ aese $dat1,q12 ++ aesmc $dat1,$dat1 ++ aese $dat2,q12 ++ aesmc $dat2,$dat2 ++ cmn $len,#0x20 ++ aese $dat1,q13 ++ aesmc $dat1,$dat1 ++ aese $dat2,q13 ++ aesmc $dat2,$dat2 ++ aese $dat1,q14 ++ aesmc $dat1,$dat1 ++ aese $dat2,q14 ++ aesmc $dat2,$dat2 ++ aese $dat1,q15 ++ aese $dat2,q15 ++ b.eq .Lecb_enc_one ++ veor $tmp1,$rndlast,$dat1 ++ veor $tmp2,$rndlast,$dat2 ++ vst1.8 {$tmp1},[$out],#16 ++ vst1.8 {$tmp2},[$out],#16 ++ b .Lecb_done ++ ++.Lecb_enc_one: ++ veor $tmp1,$rndlast,$dat2 ++ vst1.8 {$tmp1},[$out],#16 ++ b .Lecb_done ++___ ++ ++$code.=<<___; ++.align 5 ++.Lecb_dec: ++ vld1.8 {$dat1},[$inp],#16 ++ subs $len,$len,#32 // bias ++ add $cnt,$rounds,#2 ++ vorr $in1,$dat1,$dat1 ++ vorr $dat2,$dat1,$dat1 ++ vorr $dat1,$dat,$dat ++ b.lo .Lecb_dec_tail ++ ++ vorr $dat1,$in1,$in1 ++ vld1.8 {$dat2},[$inp],#16 ++___ ++$code.=<<___ if ($flavour =~ /64/); ++ cmp $len,#32 ++ b.lo .Loop3x_ecb_dec ++ ++ vld1.8 {$dat3},[$inp],#16 ++ vld1.8 {$dat4},[$inp],#16 ++ sub $len,$len,#32 // bias ++ mov $cnt,$rounds ++ ++.Loop5x_ecb_dec: ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q8 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q8 ++ aesimc $dat4,$dat4 ++ vld1.32 {q8},[$key_],#16 ++ subs $cnt,$cnt,#2 ++ aesd $dat0,q9 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q9 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q9 ++ aesimc $dat4,$dat4 ++ vld1.32 {q9},[$key_],#16 ++ b.gt .Loop5x_ecb_dec ++ ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q8 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q8 ++ aesimc $dat4,$dat4 ++ cmp $len,#0x40 // because .Lecb_tail4x ++ sub $len,$len,#0x50 ++ ++ aesd $dat0,q9 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q9 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q9 ++ aesimc $dat4,$dat4 ++ csel x6,xzr,$len,gt // borrow x6, $cnt, "gt" is not typo ++ mov $key_,$key ++ ++ aesd $dat0,q10 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q10 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q10 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q10 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q10 ++ aesimc $dat4,$dat4 ++ add $inp,$inp,x6 // $inp is adjusted in such way that ++ // at exit from the loop $dat1-$dat4 ++ // are loaded with last "words" ++ add x6,$len,#0x60 // because .Lecb_tail4x ++ ++ aesd $dat0,q11 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q11 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q11 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q11 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q11 ++ aesimc $dat4,$dat4 ++ ++ aesd $dat0,q12 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q12 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q12 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q12 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q12 ++ aesimc $dat4,$dat4 ++ ++ aesd $dat0,q13 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q13 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q13 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q13 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q13 ++ aesimc $dat4,$dat4 ++ ++ aesd $dat0,q14 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q14 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q14 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q14 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q14 ++ aesimc $dat4,$dat4 ++ ++ aesd $dat0,q15 ++ vld1.8 {$in0},[$inp],#16 ++ aesd $dat1,q15 ++ vld1.8 {$in1},[$inp],#16 ++ aesd $dat2,q15 ++ vld1.8 {$in2},[$inp],#16 ++ aesd $dat3,q15 ++ vld1.8 {$in3},[$inp],#16 ++ aesd $dat4,q15 ++ vld1.8 {$in4},[$inp],#16 ++ cbz x6,.Lecb_tail4x ++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] ++ veor $tmp0,$rndlast,$dat0 ++ vorr $dat0,$in0,$in0 ++ veor $tmp1,$rndlast,$dat1 ++ vorr $dat1,$in1,$in1 ++ veor $tmp2,$rndlast,$dat2 ++ vorr $dat2,$in2,$in2 ++ veor $tmp3,$rndlast,$dat3 ++ vorr $dat3,$in3,$in3 ++ veor $tmp4,$rndlast,$dat4 ++ vst1.8 {$tmp0},[$out],#16 ++ vorr $dat4,$in4,$in4 ++ vst1.8 {$tmp1},[$out],#16 ++ mov $cnt,$rounds ++ vst1.8 {$tmp2},[$out],#16 ++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] ++ vst1.8 {$tmp3},[$out],#16 ++ vst1.8 {$tmp4},[$out],#16 ++ b.hs .Loop5x_ecb_dec ++ ++ add $len,$len,#0x50 ++ cbz $len,.Lecb_done ++ ++ add $cnt,$rounds,#2 ++ subs $len,$len,#0x30 ++ vorr $dat0,$in2,$in2 ++ vorr $dat1,$in3,$in3 ++ vorr $dat2,$in4,$in4 ++ b.lo .Lecb_dec_tail ++ ++ b .Loop3x_ecb_dec ++ ++.align 4 ++.Lecb_tail4x: ++ veor $tmp1,$rndlast,$dat1 ++ veor $tmp2,$rndlast,$dat2 ++ veor $tmp3,$rndlast,$dat3 ++ veor $tmp4,$rndlast,$dat4 ++ vst1.8 {$tmp1},[$out],#16 ++ vst1.8 {$tmp2},[$out],#16 ++ vst1.8 {$tmp3},[$out],#16 ++ vst1.8 {$tmp4},[$out],#16 ++ ++ b .Lecb_done ++.align 4 ++___ ++$code.=<<___; ++.Loop3x_ecb_dec: ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ vld1.32 {q8},[$key_],#16 ++ subs $cnt,$cnt,#2 ++ aesd $dat0,q9 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ vld1.32 {q9},[$key_],#16 ++ b.gt .Loop3x_ecb_dec ++ ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ subs $len,$len,#0x30 ++ mov.lo x6,$len // x6, $cnt, is zero at this point ++ aesd $dat0,q9 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ add $inp,$inp,x6 // $inp is adjusted in such way that ++ // at exit from the loop $dat1-$dat2 ++ // are loaded with last "words" ++ mov $key_,$key ++ aesd $dat0,q12 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q12 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q12 ++ aesimc $dat2,$dat2 ++ vld1.8 {$in0},[$inp],#16 ++ aesd $dat0,q13 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q13 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q13 ++ aesimc $dat2,$dat2 ++ vld1.8 {$in1},[$inp],#16 ++ aesd $dat0,q14 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q14 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q14 ++ aesimc $dat2,$dat2 ++ vld1.8 {$in2},[$inp],#16 ++ aesd $dat0,q15 ++ aesd $dat1,q15 ++ aesd $dat2,q15 ++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] ++ add $cnt,$rounds,#2 ++ veor $tmp0,$rndlast,$dat0 ++ veor $tmp1,$rndlast,$dat1 ++ veor $dat2,$dat2,$rndlast ++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] ++ vst1.8 {$tmp0},[$out],#16 ++ vorr $dat0,$in0,$in0 ++ vst1.8 {$tmp1},[$out],#16 ++ vorr $dat1,$in1,$in1 ++ vst1.8 {$dat2},[$out],#16 ++ vorr $dat2,$in2,$in2 ++ b.hs .Loop3x_ecb_dec ++ ++ cmn $len,#0x30 ++ b.eq .Lecb_done ++ nop ++ ++.Lecb_dec_tail: ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ vld1.32 {q8},[$key_],#16 ++ subs $cnt,$cnt,#2 ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ vld1.32 {q9},[$key_],#16 ++ b.gt .Lecb_dec_tail ++ ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ aesd $dat1,q12 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q12 ++ aesimc $dat2,$dat2 ++ cmn $len,#0x20 ++ aesd $dat1,q13 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q13 ++ aesimc $dat2,$dat2 ++ aesd $dat1,q14 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q14 ++ aesimc $dat2,$dat2 ++ aesd $dat1,q15 ++ aesd $dat2,q15 ++ b.eq .Lecb_dec_one ++ veor $tmp1,$rndlast,$dat1 ++ veor $tmp2,$rndlast,$dat2 ++ vst1.8 {$tmp1},[$out],#16 ++ vst1.8 {$tmp2},[$out],#16 ++ b .Lecb_done ++ ++.Lecb_dec_one: ++ veor $tmp1,$rndlast,$dat2 ++ vst1.8 {$tmp1},[$out],#16 ++ ++.Lecb_done: ++___ ++} ++$code.=<<___ if ($flavour !~ /64/); ++ vldmia sp!,{d8-d15} ++ ldmia sp!,{r4-r8,pc} ++___ ++$code.=<<___ if ($flavour =~ /64/); ++ ldr x29,[sp],#16 ++___ ++$code.=<<___ if ($flavour =~ /64/); ++.Lecb_Final_abort: ++ ret ++___ ++$code.=<<___; ++.size ${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt ++___ ++}}} + {{{ + my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5"; + my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12"); +@@ -519,6 +1377,13 @@ $code.=<<___; ___ { my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); @@ -99,7 +936,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c $code.=<<___; .align 5 .Lcbc_dec: -@@ -530,7 +565,196 @@ $code.=<<___; +@@ -535,7 +1400,196 @@ $code.=<<___; vorr $in0,$dat,$dat vorr $in1,$dat1,$dat1 vorr $in2,$dat2,$dat2 @@ -225,7 +1062,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c + aesimc $dat3,$dat3 + aesd $dat4,q14 + aesimc $dat4,$dat4 -+ + + veor $tmp0,$ivec,$rndlast + aesd $dat0,q15 + veor $tmp1,$in0,$rndlast @@ -277,7 +1114,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c + b.lo .Lcbc_dec_tail + + b .Loop3x_cbc_dec - ++ +.align 4 +.Lcbc_tail4x: + veor $tmp1,$tmp0,$dat1 @@ -296,7 +1133,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c .Loop3x_cbc_dec: aesd $dat0,q8 aesimc $dat0,$dat0 -@@ -691,6 +915,9 @@ my $step="x12"; # aliases with $tctr2 +@@ -696,6 +1750,9 @@ my $step="x12"; # aliases with $tctr2 my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); @@ -306,10 +1143,10 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c my ($dat,$tmp)=($dat0,$tmp0); ### q8-q15 preloaded key schedule -@@ -743,6 +970,175 @@ $code.=<<___; - rev $tctr2, $ctr +@@ -751,6 +1808,175 @@ $code.=<<___; + vmov.32 ${ivec}[3],$tctr2 sub $len,$len,#3 // bias - vmov.32 ${dat2}[3],$tctr2 + vorr $dat2,$ivec,$ivec +___ +$code.=<<___ if ($flavour =~ /64/); + cmp $len,#2 @@ -482,7 +1319,1440 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c b .Loop3x_ctr32 .align 4 -@@ -955,7 +1351,7 @@ if ($flavour =~ /64/) { ######## 64-bi +@@ -905,6 +2131,1432 @@ $code.=<<___; + .size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks + ___ + }}} ++# Performance in cycles per byte. ++# Processed with AES-XTS different key size. ++# It shows the value before and after optimization as below: ++# (before/after): ++# ++# AES-128-XTS AES-256-XTS ++# Cortex-A57 3.36/1.09 4.02/1.37 ++# Cortex-A72 3.03/1.02 3.28/1.33 ++ ++# Optimization is implemented by loop unrolling and interleaving. ++# Commonly, we choose the unrolling factor as 5, if the input ++# data size smaller than 5 blocks, but not smaller than 3 blocks, ++# choose 3 as the unrolling factor. ++# If the input data size dsize >= 5*16 bytes, then take 5 blocks ++# as one iteration, every loop the left size lsize -= 5*16. ++# If lsize < 5*16 bytes, treat them as the tail. Note: left 4*16 bytes ++# will be processed specially, which be integrated into the 5*16 bytes ++# loop to improve the efficiency. ++# There is one special case, if the original input data size dsize ++# = 16 bytes, we will treat it seperately to improve the ++# performance: one independent code block without LR, FP load and ++# store. ++# Encryption will process the (length -tailcnt) bytes as mentioned ++# previously, then encrypt the composite block as last second ++# cipher block. ++# Decryption will process the (length -tailcnt -1) bytes as mentioned ++# previously, then decrypt the last second cipher block to get the ++# last plain block(tail), decrypt the composite block as last second ++# plain text block. ++ ++{{{ ++my ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5)); ++my ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10"); ++my ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20"); ++my ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19"); ++my ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11"); ++my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7)); ++my ($iv0,$iv1,$iv2,$iv3,$iv4)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b"); ++my ($ivd00,$ivd01,$ivd20,$ivd21)=("d6","v6.d[1]","d9","v9.d[1]"); ++my ($ivd10,$ivd11,$ivd30,$ivd31,$ivd40,$ivd41)=("d8","v8.d[1]","d10","v10.d[1]","d11","v11.d[1]"); ++ ++my ($tmpin)=("v26.16b"); ++my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); ++ ++# q7 last round key ++# q10-q15, q7 Last 7 round keys ++# q8-q9 preloaded round keys except last 7 keys for big size ++# q20, q21, q8-q9 preloaded round keys except last 7 keys for only 16 byte ++ ++ ++my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); ++ ++my ($dat3,$in3,$tmp3); # used only in 64-bit mode ++my ($dat4,$in4,$tmp4); ++if ($flavour =~ /64/) { ++ ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23)); ++} ++ ++$code.=<<___ if ($flavour =~ /64/); ++.globl ${prefix}_xts_encrypt ++.type ${prefix}_xts_encrypt,%function ++.align 5 ++${prefix}_xts_encrypt: ++___ ++$code.=<<___ if ($flavour =~ /64/); ++ cmp $len,#16 ++ // Original input data size bigger than 16, jump to big size processing. ++ b.ne .Lxts_enc_big_size ++ // Encrypt the iv with key2, as the first XEX iv. ++ ldr $rounds,[$key2,#240] ++ vld1.8 {$dat},[$key2],#16 ++ vld1.8 {$iv0},[$ivp] ++ sub $rounds,$rounds,#2 ++ vld1.8 {$dat1},[$key2],#16 ++ ++.Loop_enc_iv_enc: ++ aese $iv0,$dat ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat},[$key2],#16 ++ subs $rounds,$rounds,#2 ++ aese $iv0,$dat1 ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat1},[$key2],#16 ++ b.gt .Loop_enc_iv_enc ++ ++ aese $iv0,$dat ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat},[$key2] ++ aese $iv0,$dat1 ++ veor $iv0,$iv0,$dat ++ ++ vld1.8 {$dat0},[$inp] ++ veor $dat0,$iv0,$dat0 ++ ++ ldr $rounds,[$key1,#240] ++ vld1.32 {q20-q21},[$key1],#32 // load key schedule... ++ ++ aese $dat0,q20 ++ aesmc $dat0,$dat0 ++ vld1.32 {q8-q9},[$key1],#32 // load key schedule... ++ aese $dat0,q21 ++ aesmc $dat0,$dat0 ++ subs $rounds,$rounds,#10 // if rounds==10, jump to aes-128-xts processing ++ b.eq .Lxts_128_enc ++.Lxts_enc_round_loop: ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ vld1.32 {q8},[$key1],#16 // load key schedule... ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ vld1.32 {q9},[$key1],#16 // load key schedule... ++ subs $rounds,$rounds,#2 // bias ++ b.gt .Lxts_enc_round_loop ++.Lxts_128_enc: ++ vld1.32 {q10-q11},[$key1],#32 // load key schedule... ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ vld1.32 {q12-q13},[$key1],#32 // load key schedule... ++ aese $dat0,q10 ++ aesmc $dat0,$dat0 ++ aese $dat0,q11 ++ aesmc $dat0,$dat0 ++ vld1.32 {q14-q15},[$key1],#32 // load key schedule... ++ aese $dat0,q12 ++ aesmc $dat0,$dat0 ++ aese $dat0,q13 ++ aesmc $dat0,$dat0 ++ vld1.32 {$rndlast},[$key1] ++ aese $dat0,q14 ++ aesmc $dat0,$dat0 ++ aese $dat0,q15 ++ veor $dat0,$dat0,$rndlast ++ veor $dat0,$dat0,$iv0 ++ vst1.8 {$dat0},[$out] ++ b .Lxts_enc_final_abort ++ ++.align 4 ++.Lxts_enc_big_size: ++___ ++$code.=<<___ if ($flavour =~ /64/); ++ stp $constnumx,$tmpinp,[sp,#-64]! ++ stp $tailcnt,$midnumx,[sp,#48] ++ stp $ivd10,$ivd20,[sp,#32] ++ stp $ivd30,$ivd40,[sp,#16] ++ ++ // tailcnt store the tail value of length%16. ++ and $tailcnt,$len,#0xf ++ and $len,$len,#-16 ++ subs $len,$len,#16 ++ mov $step,#16 ++ b.lo .Lxts_abort ++ csel $step,xzr,$step,eq ++ ++ // Firstly, encrypt the iv with key2, as the first iv of XEX. ++ ldr $rounds,[$key2,#240] ++ vld1.32 {$dat},[$key2],#16 ++ vld1.8 {$iv0},[$ivp] ++ sub $rounds,$rounds,#2 ++ vld1.32 {$dat1},[$key2],#16 ++ ++.Loop_iv_enc: ++ aese $iv0,$dat ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat},[$key2],#16 ++ subs $rounds,$rounds,#2 ++ aese $iv0,$dat1 ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat1},[$key2],#16 ++ b.gt .Loop_iv_enc ++ ++ aese $iv0,$dat ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat},[$key2] ++ aese $iv0,$dat1 ++ veor $iv0,$iv0,$dat ++ ++ // The iv for second block ++ // $ivl- iv(low), $ivh - iv(high) ++ // the five ivs stored into, $iv0,$iv1,$iv2,$iv3,$iv4 ++ fmov $ivl,$ivd00 ++ fmov $ivh,$ivd01 ++ mov $constnum,#0x87 ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr#31 ++ eor $ivl,$tmpmx,$ivl,lsl#1 ++ fmov $ivd10,$ivl ++ fmov $ivd11,$ivh ++ ++ ldr $rounds0,[$key1,#240] // next starting point ++ vld1.8 {$dat},[$inp],$step ++ ++ vld1.32 {q8-q9},[$key1] // load key schedule... ++ sub $rounds0,$rounds0,#6 ++ add $key_,$key1,$ivp,lsl#4 // pointer to last 7 round keys ++ sub $rounds0,$rounds0,#2 ++ vld1.32 {q10-q11},[$key_],#32 ++ vld1.32 {q12-q13},[$key_],#32 ++ vld1.32 {q14-q15},[$key_],#32 ++ vld1.32 {$rndlast},[$key_] ++ ++ add $key_,$key1,#32 ++ mov $rounds,$rounds0 ++ ++ // Encryption ++.Lxts_enc: ++ vld1.8 {$dat2},[$inp],#16 ++ subs $len,$len,#32 // bias ++ add $rounds,$rounds0,#2 ++ vorr $in1,$dat,$dat ++ vorr $dat1,$dat,$dat ++ vorr $in3,$dat,$dat ++ vorr $in2,$dat2,$dat2 ++ vorr $in4,$dat2,$dat2 ++ b.lo .Lxts_inner_enc_tail ++ veor $dat,$dat,$iv0 // before encryption, xor with iv ++ veor $dat2,$dat2,$iv1 ++ ++ // The iv for third block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr#31 ++ eor $ivl,$tmpmx,$ivl,lsl#1 ++ fmov $ivd20,$ivl ++ fmov $ivd21,$ivh ++ ++ ++ vorr $dat1,$dat2,$dat2 ++ vld1.8 {$dat2},[$inp],#16 ++ vorr $in0,$dat,$dat ++ vorr $in1,$dat1,$dat1 ++ veor $in2,$dat2,$iv2 // the third block ++ veor $dat2,$dat2,$iv2 ++ cmp $len,#32 ++ b.lo .Lxts_outer_enc_tail ++ ++ // The iv for fourth block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr#31 ++ eor $ivl,$tmpmx,$ivl,lsl#1 ++ fmov $ivd30,$ivl ++ fmov $ivd31,$ivh ++ ++ vld1.8 {$dat3},[$inp],#16 ++ // The iv for fifth block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr#31 ++ eor $ivl,$tmpmx,$ivl,lsl#1 ++ fmov $ivd40,$ivl ++ fmov $ivd41,$ivh ++ ++ vld1.8 {$dat4},[$inp],#16 ++ veor $dat3,$dat3,$iv3 // the fourth block ++ veor $dat4,$dat4,$iv4 ++ sub $len,$len,#32 // bias ++ mov $rounds,$rounds0 ++ b .Loop5x_xts_enc ++ ++.align 4 ++.Loop5x_xts_enc: ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ aese $dat3,q8 ++ aesmc $dat3,$dat3 ++ aese $dat4,q8 ++ aesmc $dat4,$dat4 ++ vld1.32 {q8},[$key_],#16 ++ subs $rounds,$rounds,#2 ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ aese $dat3,q9 ++ aesmc $dat3,$dat3 ++ aese $dat4,q9 ++ aesmc $dat4,$dat4 ++ vld1.32 {q9},[$key_],#16 ++ b.gt .Loop5x_xts_enc ++ ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ aese $dat3,q8 ++ aesmc $dat3,$dat3 ++ aese $dat4,q8 ++ aesmc $dat4,$dat4 ++ subs $len,$len,#0x50 // because .Lxts_enc_tail4x ++ ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ aese $dat3,q9 ++ aesmc $dat3,$dat3 ++ aese $dat4,q9 ++ aesmc $dat4,$dat4 ++ csel $xoffset,xzr,$len,gt // borrow x6, w6, "gt" is not typo ++ mov $key_,$key1 ++ ++ aese $dat0,q10 ++ aesmc $dat0,$dat0 ++ aese $dat1,q10 ++ aesmc $dat1,$dat1 ++ aese $dat2,q10 ++ aesmc $dat2,$dat2 ++ aese $dat3,q10 ++ aesmc $dat3,$dat3 ++ aese $dat4,q10 ++ aesmc $dat4,$dat4 ++ add $inp,$inp,$xoffset // x0 is adjusted in such way that ++ // at exit from the loop v1.16b-v26.16b ++ // are loaded with last "words" ++ add $xoffset,$len,#0x60 // because .Lxts_enc_tail4x ++ ++ aese $dat0,q11 ++ aesmc $dat0,$dat0 ++ aese $dat1,q11 ++ aesmc $dat1,$dat1 ++ aese $dat2,q11 ++ aesmc $dat2,$dat2 ++ aese $dat3,q11 ++ aesmc $dat3,$dat3 ++ aese $dat4,q11 ++ aesmc $dat4,$dat4 ++ ++ aese $dat0,q12 ++ aesmc $dat0,$dat0 ++ aese $dat1,q12 ++ aesmc $dat1,$dat1 ++ aese $dat2,q12 ++ aesmc $dat2,$dat2 ++ aese $dat3,q12 ++ aesmc $dat3,$dat3 ++ aese $dat4,q12 ++ aesmc $dat4,$dat4 ++ ++ aese $dat0,q13 ++ aesmc $dat0,$dat0 ++ aese $dat1,q13 ++ aesmc $dat1,$dat1 ++ aese $dat2,q13 ++ aesmc $dat2,$dat2 ++ aese $dat3,q13 ++ aesmc $dat3,$dat3 ++ aese $dat4,q13 ++ aesmc $dat4,$dat4 ++ ++ aese $dat0,q14 ++ aesmc $dat0,$dat0 ++ aese $dat1,q14 ++ aesmc $dat1,$dat1 ++ aese $dat2,q14 ++ aesmc $dat2,$dat2 ++ aese $dat3,q14 ++ aesmc $dat3,$dat3 ++ aese $dat4,q14 ++ aesmc $dat4,$dat4 ++ ++ veor $tmp0,$rndlast,$iv0 ++ aese $dat0,q15 ++ // The iv for first block of one iteration ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr#31 ++ eor $ivl,$tmpmx,$ivl,lsl#1 ++ fmov $ivd00,$ivl ++ fmov $ivd01,$ivh ++ veor $tmp1,$rndlast,$iv1 ++ vld1.8 {$in0},[$inp],#16 ++ aese $dat1,q15 ++ // The iv for second block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr#31 ++ eor $ivl,$tmpmx,$ivl,lsl#1 ++ fmov $ivd10,$ivl ++ fmov $ivd11,$ivh ++ veor $tmp2,$rndlast,$iv2 ++ vld1.8 {$in1},[$inp],#16 ++ aese $dat2,q15 ++ // The iv for third block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr#31 ++ eor $ivl,$tmpmx,$ivl,lsl#1 ++ fmov $ivd20,$ivl ++ fmov $ivd21,$ivh ++ veor $tmp3,$rndlast,$iv3 ++ vld1.8 {$in2},[$inp],#16 ++ aese $dat3,q15 ++ // The iv for fourth block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr#31 ++ eor $ivl,$tmpmx,$ivl,lsl#1 ++ fmov $ivd30,$ivl ++ fmov $ivd31,$ivh ++ veor $tmp4,$rndlast,$iv4 ++ vld1.8 {$in3},[$inp],#16 ++ aese $dat4,q15 ++ ++ // The iv for fifth block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd40,$ivl ++ fmov $ivd41,$ivh ++ ++ vld1.8 {$in4},[$inp],#16 ++ cbz $xoffset,.Lxts_enc_tail4x ++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] ++ veor $tmp0,$tmp0,$dat0 ++ veor $dat0,$in0,$iv0 ++ veor $tmp1,$tmp1,$dat1 ++ veor $dat1,$in1,$iv1 ++ veor $tmp2,$tmp2,$dat2 ++ veor $dat2,$in2,$iv2 ++ veor $tmp3,$tmp3,$dat3 ++ veor $dat3,$in3,$iv3 ++ veor $tmp4,$tmp4,$dat4 ++ vst1.8 {$tmp0},[$out],#16 ++ veor $dat4,$in4,$iv4 ++ vst1.8 {$tmp1},[$out],#16 ++ mov $rounds,$rounds0 ++ vst1.8 {$tmp2},[$out],#16 ++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] ++ vst1.8 {$tmp3},[$out],#16 ++ vst1.8 {$tmp4},[$out],#16 ++ b.hs .Loop5x_xts_enc ++ ++ ++ // If left 4 blocks, borrow the five block's processing. ++ cmn $len,#0x10 ++ b.ne .Loop5x_enc_after ++ vorr $iv4,$iv3,$iv3 ++ vorr $iv3,$iv2,$iv2 ++ vorr $iv2,$iv1,$iv1 ++ vorr $iv1,$iv0,$iv0 ++ fmov $ivl,$ivd40 ++ fmov $ivh,$ivd41 ++ veor $dat0,$iv0,$in0 ++ veor $dat1,$iv1,$in1 ++ veor $dat2,$in2,$iv2 ++ veor $dat3,$in3,$iv3 ++ veor $dat4,$in4,$iv4 ++ b.eq .Loop5x_xts_enc ++ ++.Loop5x_enc_after: ++ add $len,$len,#0x50 ++ cbz $len,.Lxts_enc_done ++ ++ add $rounds,$rounds0,#2 ++ subs $len,$len,#0x30 ++ b.lo .Lxts_inner_enc_tail ++ ++ veor $dat0,$iv0,$in2 ++ veor $dat1,$iv1,$in3 ++ veor $dat2,$in4,$iv2 ++ b .Lxts_outer_enc_tail ++ ++.align 4 ++.Lxts_enc_tail4x: ++ add $inp,$inp,#16 ++ veor $tmp1,$dat1,$tmp1 ++ vst1.8 {$tmp1},[$out],#16 ++ veor $tmp2,$dat2,$tmp2 ++ vst1.8 {$tmp2},[$out],#16 ++ veor $tmp3,$dat3,$tmp3 ++ veor $tmp4,$dat4,$tmp4 ++ vst1.8 {$tmp3-$tmp4},[$out],#32 ++ ++ b .Lxts_enc_done ++.align 4 ++.Lxts_outer_enc_tail: ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ vld1.32 {q8},[$key_],#16 ++ subs $rounds,$rounds,#2 ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ vld1.32 {q9},[$key_],#16 ++ b.gt .Lxts_outer_enc_tail ++ ++ aese $dat0,q8 ++ aesmc $dat0,$dat0 ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ veor $tmp0,$iv0,$rndlast ++ subs $len,$len,#0x30 ++ // The iv for first block ++ fmov $ivl,$ivd20 ++ fmov $ivh,$ivd21 ++ //mov $constnum,#0x87 ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr#31 ++ eor $ivl,$tmpmx,$ivl,lsl#1 ++ fmov $ivd00,$ivl ++ fmov $ivd01,$ivh ++ veor $tmp1,$iv1,$rndlast ++ csel $xoffset,$len,$xoffset,lo // x6, w6, is zero at this point ++ aese $dat0,q9 ++ aesmc $dat0,$dat0 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ veor $tmp2,$iv2,$rndlast ++ ++ add $xoffset,$xoffset,#0x20 ++ add $inp,$inp,$xoffset ++ mov $key_,$key1 ++ ++ aese $dat0,q12 ++ aesmc $dat0,$dat0 ++ aese $dat1,q12 ++ aesmc $dat1,$dat1 ++ aese $dat2,q12 ++ aesmc $dat2,$dat2 ++ aese $dat0,q13 ++ aesmc $dat0,$dat0 ++ aese $dat1,q13 ++ aesmc $dat1,$dat1 ++ aese $dat2,q13 ++ aesmc $dat2,$dat2 ++ aese $dat0,q14 ++ aesmc $dat0,$dat0 ++ aese $dat1,q14 ++ aesmc $dat1,$dat1 ++ aese $dat2,q14 ++ aesmc $dat2,$dat2 ++ aese $dat0,q15 ++ aese $dat1,q15 ++ aese $dat2,q15 ++ vld1.8 {$in2},[$inp],#16 ++ add $rounds,$rounds0,#2 ++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] ++ veor $tmp0,$tmp0,$dat0 ++ veor $tmp1,$tmp1,$dat1 ++ veor $dat2,$dat2,$tmp2 ++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] ++ vst1.8 {$tmp0},[$out],#16 ++ vst1.8 {$tmp1},[$out],#16 ++ vst1.8 {$dat2},[$out],#16 ++ cmn $len,#0x30 ++ b.eq .Lxts_enc_done ++.Lxts_encxor_one: ++ vorr $in3,$in1,$in1 ++ vorr $in4,$in2,$in2 ++ nop ++ ++.Lxts_inner_enc_tail: ++ cmn $len,#0x10 ++ veor $dat1,$in3,$iv0 ++ veor $dat2,$in4,$iv1 ++ b.eq .Lxts_enc_tail_loop ++ veor $dat2,$in4,$iv0 ++.Lxts_enc_tail_loop: ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ vld1.32 {q8},[$key_],#16 ++ subs $rounds,$rounds,#2 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ vld1.32 {q9},[$key_],#16 ++ b.gt .Lxts_enc_tail_loop ++ ++ aese $dat1,q8 ++ aesmc $dat1,$dat1 ++ aese $dat2,q8 ++ aesmc $dat2,$dat2 ++ aese $dat1,q9 ++ aesmc $dat1,$dat1 ++ aese $dat2,q9 ++ aesmc $dat2,$dat2 ++ aese $dat1,q12 ++ aesmc $dat1,$dat1 ++ aese $dat2,q12 ++ aesmc $dat2,$dat2 ++ cmn $len,#0x20 ++ aese $dat1,q13 ++ aesmc $dat1,$dat1 ++ aese $dat2,q13 ++ aesmc $dat2,$dat2 ++ veor $tmp1,$iv0,$rndlast ++ aese $dat1,q14 ++ aesmc $dat1,$dat1 ++ aese $dat2,q14 ++ aesmc $dat2,$dat2 ++ veor $tmp2,$iv1,$rndlast ++ aese $dat1,q15 ++ aese $dat2,q15 ++ b.eq .Lxts_enc_one ++ veor $tmp1,$tmp1,$dat1 ++ vst1.8 {$tmp1},[$out],#16 ++ veor $tmp2,$tmp2,$dat2 ++ vorr $iv0,$iv1,$iv1 ++ vst1.8 {$tmp2},[$out],#16 ++ fmov $ivl,$ivd10 ++ fmov $ivh,$ivd11 ++ mov $constnum,#0x87 ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd00,$ivl ++ fmov $ivd01,$ivh ++ b .Lxts_enc_done ++ ++.Lxts_enc_one: ++ veor $tmp1,$tmp1,$dat2 ++ vorr $iv0,$iv0,$iv0 ++ vst1.8 {$tmp1},[$out],#16 ++ fmov $ivl,$ivd00 ++ fmov $ivh,$ivd01 ++ mov $constnum,#0x87 ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd00,$ivl ++ fmov $ivd01,$ivh ++ b .Lxts_enc_done ++.align 5 ++.Lxts_enc_done: ++ // Process the tail block with cipher stealing. ++ tst $tailcnt,#0xf ++ b.eq .Lxts_abort ++ ++ mov $tmpinp,$inp ++ mov $tmpoutp,$out ++ sub $out,$out,#16 ++.composite_enc_loop: ++ subs $tailcnt,$tailcnt,#1 ++ ldrb $l2outp,[$out,$tailcnt] ++ ldrb $loutp,[$tmpinp,$tailcnt] ++ strb $l2outp,[$tmpoutp,$tailcnt] ++ strb $loutp,[$out,$tailcnt] ++ b.gt .composite_enc_loop ++.Lxts_enc_load_done: ++ vld1.8 {$tmpin},[$out] ++ veor $tmpin,$tmpin,$iv0 ++ ++ // Encrypt the composite block to get the last second encrypted text block ++ ldr $rounds,[$key1,#240] // load key schedule... ++ vld1.8 {$dat},[$key1],#16 ++ sub $rounds,$rounds,#2 ++ vld1.8 {$dat1},[$key1],#16 // load key schedule... ++.Loop_final_enc: ++ aese $tmpin,$dat0 ++ aesmc $tmpin,$tmpin ++ vld1.32 {$dat0},[$key1],#16 ++ subs $rounds,$rounds,#2 ++ aese $tmpin,$dat1 ++ aesmc $tmpin,$tmpin ++ vld1.32 {$dat1},[$key1],#16 ++ b.gt .Loop_final_enc ++ ++ aese $tmpin,$dat0 ++ aesmc $tmpin,$tmpin ++ vld1.32 {$dat0},[$key1] ++ aese $tmpin,$dat1 ++ veor $tmpin,$tmpin,$dat0 ++ veor $tmpin,$tmpin,$iv0 ++ vst1.8 {$tmpin},[$out] ++ ++.Lxts_abort: ++ ldp $tailcnt,$midnumx,[sp,#48] ++ ldp $ivd10,$ivd20,[sp,#32] ++ ldp $ivd30,$ivd40,[sp,#16] ++ ldp $constnumx,$tmpinp,[sp],#64 ++.Lxts_enc_final_abort: ++ ret ++.size ${prefix}_xts_encrypt,.-${prefix}_xts_encrypt ++___ ++ ++}}} ++{{{ ++my ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5)); ++my ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10"); ++my ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20"); ++my ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19"); ++my ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11"); ++my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7)); ++my ($iv0,$iv1,$iv2,$iv3,$iv4,$tmpin)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b","v26.16b"); ++my ($ivd00,$ivd01,$ivd20,$ivd21)=("d6","v6.d[1]","d9","v9.d[1]"); ++my ($ivd10,$ivd11,$ivd30,$ivd31,$ivd40,$ivd41)=("d8","v8.d[1]","d10","v10.d[1]","d11","v11.d[1]"); ++ ++my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); ++ ++# q7 last round key ++# q10-q15, q7 Last 7 round keys ++# q8-q9 preloaded round keys except last 7 keys for big size ++# q20, q21, q8-q9 preloaded round keys except last 7 keys for only 16 byte ++ ++{ ++my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); ++ ++my ($dat3,$in3,$tmp3); # used only in 64-bit mode ++my ($dat4,$in4,$tmp4); ++if ($flavour =~ /64/) { ++ ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23)); ++} ++ ++$code.=<<___ if ($flavour =~ /64/); ++.globl ${prefix}_xts_decrypt ++.type ${prefix}_xts_decrypt,%function ++.align 5 ++${prefix}_xts_decrypt: ++___ ++$code.=<<___ if ($flavour =~ /64/); ++ cmp $len,#16 ++ // Original input data size bigger than 16, jump to big size processing. ++ b.ne .Lxts_dec_big_size ++ // Encrypt the iv with key2, as the first XEX iv. ++ ldr $rounds,[$key2,#240] ++ vld1.8 {$dat},[$key2],#16 ++ vld1.8 {$iv0},[$ivp] ++ sub $rounds,$rounds,#2 ++ vld1.8 {$dat1},[$key2],#16 ++ ++.Loop_dec_small_iv_enc: ++ aese $iv0,$dat ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat},[$key2],#16 ++ subs $rounds,$rounds,#2 ++ aese $iv0,$dat1 ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat1},[$key2],#16 ++ b.gt .Loop_dec_small_iv_enc ++ ++ aese $iv0,$dat ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat},[$key2] ++ aese $iv0,$dat1 ++ veor $iv0,$iv0,$dat ++ ++ vld1.8 {$dat0},[$inp] ++ veor $dat0,$iv0,$dat0 ++ ++ ldr $rounds,[$key1,#240] ++ vld1.32 {q20-q21},[$key1],#32 // load key schedule... ++ ++ aesd $dat0,q20 ++ aesimc $dat0,$dat0 ++ vld1.32 {q8-q9},[$key1],#32 // load key schedule... ++ aesd $dat0,q21 ++ aesimc $dat0,$dat0 ++ subs $rounds,$rounds,#10 // bias ++ b.eq .Lxts_128_dec ++.Lxts_dec_round_loop: ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ vld1.32 {q8},[$key1],#16 // load key schedule... ++ aesd $dat0,q9 ++ aesimc $dat0,$dat0 ++ vld1.32 {q9},[$key1],#16 // load key schedule... ++ subs $rounds,$rounds,#2 // bias ++ b.gt .Lxts_dec_round_loop ++.Lxts_128_dec: ++ vld1.32 {q10-q11},[$key1],#32 // load key schedule... ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ aesd $dat0,q9 ++ aesimc $dat0,$dat0 ++ vld1.32 {q12-q13},[$key1],#32 // load key schedule... ++ aesd $dat0,q10 ++ aesimc $dat0,$dat0 ++ aesd $dat0,q11 ++ aesimc $dat0,$dat0 ++ vld1.32 {q14-q15},[$key1],#32 // load key schedule... ++ aesd $dat0,q12 ++ aesimc $dat0,$dat0 ++ aesd $dat0,q13 ++ aesimc $dat0,$dat0 ++ vld1.32 {$rndlast},[$key1] ++ aesd $dat0,q14 ++ aesimc $dat0,$dat0 ++ aesd $dat0,q15 ++ veor $dat0,$dat0,$rndlast ++ veor $dat0,$iv0,$dat0 ++ vst1.8 {$dat0},[$out] ++ b .Lxts_dec_final_abort ++.Lxts_dec_big_size: ++___ ++$code.=<<___ if ($flavour =~ /64/); ++ stp $constnumx,$tmpinp,[sp,#-64]! ++ stp $tailcnt,$midnumx,[sp,#48] ++ stp $ivd10,$ivd20,[sp,#32] ++ stp $ivd30,$ivd40,[sp,#16] ++ ++ and $tailcnt,$len,#0xf ++ and $len,$len,#-16 ++ subs $len,$len,#16 ++ mov $step,#16 ++ b.lo .Lxts_dec_abort ++ ++ // Encrypt the iv with key2, as the first XEX iv ++ ldr $rounds,[$key2,#240] ++ vld1.8 {$dat},[$key2],#16 ++ vld1.8 {$iv0},[$ivp] ++ sub $rounds,$rounds,#2 ++ vld1.8 {$dat1},[$key2],#16 ++ ++.Loop_dec_iv_enc: ++ aese $iv0,$dat ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat},[$key2],#16 ++ subs $rounds,$rounds,#2 ++ aese $iv0,$dat1 ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat1},[$key2],#16 ++ b.gt .Loop_dec_iv_enc ++ ++ aese $iv0,$dat ++ aesmc $iv0,$iv0 ++ vld1.32 {$dat},[$key2] ++ aese $iv0,$dat1 ++ veor $iv0,$iv0,$dat ++ ++ // The iv for second block ++ // $ivl- iv(low), $ivh - iv(high) ++ // the five ivs stored into, $iv0,$iv1,$iv2,$iv3,$iv4 ++ fmov $ivl,$ivd00 ++ fmov $ivh,$ivd01 ++ mov $constnum,#0x87 ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd10,$ivl ++ fmov $ivd11,$ivh ++ ++ ldr $rounds0,[$key1,#240] // load rounds number ++ ++ // The iv for third block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd20,$ivl ++ fmov $ivd21,$ivh ++ ++ vld1.32 {q8-q9},[$key1] // load key schedule... ++ sub $rounds0,$rounds0,#6 ++ add $key_,$key1,$ivp,lsl#4 // pointer to last 7 round keys ++ sub $rounds0,$rounds0,#2 ++ vld1.32 {q10-q11},[$key_],#32 // load key schedule... ++ vld1.32 {q12-q13},[$key_],#32 ++ vld1.32 {q14-q15},[$key_],#32 ++ vld1.32 {$rndlast},[$key_] ++ ++ // The iv for fourth block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd30,$ivl ++ fmov $ivd31,$ivh ++ ++ add $key_,$key1,#32 ++ mov $rounds,$rounds0 ++ b .Lxts_dec ++ ++ // Decryption ++.align 5 ++.Lxts_dec: ++ tst $tailcnt,#0xf ++ b.eq .Lxts_dec_begin ++ subs $len,$len,#16 ++ csel $step,xzr,$step,eq ++ vld1.8 {$dat},[$inp],#16 ++ b.lo .Lxts_done ++ sub $inp,$inp,#16 ++.Lxts_dec_begin: ++ vld1.8 {$dat},[$inp],$step ++ subs $len,$len,#32 // bias ++ add $rounds,$rounds0,#2 ++ vorr $in1,$dat,$dat ++ vorr $dat1,$dat,$dat ++ vorr $in3,$dat,$dat ++ vld1.8 {$dat2},[$inp],#16 ++ vorr $in2,$dat2,$dat2 ++ vorr $in4,$dat2,$dat2 ++ b.lo .Lxts_inner_dec_tail ++ veor $dat,$dat,$iv0 // before decryt, xor with iv ++ veor $dat2,$dat2,$iv1 ++ ++ vorr $dat1,$dat2,$dat2 ++ vld1.8 {$dat2},[$inp],#16 ++ vorr $in0,$dat,$dat ++ vorr $in1,$dat1,$dat1 ++ veor $in2,$dat2,$iv2 // third block xox with third iv ++ veor $dat2,$dat2,$iv2 ++ cmp $len,#32 ++ b.lo .Lxts_outer_dec_tail ++ ++ vld1.8 {$dat3},[$inp],#16 ++ ++ // The iv for fifth block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd40,$ivl ++ fmov $ivd41,$ivh ++ ++ vld1.8 {$dat4},[$inp],#16 ++ veor $dat3,$dat3,$iv3 // the fourth block ++ veor $dat4,$dat4,$iv4 ++ sub $len,$len,#32 // bias ++ mov $rounds,$rounds0 ++ b .Loop5x_xts_dec ++ ++.align 4 ++.Loop5x_xts_dec: ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q8 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q8 ++ aesimc $dat4,$dat4 ++ vld1.32 {q8},[$key_],#16 // load key schedule... ++ subs $rounds,$rounds,#2 ++ aesd $dat0,q9 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q9 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q9 ++ aesimc $dat4,$dat4 ++ vld1.32 {q9},[$key_],#16 // load key schedule... ++ b.gt .Loop5x_xts_dec ++ ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q8 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q8 ++ aesimc $dat4,$dat4 ++ subs $len,$len,#0x50 // because .Lxts_dec_tail4x ++ ++ aesd $dat0,q9 ++ aesimc $dat0,$dat ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q9 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q9 ++ aesimc $dat4,$dat4 ++ csel $xoffset,xzr,$len,gt // borrow x6, w6, "gt" is not typo ++ mov $key_,$key1 ++ ++ aesd $dat0,q10 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q10 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q10 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q10 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q10 ++ aesimc $dat4,$dat4 ++ add $inp,$inp,$xoffset // x0 is adjusted in such way that ++ // at exit from the loop v1.16b-v26.16b ++ // are loaded with last "words" ++ add $xoffset,$len,#0x60 // because .Lxts_dec_tail4x ++ ++ aesd $dat0,q11 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q11 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q11 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q11 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q11 ++ aesimc $dat4,$dat4 ++ ++ aesd $dat0,q12 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q12 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q12 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q12 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q12 ++ aesimc $dat4,$dat4 ++ ++ aesd $dat0,q13 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q13 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q13 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q13 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q13 ++ aesimc $dat4,$dat4 ++ ++ aesd $dat0,q14 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q14 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q14 ++ aesimc $dat2,$dat2 ++ aesd $dat3,q14 ++ aesimc $dat3,$dat3 ++ aesd $dat4,q14 ++ aesimc $dat4,$dat4 ++ ++ veor $tmp0,$rndlast,$iv0 ++ aesd $dat0,q15 ++ // The iv for first block of next iteration. ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd00,$ivl ++ fmov $ivd01,$ivh ++ veor $tmp1,$rndlast,$iv1 ++ vld1.8 {$in0},[$inp],#16 ++ aesd $dat1,q15 ++ // The iv for second block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd10,$ivl ++ fmov $ivd11,$ivh ++ veor $tmp2,$rndlast,$iv2 ++ vld1.8 {$in1},[$inp],#16 ++ aesd $dat2,q15 ++ // The iv for third block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd20,$ivl ++ fmov $ivd21,$ivh ++ veor $tmp3,$rndlast,$iv3 ++ vld1.8 {$in2},[$inp],#16 ++ aesd $dat3,q15 ++ // The iv for fourth block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd30,$ivl ++ fmov $ivd31,$ivh ++ veor $tmp4,$rndlast,$iv4 ++ vld1.8 {$in3},[$inp],#16 ++ aesd $dat4,q15 ++ ++ // The iv for fifth block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd40,$ivl ++ fmov $ivd41,$ivh ++ ++ vld1.8 {$in4},[$inp],#16 ++ cbz $xoffset,.Lxts_dec_tail4x ++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] ++ veor $tmp0,$tmp0,$dat0 ++ veor $dat0,$in0,$iv0 ++ veor $tmp1,$tmp1,$dat1 ++ veor $dat1,$in1,$iv1 ++ veor $tmp2,$tmp2,$dat2 ++ veor $dat2,$in2,$iv2 ++ veor $tmp3,$tmp3,$dat3 ++ veor $dat3,$in3,$iv3 ++ veor $tmp4,$tmp4,$dat4 ++ vst1.8 {$tmp0},[$out],#16 ++ veor $dat4,$in4,$iv4 ++ vst1.8 {$tmp1},[$out],#16 ++ mov $rounds,$rounds0 ++ vst1.8 {$tmp2},[$out],#16 ++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] ++ vst1.8 {$tmp3},[$out],#16 ++ vst1.8 {$tmp4},[$out],#16 ++ b.hs .Loop5x_xts_dec ++ ++ cmn $len,#0x10 ++ b.ne .Loop5x_dec_after ++ // If x2($len) equal to -0x10, the left blocks is 4. ++ // After specially processing, utilize the five blocks processing again. ++ // It will use the following IVs: $iv0,$iv0,$iv1,$iv2,$iv3. ++ vorr $iv4,$iv3,$iv3 ++ vorr $iv3,$iv2,$iv2 ++ vorr $iv2,$iv1,$iv1 ++ vorr $iv1,$iv0,$iv0 ++ fmov $ivl,$ivd40 ++ fmov $ivh,$ivd41 ++ veor $dat0,$iv0,$in0 ++ veor $dat1,$iv1,$in1 ++ veor $dat2,$in2,$iv2 ++ veor $dat3,$in3,$iv3 ++ veor $dat4,$in4,$iv4 ++ b.eq .Loop5x_xts_dec ++ ++.Loop5x_dec_after: ++ add $len,$len,#0x50 ++ cbz $len,.Lxts_done ++ ++ add $rounds,$rounds0,#2 ++ subs $len,$len,#0x30 ++ b.lo .Lxts_inner_dec_tail ++ ++ veor $dat0,$iv0,$in2 ++ veor $dat1,$iv1,$in3 ++ veor $dat2,$in4,$iv2 ++ b .Lxts_outer_dec_tail ++ ++.align 4 ++.Lxts_dec_tail4x: ++ add $inp,$inp,#16 ++ vld1.32 {$dat0},[$inp],#16 ++ veor $tmp1,$dat1,$tmp0 ++ vst1.8 {$tmp1},[$out],#16 ++ veor $tmp2,$dat2,$tmp2 ++ vst1.8 {$tmp2},[$out],#16 ++ veor $tmp3,$dat3,$tmp3 ++ veor $tmp4,$dat4,$tmp4 ++ vst1.8 {$tmp3-$tmp4},[$out],#32 ++ ++ b .Lxts_done ++.align 4 ++.Lxts_outer_dec_tail: ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ vld1.32 {q8},[$key_],#16 ++ subs $rounds,$rounds,#2 ++ aesd $dat0,q9 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ vld1.32 {q9},[$key_],#16 ++ b.gt .Lxts_outer_dec_tail ++ ++ aesd $dat0,q8 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ veor $tmp0,$iv0,$rndlast ++ subs $len,$len,#0x30 ++ // The iv for first block ++ fmov $ivl,$ivd20 ++ fmov $ivh,$ivd21 ++ mov $constnum,#0x87 ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd00,$ivl ++ fmov $ivd01,$ivh ++ veor $tmp1,$iv1,$rndlast ++ csel $xoffset,$len,$xoffset,lo // x6, w6, is zero at this point ++ aesd $dat0,q9 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ veor $tmp2,$iv2,$rndlast ++ // The iv for second block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd10,$ivl ++ fmov $ivd11,$ivh ++ ++ add $xoffset,$xoffset,#0x20 ++ add $inp,$inp,$xoffset // $inp is adjusted to the last data ++ ++ mov $key_,$key1 ++ ++ // The iv for third block ++ extr $midnumx,$ivh,$ivh,#32 ++ extr $ivh,$ivh,$ivl,#63 ++ and $tmpmw,$constnum,$midnum,asr #31 ++ eor $ivl,$tmpmx,$ivl,lsl #1 ++ fmov $ivd20,$ivl ++ fmov $ivd21,$ivh ++ ++ aesd $dat0,q12 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q12 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q12 ++ aesimc $dat2,$dat2 ++ aesd $dat0,q13 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q13 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q13 ++ aesimc $dat2,$dat2 ++ aesd $dat0,q14 ++ aesimc $dat0,$dat0 ++ aesd $dat1,q14 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q14 ++ aesimc $dat2,$dat2 ++ vld1.8 {$in2},[$inp],#16 ++ aesd $dat0,q15 ++ aesd $dat1,q15 ++ aesd $dat2,q15 ++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] ++ add $rounds,$rounds0,#2 ++ veor $tmp0,$tmp0,$dat0 ++ veor $tmp1,$tmp1,$dat1 ++ veor $dat2,$dat2,$tmp2 ++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] ++ vst1.8 {$tmp0},[$out],#16 ++ vst1.8 {$tmp1},[$out],#16 ++ vst1.8 {$dat2},[$out],#16 ++ ++ cmn $len,#0x30 ++ add $len,$len,#0x30 ++ b.eq .Lxts_done ++ sub $len,$len,#0x30 ++ vorr $in3,$in1,$in1 ++ vorr $in4,$in2,$in2 ++ nop ++ ++.Lxts_inner_dec_tail: ++ // $len == -0x10 means two blocks left. ++ cmn $len,#0x10 ++ veor $dat1,$in3,$iv0 ++ veor $dat2,$in4,$iv1 ++ b.eq .Lxts_dec_tail_loop ++ veor $dat2,$in4,$iv0 ++.Lxts_dec_tail_loop: ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ vld1.32 {q8},[$key_],#16 ++ subs $rounds,$rounds,#2 ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ vld1.32 {q9},[$key_],#16 ++ b.gt .Lxts_dec_tail_loop ++ ++ aesd $dat1,q8 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q8 ++ aesimc $dat2,$dat2 ++ aesd $dat1,q9 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q9 ++ aesimc $dat2,$dat2 ++ aesd $dat1,q12 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q12 ++ aesimc $dat2,$dat2 ++ cmn $len,#0x20 ++ aesd $dat1,q13 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q13 ++ aesimc $dat2,$dat2 ++ veor $tmp1,$iv0,$rndlast ++ aesd $dat1,q14 ++ aesimc $dat1,$dat1 ++ aesd $dat2,q14 ++ aesimc $dat2,$dat2 ++ veor $tmp2,$iv1,$rndlast ++ aesd $dat1,q15 ++ aesd $dat2,q15 ++ b.eq .Lxts_dec_one ++ veor $tmp1,$tmp1,$dat1 ++ veor $tmp2,$tmp2,$dat2 ++ vorr $iv0,$iv2,$iv2 ++ vorr $iv1,$iv3,$iv3 ++ vst1.8 {$tmp1},[$out],#16 ++ vst1.8 {$tmp2},[$out],#16 ++ add $len,$len,#16 ++ b .Lxts_done ++ ++.Lxts_dec_one: ++ veor $tmp1,$tmp1,$dat2 ++ vorr $iv0,$iv1,$iv1 ++ vorr $iv1,$iv2,$iv2 ++ vst1.8 {$tmp1},[$out],#16 ++ add $len,$len,#32 ++ ++.Lxts_done: ++ tst $tailcnt,#0xf ++ b.eq .Lxts_dec_abort ++ // Processing the last two blocks with cipher stealing. ++ mov x7,x3 ++ cbnz x2,.Lxts_dec_1st_done ++ vld1.32 {$dat0},[$inp],#16 ++ ++ // Decrypt the last secod block to get the last plain text block ++.Lxts_dec_1st_done: ++ eor $tmpin,$dat0,$iv1 ++ ldr $rounds,[$key1,#240] ++ vld1.32 {$dat0},[$key1],#16 ++ sub $rounds,$rounds,#2 ++ vld1.32 {$dat1},[$key1],#16 ++.Loop_final_2nd_dec: ++ aesd $tmpin,$dat0 ++ aesimc $tmpin,$tmpin ++ vld1.32 {$dat0},[$key1],#16 // load key schedule... ++ subs $rounds,$rounds,#2 ++ aesd $tmpin,$dat1 ++ aesimc $tmpin,$tmpin ++ vld1.32 {$dat1},[$key1],#16 // load key schedule... ++ b.gt .Loop_final_2nd_dec ++ ++ aesd $tmpin,$dat0 ++ aesimc $tmpin,$tmpin ++ vld1.32 {$dat0},[$key1] ++ aesd $tmpin,$dat1 ++ veor $tmpin,$tmpin,$dat0 ++ veor $tmpin,$tmpin,$iv1 ++ vst1.8 {$tmpin},[$out] ++ ++ mov $tmpinp,$inp ++ add $tmpoutp,$out,#16 ++ ++ // Composite the tailcnt "16 byte not aligned block" into the last second plain blocks ++ // to get the last encrypted block. ++.composite_dec_loop: ++ subs $tailcnt,$tailcnt,#1 ++ ldrb $l2outp,[$out,$tailcnt] ++ ldrb $loutp,[$tmpinp,$tailcnt] ++ strb $l2outp,[$tmpoutp,$tailcnt] ++ strb $loutp,[$out,$tailcnt] ++ b.gt .composite_dec_loop ++.Lxts_dec_load_done: ++ vld1.8 {$tmpin},[$out] ++ veor $tmpin,$tmpin,$iv0 ++ ++ // Decrypt the composite block to get the last second plain text block ++ ldr $rounds,[$key_,#240] ++ vld1.8 {$dat},[$key_],#16 ++ sub $rounds,$rounds,#2 ++ vld1.8 {$dat1},[$key_],#16 ++.Loop_final_dec: ++ aesd $tmpin,$dat0 ++ aesimc $tmpin,$tmpin ++ vld1.32 {$dat0},[$key_],#16 // load key schedule... ++ subs $rounds,$rounds,#2 ++ aesd $tmpin,$dat1 ++ aesimc $tmpin,$tmpin ++ vld1.32 {$dat1},[$key_],#16 // load key schedule... ++ b.gt .Loop_final_dec ++ ++ aesd $tmpin,$dat0 ++ aesimc $tmpin,$tmpin ++ vld1.32 {$dat0},[$key_] ++ aesd $tmpin,$dat1 ++ veor $tmpin,$tmpin,$dat0 ++ veor $tmpin,$tmpin,$iv0 ++ vst1.8 {$tmpin},[$out] ++ ++.Lxts_dec_abort: ++ ldp $tailcnt,$midnumx,[sp,#48] ++ ldp $ivd10,$ivd20,[sp,#32] ++ ldp $ivd30,$ivd40,[sp,#16] ++ ldp $constnumx,$tmpinp,[sp],#64 ++ ++.Lxts_dec_final_abort: ++ ret ++.size ${prefix}_xts_decrypt,.-${prefix}_xts_decrypt ++___ ++} ++}}} + $code.=<<___; + #endif + ___ +@@ -963,7 +3615,7 @@ if ($flavour =~ /64/) { ######## 64-bi # since ARMv7 instructions are always encoded little-endian. # correct solution is to use .inst directive, but older # assemblers don't implement it:-( @@ -491,7 +2761,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c $word&0xff,($word>>8)&0xff, ($word>>16)&0xff,($word>>24)&0xff, $mnemonic,$arg; -@@ -996,14 +1392,17 @@ if ($flavour =~ /64/) { ######## 64-bi +@@ -1004,14 +3656,17 @@ if ($flavour =~ /64/) { ######## 64-bi s/\],#[0-9]+/]!/o; s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or @@ -511,9 +2781,9 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c print $_,"\n"; } } -diff -up openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl.arm-update openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl ---- openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200 -+++ openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl 2019-11-20 11:36:22.389506155 +0100 +diff -up openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl.arm-update openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl +--- openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100 ++++ openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl 2020-12-09 10:37:38.405558929 +0100 @@ -30,6 +30,7 @@ # Denver(***) 16.6(**) 15.1/17.8(**) [8.80/9.93 ] # Apple A7(***) 22.7(**) 10.9/14.3 [8.45/10.0 ] @@ -522,9 +2792,9 @@ diff -up openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl.arm-update openssl-1.1.1c/ # # (*) ECB denotes approximate result for parallelizable modes # such as CBC decrypt, CTR, etc.; -diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl ---- openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200 -+++ openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl 2019-11-21 16:44:50.814651553 +0100 +diff -up openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl +--- openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100 ++++ openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl 2020-12-09 10:40:57.922288627 +0100 @@ -18,32 +18,44 @@ # # ChaCha20 for ARMv8. @@ -585,20 +2855,22 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 *STDOUT=*OUT; sub AUTOLOAD() # thunk [simplified] x86-style perlasm -@@ -120,41 +132,36 @@ my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1) +@@ -120,42 +132,37 @@ my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1) } $code.=<<___; -#include "arm_arch.h" +- +-.text +- +#ifndef __KERNEL__ +# include "arm_arch.h" -+.extern OPENSSL_armcap_P + .extern OPENSSL_armcap_P + .hidden OPENSSL_armcap_P +#endif ++ ++.text - .text - --.extern OPENSSL_armcap_P -- .align 5 .Lsigma: .quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral @@ -641,7 +2913,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 .Lshort: .inst 0xd503233f // paciasp -@@ -173,7 +180,7 @@ ChaCha20_ctr32: +@@ -174,7 +181,7 @@ ChaCha20_ctr32: ldp @d[2],@d[3],[$key] // load key ldp @d[4],@d[5],[$key,#16] ldp @d[6],@d[7],[$ctr] // load counter @@ -650,7 +2922,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 ror @d[2],@d[2],#32 ror @d[3],@d[3],#32 ror @d[4],@d[4],#32 -@@ -242,7 +249,7 @@ $code.=<<___; +@@ -243,7 +250,7 @@ $code.=<<___; add @x[14],@x[14],@x[15],lsl#32 ldp @x[13],@x[15],[$inp,#48] add $inp,$inp,#64 @@ -659,7 +2931,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 rev @x[0],@x[0] rev @x[2],@x[2] rev @x[4],@x[4] -@@ -299,7 +306,7 @@ $code.=<<___; +@@ -300,7 +307,7 @@ $code.=<<___; add @x[10],@x[10],@x[11],lsl#32 add @x[12],@x[12],@x[13],lsl#32 add @x[14],@x[14],@x[15],lsl#32 @@ -668,7 +2940,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 rev @x[0],@x[0] rev @x[2],@x[2] rev @x[4],@x[4] -@@ -340,46 +347,91 @@ $code.=<<___; +@@ -341,46 +348,91 @@ $code.=<<___; ___ {{{ @@ -789,7 +3061,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 .inst 0xd503233f // paciasp stp x29,x30,[sp,#-96]! add x29,sp,#0 -@@ -402,8 +454,9 @@ ChaCha20_neon: +@@ -403,8 +455,9 @@ ChaCha20_neon: ld1 {@K[1],@K[2]},[$key] ldp @d[6],@d[7],[$ctr] // load counter ld1 {@K[3]},[$ctr] @@ -801,7 +3073,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 rev64 @K[0],@K[0] ror @d[2],@d[2],#32 ror @d[3],@d[3],#32 -@@ -412,115 +465,129 @@ ChaCha20_neon: +@@ -413,115 +466,129 @@ ChaCha20_neon: ror @d[6],@d[6],#32 ror @d[7],@d[7],#32 #endif @@ -1013,7 +3285,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 rev @x[0],@x[0] rev @x[2],@x[2] rev @x[4],@x[4] -@@ -530,48 +597,68 @@ $code.=<<___; +@@ -531,48 +598,68 @@ $code.=<<___; rev @x[12],@x[12] rev @x[14],@x[14] #endif @@ -1106,7 +3378,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 ldp x19,x20,[x29,#16] add sp,sp,#64 ldp x21,x22,[x29,#32] -@@ -582,8 +669,10 @@ $code.=<<___; +@@ -583,8 +670,10 @@ $code.=<<___; .inst 0xd50323bf // autiasp ret @@ -1118,7 +3390,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 cmp $len,#64 b.lo .Less_than_64 -@@ -600,7 +689,7 @@ $code.=<<___; +@@ -601,7 +690,7 @@ $code.=<<___; add @x[14],@x[14],@x[15],lsl#32 ldp @x[13],@x[15],[$inp,#48] add $inp,$inp,#64 @@ -1127,7 +3399,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 rev @x[0],@x[0] rev @x[2],@x[2] rev @x[4],@x[4] -@@ -620,48 +709,68 @@ $code.=<<___; +@@ -621,48 +710,68 @@ $code.=<<___; eor @x[14],@x[14],@x[15] stp @x[0],@x[2],[$out,#0] // store output @@ -1220,7 +3492,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 sub $out,$out,#1 add $inp,$inp,$len add $out,$out,$len -@@ -694,9 +803,41 @@ $code.=<<___; +@@ -695,9 +804,41 @@ $code.=<<___; .size ChaCha20_neon,.-ChaCha20_neon ___ { @@ -1263,7 +3535,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 $code.=<<___; .type ChaCha20_512_neon,%function -@@ -716,6 +857,7 @@ ChaCha20_512_neon: +@@ -717,6 +858,7 @@ ChaCha20_512_neon: .L512_or_more_neon: sub sp,sp,#128+64 @@ -1271,7 +3543,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 ldp @d[0],@d[1],[@x[0]] // load sigma ld1 {@K[0]},[@x[0]],#16 ldp @d[2],@d[3],[$key] // load key -@@ -723,8 +865,9 @@ ChaCha20_512_neon: +@@ -724,8 +866,9 @@ ChaCha20_512_neon: ld1 {@K[1],@K[2]},[$key] ldp @d[6],@d[7],[$ctr] // load counter ld1 {@K[3]},[$ctr] @@ -1283,7 +3555,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 rev64 @K[0],@K[0] ror @d[2],@d[2],#32 ror @d[3],@d[3],#32 -@@ -791,9 +934,10 @@ ChaCha20_512_neon: +@@ -792,9 +935,10 @@ ChaCha20_512_neon: mov $C4,@K[2] stp @K[3],@K[4],[sp,#48] // off-load key block, variable part mov $C5,@K[2] @@ -1295,7 +3567,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 subs $len,$len,#512 .Loop_upper_neon: sub $ctr,$ctr,#1 -@@ -866,7 +1010,7 @@ $code.=<<___; +@@ -867,7 +1011,7 @@ $code.=<<___; add @x[14],@x[14],@x[15],lsl#32 ldp @x[13],@x[15],[$inp,#48] add $inp,$inp,#64 @@ -1304,7 +3576,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 rev @x[0],@x[0] rev @x[2],@x[2] rev @x[4],@x[4] -@@ -955,6 +1099,7 @@ $code.=<<___; +@@ -956,6 +1100,7 @@ $code.=<<___; add.32 @x[2],@x[2],@d[1] ldp @K[4],@K[5],[sp,#64] add @x[3],@x[3],@d[1],lsr#32 @@ -1312,7 +3584,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 add $A0,$A0,@K[0] add.32 @x[4],@x[4],@d[2] add $A1,$A1,@K[0] -@@ -1007,7 +1152,7 @@ $code.=<<___; +@@ -1008,7 +1153,7 @@ $code.=<<___; add $inp,$inp,#64 add $B5,$B5,@K[1] @@ -1321,7 +3593,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 rev @x[0],@x[0] rev @x[2],@x[2] rev @x[4],@x[4] -@@ -1085,26 +1230,26 @@ $code.=<<___; +@@ -1086,26 +1231,26 @@ $code.=<<___; b.hs .Loop_outer_512_neon adds $len,$len,#512 @@ -1356,7 +3628,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 eor @K[1],@K[1],@K[1] eor @K[2],@K[2],@K[2] eor @K[3],@K[3],@K[3] -@@ -1114,6 +1259,7 @@ $code.=<<___; +@@ -1115,6 +1260,7 @@ $code.=<<___; b .Loop_outer .Ldone_512_neon: @@ -1364,7 +3636,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 ldp x19,x20,[x29,#16] add sp,sp,#128+64 ldp x21,x22,[x29,#32] -@@ -1132,9 +1278,11 @@ foreach (split("\n",$code)) { +@@ -1133,9 +1279,11 @@ foreach (split("\n",$code)) { s/\`([^\`]*)\`/eval $1/geo; (s/\b([a-z]+)\.32\b/$1/ and (s/x([0-9]+)/w$1/g or 1)) or @@ -1377,9 +3649,9 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1 (s/\brev32\.16\b/rev32/ and (s/\.4s/\.8h/g or 1)); #s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo; -diff -up openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl.arm-update openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl ---- openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl.arm-update 2019-05-28 15:12:21.000000000 +0200 -+++ openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl 2019-11-20 11:36:22.389506155 +0100 +diff -up openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl.arm-update openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl +--- openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl.arm-update 2020-12-08 14:20:59.000000000 +0100 ++++ openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl 2020-12-09 10:37:38.408558954 +0100 @@ -42,6 +42,7 @@ # Denver 0.51 0.65 6.02 # Mongoose 0.65 1.10 8.06 @@ -1388,9 +3660,9 @@ diff -up openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl.arm-update openssl-1.1. # # (*) presented for reference/comparison purposes; -diff -up openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl.arm-update openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl ---- openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200 -+++ openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl 2019-11-20 11:36:22.390506137 +0100 +diff -up openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl.arm-update openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl +--- openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100 ++++ openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl 2020-12-09 10:37:38.408558954 +0100 @@ -29,6 +29,7 @@ # X-Gene 2.13/+68% 2.27 # Mongoose 1.77/+75% 1.12 @@ -1399,9 +3671,9 @@ diff -up openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl.arm-update openssl # # (*) estimate based on resources availability is less than 1.0, # i.e. measured result is worse than expected, presumably binary -diff -up openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl.arm-update openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl ---- openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200 -+++ openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl 2019-11-20 11:36:22.390506137 +0100 +diff -up openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl.arm-update openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl +--- openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100 ++++ openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl 2020-12-09 10:37:38.408558954 +0100 @@ -51,6 +51,7 @@ # Kryo 12 # Denver 7.8 @@ -1410,9 +3682,9 @@ diff -up openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl.arm-update openssl-1. # # (*) Corresponds to SHA3-256. No improvement coefficients are listed # because they vary too much from compiler to compiler. Newer -diff -up openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl.arm-update openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl ---- openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200 -+++ openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl 2019-11-20 11:36:22.390506137 +0100 +diff -up openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl.arm-update openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl +--- openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100 ++++ openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl 2020-12-09 10:37:38.408558954 +0100 @@ -27,6 +27,7 @@ # X-Gene 8.80 (+200%) # Mongoose 2.05 6.50 (+160%) @@ -1421,9 +3693,9 @@ diff -up openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl.arm-update openssl-1.1.1c/c # # (*) Software results are presented mostly for reference purposes. # (**) Keep in mind that Denver relies on binary translation, which -diff -up openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl.arm-update openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl ---- openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200 -+++ openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl 2019-11-20 11:36:22.390506137 +0100 +diff -up openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl.arm-update openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl +--- openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100 ++++ openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl 2020-12-09 10:37:38.408558954 +0100 @@ -28,6 +28,7 @@ # X-Gene 20.0 (+100%) 12.8 (+300%(***)) # Mongoose 2.36 13.0 (+50%) 8.36 (+33%) diff --git a/openssl-1.1.1-ec-curves.patch b/openssl-1.1.1-ec-curves.patch index a83a331..27f23ca 100644 --- a/openssl-1.1.1-ec-curves.patch +++ b/openssl-1.1.1-ec-curves.patch @@ -1,6 +1,6 @@ -diff -up openssl-1.1.1c/apps/speed.c.curves openssl-1.1.1c/apps/speed.c ---- openssl-1.1.1c/apps/speed.c.curves 2019-05-28 15:12:21.000000000 +0200 -+++ openssl-1.1.1c/apps/speed.c 2019-05-29 15:36:53.332224470 +0200 +diff -up openssl-1.1.1h/apps/speed.c.curves openssl-1.1.1h/apps/speed.c +--- openssl-1.1.1h/apps/speed.c.curves 2020-09-22 14:55:07.000000000 +0200 ++++ openssl-1.1.1h/apps/speed.c 2020-11-06 13:27:15.659288431 +0100 @@ -490,90 +490,30 @@ static double rsa_results[RSA_NUM][2]; #endif /* OPENSSL_NO_RSA */ @@ -92,7 +92,7 @@ diff -up openssl-1.1.1c/apps/speed.c.curves openssl-1.1.1c/apps/speed.c {"ecdhx25519", R_EC_X25519}, {"ecdhx448", R_EC_X448} }; -@@ -1504,31 +1444,10 @@ int speed_main(int argc, char **argv) +@@ -1502,31 +1442,10 @@ int speed_main(int argc, char **argv) unsigned int bits; } test_curves[] = { /* Prime Curves */ @@ -124,7 +124,7 @@ diff -up openssl-1.1.1c/apps/speed.c.curves openssl-1.1.1c/apps/speed.c /* Other and ECDH only ones */ {"X25519", NID_X25519, 253}, {"X448", NID_X448, 448} -@@ -2028,9 +1947,9 @@ int speed_main(int argc, char **argv) +@@ -2026,9 +1945,9 @@ int speed_main(int argc, char **argv) # endif # ifndef OPENSSL_NO_EC @@ -137,7 +137,7 @@ diff -up openssl-1.1.1c/apps/speed.c.curves openssl-1.1.1c/apps/speed.c ecdsa_c[i][0] = ecdsa_c[i - 1][0] / 2; ecdsa_c[i][1] = ecdsa_c[i - 1][1] / 2; if (ecdsa_doit[i] <= 1 && ecdsa_c[i][0] == 0) -@@ -2042,7 +1961,7 @@ int speed_main(int argc, char **argv) +@@ -2040,7 +1959,7 @@ int speed_main(int argc, char **argv) } } } @@ -146,7 +146,7 @@ diff -up openssl-1.1.1c/apps/speed.c.curves openssl-1.1.1c/apps/speed.c ecdsa_c[R_EC_K163][0] = count / 1000; ecdsa_c[R_EC_K163][1] = count / 1000 / 2; for (i = R_EC_K233; i <= R_EC_K571; i++) { -@@ -2073,8 +1992,8 @@ int speed_main(int argc, char **argv) +@@ -2071,8 +1990,8 @@ int speed_main(int argc, char **argv) } # endif @@ -157,7 +157,7 @@ diff -up openssl-1.1.1c/apps/speed.c.curves openssl-1.1.1c/apps/speed.c ecdh_c[i][0] = ecdh_c[i - 1][0] / 2; if (ecdh_doit[i] <= 1 && ecdh_c[i][0] == 0) ecdh_doit[i] = 0; -@@ -2084,7 +2003,7 @@ int speed_main(int argc, char **argv) +@@ -2082,7 +2001,7 @@ int speed_main(int argc, char **argv) } } } @@ -166,9 +166,9 @@ diff -up openssl-1.1.1c/apps/speed.c.curves openssl-1.1.1c/apps/speed.c ecdh_c[R_EC_K163][0] = count / 1000; for (i = R_EC_K233; i <= R_EC_K571; i++) { ecdh_c[i][0] = ecdh_c[i - 1][0] / 2; -diff -up openssl-1.1.1c/crypto/ec/ecp_smpl.c.curves openssl-1.1.1c/crypto/ec/ecp_smpl.c ---- openssl-1.1.1c/crypto/ec/ecp_smpl.c.curves 2019-05-28 15:12:21.000000000 +0200 -+++ openssl-1.1.1c/crypto/ec/ecp_smpl.c 2019-05-29 15:30:09.071349520 +0200 +diff -up openssl-1.1.1h/crypto/ec/ecp_smpl.c.curves openssl-1.1.1h/crypto/ec/ecp_smpl.c +--- openssl-1.1.1h/crypto/ec/ecp_smpl.c.curves 2020-09-22 14:55:07.000000000 +0200 ++++ openssl-1.1.1h/crypto/ec/ecp_smpl.c 2020-11-06 13:27:15.659288431 +0100 @@ -145,6 +145,11 @@ int ec_GFp_simple_group_set_curve(EC_GRO return 0; } @@ -181,9 +181,9 @@ diff -up openssl-1.1.1c/crypto/ec/ecp_smpl.c.curves openssl-1.1.1c/crypto/ec/ecp if (ctx == NULL) { ctx = new_ctx = BN_CTX_new(); if (ctx == NULL) -diff -up openssl-1.1.1c/test/ecdsatest.h.curves openssl-1.1.1c/test/ecdsatest.h ---- openssl-1.1.1c/test/ecdsatest.h.curves 2019-05-29 15:30:09.010350595 +0200 -+++ openssl-1.1.1c/test/ecdsatest.h 2019-05-29 15:41:24.586444294 +0200 +diff -up openssl-1.1.1h/test/ecdsatest.h.curves openssl-1.1.1h/test/ecdsatest.h +--- openssl-1.1.1h/test/ecdsatest.h.curves 2020-11-06 13:27:15.627288114 +0100 ++++ openssl-1.1.1h/test/ecdsatest.h 2020-11-06 13:27:15.660288441 +0100 @@ -32,23 +32,6 @@ typedef struct { } ecdsa_cavs_kat_t; @@ -208,3 +208,59 @@ diff -up openssl-1.1.1c/test/ecdsatest.h.curves openssl-1.1.1c/test/ecdsatest.h /* prime KATs from NIST CAVP */ {NID_secp224r1, NID_sha224, "699325d6fc8fbbb4981a6ded3c3a54ad2e4e3db8a5669201912064c64e700c139248cdc1" +--- openssl-1.1.1h/test/recipes/15-test_genec.t.ec-curves 2020-11-06 13:58:36.402895540 +0100 ++++ openssl-1.1.1h/test/recipes/15-test_genec.t 2020-11-06 13:59:38.508484498 +0100 +@@ -20,45 +20,11 @@ plan skip_all => "This test is unsupport + if disabled("ec"); + + my @prime_curves = qw( +- secp112r1 +- secp112r2 +- secp128r1 +- secp128r2 +- secp160k1 +- secp160r1 +- secp160r2 +- secp192k1 +- secp224k1 + secp224r1 + secp256k1 + secp384r1 + secp521r1 +- prime192v1 +- prime192v2 +- prime192v3 +- prime239v1 +- prime239v2 +- prime239v3 + prime256v1 +- wap-wsg-idm-ecid-wtls6 +- wap-wsg-idm-ecid-wtls7 +- wap-wsg-idm-ecid-wtls8 +- wap-wsg-idm-ecid-wtls9 +- wap-wsg-idm-ecid-wtls12 +- brainpoolP160r1 +- brainpoolP160t1 +- brainpoolP192r1 +- brainpoolP192t1 +- brainpoolP224r1 +- brainpoolP224t1 +- brainpoolP256r1 +- brainpoolP256t1 +- brainpoolP320r1 +- brainpoolP320t1 +- brainpoolP384r1 +- brainpoolP384t1 +- brainpoolP512r1 +- brainpoolP512t1 + ); + + my @binary_curves = qw( +@@ -115,7 +81,6 @@ push(@other_curves, 'SM2') + if !disabled("sm2"); + + my @curve_aliases = qw( +- P-192 + P-224 + P-256 + P-384 diff --git a/openssl-1.1.1-fips-dh.patch b/openssl-1.1.1-fips-dh.patch index d98372e..ff895d5 100644 --- a/openssl-1.1.1-fips-dh.patch +++ b/openssl-1.1.1-fips-dh.patch @@ -2716,91 +2716,16 @@ diff -up openssl-1.1.1g/ssl/s3_lib.c.fips-dh openssl-1.1.1g/ssl/s3_lib.c return ret; } #endif -diff -up openssl-1.1.1g/ssl/t1_lib.c.fips-dh openssl-1.1.1g/ssl/t1_lib.c ---- openssl-1.1.1g/ssl/t1_lib.c.fips-dh 2020-07-17 10:36:29.243788425 +0200 -+++ openssl-1.1.1g/ssl/t1_lib.c 2020-07-17 10:36:29.249788474 +0200 -@@ -2511,46 +2511,48 @@ int SSL_check_chain(SSL *s, X509 *x, EVP - #ifndef OPENSSL_NO_DH - DH *ssl_get_auto_dh(SSL *s) - { -+ DH *dhp = NULL; -+ BIGNUM *p = NULL, *g = NULL; - int dh_secbits = 80; -- if (s->cert->dh_tmp_auto == 2) -- return DH_get_1024_160(); -- if (s->s3->tmp.new_cipher->algorithm_auth & (SSL_aNULL | SSL_aPSK)) { -- if (s->s3->tmp.new_cipher->strength_bits == 256) -- dh_secbits = 128; -- else -- dh_secbits = 80; -- } else { -- if (s->s3->tmp.cert == NULL) -- return NULL; -- dh_secbits = EVP_PKEY_security_bits(s->s3->tmp.cert->privatekey); -+ if (s->cert->dh_tmp_auto != 2) { -+ if (s->s3->tmp.new_cipher->algorithm_auth & (SSL_aNULL | SSL_aPSK)) { -+ if (s->s3->tmp.new_cipher->strength_bits == 256) -+ dh_secbits = 128; -+ else -+ dh_secbits = 80; -+ } else { -+ if (s->s3->tmp.cert == NULL) -+ return NULL; -+ dh_secbits = EVP_PKEY_security_bits(s->s3->tmp.cert->privatekey); -+ } - } -- if (dh_secbits >= 128) { -- DH *dhp = DH_new(); -- BIGNUM *p, *g; -- if (dhp == NULL) -- return NULL; -- g = BN_new(); -- if (g == NULL || !BN_set_word(g, 2)) { -- DH_free(dhp); -- BN_free(g); -- return NULL; -- } -- if (dh_secbits >= 192) -- p = BN_get_rfc3526_prime_8192(NULL); -- else -- p = BN_get_rfc3526_prime_3072(NULL); -- if (p == NULL || !DH_set0_pqg(dhp, p, NULL, g)) { -- DH_free(dhp); -- BN_free(p); -- BN_free(g); -- return NULL; -- } -- return dhp; -+ dhp = DH_new(); -+ if (dhp == NULL) -+ return NULL; -+ g = BN_new(); -+ if (g == NULL || !BN_set_word(g, 2)) { -+ DH_free(dhp); -+ BN_free(g); -+ return NULL; -+ } -+ if (dh_secbits >= 192) -+ p = BN_get_rfc3526_prime_8192(NULL); -+ else if (dh_secbits >= 152) -+ p = BN_get_rfc3526_prime_4096(NULL); -+ else if (dh_secbits >= 128) -+ p = BN_get_rfc3526_prime_3072(NULL); +diff -up openssl-1.1.1h/ssl/t1_lib.c.fips-dh openssl-1.1.1h/ssl/t1_lib.c +--- openssl-1.1.1h/ssl/t1_lib.c.fips-dh 2020-11-04 14:04:41.851711629 +0100 ++++ openssl-1.1.1h/ssl/t1_lib.c 2020-11-04 14:06:06.506431652 +0100 +@@ -2470,7 +2470,7 @@ + p = BN_get_rfc3526_prime_4096(NULL); + else if (dh_secbits >= 128) + p = BN_get_rfc3526_prime_3072(NULL); +- else if (dh_secbits >= 112) + else if (dh_secbits >= 112 || FIPS_mode()) -+ p = BN_get_rfc3526_prime_2048(NULL); -+ else -+ p = BN_get_rfc2409_prime_1024(NULL); -+ if (p == NULL || !DH_set0_pqg(dhp, p, NULL, g)) { -+ DH_free(dhp); -+ BN_free(p); -+ BN_free(g); -+ return NULL; - } -- if (dh_secbits >= 112) -- return DH_get_2048_224(); -- return DH_get_1024_160(); -+ return dhp; - } - #endif - + p = BN_get_rfc3526_prime_2048(NULL); + else + p = BN_get_rfc2409_prime_1024(NULL); diff --git a/openssl-1.1.1-fips-post-rand.patch b/openssl-1.1.1-fips-post-rand.patch index 18a01fe..027dc55 100644 --- a/openssl-1.1.1-fips-post-rand.patch +++ b/openssl-1.1.1-fips-post-rand.patch @@ -1,6 +1,6 @@ -diff -up openssl-1.1.1e/crypto/fips/fips.c.fips-post-rand openssl-1.1.1e/crypto/fips/fips.c ---- openssl-1.1.1e/crypto/fips/fips.c.fips-post-rand 2020-03-17 18:06:16.822418854 +0100 -+++ openssl-1.1.1e/crypto/fips/fips.c 2020-03-17 18:06:16.861418172 +0100 +diff -up openssl-1.1.1i/crypto/fips/fips.c.fips-post-rand openssl-1.1.1i/crypto/fips/fips.c +--- openssl-1.1.1i/crypto/fips/fips.c.fips-post-rand 2020-12-09 10:26:41.634106328 +0100 ++++ openssl-1.1.1i/crypto/fips/fips.c 2020-12-09 10:26:41.652106475 +0100 @@ -68,6 +68,7 @@ # include @@ -51,10 +51,10 @@ diff -up openssl-1.1.1e/crypto/fips/fips.c.fips-post-rand openssl-1.1.1e/crypto/ ret = 1; goto end; } -diff -up openssl-1.1.1e/crypto/rand/drbg_lib.c.fips-post-rand openssl-1.1.1e/crypto/rand/drbg_lib.c ---- openssl-1.1.1e/crypto/rand/drbg_lib.c.fips-post-rand 2020-03-17 15:31:17.000000000 +0100 -+++ openssl-1.1.1e/crypto/rand/drbg_lib.c 2020-03-17 18:07:35.305045521 +0100 -@@ -1009,6 +1009,20 @@ size_t rand_drbg_seedlen(RAND_DRBG *drbg +diff -up openssl-1.1.1i/crypto/rand/drbg_lib.c.fips-post-rand openssl-1.1.1i/crypto/rand/drbg_lib.c +--- openssl-1.1.1i/crypto/rand/drbg_lib.c.fips-post-rand 2020-12-08 14:20:59.000000000 +0100 ++++ openssl-1.1.1i/crypto/rand/drbg_lib.c 2020-12-09 10:26:41.652106475 +0100 +@@ -1005,6 +1005,20 @@ size_t rand_drbg_seedlen(RAND_DRBG *drbg return min_entropy > min_entropylen ? min_entropy : min_entropylen; } @@ -75,9 +75,9 @@ diff -up openssl-1.1.1e/crypto/rand/drbg_lib.c.fips-post-rand openssl-1.1.1e/cry /* Implements the default OpenSSL RAND_add() method */ static int drbg_add(const void *buf, int num, double randomness) { -diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/crypto/rand/rand_unix.c ---- openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand 2020-03-17 15:31:17.000000000 +0100 -+++ openssl-1.1.1e/crypto/rand/rand_unix.c 2020-03-17 18:09:01.503537189 +0100 +diff -up openssl-1.1.1i/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1i/crypto/rand/rand_unix.c +--- openssl-1.1.1i/crypto/rand/rand_unix.c.fips-post-rand 2020-12-08 14:20:59.000000000 +0100 ++++ openssl-1.1.1i/crypto/rand/rand_unix.c 2020-12-09 10:36:59.531221903 +0100 @@ -17,10 +17,12 @@ #include #include "rand_local.h" @@ -91,7 +91,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr # ifdef DEVRANDOM_WAIT # include # include -@@ -342,7 +344,7 @@ static ssize_t sysctl_random(char *buf, +@@ -344,7 +346,7 @@ static ssize_t sysctl_random(char *buf, * syscall_random(): Try to get random data using a system call * returns the number of bytes returned in buf, or < 0 on error. */ @@ -100,15 +100,15 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr { /* * Note: 'buflen' equals the size of the buffer which is used by the -@@ -364,6 +366,7 @@ static ssize_t syscall_random(void *buf, - * - Linux since 3.17 with glibc 2.25 - * - FreeBSD since 12.0 (1200061) +@@ -369,6 +371,7 @@ static ssize_t syscall_random(void *buf, + * Note: Sometimes getentropy() can be provided but not implemented + * internally. So we need to check errno for ENOSYS */ +# if 0 # if defined(__GNUC__) && __GNUC__>=2 && defined(__ELF__) && !defined(__hpux) extern int getentropy(void *buffer, size_t length) __attribute__((weak)); -@@ -385,10 +388,10 @@ static ssize_t syscall_random(void *buf, +@@ -394,10 +397,10 @@ static ssize_t syscall_random(void *buf, if (p_getentropy.p != NULL) return p_getentropy.f(buf, buflen) == 0 ? (ssize_t)buflen : -1; # endif @@ -122,7 +122,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr # elif (defined(__FreeBSD__) || defined(__NetBSD__)) && defined(KERN_ARND) return sysctl_random(buf, buflen); # else -@@ -623,6 +626,9 @@ size_t rand_pool_acquire_entropy(RAND_PO +@@ -633,6 +636,9 @@ size_t rand_pool_acquire_entropy(RAND_PO size_t entropy_available; # if defined(OPENSSL_RAND_SEED_GETRANDOM) @@ -132,7 +132,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr { size_t bytes_needed; unsigned char *buffer; -@@ -633,7 +639,7 @@ size_t rand_pool_acquire_entropy(RAND_PO +@@ -643,7 +649,7 @@ size_t rand_pool_acquire_entropy(RAND_PO bytes_needed = rand_pool_bytes_needed(pool, 1 /*entropy_factor*/); while (bytes_needed != 0 && attempts-- > 0) { buffer = rand_pool_add_begin(pool, bytes_needed); @@ -141,7 +141,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr if (bytes > 0) { rand_pool_add_end(pool, bytes, 8 * bytes); bytes_needed -= bytes; -@@ -668,8 +674,10 @@ size_t rand_pool_acquire_entropy(RAND_PO +@@ -678,8 +684,10 @@ size_t rand_pool_acquire_entropy(RAND_PO int attempts = 3; const int fd = get_random_device(i); @@ -153,7 +153,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr while (bytes_needed != 0 && attempts-- > 0) { buffer = rand_pool_add_begin(pool, bytes_needed); -@@ -732,7 +740,9 @@ size_t rand_pool_acquire_entropy(RAND_PO +@@ -742,7 +750,9 @@ size_t rand_pool_acquire_entropy(RAND_PO return entropy_available; } # endif @@ -164,9 +164,9 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr return rand_pool_entropy_available(pool); # endif } -diff -up openssl-1.1.1e/include/crypto/fips.h.fips-post-rand openssl-1.1.1e/include/crypto/fips.h ---- openssl-1.1.1e/include/crypto/fips.h.fips-post-rand 2020-03-17 18:06:16.831418696 +0100 -+++ openssl-1.1.1e/include/crypto/fips.h 2020-03-17 18:06:16.861418172 +0100 +diff -up openssl-1.1.1i/include/crypto/fips.h.fips-post-rand openssl-1.1.1i/include/crypto/fips.h +--- openssl-1.1.1i/include/crypto/fips.h.fips-post-rand 2020-12-09 10:26:41.639106369 +0100 ++++ openssl-1.1.1i/include/crypto/fips.h 2020-12-09 10:26:41.657106516 +0100 @@ -77,6 +77,8 @@ int FIPS_selftest_hmac(void); int FIPS_selftest_drbg(void); int FIPS_selftest_cmac(void); @@ -176,9 +176,9 @@ diff -up openssl-1.1.1e/include/crypto/fips.h.fips-post-rand openssl-1.1.1e/incl int fips_pkey_signature_test(EVP_PKEY *pkey, const unsigned char *tbs, int tbslen, const unsigned char *kat, -diff -up openssl-1.1.1e/include/crypto/rand.h.fips-post-rand openssl-1.1.1e/include/crypto/rand.h ---- openssl-1.1.1e/include/crypto/rand.h.fips-post-rand 2020-03-17 15:31:17.000000000 +0100 -+++ openssl-1.1.1e/include/crypto/rand.h 2020-03-17 18:07:35.303045555 +0100 +diff -up openssl-1.1.1i/include/crypto/rand.h.fips-post-rand openssl-1.1.1i/include/crypto/rand.h +--- openssl-1.1.1i/include/crypto/rand.h.fips-post-rand 2020-12-08 14:20:59.000000000 +0100 ++++ openssl-1.1.1i/include/crypto/rand.h 2020-12-09 10:26:41.657106516 +0100 @@ -24,6 +24,7 @@ typedef struct rand_pool_st RAND_POOL; diff --git a/openssl-1.1.1-fips.patch b/openssl-1.1.1-fips.patch index ad295a4..c9137ca 100644 --- a/openssl-1.1.1-fips.patch +++ b/openssl-1.1.1-fips.patch @@ -11614,10 +11614,10 @@ diff -up openssl-1.1.1e/test/recipes/30-test_evp_data/evpciph.txt.fips openssl-1 diff -up openssl-1.1.1e/util/libcrypto.num.fips openssl-1.1.1e/util/libcrypto.num --- openssl-1.1.1e/util/libcrypto.num.fips 2020-03-17 17:31:10.744241038 +0100 +++ openssl-1.1.1e/util/libcrypto.num 2020-03-17 17:32:37.851722261 +0100 -@@ -4587,3 +4587,38 @@ EVP_PKEY_meth_set_digestverify - EVP_PKEY_meth_get_digestverify 4541 1_1_1e EXIST::FUNCTION: - EVP_PKEY_meth_get_digestsign 4542 1_1_1e EXIST::FUNCTION: - RSA_get0_pss_params 4543 1_1_1e EXIST::FUNCTION:RSA +@@ -4590,3 +4590,38 @@ X509_ALGOR_copy + X509_REQ_set0_signature 4545 1_1_1h EXIST::FUNCTION: + X509_REQ_set1_signature_algo 4546 1_1_1h EXIST::FUNCTION: + EC_KEY_decoded_from_explicit_params 4547 1_1_1h EXIST::FUNCTION:EC +FIPS_drbg_reseed 6348 1_1_0g EXIST::FUNCTION: +FIPS_selftest_check 6349 1_1_0g EXIST::FUNCTION: +FIPS_rand_set_method 6350 1_1_0g EXIST::FUNCTION: diff --git a/openssl-1.1.1-ignore-bound.patch b/openssl-1.1.1-ignore-bound.patch deleted file mode 100644 index 4838f3d..0000000 --- a/openssl-1.1.1-ignore-bound.patch +++ /dev/null @@ -1,14 +0,0 @@ -Do not return failure when setting version bound on fixed protocol -version method. -diff -up openssl-1.1.1-pre8/ssl/statem/statem_lib.c.ignore-bound openssl-1.1.1-pre8/ssl/statem/statem_lib.c ---- openssl-1.1.1-pre8/ssl/statem/statem_lib.c.ignore-bound 2018-06-20 16:48:13.000000000 +0200 -+++ openssl-1.1.1-pre8/ssl/statem/statem_lib.c 2018-08-13 11:07:52.826304045 +0200 -@@ -1595,7 +1595,7 @@ int ssl_set_version_bound(int method_ver - * methods are not subject to controls that disable individual protocol - * versions. - */ -- return 0; -+ return 1; - - case TLS_ANY_VERSION: - if (version < SSL3_VERSION || version > TLS_MAX_VERSION) diff --git a/openssl-1.1.1-reneg-no-extms.patch b/openssl-1.1.1-reneg-no-extms.patch deleted file mode 100644 index 76adef7..0000000 --- a/openssl-1.1.1-reneg-no-extms.patch +++ /dev/null @@ -1,44 +0,0 @@ -diff -up openssl-1.1.1g/include/openssl/ssl3.h.reneg-no-extms openssl-1.1.1g/include/openssl/ssl3.h ---- openssl-1.1.1g/include/openssl/ssl3.h.reneg-no-extms 2020-04-21 14:22:39.000000000 +0200 -+++ openssl-1.1.1g/include/openssl/ssl3.h 2020-06-05 15:20:22.090682776 +0200 -@@ -292,6 +292,9 @@ extern "C" { - - # define TLS1_FLAGS_STATELESS 0x0800 - -+/* Set if extended master secret extension required on renegotiation */ -+# define TLS1_FLAGS_REQUIRED_EXTMS 0x1000 -+ - # define SSL3_MT_HELLO_REQUEST 0 - # define SSL3_MT_CLIENT_HELLO 1 - # define SSL3_MT_SERVER_HELLO 2 -diff -up openssl-1.1.1g/ssl/statem/extensions.c.reneg-no-extms openssl-1.1.1g/ssl/statem/extensions.c ---- openssl-1.1.1g/ssl/statem/extensions.c.reneg-no-extms 2020-04-21 14:22:39.000000000 +0200 -+++ openssl-1.1.1g/ssl/statem/extensions.c 2020-06-05 15:22:19.677653437 +0200 -@@ -1168,14 +1168,26 @@ static int init_etm(SSL *s, unsigned int - - static int init_ems(SSL *s, unsigned int context) - { -- if (!s->server) -+ if (s->s3->flags & TLS1_FLAGS_RECEIVED_EXTMS) { - s->s3->flags &= ~TLS1_FLAGS_RECEIVED_EXTMS; -+ s->s3->flags |= TLS1_FLAGS_REQUIRED_EXTMS; -+ } - - return 1; - } - - static int final_ems(SSL *s, unsigned int context, int sent) - { -+ /* -+ * Check extended master secret extension is not dropped on -+ * renegotiation. -+ */ -+ if (!(s->s3->flags & TLS1_FLAGS_RECEIVED_EXTMS) -+ && (s->s3->flags & TLS1_FLAGS_REQUIRED_EXTMS)) { -+ SSLfatal(s, SSL_AD_HANDSHAKE_FAILURE, SSL_F_FINAL_EMS, -+ SSL_R_INCONSISTENT_EXTMS); -+ return 0; -+ } - if (!s->server && s->hit) { - /* - * Check extended master secret extension is consistent with diff --git a/openssl-1.1.1-ts-sha256-default.patch b/openssl-1.1.1-ts-sha256-default.patch index d99dc47..2a1dd6c 100644 --- a/openssl-1.1.1-ts-sha256-default.patch +++ b/openssl-1.1.1-ts-sha256-default.patch @@ -1,8 +1,17 @@ -diff --git a/apps/ts.c b/apps/ts.c -index 63c5210183..4ef8a72eef 100644 ---- a/apps/ts.c -+++ b/apps/ts.c -@@ -425,7 +425,7 @@ static TS_REQ *create_query(BIO *data_bio, const char *digest, const EVP_MD *md, +diff -up openssl-1.1.1h/apps/openssl.cnf.ts-sha256-default openssl-1.1.1h/apps/openssl.cnf +--- openssl-1.1.1h/apps/openssl.cnf.ts-sha256-default 2020-11-06 11:07:28.850100899 +0100 ++++ openssl-1.1.1h/apps/openssl.cnf 2020-11-06 11:11:28.042913791 +0100 +@@ -364,5 +348,5 @@ tsa_name = yes # Must the TSA name be i + # (optional, default: no) + ess_cert_id_chain = no # Must the ESS cert id chain be included? + # (optional, default: no) +-ess_cert_id_alg = sha1 # algorithm to compute certificate ++ess_cert_id_alg = sha256 # algorithm to compute certificate + # identifier (optional, default: sha1) +diff -up openssl-1.1.1h/apps/ts.c.ts-sha256-default openssl-1.1.1h/apps/ts.c +--- openssl-1.1.1h/apps/ts.c.ts-sha256-default 2020-09-22 14:55:07.000000000 +0200 ++++ openssl-1.1.1h/apps/ts.c 2020-11-06 11:07:28.883101220 +0100 +@@ -423,7 +423,7 @@ static TS_REQ *create_query(BIO *data_bi ASN1_OBJECT *policy_obj = NULL; ASN1_INTEGER *nonce_asn1 = NULL; @@ -11,11 +20,22 @@ index 63c5210183..4ef8a72eef 100644 goto err; if ((ts_req = TS_REQ_new()) == NULL) goto err; -diff --git a/doc/man1/ts.pod b/doc/man1/ts.pod -index 078905a845..83b8fe4350 100644 ---- a/doc/man1/ts.pod -+++ b/doc/man1/ts.pod -@@ -517,7 +517,7 @@ included. Default is no. (Optional) +diff -up openssl-1.1.1h/crypto/ts/ts_conf.c.ts-sha256-default openssl-1.1.1h/crypto/ts/ts_conf.c +--- openssl-1.1.1h/crypto/ts/ts_conf.c.ts-sha256-default 2020-11-06 12:03:51.226372867 +0100 ++++ openssl-1.1.1h/crypto/ts/ts_conf.c 2020-11-06 12:04:01.713488990 +0100 +@@ -476,7 +476,7 @@ int TS_CONF_set_ess_cert_id_digest(CONF + const char *md = NCONF_get_string(conf, section, ENV_ESS_CERT_ID_ALG); + + if (md == NULL) +- md = "sha1"; ++ md = "sha256"; + + cert_md = EVP_get_digestbyname(md); + if (cert_md == NULL) { +diff -up openssl-1.1.1h/doc/man1/ts.pod.ts-sha256-default openssl-1.1.1h/doc/man1/ts.pod +--- openssl-1.1.1h/doc/man1/ts.pod.ts-sha256-default 2020-09-22 14:55:07.000000000 +0200 ++++ openssl-1.1.1h/doc/man1/ts.pod 2020-11-06 11:07:28.883101220 +0100 +@@ -518,7 +518,7 @@ included. Default is no. (Optional) =item B This option specifies the hash function to be used to calculate the TSA's @@ -24,21 +44,21 @@ index 078905a845..83b8fe4350 100644 =back -@@ -529,7 +529,7 @@ openssl/apps/openssl.cnf will do. +@@ -530,7 +530,7 @@ openssl/apps/openssl.cnf will do. =head2 Time Stamp Request --To create a time stamp request for design1.txt with SHA-1 -+To create a time stamp request for design1.txt with SHA-256 +-To create a timestamp request for design1.txt with SHA-1 ++To create a timestamp request for design1.txt with SHA-256 without nonce and policy and no certificate is required in the response: openssl ts -query -data design1.txt -no_nonce \ -@@ -545,12 +545,12 @@ To print the content of the previous request in human readable format: +@@ -546,12 +546,12 @@ To print the content of the previous req openssl ts -query -in design1.tsq -text --To create a time stamp request which includes the MD-5 digest -+To create a time stamp request which includes the SHA-512 digest +-To create a timestamp request which includes the MD-5 digest ++To create a timestamp request which includes the SHA-512 digest of design2.txt, requests the signer certificate and nonce, specifies a policy id (assuming the tsa_policy1 name is defined in the OID section of the config file): diff --git a/openssl-1.1.1-version-override.patch b/openssl-1.1.1-version-override.patch index a6975fa..727cc26 100644 --- a/openssl-1.1.1-version-override.patch +++ b/openssl-1.1.1-version-override.patch @@ -1,12 +1,12 @@ -diff -up openssl-1.1.1g/include/openssl/opensslv.h.version-override openssl-1.1.1g/include/openssl/opensslv.h ---- openssl-1.1.1g/include/openssl/opensslv.h.version-override 2020-04-23 13:29:37.802673513 +0200 -+++ openssl-1.1.1g/include/openssl/opensslv.h 2020-04-23 13:30:13.064008458 +0200 +diff -up openssl-1.1.1i/include/openssl/opensslv.h.version-override openssl-1.1.1i/include/openssl/opensslv.h +--- openssl-1.1.1i/include/openssl/opensslv.h.version-override 2020-12-09 10:25:12.042374409 +0100 ++++ openssl-1.1.1i/include/openssl/opensslv.h 2020-12-09 10:26:00.362769170 +0100 @@ -40,7 +40,7 @@ extern "C" { * major minor fix final patch/beta) */ - # define OPENSSL_VERSION_NUMBER 0x1010107fL --# define OPENSSL_VERSION_TEXT "OpenSSL 1.1.1g 21 Apr 2020" -+# define OPENSSL_VERSION_TEXT "OpenSSL 1.1.1g FIPS 21 Apr 2020" + # define OPENSSL_VERSION_NUMBER 0x1010109fL +-# define OPENSSL_VERSION_TEXT "OpenSSL 1.1.1i 8 Dec 2020" ++# define OPENSSL_VERSION_TEXT "OpenSSL 1.1.1i FIPS 8 Dec 2020" /*- * The macros below are to be used for shared library (.so, .dll, ...) diff --git a/openssl1.1.spec b/openssl1.1.spec index 99ca895..06db079 100644 --- a/openssl1.1.spec +++ b/openssl1.1.spec @@ -25,8 +25,8 @@ Summary: Compatibility version of the OpenSSL library Name: openssl1.1 -Version: 1.1.1g -Release: 3%{?dist} +Version: 1.1.1i +Release: 1%{?dist} Epoch: 1 # We have to remove certain patented algorithms from the openssl source # tarball with the hobble-openssl script which is included below. @@ -54,7 +54,6 @@ Patch38: openssl-1.1.1-no-weak-verify.patch Patch40: openssl-1.1.1-disable-ssl3.patch Patch41: openssl-1.1.1-system-cipherlist.patch Patch42: openssl-1.1.1-fips.patch -Patch43: openssl-1.1.1-ignore-bound.patch Patch44: openssl-1.1.1-version-override.patch Patch45: openssl-1.1.1-weak-ciphers.patch Patch46: openssl-1.1.1-seclevel.patch @@ -69,7 +68,6 @@ Patch62: openssl-1.1.1-fips-curves.patch Patch65: openssl-1.1.1-fips-drbg-selftest.patch Patch66: openssl-1.1.1-fips-dh.patch Patch67: openssl-1.1.1-kdf-selftest.patch -Patch68: openssl-1.1.1-reneg-no-extms.patch Patch69: openssl-1.1.1-alpn-cb.patch Patch70: openssl-1.1.1-rewire-fips-drbg.patch # Backported fixes including security fixes @@ -141,7 +139,6 @@ cp %{SOURCE13} test/ %patch40 -p1 -b .disable-ssl3 %patch41 -p1 -b .system-cipherlist %patch42 -p1 -b .fips -%patch43 -p1 -b .ignore-bound %patch44 -p1 -b .version-override %patch45 -p1 -b .weak-ciphers %patch46 -p1 -b .seclevel @@ -160,7 +157,6 @@ cp %{SOURCE13} test/ %patch65 -p1 -b .drbg-selftest %patch66 -p1 -b .fips-dh %patch67 -p1 -b .kdf-selftest -%patch68 -p1 -b .reneg-no-extms %patch69 -p1 -b .alpn-cb %patch70 -p1 -b .rewire-fips-drbg @@ -392,6 +388,9 @@ rm -rf $RPM_BUILD_ROOT%{_libdir}/pkgconfig %ldconfig_scriptlets %changelog +* Wed Dec 9 2020 Tomáš Mráz 1.1.1i-1 +- Update to the 1.1.1i release fixing CVE-2020-1971 + * Fri Oct 30 2020 Tomáš Mráz 1.1.1g-3 - Corrected wrong requires in the devel package diff --git a/sources b/sources index 50e115e..4c1e648 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (openssl-1.1.1g-hobbled.tar.xz) = 7cd351d8fd4a028edcdc6804d8b73af7ff5693ab96cafd4f9252534d4e8e9000e22aefa45f51db490da52d89f4e5b41d02452be0b516fbb0fe84e36d5ca54971 +SHA512 (openssl-1.1.1i-hobbled.tar.xz) = e131a05e88690a7be7c3d74cbb26620130498ced2ce3d7fd55979aab5ea736ec8b268ba92268bd5bc347989325a3950a066883007cb20c2dd9739fd1eafc513f