lib/libc/aarch64/string: add timingsafe_memcmp() assembly implementation

A port of the amd64 implementation with some slight changes due to
differences in instructions provided by aarch64.

No ASIMD for the same reason as the amd64 code: it's just not particularly
suitable for this application.

Event:		EuroBSDcon 2024
Approved by:	security (cperciva)
Reviewed by:	getz, cperciva
Differential Revision:	https://reviews.freebsd.org/D46758
This commit is contained in:
Robert Clausecker 2024-12-09 10:50:00 +01:00
parent f2c98669fc
commit 3f224333af
2 changed files with 118 additions and 0 deletions

View file

@ -32,6 +32,7 @@ MDSRCS+= \
strlcat.c \
strlen.S \
timingsafe_bcmp.S \
timingsafe_memcmp.S \
bcopy.c \
bzero.c

View file

@ -0,0 +1,117 @@
/*
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 Robert Clausecker
*/
#include <machine/asm.h>
ENTRY(timingsafe_memcmp)
cmp x2, #16 // at least 17 bytes to process?
bhi .Lgt16
cmp x2, #8 // at least 9 bytes to process?
bhi .L0916
cmp x2, #4 // at least 5 bytes to process?
bhi .L0508
cmp x2, #2 // at least 3 bytes to process?
bhi .L0304
cbnz x2, .L0102 // buffer empty?
mov w0, #0 // empty buffer always matches
ret
.L0102: ldrb w3, [x0] // load first bytes
ldrb w4, [x1]
sub x2, x2, #1
ldrb w5, [x0, x2] // load last bytes
ldrb w6, [x1, x2]
bfi w5, w3, #8, #8 // join bytes in big endian
bfi w6, w4, #8, #8
sub w0, w5, w6
ret
.L0304: ldrh w3, [x0] // load first halfwords
ldrh w4, [x1]
sub x2, x2, #2
ldrh w5, [x0, x2] // load last halfwords
ldrh w6, [x1, x2]
bfi w3, w5, #16, #16 // join halfwords in little endian
bfi w4, w6, #16, #16
rev w3, w3 // swap word order
rev w4, w4
cmp w3, w4
csetm w0, lo // w0 = w3 >= w4 ? 0 : -1
csinc w0, w0, wzr, ls // w0 = w3 <=> w4 ? 1 : 0 : -1
ret
.L0508: ldr w3, [x0] // load first words
ldr w4, [x1]
sub x2, x2, #4
ldr w5, [x0, x2] // load last words
ldr w6, [x1, x2]
bfi x3, x5, #32, #32 // join words in little endian
bfi x4, x6, #32, #32
rev x3, x3 // swap word order
rev x4, x4
cmp x3, x4
csetm w0, lo // x0 = x3 >= w4 ? 0 : -1
csinc w0, w0, wzr, ls // x0 = x3 <=> w4 ? 1 : 0 : -1
ret
.L0916: ldr x3, [x0]
ldr x4, [x1]
sub x2, x2, #8
ldr x5, [x0, x2]
ldr x6, [x1, x2]
cmp x3, x4 // mismatch in first pair?
csel x3, x3, x5, ne // use second pair if first pair equal
csel x4, x4, x6, ne
rev x3, x3
rev x4, x4
cmp x3, x4
csetm w0, lo
csinc w0, w0, wzr, ls
ret
/* more than 16 bytes: process buffer in a loop */
.Lgt16: ldp x3, x4, [x0], #16
ldp x5, x6, [x1], #16
cmp x3, x5 // mismatch in first pair?
csel x3, x3, x4, ne // use second pair if first pair equal
csel x5, x5, x6, ne
subs x2, x2, #32
bls .Ltail
0: ldp x4, x7, [x0], #16
ldp x6, x8, [x1], #16
cmp x4, x6 // mismatch in first pair?
csel x4, x4, x7, ne // if not, try second pair
csel x6, x6, x8, ne
cmp x3, x5 // was there a mismatch previously?
csel x3, x3, x4, ne // apply new pair if there was not
csel x5, x5, x6, ne
subs x2, x2, #16
bhi 0b
.Ltail: add x0, x0, x2
add x1, x1, x2
ldp x4, x7, [x0]
ldp x6, x8, [x1]
cmp x4, x6 // mismatch in first pair?
csel x4, x4, x7, ne // if not, try second pair
csel x6, x6, x8, ne
cmp x3, x5 // was there a mismatch previously?
csel x3, x3, x4, ne // apply new pair if there was not
csel x5, x5, x6, ne
rev x3, x3
rev x5, x5
cmp x3, x5
csetm w0, lo
csinc w0, w0, wzr, ls
ret
END(timingsafe_bcmp)