mirror of
https://github.com/opnsense/src.git
synced 2026-06-05 06:42:56 -04:00
lib/libc/amd64/string: add timingsafe_bcmp(3) scalar, baseline implementations
Very straightforward and similar to memcmp(3). The code has been written to use only instructions specified as having data operand independent timing by Intel. Sponsored by: The FreeBSD Foundation Approved by: security (cperciva) Differential Revision: https://reviews.freebsd.org/D41673
This commit is contained in:
parent
c08ba4a588
commit
76c2b331bc
2 changed files with 234 additions and 1 deletions
|
|
@ -15,4 +15,5 @@ MDSRCS+= \
|
|||
strcspn.S \
|
||||
strlen.S \
|
||||
strnlen.c \
|
||||
strspn.S
|
||||
strspn.S \
|
||||
timingsafe_bcmp.S
|
||||
|
|
|
|||
232
lib/libc/amd64/string/timingsafe_bcmp.S
Normal file
232
lib/libc/amd64/string/timingsafe_bcmp.S
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
/*-
|
||||
* Copyright (c) 2023 The FreeBSD Foundation
|
||||
*
|
||||
* This software was developed by Robert Clausecker <fuz@FreeBSD.org>
|
||||
* under sponsorship from the FreeBSD Foundation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE
|
||||
*/
|
||||
|
||||
#include <machine/asm.h>
|
||||
|
||||
#include "amd64_archlevel.h"
|
||||
|
||||
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
|
||||
|
||||
ARCHFUNCS(timingsafe_bcmp)
|
||||
ARCHFUNC(timingsafe_bcmp, scalar)
|
||||
ARCHFUNC(timingsafe_bcmp, baseline)
|
||||
ENDARCHFUNCS(timingsafe_bcmp)
|
||||
|
||||
ARCHENTRY(timingsafe_bcmp, scalar)
|
||||
cmp $16, %rdx # at least 17 bytes to process?
|
||||
ja .Lgt16
|
||||
|
||||
cmp $8, %edx # at least 9 bytes to process?
|
||||
ja .L0916
|
||||
|
||||
cmp $4, %edx # at least 5 bytes to process?
|
||||
ja .L0508
|
||||
|
||||
cmp $2, %edx # at least 3 bytes to process?
|
||||
ja .L0304
|
||||
|
||||
test %edx, %edx # buffer empty?
|
||||
jnz .L0102
|
||||
|
||||
xor %eax, %eax # empty buffer always matches
|
||||
ret
|
||||
|
||||
.L0102: movzbl (%rdi), %eax # load 1--2 bytes from first buffer
|
||||
movzbl -1(%rdi, %rdx, 1), %ecx
|
||||
xor (%rsi), %al # xor in second buffer
|
||||
xor -1(%rsi, %rdx, 1), %cl
|
||||
or %ecx, %eax # mismatch in any of the two?
|
||||
ret
|
||||
|
||||
.L0304: movzwl (%rdi), %eax
|
||||
movzwl -2(%rdi, %rdx, 1), %ecx
|
||||
xor (%rsi), %ax
|
||||
xor -2(%rsi, %rdx, 1), %cx
|
||||
or %ecx, %eax
|
||||
ret
|
||||
|
||||
.L0508: mov (%rdi), %eax
|
||||
mov -4(%rdi, %rdx, 1), %ecx
|
||||
xor (%rsi), %eax
|
||||
xor -4(%rsi, %rdx, 1), %ecx
|
||||
or %ecx, %eax
|
||||
ret
|
||||
|
||||
.L0916: mov (%rdi), %rax
|
||||
mov -8(%rdi, %rdx, 1), %rcx
|
||||
xor (%rsi), %rax
|
||||
xor -8(%rsi, %rdx, 1), %rcx
|
||||
or %rcx, %rax
|
||||
setnz %al # ensure EAX nonzero even if only
|
||||
ret # high bits of RAX were set
|
||||
|
||||
/* more than 16 bytes: process buffer in a loop */
|
||||
.Lgt16: mov (%rdi), %rax # process first 16 bytes
|
||||
mov 8(%rdi), %r9
|
||||
mov $32, %ecx
|
||||
xor (%rsi), %rax
|
||||
xor 8(%rsi), %r9
|
||||
or %r9, %rax
|
||||
|
||||
cmp %rdx, %rcx # enough left for a full iteration?
|
||||
jae .Ltail
|
||||
|
||||
/* main loop processing 16 bytes per iteration */
|
||||
ALIGN_TEXT
|
||||
0: mov -16(%rdi, %rcx, 1), %r8
|
||||
mov -8(%rdi, %rcx, 1), %r9
|
||||
xor -16(%rsi, %rcx, 1), %r8
|
||||
xor -8(%rsi, %rcx, 1), %r9
|
||||
add $16, %rcx
|
||||
or %r9, %r8
|
||||
or %r8, %rax
|
||||
|
||||
cmp %rdx, %rcx
|
||||
jb 0b
|
||||
|
||||
/* process last 16 bytes */
|
||||
.Ltail: mov -16(%rdi, %rdx, 1), %r8
|
||||
mov -8(%rdi, %rdx, 1), %r9
|
||||
xor -16(%rsi, %rdx, 1), %r8
|
||||
xor -8(%rsi, %rdx, 1), %r9
|
||||
or %r9, %r8
|
||||
or %r8, %rax
|
||||
setnz %al
|
||||
ret
|
||||
ARCHEND(timingsafe_bcmp, scalar)
|
||||
|
||||
ARCHENTRY(timingsafe_bcmp, baseline)
|
||||
cmp $32, %rdx # at least 33 bytes to process?
|
||||
ja .Lgt32b
|
||||
|
||||
cmp $16, %edx # at least 17 bytes to process?
|
||||
ja .L1732b
|
||||
|
||||
cmp $8, %edx # at least 9 bytes to process?
|
||||
ja .L0916b
|
||||
|
||||
cmp $4, %edx # at least 5 bytes to process?
|
||||
ja .L0508b
|
||||
|
||||
cmp $2, %edx # at least 3 bytes to process?
|
||||
ja .L0304b
|
||||
|
||||
test %edx, %edx # buffer empty?
|
||||
jnz .L0102b
|
||||
|
||||
xor %eax, %eax # empty buffer always matches
|
||||
ret
|
||||
|
||||
.L0102b:
|
||||
movzbl (%rdi), %eax # load 1--2 bytes from first buffer
|
||||
movzbl -1(%rdi, %rdx, 1), %ecx
|
||||
xor (%rsi), %al # xor in second buffer
|
||||
xor -1(%rsi, %rdx, 1), %cl
|
||||
or %ecx, %eax # mismatch in any of the two?
|
||||
ret
|
||||
|
||||
.L0304b:
|
||||
movzwl (%rdi), %eax
|
||||
movzwl -2(%rdi, %rdx, 1), %ecx
|
||||
xor (%rsi), %ax
|
||||
xor -2(%rsi, %rdx, 1), %cx
|
||||
or %ecx, %eax
|
||||
ret
|
||||
|
||||
.L0508b:
|
||||
mov (%rdi), %eax
|
||||
mov -4(%rdi, %rdx, 1), %ecx
|
||||
xor (%rsi), %eax
|
||||
xor -4(%rsi, %rdx, 1), %ecx
|
||||
or %ecx, %eax
|
||||
ret
|
||||
|
||||
.L0916b:
|
||||
mov (%rdi), %rax
|
||||
mov -8(%rdi, %rdx, 1), %rcx
|
||||
xor (%rsi), %rax
|
||||
xor -8(%rsi, %rdx, 1), %rcx
|
||||
or %rcx, %rax
|
||||
setnz %al # ensure EAX nonzero even if only
|
||||
ret # high bits of RAX were set
|
||||
|
||||
.L1732b:
|
||||
movdqu (%rdi), %xmm0
|
||||
movdqu (%rsi), %xmm2
|
||||
movdqu -16(%rdi, %rdx, 1), %xmm1
|
||||
movdqu -16(%rsi, %rdx, 1), %xmm3
|
||||
pcmpeqb %xmm2, %xmm0
|
||||
pcmpeqb %xmm3, %xmm1
|
||||
pand %xmm1, %xmm0
|
||||
pmovmskb %xmm0, %eax # 1 where equal
|
||||
xor $0xffff, %eax # 1 where not equal
|
||||
ret
|
||||
|
||||
/* more than 32 bytes: process buffer in a loop */
|
||||
.Lgt32b:
|
||||
movdqu (%rdi), %xmm4
|
||||
movdqu (%rsi), %xmm2
|
||||
movdqu 16(%rdi), %xmm1
|
||||
movdqu 16(%rsi), %xmm3
|
||||
mov $64, %ecx
|
||||
pcmpeqb %xmm2, %xmm4
|
||||
pcmpeqb %xmm3, %xmm1
|
||||
pand %xmm1, %xmm4
|
||||
cmp %rdx, %rcx # enough left for a full iteration?
|
||||
jae .Ltailb
|
||||
|
||||
/* main loop processing 32 bytes per iteration */
|
||||
ALIGN_TEXT
|
||||
0: movdqu -32(%rdi, %rcx, 1), %xmm0
|
||||
movdqu -32(%rsi, %rcx, 1), %xmm2
|
||||
movdqu -16(%rdi, %rcx, 1), %xmm1
|
||||
movdqu -16(%rsi, %rcx, 1), %xmm3
|
||||
add $32, %rcx
|
||||
pcmpeqb %xmm2, %xmm0
|
||||
pcmpeqb %xmm3, %xmm1
|
||||
pand %xmm1, %xmm0
|
||||
pand %xmm0, %xmm4
|
||||
cmp %rdx, %rcx
|
||||
jb 0b
|
||||
|
||||
/* process last 32 bytes */
|
||||
.Ltailb:
|
||||
movdqu -32(%rdi, %rdx, 1), %xmm0
|
||||
movdqu -32(%rsi, %rdx, 1), %xmm2
|
||||
movdqu -16(%rdi, %rdx, 1), %xmm1
|
||||
movdqu -16(%rsi, %rdx, 1), %xmm3
|
||||
pcmpeqb %xmm2, %xmm0
|
||||
pcmpeqb %xmm3, %xmm1
|
||||
pand %xmm1, %xmm0
|
||||
pand %xmm4, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
xor $0xffff, %eax
|
||||
ret
|
||||
ARCHEND(timingsafe_bcmp, baseline)
|
||||
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
Loading…
Reference in a new issue