LibC: Write memchr, memcmp and strlen with sse

This commit is contained in:
Bananymous 2026-04-02 15:30:31 +03:00
parent d168492462
commit 82d5d9ba58
2 changed files with 148 additions and 73 deletions

View File

@ -1,35 +1,3 @@
.global memchr
memchr:
xchgl 4(%esp), %edi
movl 8(%esp), %eax
movl 12(%esp), %ecx
movl $1, %edx
cmpl $1, %ecx # clear ZF if count is zero
repne scasb
cmovel %edi, %edx
leal -1(%edx), %eax
movl 4(%esp), %edi
ret
.global memcmp
memcmp:
xchgl 4(%esp), %edi
xchgl 8(%esp), %esi
movl 12(%esp), %ecx
testl %ecx, %ecx # set ZF if count is zero
repe cmpsb
jne .memcmp_not_equal
xorl %eax, %eax
jmp .memcmp_done
.memcmp_not_equal:
movzbl -1(%edi), %eax
movzbl -1(%esi), %ecx
subl %ecx, %eax
.memcmp_done:
movl 4(%esp), %edi
movl 8(%esp), %esi
ret
.global memcpy
memcpy:
xchgl 4(%esp), %edi
@ -74,14 +42,3 @@ memset:
movl 4(%esp), %edi
movl %edx, %eax
ret
.global strlen
strlen:
xchgl 4(%esp), %edi
xorb %al, %al
movl $-1, %ecx
repne scasb
movl 4(%esp), %edi
movl $-2, %eax
subl %ecx, %eax
ret

View File

@ -1,28 +1,3 @@
.global memchr
memchr:
movb %sil, %al
movq %rdx, %rcx
movq $1, %rdx
cmpq $1, %rcx # clear ZF if count is zero
repne scasb
cmoveq %rdi, %rdx
leaq -1(%rdx), %rax
ret
.global memcmp
memcmp:
movq %rdx, %rcx
testq %rcx, %rcx # set ZF if count is zero
repe cmpsb
jne .memcmp_not_equal
xorq %rax, %rax
ret
.memcmp_not_equal:
movzbl -1(%rdi), %eax
movzbl -1(%rsi), %ecx
subq %rcx, %rax
ret
.global memcpy
memcpy:
movq %rdi, %rax
@ -52,11 +27,154 @@ memset:
movq %r8, %rax
ret
#if defined(__SSE2__)
.global memchr
memchr:
testq %rdx, %rdx
jz .memchr_no_match
movd %esi, %xmm0
punpcklbw %xmm0, %xmm0
punpcklwd %xmm0, %xmm0
pshufd $0, %xmm0, %xmm0
movq %rdi, %rcx
andq $15, %rcx
jz .memchr_loop
movq %rdi, %rsi
subq %rcx, %rsi
movdqa (%rsi), %xmm1
pcmpeqb %xmm0, %xmm1
pmovmskb %xmm1, %eax
shrl %cl, %eax
jnz .memchr_match
leaq 16(%rsi), %rdi
addq %rcx, %rdx
subq $16, %rdx
jbe .memchr_no_match
.memchr_loop:
movdqa (%rdi), %xmm1
pcmpeqb %xmm0, %xmm1
pmovmskb %xmm1, %eax
testl %eax, %eax
jnz .memchr_match
addq $16, %rdi
subq $16, %rdx
ja .memchr_loop
.memchr_no_match:
xorq %rax, %rax
ret
.memchr_match:
bsfl %eax, %eax
cmpq %rdx, %rax
jae .memchr_no_match
addq %rdi, %rax
ret
.global memcmp
memcmp:
testq %rdx, %rdx
jz .memcmp_equal
movq %rdi, %rax
movq %rsi, %rcx
andq $15, %rax
andq $15, %rcx
cmpq %rax, %rcx
cmovaq %rcx, %rax
testq %rax, %rax
jz .memcmp_loop
movq $16, %rcx
subq %rax, %rcx
cmpq %rcx, %rdx
cmovbq %rdx, %rcx
subq %rcx, %rdx
.memcmp_align_loop:
movzbl (%rdi), %eax
movzbl (%rsi), %r8d
subl %r8d, %eax
jnz .memcmp_return
incq %rdi
incq %rsi
decq %rcx
jnz .memcmp_align_loop
.memcmp_loop:
movdqu (%rdi), %xmm0
movdqu (%rsi), %xmm1
pcmpeqb %xmm0, %xmm1
pmovmskb %xmm1, %eax
xorl $0xFFFF, %eax
jnz .memcmp_differ
addq $16, %rdi
addq $16, %rsi
subq $16, %rdx
ja .memcmp_loop
.memcmp_equal:
xorl %eax, %eax
.memcmp_return:
ret
.memcmp_differ:
bsfl %eax, %ecx
cmpq %rdx, %rcx
jae .memcmp_equal
movzbl (%rdi, %rcx), %eax
movzbl (%rsi, %rcx), %edx
subl %edx, %eax
ret
.global strlen
strlen:
xorb %al, %al
movq $-1, %rcx
repne scasb
movq $-2, %rax
subq %rcx, %rax
movq %rdi, %rsi
pxor %xmm0, %xmm0
movq %rsi, %rcx
andq $15, %rcx
jz .strlen_loop
movq %rsi, %rdx
subq %rcx, %rdx
movdqa (%rdx), %xmm1
pcmpeqb %xmm0, %xmm1
pmovmskb %xmm1, %eax
shrl %cl, %eax
jnz .strlen_null_found
leaq 16(%rdx), %rsi
.strlen_loop:
movdqa (%rsi), %xmm1
pcmpeqb %xmm0, %xmm1
pmovmskb %xmm1, %eax
testl %eax, %eax
jnz .strlen_null_found
addq $16, %rsi
jmp .strlen_loop
.strlen_null_found:
bsfl %eax, %eax
addq %rsi, %rax
subq %rdi, %rax
ret
#endif