From ebecbb69ec0e1f7fe603f8680a05f5943b35933f Mon Sep 17 00:00:00 2001 From: Bananymous Date: Tue, 14 Jan 2025 22:50:46 +0200 Subject: [PATCH] LibC: Implement some mem* and str* functions in assembly This made them a lot faster on modern cpus with optimized rep stos and rep movs --- kernel/CMakeLists.txt | 4 + userspace/libraries/LibC/CMakeLists.txt | 1 + userspace/libraries/LibC/arch/i686/string.S | 85 +++++++++++++++++++ userspace/libraries/LibC/arch/x86_64/string.S | 62 ++++++++++++++ 4 files changed, 152 insertions(+) create mode 100644 userspace/libraries/LibC/arch/i686/string.S create mode 100644 userspace/libraries/LibC/arch/x86_64/string.S diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 2272ec78..7b0b4a16 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -148,6 +148,10 @@ set(BAN_SOURCES set(KLIBC_SOURCES klibc/ctype.cpp klibc/string.cpp + + # Ehhh don't do this but for now libc uses the same stuff kernel can use + # This won't work after libc starts using sse implemetations tho + ../userspace/libraries/LibC/arch/${BANAN_ARCH}/string.S ) set(LIBFONT_SOURCES diff --git a/userspace/libraries/LibC/CMakeLists.txt b/userspace/libraries/LibC/CMakeLists.txt index be012951..0c8c01bc 100644 --- a/userspace/libraries/LibC/CMakeLists.txt +++ b/userspace/libraries/LibC/CMakeLists.txt @@ -45,6 +45,7 @@ set(LIBC_SOURCES icxxabi.cpp arch/${BANAN_ARCH}/setjmp.S + arch/${BANAN_ARCH}/string.S ../../../BAN/BAN/Assert.cpp ) diff --git a/userspace/libraries/LibC/arch/i686/string.S b/userspace/libraries/LibC/arch/i686/string.S new file mode 100644 index 00000000..07a5ee6d --- /dev/null +++ b/userspace/libraries/LibC/arch/i686/string.S @@ -0,0 +1,85 @@ +.global memchr +memchr: + xchgl 4(%esp), %edi + movl 8(%esp), %eax + movl 12(%esp), %ecx + repne scasb + xorl %eax, %eax + testl %ecx, %ecx + cmovnzl %edi, %eax + movl 4(%esp), %edi + ret + +.global memcmp +memcmp: + xchgl 4(%esp), %edi + xchgl 8(%esp), %esi + movl 12(%esp), %ecx + repe cmpsb + jne .memcmp_not_equal + xorl %eax, %eax + jmp .memcmp_done + .memcmp_not_equal: + movzbl -1(%edi), %eax + movzbl -1(%esi), %ecx + subl %ecx, %eax + .memcmp_done: + movl 4(%esp), %edi + movl 8(%esp), %esi + ret + +.global memcpy +memcpy: + xchgl 4(%esp), %edi + xchgl 8(%esp), %esi + movl 12(%esp), %ecx + movl %edi, %edx + rep movsb + movl 4(%esp), %edi + movl 8(%esp), %esi + movl %edx, %eax + ret + +.global memmove +memmove: + xchgl 4(%esp), %edi + xchgl 8(%esp), %esi + movl 12(%esp), %ecx + movl %edi, %edx + cmpl %edi, %esi + jb .memmove_slow + rep movsb + .memmove_done: + movl 4(%esp), %edi + movl 8(%esp), %esi + movl %edx, %eax + ret + .memmove_slow: + leal -1(%edi, %ecx), %edi + leal -1(%esi, %ecx), %esi + std + rep movsb + cld + jmp .memmove_done + +.global memset +memset: + xchgl 4(%esp), %edi + movl 8(%esp), %eax + movl 12(%esp), %ecx + movl %edi, %edx + rep stosb + movl 4(%esp), %edi + movl %edx, %eax + ret + +.global strlen +strlen: + xchgl 4(%esp), %edi + xorb %al, %al + movl $-1, %ecx + repne scasb + movl 4(%esp), %edi + movl $-2, %eax + subl %ecx, %eax + ret diff --git a/userspace/libraries/LibC/arch/x86_64/string.S b/userspace/libraries/LibC/arch/x86_64/string.S new file mode 100644 index 00000000..96c07bc8 --- /dev/null +++ b/userspace/libraries/LibC/arch/x86_64/string.S @@ -0,0 +1,62 @@ +.global memchr +memchr: + movb %sil, %al + movq %rdx, %rcx + repne scasb + xorq %rax, %rax + testq %rcx, %rcx + cmovnzq %rdi, %rax + ret + +.global memcmp +memcmp: + movq %rdx, %rcx + repe cmpsb + jne .memcmp_not_equal + xorq %rax, %rax + ret + .memcmp_not_equal: + movzbl -1(%rdi), %eax + movzbl -1(%rsi), %ecx + subq %rcx, %rax + ret + +.global memcpy +memcpy: + movq %rdi, %rax + movq %rdx, %rcx + movq %rdi, %rdx + rep movsb + movq %rdx, %rax + ret + +.global memmove +memmove: + cmpq %rdi, %rsi + jae memcpy + leaq -1(%rdi, %rdx), %rdi + leaq -1(%rsi, %rdx), %rsi + movq %rdx, %rcx + std + rep movsb + cld + leaq 1(%rdi), %rax + ret + +.global memset +memset: + movq %rdi, %r8 + movb %sil, %al + movq %rdx, %rcx + rep stosb + movq %r8, %rax + ret + +.global strlen +strlen: + xorb %al, %al + movq $-1, %rcx + repne scasb + movq $-2, %rax + subq %rcx, %rax + ret