anolis-glibc/glibc-2.28-Refactor-code-of-st-r-p-functions.patch
ticat_fp d91eae1237 LoongArch: Sync loongarch64 code to lnd.36
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
2024-05-29 10:24:08 +08:00

2770 lines
57 KiB
Diff

From b720fd44df475685ea164491d76c42e127aab3ea Mon Sep 17 00:00:00 2001
From: caiyinyu <caiyinyu@loongson.cn>
Date: Wed, 21 Jun 2023 10:49:39 +0800
Subject: [PATCH 07/14] glibc-2.28: Refactor code of st{r,p}* functions.
Change-Id: Ife977373e9ba071b284ee19ca4ba121bc27d5834
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
.../loongarch/lp64/multiarch/stpcpy-aligned.S | 179 +++++++++++-
.../loongarch/lp64/multiarch/strchr-aligned.S | 91 ++++++-
.../lp64/multiarch/strchrnul-aligned.S | 94 ++++++-
.../loongarch/lp64/multiarch/strcmp-aligned.S | 225 ++++++++++++++-
.../loongarch/lp64/multiarch/strcpy-aligned.S | 173 +++++++++++-
.../loongarch/lp64/multiarch/strlen-aligned.S | 85 +++++-
.../lp64/multiarch/strncmp-aligned.S | 256 +++++++++++++++++-
.../lp64/multiarch/strnlen-aligned.S | 82 +++++-
.../lp64/multiarch/strrchr-aligned.S | 105 ++++++-
sysdeps/loongarch/lp64/stpcpy.S | 179 ------------
sysdeps/loongarch/lp64/strchr.S | 89 ------
sysdeps/loongarch/lp64/strchrnul.S | 94 -------
sysdeps/loongarch/lp64/strcmp.S | 227 ----------------
sysdeps/loongarch/lp64/strcpy.S | 173 ------------
sysdeps/loongarch/lp64/strlen.S | 85 ------
sysdeps/loongarch/lp64/strncmp.S | 256 ------------------
sysdeps/loongarch/lp64/strnlen.S | 82 ------
sysdeps/loongarch/lp64/strrchr.S | 105 -------
18 files changed, 1264 insertions(+), 1316 deletions(-)
delete mode 100644 sysdeps/loongarch/lp64/stpcpy.S
delete mode 100644 sysdeps/loongarch/lp64/strchr.S
delete mode 100644 sysdeps/loongarch/lp64/strchrnul.S
delete mode 100644 sysdeps/loongarch/lp64/strcmp.S
delete mode 100644 sysdeps/loongarch/lp64/strcpy.S
delete mode 100644 sysdeps/loongarch/lp64/strlen.S
delete mode 100644 sysdeps/loongarch/lp64/strncmp.S
delete mode 100644 sysdeps/loongarch/lp64/strnlen.S
delete mode 100644 sysdeps/loongarch/lp64/strrchr.S
diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
index 3d134e3f..7109b0f0 100644
--- a/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
@@ -1,8 +1,181 @@
+#ifdef _LIBC
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+#else
+#include <sys/asm.h>
+#include <sys/regdef.h>
+#endif
#if IS_IN (libc)
-
#define STPCPY_NAME __stpcpy_aligned
-
+#else
+#define STPCPY_NAME __stpcpy
#endif
-#include "../stpcpy.S"
+LEAF(STPCPY_NAME, 6)
+ andi a3, a0, 0x7
+ beqz a3, L(dest_align)
+ sub.d a5, a1, a3
+ addi.d a5, a5, 8
+
+L(make_dest_align):
+ ld.b t0, a1, 0
+ addi.d a1, a1, 1
+ st.b t0, a0, 0
+ addi.d a0, a0, 1
+
+ beqz t0, L(al_out)
+ bne a1, a5, L(make_dest_align)
+
+L(dest_align):
+ andi a4, a1, 7
+ bstrins.d a1, zero, 2, 0
+
+ lu12i.w t5, 0x1010
+ ld.d t0, a1, 0
+ ori t5, t5, 0x101
+ bstrins.d t5, t5, 63, 32
+
+ slli.d t6, t5, 0x7
+ bnez a4, L(unalign)
+ sub.d t1, t0, t5
+ andn t2, t6, t0
+
+ and t3, t1, t2
+ bnez t3, L(al_end)
+
+L(al_loop):
+ st.d t0, a0, 0
+ ld.d t0, a1, 8
+
+ addi.d a1, a1, 8
+ addi.d a0, a0, 8
+ sub.d t1, t0, t5
+ andn t2, t6, t0
+
+ and t3, t1, t2
+ beqz t3, L(al_loop)
+
+L(al_end):
+ ctz.d t1, t3
+ srli.d t1, t1, 3
+ addi.d t1, t1, 1 # add 1, since '\0' needs to be copied to dest
+
+ andi a3, t1, 8
+ andi a4, t1, 4
+ andi a5, t1, 2
+ andi a6, t1, 1
+
+L(al_end_8):
+ beqz a3, L(al_end_4)
+ st.d t0, a0, 0
+ addi.d a0, a0, 7
+ jr ra
+L(al_end_4):
+ beqz a4, L(al_end_2)
+ st.w t0, a0, 0
+ addi.d a0, a0, 4
+ srli.d t0, t0, 32
+L(al_end_2):
+ beqz a5, L(al_end_1)
+ st.h t0, a0, 0
+ addi.d a0, a0, 2
+ srli.d t0, t0, 16
+L(al_end_1):
+ beqz a6, L(al_out)
+ st.b t0, a0, 0
+ addi.d a0, a0, 1
+L(al_out):
+ addi.d a0, a0, -1
+ jr ra
+
+L(unalign):
+ slli.d a5, a4, 3
+ li.d t1, -1
+ sub.d a6, zero, a5
+
+ srl.d a7, t0, a5
+ sll.d t7, t1, a6
+
+ or t0, a7, t7
+ sub.d t1, t0, t5
+ andn t2, t6, t0
+ and t3, t1, t2
+
+ bnez t3, L(un_end)
+
+ ld.d t4, a1, 8
+ addi.d a1, a1, 8
+
+ sub.d t1, t4, t5
+ andn t2, t6, t4
+ sll.d t0, t4, a6
+ and t3, t1, t2
+
+ or t0, t0, a7
+ bnez t3, L(un_end_with_remaining)
+
+L(un_loop):
+ srl.d a7, t4, a5
+
+ ld.d t4, a1, 8
+ addi.d a1, a1, 8
+
+ st.d t0, a0, 0
+ addi.d a0, a0, 8
+
+ sub.d t1, t4, t5
+ andn t2, t6, t4
+ sll.d t0, t4, a6
+ and t3, t1, t2
+
+ or t0, t0, a7
+ beqz t3, L(un_loop)
+
+L(un_end_with_remaining):
+ ctz.d t1, t3
+ srli.d t1, t1, 3
+ addi.d t1, t1, 1
+ sub.d t1, t1, a4
+
+ blt t1, zero, L(un_end_less_8)
+ st.d t0, a0, 0
+ addi.d a0, a0, 8
+ beqz t1, L(un_out)
+ srl.d t0, t4, a5 # get the remaining part
+ b L(un_end_less_8)
+
+L(un_end):
+ ctz.d t1, t3
+ srli.d t1, t1, 3
+ addi.d t1, t1, 1
+
+L(un_end_less_8):
+ andi a4, t1, 4
+ andi a5, t1, 2
+ andi a6, t1, 1
+L(un_end_4):
+ beqz a4, L(un_end_2)
+ st.w t0, a0, 0
+ addi.d a0, a0, 4
+ srli.d t0, t0, 32
+L(un_end_2):
+ beqz a5, L(un_end_1)
+ st.h t0, a0, 0
+ addi.d a0, a0, 2
+ srli.d t0, t0, 16
+L(un_end_1):
+ beqz a6, L(un_out)
+ st.b t0, a0, 0
+ addi.d a0, a0, 1
+L(un_out):
+ addi.d a0, a0, -1
+ jr ra
+
+END(STPCPY_NAME)
+
+#ifdef _LIBC
+weak_alias (STPCPY_NAME, stpcpy)
+libc_hidden_builtin_def (STPCPY_NAME)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
index 92365658..d9bd4587 100644
--- a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
@@ -1,10 +1,95 @@
-#if IS_IN (libc)
-#define STRCHR_NAME __strchr_aligned
+#ifdef _LIBC
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+#else
+#include <sys/asm.h>
+#include <sys/regdef.h>
+#endif
+#if IS_IN (libc)
+#define STRCHR_NAME __strchr_aligned
+#else
+#define STRCHR_NAME strchr
#endif
-#include "../strchr.S"
+/* char * strchr (const char *s1, int c); */
+
+LEAF(STRCHR_NAME, 6)
+ slli.d t1, a0, 3
+ bstrins.d a0, zero, 2, 0
+ lu12i.w a2, 0x01010
+ ld.d t2, a0, 0
+
+ ori a2, a2, 0x101
+ andi a1, a1, 0xff
+ bstrins.d a2, a2, 63, 32
+ li.w t0, -1
+
+ mul.d a1, a1, a2 # "cccccccc"
+ sll.d t0, t0, t1
+ slli.d a3, a2, 7 # 0x8080808080808080
+ orn t2, t2, t0
+
+ sll.d t3, a1, t1
+ xor t4, t2, t3
+ sub.d a7, t2, a2
+ andn a6, a3, t2
+
+
+ sub.d a5, t4, a2
+ andn a4, a3, t4
+ and a6, a7, a6
+ and a5, a5, a4
+
+ or t0, a6, a5
+ bnez t0, L(_mc8_a)
+ addi.d a0, a0, 8
+L(_aloop):
+ ld.d t4, a0, 0
+
+ xor t2, t4, a1
+ sub.d a7, t4, a2
+ andn a6, a3, t4
+ sub.d a5, t2, a2
+
+ andn a4, a3, t2
+ and a6, a7, a6
+ and a5, a5, a4
+ or a7, a6, a5
+
+
+ bnez a7, L(_mc8_a)
+ ld.d t4, a0, 8
+ addi.d a0, a0, 16
+ xor t2, t4, a1
+
+ sub.d a7, t4, a2
+ andn a6, a3, t4
+ sub.d a5, t2, a2
+ andn a4, a3, t2
+
+ and a6, a7, a6
+ and a5, a5, a4
+ or a7, a6, a5
+ beqz a7, L(_aloop)
+
+ addi.d a0, a0, -8
+
+L(_mc8_a):
+ ctz.d t0, a5
+ ctz.d t2, a6
+ srli.w t0, t0, 3
+
+
+ srli.w t2, t2, 3
+ sltu t1, t2, t0
+ add.d a0, a0, t0
+ masknez a0, a0, t1
+
+ jr ra
+END(STRCHR_NAME)
weak_alias (STRCHR_NAME, index)
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
index 4fa63ecc..f18b01a3 100644
--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
@@ -1,8 +1,96 @@
+#ifdef _LIBC
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+#else
+#include <sys/asm.h>
+#include <sys/regdef.h>
+#endif
#if IS_IN (libc)
-
#define STRCHRNUL_NAME __strchrnul_aligned
-
+#else
+#define STRCHRNUL_NAME __strchrnul
#endif
-#include "../strchrnul.S"
+/* char * strchrnul (const char *s1, int c); */
+
+LEAF(STRCHRNUL_NAME, 6)
+ slli.d t1, a0, 3
+ bstrins.d a0, zero, 2, 0
+ lu12i.w a2, 0x01010
+ ld.d t2, a0, 0
+
+ ori a2, a2, 0x101
+ andi a1, a1, 0xff
+ bstrins.d a2, a2, 63, 32
+ li.w t0, -1
+
+ mul.d a1, a1, a2 # "cccccccc"
+ sll.d t0, t0, t1
+ slli.d a3, a2, 7 # 0x8080808080808080
+ orn t2, t2, t0
+
+ sll.d t3, a1, t1
+ xor t4, t2, t3
+ sub.d a7, t2, a2
+ andn a6, a3, t2
+
+
+ sub.d a5, t4, a2
+ andn a4, a3, t4
+ and a6, a7, a6
+ and a5, a5, a4
+
+ or t0, a6, a5
+ bnez t0, L(_mc8_a)
+ addi.d a0, a0, 8
+L(_aloop):
+ ld.d t4, a0, 0
+
+ xor t2, t4, a1
+ sub.d a7, t4, a2
+ andn a6, a3, t4
+ sub.d a5, t2, a2
+
+ andn a4, a3, t2
+ and a6, a7, a6
+ and a5, a5, a4
+ or a7, a6, a5
+
+
+ bnez a7, L(_mc8_a)
+ ld.d t4, a0, 8
+ addi.d a0, a0, 16
+ xor t2, t4, a1
+
+ sub.d a7, t4, a2
+ andn a6, a3, t4
+ sub.d a5, t2, a2
+ andn a4, a3, t2
+
+ and a6, a7, a6
+ and a5, a5, a4
+ or a7, a6, a5
+ beqz a7, L(_aloop)
+
+ addi.d a0, a0, -8
+L(_mc8_a):
+ ctz.d t0, a5
+ ctz.d t2, a6
+ srli.w t0, t0, 3
+
+ srli.w t2, t2, 3
+ slt t1, t0, t2
+ masknez t3, t2, t1
+ maskeqz t4, t0, t1
+
+ or t0, t3, t4
+ add.d a0, a0, t0
+ jr ra
+END(STRCHRNUL_NAME)
+
+#ifdef _LIBC
+weak_alias(STRCHRNUL_NAME, strchrnul)
+libc_hidden_builtin_def (STRCHRNUL_NAME)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
index f84f52b8..a9b74b0c 100644
--- a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
@@ -1,8 +1,229 @@
+/* 2022\06\15 loongarch64 author: chenxiaolong. */
-#if IS_IN (libc)
+#ifdef _LIBC
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+#else
+#include <sys/asm.h>
+#include <sys/regdef.h>
+#endif
+#if IS_IN (libc)
#define STRCMP_NAME __strcmp_aligned
+#else
+#define STRCMP_NAME strcmp
+#endif
+
+/* int strcmp (const char *s1, const char *s2); */
+
+/* Parameters and Results */
+#define src1 a0
+#define src2 a1
+#define result v0
+LEAF(STRCMP_NAME, 6)
+ xor a4, src1, src2
+ lu12i.w t5, 0x01010
+ lu12i.w t6, 0x7f7f7
+ andi a2, src1, 0x7
+
+ ori t5, t5, 0x101
+ andi a4, a4, 0x7
+ ori t6, t6, 0xf7f
+ bstrins.d t5, t5, 63, 32
+ bstrins.d t6, t6, 63, 32
+
+ bnez a4, 3f // unaligned
+ beqz a2, 1f // loop aligned
+
+// mutual aligned
+ bstrins.d src1, zero, 2, 0
+ bstrins.d src2, zero, 2, 0
+ slli.d a4, a2, 0x3
+ ld.d t0, src1, 0
+
+ sub.d a4, zero, a4
+ ld.d t1, src2, 0
+ addi.d src1, src1, 8
+ addi.d src2, src2, 8
+
+ nor a5, zero, zero
+ srl.d a5, a5, a4
+ or t0, t0, a5
+
+ or t1, t1, a5
+ b 2f //start realigned
+
+// loop aligned
+1:
+ ld.d t0, src1, 0
+ addi.d src1, src1, 8
+ ld.d t1, src2, 0
+ addi.d src2, src2, 8
+
+// start realigned:
+2:
+ sub.d t2, t0, t5
+ nor t3, t0, t6
+ and t2, t2, t3
+
+ xor t3, t0, t1
+ or t2, t2, t3
+ beqz t2, 1b
+
+ ctz.d t7, t2
+ bstrins.d t7, zero, 2, 0
+ srl.d t0, t0, t7
+ srl.d t1, t1, t7
+
+ andi t0, t0, 0xff
+ andi t1, t1, 0xff
+ sub.d v0, t0, t1
+ jr ra
+
+// unaligned
+3:
+ andi a3, src2, 0x7
+ slt a5, a2, a3
+ masknez t8, a2, a5
+ xor a6, src1, src2
+ maskeqz a6, a6, t8
+ xor src1, src1, a6
+ xor src2, src2, a6
+
+ andi a2, src1, 0x7
+ beqz a2, 4f // src1 is aligned
+
+//strcmp_unaligned:
+ andi a3, src2, 0x7
+ bstrins.d src1, zero, 2, 0
+ bstrins.d src2, zero, 2, 0
+ nor t3, zero, zero
+
+ ld.d t0, src1, 0
+ ld.d t1, src2, 0
+ sub.d a2, a3, a2
+ addi.d t2, zero, 8
+
+ sub.d a5, t2, a2
+ sub.d a6, t2, a3
+ slli.d a5, a5, 0x3
+ slli.d a6, a6, 0x3
+
+ srl.d t4, t3, a6
+ srl.d a4, t3, a5
+ rotr.d a7, t0, a5
+
+ addi.d src2, src2, 8
+ addi.d src1, src1, 8
+ or t1, t1, t4
+ or t0, a7, t4
+
+ sub.d t2, t0, t5
+ nor t3, t0, t6
+ and t2, t2, t3
+ xor t3, t0, t1
+ or t2, t2, t3
+ bnez t2, 7f
+
+ and a7, a7, a4
+ slli.d a6, a2, 0x3
+ nor a4, zero, a4
+ b 5f
+
+// src1 is aligned
+4:
+ andi a3, src2, 0x7
+ ld.d t0, src1, 0
+
+ bstrins.d src2, zero, 2, 0
+ nor t2, zero, zero
+ ld.d t1, src2, 0
+
+ addi.d t3, zero, 0x8
+ sub.d a5, t3, a3
+ slli.d a5, a5, 0x3
+ srl.d a4, t2, a5
+ rotr.d t4, t0, a5
+
+ addi.d src2, src2, 8
+ addi.d src1, src1, 8
+ or t1, t1, a4
+ or t0, t4, a4
+
+ sub.d t2, t0, t5
+ nor t3, t0, t6
+ and t2, t2, t3
+ xor t3, t0, t1
+ or t2, t2, t3
+
+ bnez t2, 7f
+
+ and a7, t4, a4
+ slli.d a6, a3, 0x3
+ nor a4, zero, a4
+
+// unaligned loop
+// a7: remaining number
+// a6: shift left number
+// a5: shift right number
+// a4: mask for checking remaining number
+5:
+ or t0, a7, a4
+ sub.d t2, t0, t5
+ nor t3, t0, t6
+ and t2, t2, t3
+ bnez t2, 6f
+
+ ld.d t0, src1, 0
+ addi.d src1, src1, 8
+ ld.d t1, src2, 0
+ addi.d src2, src2, 8
+
+ srl.d t7, t0, a5
+ sll.d t0, t0, a6
+ or t0, a7, t0
+
+ sub.d t2, t0, t5
+ nor t3, t0, t6
+ and t2, t2, t3
+ xor t3, t0, t1
+ or t2, t2, t3
+ bnez t2, 7f
+
+ or a7, t7, zero
+ b 5b
+
+6:
+ ld.bu t1, src2, 0
+ andi t0, a7, 0xff
+ xor t2, t0, t1
+ srli.d a7, a7, 0x8
+ masknez t2, t0, t2
+ addi.d src2, src2, 1
+ beqz t2, 8f
+ b 6b
+
+7:
+ ctz.d t7, t2
+ bstrins.d t7, zero, 2, 0
+ srl.d t0, t0, t7
+ srl.d t1, t1, t7
+
+ andi t0, t0, 0xff
+ andi t1, t1, 0xff
+
+8:
+ sub.d a4, t0, t1
+ sub.d a5, t1, t0
+ maskeqz a6, a5, t8
+ masknez result, a4, t8
+ or result, result, a6
+ jr ra
+
+END(STRCMP_NAME)
+#ifdef _LIBC
+libc_hidden_builtin_def (STRCMP_NAME)
#endif
-#include "../strcmp.S"
diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
index 4860398b..80954912 100644
--- a/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
@@ -1,8 +1,175 @@
+#ifdef _LIBC
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+#else
+#include <sys/asm.h>
+#include <sys/regdef.h>
+#endif
#if IS_IN (libc)
-
#define STRCPY __strcpy_aligned
-
+#else
+#define STRCPY strcpy
#endif
-#include "../strcpy.S"
+LEAF(STRCPY, 6)
+ andi a3, a0, 0x7
+ move a2, a0
+ beqz a3, L(dest_align)
+ sub.d a5, a1, a3
+ addi.d a5, a5, 8
+
+L(make_dest_align):
+ ld.b t0, a1, 0
+ addi.d a1, a1, 1
+ st.b t0, a2, 0
+ beqz t0, L(al_out)
+
+ addi.d a2, a2, 1
+ bne a1, a5, L(make_dest_align)
+
+L(dest_align):
+ andi a4, a1, 7
+ bstrins.d a1, zero, 2, 0
+
+ lu12i.w t5, 0x1010
+ ld.d t0, a1, 0
+ ori t5, t5, 0x101
+ bstrins.d t5, t5, 63, 32
+
+ slli.d t6, t5, 0x7
+ bnez a4, L(unalign)
+ sub.d t1, t0, t5
+ andn t2, t6, t0
+
+ and t3, t1, t2
+ bnez t3, L(al_end)
+
+L(al_loop):
+ st.d t0, a2, 0
+ ld.d t0, a1, 8
+
+ addi.d a1, a1, 8
+ addi.d a2, a2, 8
+ sub.d t1, t0, t5
+ andn t2, t6, t0
+
+ and t3, t1, t2
+ beqz t3, L(al_loop)
+
+L(al_end):
+ ctz.d t1, t3
+ srli.d t1, t1, 3
+ addi.d t1, t1, 1 # add 1, since '\0' needs to be copied to dest
+
+ andi a3, t1, 8
+ andi a4, t1, 4
+ andi a5, t1, 2
+ andi a6, t1, 1
+
+L(al_end_8):
+ beqz a3, L(al_end_4)
+ st.d t0, a2, 0
+ jr ra
+L(al_end_4):
+ beqz a4, L(al_end_2)
+ st.w t0, a2, 0
+ addi.d a2, a2, 4
+ srli.d t0, t0, 32
+L(al_end_2):
+ beqz a5, L(al_end_1)
+ st.h t0, a2, 0
+ addi.d a2, a2, 2
+ srli.d t0, t0, 16
+L(al_end_1):
+ beqz a6, L(al_out)
+ st.b t0, a2, 0
+L(al_out):
+ jr ra
+
+L(unalign):
+ slli.d a5, a4, 3
+ li.d t1, -1
+ sub.d a6, zero, a5
+
+ srl.d a7, t0, a5
+ sll.d t7, t1, a6
+
+ or t0, a7, t7
+ sub.d t1, t0, t5
+ andn t2, t6, t0
+ and t3, t1, t2
+
+ bnez t3, L(un_end)
+
+ ld.d t4, a1, 8
+
+ sub.d t1, t4, t5
+ andn t2, t6, t4
+ sll.d t0, t4, a6
+ and t3, t1, t2
+
+ or t0, t0, a7
+ bnez t3, L(un_end_with_remaining)
+
+L(un_loop):
+ srl.d a7, t4, a5
+
+ ld.d t4, a1, 16
+ addi.d a1, a1, 8
+
+ st.d t0, a2, 0
+ addi.d a2, a2, 8
+
+ sub.d t1, t4, t5
+ andn t2, t6, t4
+ sll.d t0, t4, a6
+ and t3, t1, t2
+
+ or t0, t0, a7
+ beqz t3, L(un_loop)
+
+L(un_end_with_remaining):
+ ctz.d t1, t3
+ srli.d t1, t1, 3
+ addi.d t1, t1, 1
+ sub.d t1, t1, a4
+
+ blt t1, zero, L(un_end_less_8)
+ st.d t0, a2, 0
+ addi.d a2, a2, 8
+ beqz t1, L(un_out)
+ srl.d t0, t4, a5 # get the remaining part
+ b L(un_end_less_8)
+
+L(un_end):
+ ctz.d t1, t3
+ srli.d t1, t1, 3
+ addi.d t1, t1, 1
+
+L(un_end_less_8):
+ andi a4, t1, 4
+ andi a5, t1, 2
+ andi a6, t1, 1
+L(un_end_4):
+ beqz a4, L(un_end_2)
+ st.w t0, a2, 0
+ addi.d a2, a2, 4
+ srli.d t0, t0, 32
+L(un_end_2):
+ beqz a5, L(un_end_1)
+ st.h t0, a2, 0
+ addi.d a2, a2, 2
+ srli.d t0, t0, 16
+L(un_end_1):
+ beqz a6, L(un_out)
+ st.b t0, a2, 0
+L(un_out):
+ jr ra
+
+END(STRCPY)
+
+#ifdef _LIBC
+libc_hidden_builtin_def (STRCPY)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
index d31875fd..fcbc4f6a 100644
--- a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
@@ -1,8 +1,87 @@
+#ifdef _LIBC
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+#else
+#include <sys/asm.h>
+#include <sys/regdef.h>
+#endif
#if IS_IN (libc)
-
#define STRLEN __strlen_aligned
-
+#else
+#define STRLEN strlen
#endif
-#include "../strlen.S"
+LEAF(STRLEN, 6)
+ move a1, a0
+ bstrins.d a0, zero, 2, 0
+ lu12i.w a2, 0x01010
+ li.w t0, -1
+
+ ld.d t2, a0, 0
+ andi t1, a1, 0x7
+ ori a2, a2, 0x101
+ slli.d t1, t1, 3
+
+ bstrins.d a2, a2, 63, 32
+ sll.d t1, t0, t1
+ slli.d t3, a2, 7
+ nor a3, zero, t3
+
+ orn t2, t2, t1
+ sub.d t0, t2, a2
+ nor t1, t2, a3
+ and t0, t0, t1
+
+
+ bnez t0, L(count_pos)
+ addi.d a0, a0, 8
+L(loop_16_7bit):
+ ld.d t2, a0, 0
+ sub.d t1, t2, a2
+
+ and t0, t1, t3
+ bnez t0, L(more_check)
+ ld.d t2, a0, 8
+ addi.d a0, a0, 16
+
+ sub.d t1, t2, a2
+ and t0, t1, t3
+ beqz t0, L(loop_16_7bit)
+ addi.d a0, a0, -8
+L(more_check):
+ nor t0, t2, a3
+
+ and t0, t1, t0
+ bnez t0, L(count_pos)
+ addi.d a0, a0, 8
+L(loop_16_8bit):
+ ld.d t2, a0, 0
+
+ sub.d t1, t2, a2
+ nor t0, t2, a3
+ and t0, t0, t1
+ bnez t0, L(count_pos)
+
+ ld.d t2, a0, 8
+ addi.d a0, a0, 16
+ sub.d t1, t2, a2
+ nor t0, t2, a3
+
+ and t0, t0, t1
+ beqz t0, L(loop_16_8bit)
+ addi.d a0, a0, -8
+L(count_pos):
+ ctz.d t1, t0
+ sub.d a0, a0, a1
+
+ srli.d t1, t1, 3
+ add.d a0, a0, t1
+ jr ra
+
+END(STRLEN)
+
+#ifdef _LIBC
+libc_hidden_builtin_def (STRLEN)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
index f371b19e..2cd56c44 100644
--- a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
@@ -1,8 +1,258 @@
+#ifdef _LIBC
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+#else
+#include <sys/asm.h>
+#include <sys/regdef.h>
+#endif
#if IS_IN (libc)
-
#define STRNCMP __strncmp_aligned
-
+#else
+#define STRNCMP strncmp
#endif
-#include "../strncmp.S"
+/* int strncmp (const char *s1, const char *s2); */
+
+LEAF(STRNCMP, 6)
+ beqz a2, L(ret0)
+ xor a4, a0, a1
+ lu12i.w t5, 0x01010
+ lu12i.w t6, 0x7f7f7
+
+ andi a3, a0, 0x7
+ ori t5, t5, 0x101
+ andi a4, a4, 0x7
+ ori t6, t6, 0xf7f
+
+ bstrins.d t5, t5, 63, 32
+ bstrins.d t6, t6, 63, 32
+
+ bnez a4, L(unalign)
+ bnez a3, L(mutual_align)
+
+L(a_loop):
+ ld.d t0, a0, 0
+ ld.d t1, a1, 0
+ addi.d a0, a0, 8
+ addi.d a1, a1, 8
+
+
+ sltui t7, a2, 9
+
+L(start_realign):
+ sub.d t2, t0, t5
+ nor t3, t0, t6
+ xor t4, t0, t1
+
+ and t2, t2, t3
+ addi.d a2, a2, -8
+
+ or t2, t2, t4
+ or t3, t2, t7
+ beqz t3, L(a_loop)
+
+L(end):
+ bge zero, t7, L(out)
+ andi t4, a2, 7
+ li.d t3, -1
+ addi.d t4, t4, -1
+ slli.d t4, t4, 3
+ sll.d t3, t3, t4
+ or t2, t2, t3
+
+
+L(out):
+ ctz.d t3, t2
+ bstrins.d t3, zero, 2, 0
+ srl.d t0, t0, t3
+ srl.d t1, t1, t3
+
+ andi t0, t0, 0xff
+ andi t1, t1, 0xff
+ sub.d a0, t0, t1
+ jr ra
+
+L(mutual_align):
+ bstrins.d a0, zero, 2, 0
+ bstrins.d a1, zero, 2, 0
+ slli.d a5, a3, 0x3
+ li.d t2, -1
+
+ ld.d t0, a0, 0
+ ld.d t1, a1, 0
+
+ li.d t3, 9
+ sll.d t2, t2, a5
+
+ sub.d t3, t3, a3
+ addi.d a0, a0, 8
+
+ sltu t7, a2, t3
+ addi.d a1, a1, 8
+
+ add.d a2, a2, a3
+ orn t0, t0, t2
+ orn t1, t1, t2
+ b L(start_realign)
+
+L(ret0):
+ move a0, zero
+ jr ra
+
+L(unalign):
+ li.d t8, 8
+ blt a2, t8, L(short_cmp)
+
+ # swap a0 and a1 in case a3 > a4
+ andi a4, a1, 0x7
+ sltu t8, a4, a3
+ xor a6, a0, a1
+ maskeqz a6, a6, t8
+ xor a0, a0, a6
+ xor a1, a1, a6
+
+ andi a3, a0, 0x7
+ andi a4, a1, 0x7
+
+ bstrins.d a0, zero, 2, 0
+ bstrins.d a1, zero, 2, 0
+
+ li.d t2, -1
+ li.d t3, 9
+
+ ld.d t0, a0, 0
+ ld.d t1, a1, 0
+
+ sub.d t3, t3, a4
+ sub.d a3, a4, a3
+
+ slli.d t4, a4, 3
+ slli.d a6, a3, 3
+
+ sub.d a5, zero, a6
+ sltu t7, a2, t3
+
+ rotr.d a7, t0, a5
+ sll.d t4, t2, t4 # mask for first num
+
+ add.d a2, a2, a4
+ sll.d a4, t2, a6 # mask for a7
+
+ orn t0, a7, t4
+ orn t1, t1, t4
+
+ sub.d t2, t0, t5
+ nor t4, t0, t6
+ and t2, t2, t4
+
+ xor t3, t0, t1
+ or t2, t2, t3
+
+ or t3, t2, t7
+ bnez t3, L(un_end)
+
+ andn a7, a7, a4
+ addi.d a3, a3, 1
+
+L(un_loop):
+ addi.d a2, a2, -8
+ # in case remaining part has '\0', no more load instructions should be executed on a0 address
+ or t0, a7, a4
+ sltu t7, a2, a3
+
+ sub.d t2, t0, t5
+ nor t3, t0, t6
+ and t2, t2, t3
+
+ or t3, t2, t7
+ bnez t3, L(check_remaining)
+
+ ld.d t7, a0, 8
+ ld.d t1, a1, 8
+ addi.d a0, a0, 8
+ addi.d a1, a1, 8
+
+ sll.d t4, t7, a6
+ sub.d t2, t1, t5
+ nor t3, t1, t6
+
+ or t0, t4, a7
+ srl.d a7, t7, a5
+
+ and t2, t2, t3
+ xor t3, t0, t1
+
+ sltui t7, a2, 9
+ or t2, t2, t3
+
+ or t3, t2, t7
+ beqz t3, L(un_loop)
+ b L(un_end)
+
+L(check_remaining):
+ ld.d t1, a1, 8
+ xor t3, t1, a7
+ or t2, t2, t3
+
+L(un_end):
+ bge zero, t7, L(un_out)
+ andi t4, a2, 7
+ li.d t3, -1
+
+ addi.d t4, t4, -1
+ slli.d t4, t4, 3
+ sll.d t3, t3, t4
+ or t2, t2, t3
+
+L(un_out):
+ ctz.d t3, t2
+ bstrins.d t3, zero, 2, 0
+ srl.d t0, t0, t3
+ srl.d t1, t1, t3
+
+ andi t0, t0, 0xff
+ andi t1, t1, 0xff
+
+ sub.d a4, t0, t1
+ sub.d a5, t1, t0
+
+ maskeqz a6, a5, t8
+ masknez a0, a4, t8
+
+ or a0, a0, a6
+ jr ra
+
+L(short_cmp):
+ ld.bu t0, a0, 0
+ ld.bu t1, a1, 0
+ addi.d a2, a2, -1
+
+ xor t2, t0, t1
+ masknez t2, t0, t2
+ maskeqz t2, a2, t2
+
+ beqz t2, L(short_out)
+
+ ld.bu t0, a0, 1
+ ld.bu t1, a1, 1
+
+ addi.d a2, a2, -1
+ addi.d a0, a0, 2
+
+ addi.d a1, a1, 2
+ xor t2, t0, t1
+ masknez t2, t0, t2
+ maskeqz t2, a2, t2
+
+ bnez t2, L(short_cmp)
+
+L(short_out):
+ sub.d a0, t0, t1
+ jr ra
+
+END(STRNCMP)
+#ifdef _LIBC
+libc_hidden_builtin_def (STRNCMP)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
index 503442b3..78c8fd5d 100644
--- a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
@@ -1,8 +1,84 @@
+#ifdef _LIBC
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+#else
+#include <sys/asm.h>
+#include <sys/regdef.h>
+#endif
#if IS_IN (libc)
-
#define STRNLEN __strnlen_aligned
-
+#else
+#define STRNLEN __strnlen
#endif
-#include "../strnlen.S"
+#. before every load, a1(t5) must > 0;
+#. first load with t1 != 0, need to adjust t5;
+#. return the less one of both strlen(s) and a1;
+
+LEAF(STRNLEN, 6)
+ beqz a1, L(out)
+ lu12i.w a2, 0x01010
+ andi t1, a0, 0x7
+ move t4, a0
+
+ bstrins.d a0, zero, 2, 0
+ ori a2, a2, 0x101
+ li.w t0, -1
+ ld.d t2, a0, 0
+
+ slli.d t3, t1, 3
+ bstrins.d a2, a2, 63, 32
+ li.w t5, 8
+ slli.d a3, a2, 7
+
+ sub.w t1, t5, t1
+ sll.d t0, t0, t3
+ nor a3, zero, a3
+ orn t2, t2, t0
+
+
+ sub.d t0, t2, a2
+ nor t3, t2, a3
+ and t0, t0, t3
+ bnez t0, L(count_pos)
+
+ sub.d t5, a1, t1
+ bgeu t1, a1, L(out)
+L(loop_8bytes):
+ ld.d t2, a0, 8
+ addi.d a0, a0, 8
+
+ sub.d t0, t2, a2
+ nor t1, t2, a3
+ sltui t6, t5, 9
+ and t0, t0, t1
+
+ addi.d t5, t5, -8
+ or t7, t0, t6
+ beqz t7, L(loop_8bytes)
+L(count_pos):
+ ctz.d t1, t0
+
+
+ sub.d a0, a0, t4
+ srli.d t1, t1, 3
+ add.d a0, t1, a0
+ sltu t0, a0, a1
+
+ masknez t1, a1, t0
+ maskeqz a0, a0, t0
+ or a0, a0, t1
+ jr ra
+
+L(out):
+ move a0, a1
+ jr ra
+
+END(STRNLEN)
+
+#ifdef _LIBC
+weak_alias (STRNLEN, strnlen)
+libc_hidden_builtin_def (STRNLEN)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
index a58ddde8..6931045b 100644
--- a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
@@ -1,11 +1,110 @@
+#ifdef _LIBC
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+#else
+#include <sys/asm.h>
+#include <sys/regdef.h>
+#endif
#if IS_IN (libc)
-
#define STRRCHR_NAME __strrchr_aligned
-
+#else
+#define STRRCHR_NAME strrchr
#endif
-#include "../strrchr.S"
+LEAF(STRRCHR_NAME, 6)
+ slli.d t1, a0, 3
+ bstrins.d a0, zero, 2, 0
+ lu12i.w a2, 0x01010
+ ld.d t2, a0, 0 // t2 = "5ZZ21abc"
+
+ ori a2, a2, 0x101
+ andi a1, a1, 0xff // a1 = "0000000Z"
+ li.d a5, -1
+ bstrins.d a2, a2, 63, 32 // a2 = 0x0101010101010101
+
+ sll.d t1, a5, t1 // t1 = 0xffffffffff000000
+ mul.d a1, a1, a2 // a1 = "ZZZZZZZZ"
+ orn t2, t2, t1 // t2 = "5ZZ21YYY"
+ slli.d a3, a2, 7 // a3 = 0x8080808080808080
+
+ sub.d a4, t2, a2
+ andn t0, a3, t2
+ move t3, zero
+ and t0, a4, t0
+
+
+ xor a4, t2, a1
+ move t5, zero
+ orn a4, a4, t1
+ bnez t0, L(found_end)
+
+ sub.d t1, a4, a2
+ andn t0, a3, a4
+ and t1, t1, t0
+
+L(loop_8bytes):
+ masknez t4, t3, t1
+
+ maskeqz t3, t2, t1
+ ld.d t2, a0, 8
+ masknez t0, t5, t1
+ maskeqz t5, a0, t1
+
+ or t3, t3, t4
+ or t5, t0, t5
+ sub.d t0, t2, a2
+ andn t1, a3, t2
+
+
+ xor a4, t2, a1
+ and t0, t0, t1 //t0 hold diff pattern for '\0'
+ sub.d t1, a4, a2
+ andn t4, a3, a4
+
+ and t1, t1, t4 //t1 hold diff pattern for 'a1'
+ addi.d a0, a0, 8
+ beqz t0, L(loop_8bytes) //ok, neither \0 nor found
+L(found_end):
+ ctz.d t1, t0
+
+ xor t3, t3, a1
+ orn t1, zero, t1
+ revb.d t3, t3
+ srl.d t1, a5, t1 // mask for '\0'
+
+ sub.d t4, t3, a2
+ orn a4, a4, t1
+ andn t3, a3, t3
+ revb.d t2, a4
+
+ sub.d t0, t2, a2
+ andn t1, a3, t2
+ and t3, t3, t4
+ and t1, t0, t1
+
+ li.d t7, 7
+ masknez t4, t3, t1
+ maskeqz t3, t1, t1
+ masknez t5, t5, t1
+
+ or t3, t3, t4
+ maskeqz t6, a0, t1
+ ctz.d t0, t3
+ or t5, t6, t5
+
+ srli.d t0, t0, 3
+ sub.d t0, t7, t0
+ add.d a0, t5, t0
+ maskeqz a0, a0, t3
+
+ jr ra
+END(STRRCHR_NAME)
+
+#ifdef _LIBC
+libc_hidden_builtin_def(STRRCHR_NAME)
+#endif
#undef rindex
weak_alias(STRRCHR_NAME, rindex)
diff --git a/sysdeps/loongarch/lp64/stpcpy.S b/sysdeps/loongarch/lp64/stpcpy.S
deleted file mode 100644
index b6a367dc..00000000
--- a/sysdeps/loongarch/lp64/stpcpy.S
+++ /dev/null
@@ -1,179 +0,0 @@
-#ifdef _LIBC
-#include <sysdep.h>
-#include <sys/regdef.h>
-#include <sys/asm.h>
-#else
-#include <sys/asm.h>
-#include <sys/regdef.h>
-#endif
-
-#ifndef STPCPY_NAME
-#define STPCPY_NAME __stpcpy
-#endif
-
-LEAF(STPCPY_NAME, 6)
- andi a3, a0, 0x7
- beqz a3, L(dest_align)
- sub.d a5, a1, a3
- addi.d a5, a5, 8
-
-L(make_dest_align):
- ld.b t0, a1, 0
- addi.d a1, a1, 1
- st.b t0, a0, 0
- addi.d a0, a0, 1
-
- beqz t0, L(al_out)
- bne a1, a5, L(make_dest_align)
-
-L(dest_align):
- andi a4, a1, 7
- bstrins.d a1, zero, 2, 0
-
- lu12i.w t5, 0x1010
- ld.d t0, a1, 0
- ori t5, t5, 0x101
- bstrins.d t5, t5, 63, 32
-
- slli.d t6, t5, 0x7
- bnez a4, L(unalign)
- sub.d t1, t0, t5
- andn t2, t6, t0
-
- and t3, t1, t2
- bnez t3, L(al_end)
-
-L(al_loop):
- st.d t0, a0, 0
- ld.d t0, a1, 8
-
- addi.d a1, a1, 8
- addi.d a0, a0, 8
- sub.d t1, t0, t5
- andn t2, t6, t0
-
- and t3, t1, t2
- beqz t3, L(al_loop)
-
-L(al_end):
- ctz.d t1, t3
- srli.d t1, t1, 3
- addi.d t1, t1, 1 # add 1, since '\0' needs to be copied to dest
-
- andi a3, t1, 8
- andi a4, t1, 4
- andi a5, t1, 2
- andi a6, t1, 1
-
-L(al_end_8):
- beqz a3, L(al_end_4)
- st.d t0, a0, 0
- addi.d a0, a0, 7
- jr ra
-L(al_end_4):
- beqz a4, L(al_end_2)
- st.w t0, a0, 0
- addi.d a0, a0, 4
- srli.d t0, t0, 32
-L(al_end_2):
- beqz a5, L(al_end_1)
- st.h t0, a0, 0
- addi.d a0, a0, 2
- srli.d t0, t0, 16
-L(al_end_1):
- beqz a6, L(al_out)
- st.b t0, a0, 0
- addi.d a0, a0, 1
-L(al_out):
- addi.d a0, a0, -1
- jr ra
-
-L(unalign):
- slli.d a5, a4, 3
- li.d t1, -1
- sub.d a6, zero, a5
-
- srl.d a7, t0, a5
- sll.d t7, t1, a6
-
- or t0, a7, t7
- sub.d t1, t0, t5
- andn t2, t6, t0
- and t3, t1, t2
-
- bnez t3, L(un_end)
-
- ld.d t4, a1, 8
- addi.d a1, a1, 8
-
- sub.d t1, t4, t5
- andn t2, t6, t4
- sll.d t0, t4, a6
- and t3, t1, t2
-
- or t0, t0, a7
- bnez t3, L(un_end_with_remaining)
-
-L(un_loop):
- srl.d a7, t4, a5
-
- ld.d t4, a1, 8
- addi.d a1, a1, 8
-
- st.d t0, a0, 0
- addi.d a0, a0, 8
-
- sub.d t1, t4, t5
- andn t2, t6, t4
- sll.d t0, t4, a6
- and t3, t1, t2
-
- or t0, t0, a7
- beqz t3, L(un_loop)
-
-L(un_end_with_remaining):
- ctz.d t1, t3
- srli.d t1, t1, 3
- addi.d t1, t1, 1
- sub.d t1, t1, a4
-
- blt t1, zero, L(un_end_less_8)
- st.d t0, a0, 0
- addi.d a0, a0, 8
- beqz t1, L(un_out)
- srl.d t0, t4, a5 # get the remaining part
- b L(un_end_less_8)
-
-L(un_end):
- ctz.d t1, t3
- srli.d t1, t1, 3
- addi.d t1, t1, 1
-
-L(un_end_less_8):
- andi a4, t1, 4
- andi a5, t1, 2
- andi a6, t1, 1
-L(un_end_4):
- beqz a4, L(un_end_2)
- st.w t0, a0, 0
- addi.d a0, a0, 4
- srli.d t0, t0, 32
-L(un_end_2):
- beqz a5, L(un_end_1)
- st.h t0, a0, 0
- addi.d a0, a0, 2
- srli.d t0, t0, 16
-L(un_end_1):
- beqz a6, L(un_out)
- st.b t0, a0, 0
- addi.d a0, a0, 1
-L(un_out):
- addi.d a0, a0, -1
- jr ra
-
-END(STPCPY_NAME)
-
-#ifdef _LIBC
-weak_alias (STPCPY_NAME, stpcpy)
-libc_hidden_builtin_def (STPCPY_NAME)
-#endif
diff --git a/sysdeps/loongarch/lp64/strchr.S b/sysdeps/loongarch/lp64/strchr.S
deleted file mode 100644
index fde53a30..00000000
--- a/sysdeps/loongarch/lp64/strchr.S
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifdef _LIBC
-#include <sysdep.h>
-#include <sys/regdef.h>
-#include <sys/asm.h>
-#else
-#include <sys/asm.h>
-#include <sys/regdef.h>
-#endif
-
-#ifndef STRCHR_NAME
-#define STRCHR_NAME strchr
-#endif
-
-/* char * strchr (const char *s1, int c); */
-
-LEAF(STRCHR_NAME, 6)
- slli.d t1, a0, 3
- bstrins.d a0, zero, 2, 0
- lu12i.w a2, 0x01010
- ld.d t2, a0, 0
-
- ori a2, a2, 0x101
- andi a1, a1, 0xff
- bstrins.d a2, a2, 63, 32
- li.w t0, -1
-
- mul.d a1, a1, a2 # "cccccccc"
- sll.d t0, t0, t1
- slli.d a3, a2, 7 # 0x8080808080808080
- orn t2, t2, t0
-
- sll.d t3, a1, t1
- xor t4, t2, t3
- sub.d a7, t2, a2
- andn a6, a3, t2
-
-
- sub.d a5, t4, a2
- andn a4, a3, t4
- and a6, a7, a6
- and a5, a5, a4
-
- or t0, a6, a5
- bnez t0, L(_mc8_a)
- addi.d a0, a0, 8
-L(_aloop):
- ld.d t4, a0, 0
-
- xor t2, t4, a1
- sub.d a7, t4, a2
- andn a6, a3, t4
- sub.d a5, t2, a2
-
- andn a4, a3, t2
- and a6, a7, a6
- and a5, a5, a4
- or a7, a6, a5
-
-
- bnez a7, L(_mc8_a)
- ld.d t4, a0, 8
- addi.d a0, a0, 16
- xor t2, t4, a1
-
- sub.d a7, t4, a2
- andn a6, a3, t4
- sub.d a5, t2, a2
- andn a4, a3, t2
-
- and a6, a7, a6
- and a5, a5, a4
- or a7, a6, a5
- beqz a7, L(_aloop)
-
- addi.d a0, a0, -8
-
-L(_mc8_a):
- ctz.d t0, a5
- ctz.d t2, a6
- srli.w t0, t0, 3
-
-
- srli.w t2, t2, 3
- sltu t1, t2, t0
- add.d a0, a0, t0
- masknez a0, a0, t1
-
- jr ra
-END(STRCHR_NAME)
diff --git a/sysdeps/loongarch/lp64/strchrnul.S b/sysdeps/loongarch/lp64/strchrnul.S
deleted file mode 100644
index a5ee09a3..00000000
--- a/sysdeps/loongarch/lp64/strchrnul.S
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifdef _LIBC
-#include <sysdep.h>
-#include <sys/regdef.h>
-#include <sys/asm.h>
-#else
-#include <sys/asm.h>
-#include <sys/regdef.h>
-#endif
-
-#ifndef STRCHRNUL_NAME
-#define STRCHRNUL_NAME __strchrnul
-#endif
-
-/* char * strchrnul (const char *s1, int c); */
-
-LEAF(STRCHRNUL_NAME, 6)
- slli.d t1, a0, 3
- bstrins.d a0, zero, 2, 0
- lu12i.w a2, 0x01010
- ld.d t2, a0, 0
-
- ori a2, a2, 0x101
- andi a1, a1, 0xff
- bstrins.d a2, a2, 63, 32
- li.w t0, -1
-
- mul.d a1, a1, a2 # "cccccccc"
- sll.d t0, t0, t1
- slli.d a3, a2, 7 # 0x8080808080808080
- orn t2, t2, t0
-
- sll.d t3, a1, t1
- xor t4, t2, t3
- sub.d a7, t2, a2
- andn a6, a3, t2
-
-
- sub.d a5, t4, a2
- andn a4, a3, t4
- and a6, a7, a6
- and a5, a5, a4
-
- or t0, a6, a5
- bnez t0, L(_mc8_a)
- addi.d a0, a0, 8
-L(_aloop):
- ld.d t4, a0, 0
-
- xor t2, t4, a1
- sub.d a7, t4, a2
- andn a6, a3, t4
- sub.d a5, t2, a2
-
- andn a4, a3, t2
- and a6, a7, a6
- and a5, a5, a4
- or a7, a6, a5
-
-
- bnez a7, L(_mc8_a)
- ld.d t4, a0, 8
- addi.d a0, a0, 16
- xor t2, t4, a1
-
- sub.d a7, t4, a2
- andn a6, a3, t4
- sub.d a5, t2, a2
- andn a4, a3, t2
-
- and a6, a7, a6
- and a5, a5, a4
- or a7, a6, a5
- beqz a7, L(_aloop)
-
- addi.d a0, a0, -8
-L(_mc8_a):
- ctz.d t0, a5
- ctz.d t2, a6
- srli.w t0, t0, 3
-
- srli.w t2, t2, 3
- slt t1, t0, t2
- masknez t3, t2, t1
- maskeqz t4, t0, t1
-
- or t0, t3, t4
- add.d a0, a0, t0
- jr ra
-END(STRCHRNUL_NAME)
-
-#ifdef _LIBC
-weak_alias(STRCHRNUL_NAME, strchrnul)
-libc_hidden_builtin_def (STRCHRNUL_NAME)
-#endif
diff --git a/sysdeps/loongarch/lp64/strcmp.S b/sysdeps/loongarch/lp64/strcmp.S
deleted file mode 100644
index 3a863992..00000000
--- a/sysdeps/loongarch/lp64/strcmp.S
+++ /dev/null
@@ -1,227 +0,0 @@
-/* 2022\06\15 loongarch64 author: chenxiaolong. */
-
-#ifdef _LIBC
-#include <sysdep.h>
-#include <sys/regdef.h>
-#include <sys/asm.h>
-#else
-#include <sys/asm.h>
-#include <sys/regdef.h>
-#endif
-
-#ifndef STRCMP_NAME
-#define STRCMP_NAME strcmp
-#endif
-
-/* int strcmp (const char *s1, const char *s2); */
-
-/* Parameters and Results */
-#define src1 a0
-#define src2 a1
-#define result v0
-LEAF(STRCMP_NAME, 6)
- xor a4, src1, src2
- lu12i.w t5, 0x01010
- lu12i.w t6, 0x7f7f7
- andi a2, src1, 0x7
-
- ori t5, t5, 0x101
- andi a4, a4, 0x7
- ori t6, t6, 0xf7f
- bstrins.d t5, t5, 63, 32
- bstrins.d t6, t6, 63, 32
-
- bnez a4, 3f // unaligned
- beqz a2, 1f // loop aligned
-
-// mutual aligned
- bstrins.d src1, zero, 2, 0
- bstrins.d src2, zero, 2, 0
- slli.d a4, a2, 0x3
- ld.d t0, src1, 0
-
- sub.d a4, zero, a4
- ld.d t1, src2, 0
- addi.d src1, src1, 8
- addi.d src2, src2, 8
-
- nor a5, zero, zero
- srl.d a5, a5, a4
- or t0, t0, a5
-
- or t1, t1, a5
- b 2f //start realigned
-
-// loop aligned
-1:
- ld.d t0, src1, 0
- addi.d src1, src1, 8
- ld.d t1, src2, 0
- addi.d src2, src2, 8
-
-// start realigned:
-2:
- sub.d t2, t0, t5
- nor t3, t0, t6
- and t2, t2, t3
-
- xor t3, t0, t1
- or t2, t2, t3
- beqz t2, 1b
-
- ctz.d t7, t2
- bstrins.d t7, zero, 2, 0
- srl.d t0, t0, t7
- srl.d t1, t1, t7
-
- andi t0, t0, 0xff
- andi t1, t1, 0xff
- sub.d v0, t0, t1
- jr ra
-
-// unaligned
-3:
- andi a3, src2, 0x7
- slt a5, a2, a3
- masknez t8, a2, a5
- xor a6, src1, src2
- maskeqz a6, a6, t8
- xor src1, src1, a6
- xor src2, src2, a6
-
- andi a2, src1, 0x7
- beqz a2, 4f // src1 is aligned
-
-//strcmp_unaligned:
- andi a3, src2, 0x7
- bstrins.d src1, zero, 2, 0
- bstrins.d src2, zero, 2, 0
- nor t3, zero, zero
-
- ld.d t0, src1, 0
- ld.d t1, src2, 0
- sub.d a2, a3, a2
- addi.d t2, zero, 8
-
- sub.d a5, t2, a2
- sub.d a6, t2, a3
- slli.d a5, a5, 0x3
- slli.d a6, a6, 0x3
-
- srl.d t4, t3, a6
- srl.d a4, t3, a5
- rotr.d a7, t0, a5
-
- addi.d src2, src2, 8
- addi.d src1, src1, 8
- or t1, t1, t4
- or t0, a7, t4
-
- sub.d t2, t0, t5
- nor t3, t0, t6
- and t2, t2, t3
- xor t3, t0, t1
- or t2, t2, t3
- bnez t2, 7f
-
- and a7, a7, a4
- slli.d a6, a2, 0x3
- nor a4, zero, a4
- b 5f
-
-// src1 is aligned
-4:
- andi a3, src2, 0x7
- ld.d t0, src1, 0
-
- bstrins.d src2, zero, 2, 0
- nor t2, zero, zero
- ld.d t1, src2, 0
-
- addi.d t3, zero, 0x8
- sub.d a5, t3, a3
- slli.d a5, a5, 0x3
- srl.d a4, t2, a5
- rotr.d t4, t0, a5
-
- addi.d src2, src2, 8
- addi.d src1, src1, 8
- or t1, t1, a4
- or t0, t4, a4
-
- sub.d t2, t0, t5
- nor t3, t0, t6
- and t2, t2, t3
- xor t3, t0, t1
- or t2, t2, t3
-
- bnez t2, 7f
-
- and a7, t4, a4
- slli.d a6, a3, 0x3
- nor a4, zero, a4
-
-// unaligned loop
-// a7: remaining number
-// a6: shift left number
-// a5: shift right number
-// a4: mask for checking remaining number
-5:
- or t0, a7, a4
- sub.d t2, t0, t5
- nor t3, t0, t6
- and t2, t2, t3
- bnez t2, 6f
-
- ld.d t0, src1, 0
- addi.d src1, src1, 8
- ld.d t1, src2, 0
- addi.d src2, src2, 8
-
- srl.d t7, t0, a5
- sll.d t0, t0, a6
- or t0, a7, t0
-
- sub.d t2, t0, t5
- nor t3, t0, t6
- and t2, t2, t3
- xor t3, t0, t1
- or t2, t2, t3
- bnez t2, 7f
-
- or a7, t7, zero
- b 5b
-
-6:
- ld.bu t1, src2, 0
- andi t0, a7, 0xff
- xor t2, t0, t1
- srli.d a7, a7, 0x8
- masknez t2, t0, t2
- addi.d src2, src2, 1
- beqz t2, 8f
- b 6b
-
-7:
- ctz.d t7, t2
- bstrins.d t7, zero, 2, 0
- srl.d t0, t0, t7
- srl.d t1, t1, t7
-
- andi t0, t0, 0xff
- andi t1, t1, 0xff
-
-8:
- sub.d a4, t0, t1
- sub.d a5, t1, t0
- maskeqz a6, a5, t8
- masknez result, a4, t8
- or result, result, a6
- jr ra
-
-END(STRCMP_NAME)
-
-#ifdef _LIBC
-libc_hidden_builtin_def (STRCMP_NAME)
-#endif
-
diff --git a/sysdeps/loongarch/lp64/strcpy.S b/sysdeps/loongarch/lp64/strcpy.S
deleted file mode 100644
index 08505192..00000000
--- a/sysdeps/loongarch/lp64/strcpy.S
+++ /dev/null
@@ -1,173 +0,0 @@
-#ifdef _LIBC
-#include <sysdep.h>
-#include <sys/regdef.h>
-#include <sys/asm.h>
-#else
-#include <sys/asm.h>
-#include <sys/regdef.h>
-#endif
-
-#ifndef STRCPY
-#define STRCPY strcpy
-#endif
-
-LEAF(STRCPY, 6)
- andi a3, a0, 0x7
- move a2, a0
- beqz a3, L(dest_align)
- sub.d a5, a1, a3
- addi.d a5, a5, 8
-
-L(make_dest_align):
- ld.b t0, a1, 0
- addi.d a1, a1, 1
- st.b t0, a2, 0
- beqz t0, L(al_out)
-
- addi.d a2, a2, 1
- bne a1, a5, L(make_dest_align)
-
-L(dest_align):
- andi a4, a1, 7
- bstrins.d a1, zero, 2, 0
-
- lu12i.w t5, 0x1010
- ld.d t0, a1, 0
- ori t5, t5, 0x101
- bstrins.d t5, t5, 63, 32
-
- slli.d t6, t5, 0x7
- bnez a4, L(unalign)
- sub.d t1, t0, t5
- andn t2, t6, t0
-
- and t3, t1, t2
- bnez t3, L(al_end)
-
-L(al_loop):
- st.d t0, a2, 0
- ld.d t0, a1, 8
-
- addi.d a1, a1, 8
- addi.d a2, a2, 8
- sub.d t1, t0, t5
- andn t2, t6, t0
-
- and t3, t1, t2
- beqz t3, L(al_loop)
-
-L(al_end):
- ctz.d t1, t3
- srli.d t1, t1, 3
- addi.d t1, t1, 1 # add 1, since '\0' needs to be copied to dest
-
- andi a3, t1, 8
- andi a4, t1, 4
- andi a5, t1, 2
- andi a6, t1, 1
-
-L(al_end_8):
- beqz a3, L(al_end_4)
- st.d t0, a2, 0
- jr ra
-L(al_end_4):
- beqz a4, L(al_end_2)
- st.w t0, a2, 0
- addi.d a2, a2, 4
- srli.d t0, t0, 32
-L(al_end_2):
- beqz a5, L(al_end_1)
- st.h t0, a2, 0
- addi.d a2, a2, 2
- srli.d t0, t0, 16
-L(al_end_1):
- beqz a6, L(al_out)
- st.b t0, a2, 0
-L(al_out):
- jr ra
-
-L(unalign):
- slli.d a5, a4, 3
- li.d t1, -1
- sub.d a6, zero, a5
-
- srl.d a7, t0, a5
- sll.d t7, t1, a6
-
- or t0, a7, t7
- sub.d t1, t0, t5
- andn t2, t6, t0
- and t3, t1, t2
-
- bnez t3, L(un_end)
-
- ld.d t4, a1, 8
-
- sub.d t1, t4, t5
- andn t2, t6, t4
- sll.d t0, t4, a6
- and t3, t1, t2
-
- or t0, t0, a7
- bnez t3, L(un_end_with_remaining)
-
-L(un_loop):
- srl.d a7, t4, a5
-
- ld.d t4, a1, 16
- addi.d a1, a1, 8
-
- st.d t0, a2, 0
- addi.d a2, a2, 8
-
- sub.d t1, t4, t5
- andn t2, t6, t4
- sll.d t0, t4, a6
- and t3, t1, t2
-
- or t0, t0, a7
- beqz t3, L(un_loop)
-
-L(un_end_with_remaining):
- ctz.d t1, t3
- srli.d t1, t1, 3
- addi.d t1, t1, 1
- sub.d t1, t1, a4
-
- blt t1, zero, L(un_end_less_8)
- st.d t0, a2, 0
- addi.d a2, a2, 8
- beqz t1, L(un_out)
- srl.d t0, t4, a5 # get the remaining part
- b L(un_end_less_8)
-
-L(un_end):
- ctz.d t1, t3
- srli.d t1, t1, 3
- addi.d t1, t1, 1
-
-L(un_end_less_8):
- andi a4, t1, 4
- andi a5, t1, 2
- andi a6, t1, 1
-L(un_end_4):
- beqz a4, L(un_end_2)
- st.w t0, a2, 0
- addi.d a2, a2, 4
- srli.d t0, t0, 32
-L(un_end_2):
- beqz a5, L(un_end_1)
- st.h t0, a2, 0
- addi.d a2, a2, 2
- srli.d t0, t0, 16
-L(un_end_1):
- beqz a6, L(un_out)
- st.b t0, a2, 0
-L(un_out):
- jr ra
-
-END(STRCPY)
-
-#ifdef _LIBC
-libc_hidden_builtin_def (STRCPY)
-#endif
diff --git a/sysdeps/loongarch/lp64/strlen.S b/sysdeps/loongarch/lp64/strlen.S
deleted file mode 100644
index 71431ce2..00000000
--- a/sysdeps/loongarch/lp64/strlen.S
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifdef _LIBC
-#include <sysdep.h>
-#include <sys/regdef.h>
-#include <sys/asm.h>
-#else
-#include <sys/asm.h>
-#include <sys/regdef.h>
-#endif
-
-#ifndef STRLEN
-#define STRLEN strlen
-#endif
-
-LEAF(STRLEN, 6)
- move a1, a0
- bstrins.d a0, zero, 2, 0
- lu12i.w a2, 0x01010
- li.w t0, -1
-
- ld.d t2, a0, 0
- andi t1, a1, 0x7
- ori a2, a2, 0x101
- slli.d t1, t1, 3
-
- bstrins.d a2, a2, 63, 32
- sll.d t1, t0, t1
- slli.d t3, a2, 7
- nor a3, zero, t3
-
- orn t2, t2, t1
- sub.d t0, t2, a2
- nor t1, t2, a3
- and t0, t0, t1
-
-
- bnez t0, L(count_pos)
- addi.d a0, a0, 8
-L(loop_16_7bit):
- ld.d t2, a0, 0
- sub.d t1, t2, a2
-
- and t0, t1, t3
- bnez t0, L(more_check)
- ld.d t2, a0, 8
- addi.d a0, a0, 16
-
- sub.d t1, t2, a2
- and t0, t1, t3
- beqz t0, L(loop_16_7bit)
- addi.d a0, a0, -8
-L(more_check):
- nor t0, t2, a3
-
- and t0, t1, t0
- bnez t0, L(count_pos)
- addi.d a0, a0, 8
-L(loop_16_8bit):
- ld.d t2, a0, 0
-
- sub.d t1, t2, a2
- nor t0, t2, a3
- and t0, t0, t1
- bnez t0, L(count_pos)
-
- ld.d t2, a0, 8
- addi.d a0, a0, 16
- sub.d t1, t2, a2
- nor t0, t2, a3
-
- and t0, t0, t1
- beqz t0, L(loop_16_8bit)
- addi.d a0, a0, -8
-L(count_pos):
- ctz.d t1, t0
- sub.d a0, a0, a1
-
- srli.d t1, t1, 3
- add.d a0, a0, t1
- jr ra
-
-END(STRLEN)
-
-#ifdef _LIBC
-libc_hidden_builtin_def (STRLEN)
-#endif
diff --git a/sysdeps/loongarch/lp64/strncmp.S b/sysdeps/loongarch/lp64/strncmp.S
deleted file mode 100644
index 55450e55..00000000
--- a/sysdeps/loongarch/lp64/strncmp.S
+++ /dev/null
@@ -1,256 +0,0 @@
-#ifdef _LIBC
-#include <sysdep.h>
-#include <sys/regdef.h>
-#include <sys/asm.h>
-#else
-#include <sys/asm.h>
-#include <sys/regdef.h>
-#endif
-
-#ifndef STRNCMP
-#define STRNCMP strncmp
-#endif
-
-/* int strncmp (const char *s1, const char *s2); */
-
-LEAF(STRNCMP, 6)
- beqz a2, L(ret0)
- xor a4, a0, a1
- lu12i.w t5, 0x01010
- lu12i.w t6, 0x7f7f7
-
- andi a3, a0, 0x7
- ori t5, t5, 0x101
- andi a4, a4, 0x7
- ori t6, t6, 0xf7f
-
- bstrins.d t5, t5, 63, 32
- bstrins.d t6, t6, 63, 32
-
- bnez a4, L(unalign)
- bnez a3, L(mutual_align)
-
-L(a_loop):
- ld.d t0, a0, 0
- ld.d t1, a1, 0
- addi.d a0, a0, 8
- addi.d a1, a1, 8
-
-
- sltui t7, a2, 9
-
-L(start_realign):
- sub.d t2, t0, t5
- nor t3, t0, t6
- xor t4, t0, t1
-
- and t2, t2, t3
- addi.d a2, a2, -8
-
- or t2, t2, t4
- or t3, t2, t7
- beqz t3, L(a_loop)
-
-L(end):
- bge zero, t7, L(out)
- andi t4, a2, 7
- li.d t3, -1
- addi.d t4, t4, -1
- slli.d t4, t4, 3
- sll.d t3, t3, t4
- or t2, t2, t3
-
-
-L(out):
- ctz.d t3, t2
- bstrins.d t3, zero, 2, 0
- srl.d t0, t0, t3
- srl.d t1, t1, t3
-
- andi t0, t0, 0xff
- andi t1, t1, 0xff
- sub.d a0, t0, t1
- jr ra
-
-L(mutual_align):
- bstrins.d a0, zero, 2, 0
- bstrins.d a1, zero, 2, 0
- slli.d a5, a3, 0x3
- li.d t2, -1
-
- ld.d t0, a0, 0
- ld.d t1, a1, 0
-
- li.d t3, 9
- sll.d t2, t2, a5
-
- sub.d t3, t3, a3
- addi.d a0, a0, 8
-
- sltu t7, a2, t3
- addi.d a1, a1, 8
-
- add.d a2, a2, a3
- orn t0, t0, t2
- orn t1, t1, t2
- b L(start_realign)
-
-L(ret0):
- move a0, zero
- jr ra
-
-L(unalign):
- li.d t8, 8
- blt a2, t8, L(short_cmp)
-
- # swap a0 and a1 in case a3 > a4
- andi a4, a1, 0x7
- sltu t8, a4, a3
- xor a6, a0, a1
- maskeqz a6, a6, t8
- xor a0, a0, a6
- xor a1, a1, a6
-
- andi a3, a0, 0x7
- andi a4, a1, 0x7
-
- bstrins.d a0, zero, 2, 0
- bstrins.d a1, zero, 2, 0
-
- li.d t2, -1
- li.d t3, 9
-
- ld.d t0, a0, 0
- ld.d t1, a1, 0
-
- sub.d t3, t3, a4
- sub.d a3, a4, a3
-
- slli.d t4, a4, 3
- slli.d a6, a3, 3
-
- sub.d a5, zero, a6
- sltu t7, a2, t3
-
- rotr.d a7, t0, a5
- sll.d t4, t2, t4 # mask for first num
-
- add.d a2, a2, a4
- sll.d a4, t2, a6 # mask for a7
-
- orn t0, a7, t4
- orn t1, t1, t4
-
- sub.d t2, t0, t5
- nor t4, t0, t6
- and t2, t2, t4
-
- xor t3, t0, t1
- or t2, t2, t3
-
- or t3, t2, t7
- bnez t3, L(un_end)
-
- andn a7, a7, a4
- addi.d a3, a3, 1
-
-L(un_loop):
- addi.d a2, a2, -8
- # in case remaining part has '\0', no more load instructions should be executed on a0 address
- or t0, a7, a4
- sltu t7, a2, a3
-
- sub.d t2, t0, t5
- nor t3, t0, t6
- and t2, t2, t3
-
- or t3, t2, t7
- bnez t3, L(check_remaining)
-
- ld.d t7, a0, 8
- ld.d t1, a1, 8
- addi.d a0, a0, 8
- addi.d a1, a1, 8
-
- sll.d t4, t7, a6
- sub.d t2, t1, t5
- nor t3, t1, t6
-
- or t0, t4, a7
- srl.d a7, t7, a5
-
- and t2, t2, t3
- xor t3, t0, t1
-
- sltui t7, a2, 9
- or t2, t2, t3
-
- or t3, t2, t7
- beqz t3, L(un_loop)
- b L(un_end)
-
-L(check_remaining):
- ld.d t1, a1, 8
- xor t3, t1, a7
- or t2, t2, t3
-
-L(un_end):
- bge zero, t7, L(un_out)
- andi t4, a2, 7
- li.d t3, -1
-
- addi.d t4, t4, -1
- slli.d t4, t4, 3
- sll.d t3, t3, t4
- or t2, t2, t3
-
-L(un_out):
- ctz.d t3, t2
- bstrins.d t3, zero, 2, 0
- srl.d t0, t0, t3
- srl.d t1, t1, t3
-
- andi t0, t0, 0xff
- andi t1, t1, 0xff
-
- sub.d a4, t0, t1
- sub.d a5, t1, t0
-
- maskeqz a6, a5, t8
- masknez a0, a4, t8
-
- or a0, a0, a6
- jr ra
-
-L(short_cmp):
- ld.bu t0, a0, 0
- ld.bu t1, a1, 0
- addi.d a2, a2, -1
-
- xor t2, t0, t1
- masknez t2, t0, t2
- maskeqz t2, a2, t2
-
- beqz t2, L(short_out)
-
- ld.bu t0, a0, 1
- ld.bu t1, a1, 1
-
- addi.d a2, a2, -1
- addi.d a0, a0, 2
-
- addi.d a1, a1, 2
- xor t2, t0, t1
- masknez t2, t0, t2
- maskeqz t2, a2, t2
-
- bnez t2, L(short_cmp)
-
-L(short_out):
- sub.d a0, t0, t1
- jr ra
-
-END(STRNCMP)
-#ifdef _LIBC
-libc_hidden_builtin_def (STRNCMP)
-#endif
diff --git a/sysdeps/loongarch/lp64/strnlen.S b/sysdeps/loongarch/lp64/strnlen.S
deleted file mode 100644
index 5b5ab585..00000000
--- a/sysdeps/loongarch/lp64/strnlen.S
+++ /dev/null
@@ -1,82 +0,0 @@
-#ifdef _LIBC
-#include <sysdep.h>
-#include <sys/regdef.h>
-#include <sys/asm.h>
-#else
-#include <sys/asm.h>
-#include <sys/regdef.h>
-#endif
-
-#ifndef STRNLEN
-#define STRNLEN __strnlen
-#endif
-
-#. before every load, a1(t5) must > 0;
-#. first load with t1 != 0, need to adjust t5;
-#. return the less one of both strlen(s) and a1;
-
-LEAF(STRNLEN, 6)
- beqz a1, L(out)
- lu12i.w a2, 0x01010
- andi t1, a0, 0x7
- move t4, a0
-
- bstrins.d a0, zero, 2, 0
- ori a2, a2, 0x101
- li.w t0, -1
- ld.d t2, a0, 0
-
- slli.d t3, t1, 3
- bstrins.d a2, a2, 63, 32
- li.w t5, 8
- slli.d a3, a2, 7
-
- sub.w t1, t5, t1
- sll.d t0, t0, t3
- nor a3, zero, a3
- orn t2, t2, t0
-
-
- sub.d t0, t2, a2
- nor t3, t2, a3
- and t0, t0, t3
- bnez t0, L(count_pos)
-
- sub.d t5, a1, t1
- bgeu t1, a1, L(out)
-L(loop_8bytes):
- ld.d t2, a0, 8
- addi.d a0, a0, 8
-
- sub.d t0, t2, a2
- nor t1, t2, a3
- sltui t6, t5, 9
- and t0, t0, t1
-
- addi.d t5, t5, -8
- or t7, t0, t6
- beqz t7, L(loop_8bytes)
-L(count_pos):
- ctz.d t1, t0
-
-
- sub.d a0, a0, t4
- srli.d t1, t1, 3
- add.d a0, t1, a0
- sltu t0, a0, a1
-
- masknez t1, a1, t0
- maskeqz a0, a0, t0
- or a0, a0, t1
- jr ra
-
-L(out):
- move a0, a1
- jr ra
-
-END(STRNLEN)
-
-#ifdef _LIBC
-weak_alias (STRNLEN, strnlen)
-libc_hidden_builtin_def (STRNLEN)
-#endif
diff --git a/sysdeps/loongarch/lp64/strrchr.S b/sysdeps/loongarch/lp64/strrchr.S
deleted file mode 100644
index df7fcb6b..00000000
--- a/sysdeps/loongarch/lp64/strrchr.S
+++ /dev/null
@@ -1,105 +0,0 @@
-#ifdef _LIBC
-#include <sysdep.h>
-#include <sys/regdef.h>
-#include <sys/asm.h>
-#else
-#include <sys/asm.h>
-#include <sys/regdef.h>
-#endif
-
-#ifndef STRRCHR_NAME
-#define STRRCHR_NAME strrchr
-#endif
-
-LEAF(STRRCHR_NAME, 6)
- slli.d t1, a0, 3
- bstrins.d a0, zero, 2, 0
- lu12i.w a2, 0x01010
- ld.d t2, a0, 0 // t2 = "5ZZ21abc"
-
- ori a2, a2, 0x101
- andi a1, a1, 0xff // a1 = "0000000Z"
- li.d a5, -1
- bstrins.d a2, a2, 63, 32 // a2 = 0x0101010101010101
-
- sll.d t1, a5, t1 // t1 = 0xffffffffff000000
- mul.d a1, a1, a2 // a1 = "ZZZZZZZZ"
- orn t2, t2, t1 // t2 = "5ZZ21YYY"
- slli.d a3, a2, 7 // a3 = 0x8080808080808080
-
- sub.d a4, t2, a2
- andn t0, a3, t2
- move t3, zero
- and t0, a4, t0
-
-
- xor a4, t2, a1
- move t5, zero
- orn a4, a4, t1
- bnez t0, L(found_end)
-
- sub.d t1, a4, a2
- andn t0, a3, a4
- and t1, t1, t0
-
-L(loop_8bytes):
- masknez t4, t3, t1
-
- maskeqz t3, t2, t1
- ld.d t2, a0, 8
- masknez t0, t5, t1
- maskeqz t5, a0, t1
-
- or t3, t3, t4
- or t5, t0, t5
- sub.d t0, t2, a2
- andn t1, a3, t2
-
-
- xor a4, t2, a1
- and t0, t0, t1 //t0 hold diff pattern for '\0'
- sub.d t1, a4, a2
- andn t4, a3, a4
-
- and t1, t1, t4 //t1 hold diff pattern for 'a1'
- addi.d a0, a0, 8
- beqz t0, L(loop_8bytes) //ok, neither \0 nor found
-L(found_end):
- ctz.d t1, t0
-
- xor t3, t3, a1
- orn t1, zero, t1
- revb.d t3, t3
- srl.d t1, a5, t1 // mask for '\0'
-
- sub.d t4, t3, a2
- orn a4, a4, t1
- andn t3, a3, t3
- revb.d t2, a4
-
- sub.d t0, t2, a2
- andn t1, a3, t2
- and t3, t3, t4
- and t1, t0, t1
-
- li.d t7, 7
- masknez t4, t3, t1
- maskeqz t3, t1, t1
- masknez t5, t5, t1
-
- or t3, t3, t4
- maskeqz t6, a0, t1
- ctz.d t0, t3
- or t5, t6, t5
-
- srli.d t0, t0, 3
- sub.d t0, t7, t0
- add.d a0, t5, t0
- maskeqz a0, a0, t3
-
- jr ra
-END(STRRCHR_NAME)
-
-#ifdef _LIBC
-libc_hidden_builtin_def(STRRCHR_NAME)
-#endif
--
2.33.0