2770 lines
57 KiB
Diff
2770 lines
57 KiB
Diff
From b720fd44df475685ea164491d76c42e127aab3ea Mon Sep 17 00:00:00 2001
|
|
From: caiyinyu <caiyinyu@loongson.cn>
|
|
Date: Wed, 21 Jun 2023 10:49:39 +0800
|
|
Subject: [PATCH 07/14] glibc-2.28: Refactor code of st{r,p}* functions.
|
|
|
|
Change-Id: Ife977373e9ba071b284ee19ca4ba121bc27d5834
|
|
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
|
---
|
|
.../loongarch/lp64/multiarch/stpcpy-aligned.S | 179 +++++++++++-
|
|
.../loongarch/lp64/multiarch/strchr-aligned.S | 91 ++++++-
|
|
.../lp64/multiarch/strchrnul-aligned.S | 94 ++++++-
|
|
.../loongarch/lp64/multiarch/strcmp-aligned.S | 225 ++++++++++++++-
|
|
.../loongarch/lp64/multiarch/strcpy-aligned.S | 173 +++++++++++-
|
|
.../loongarch/lp64/multiarch/strlen-aligned.S | 85 +++++-
|
|
.../lp64/multiarch/strncmp-aligned.S | 256 +++++++++++++++++-
|
|
.../lp64/multiarch/strnlen-aligned.S | 82 +++++-
|
|
.../lp64/multiarch/strrchr-aligned.S | 105 ++++++-
|
|
sysdeps/loongarch/lp64/stpcpy.S | 179 ------------
|
|
sysdeps/loongarch/lp64/strchr.S | 89 ------
|
|
sysdeps/loongarch/lp64/strchrnul.S | 94 -------
|
|
sysdeps/loongarch/lp64/strcmp.S | 227 ----------------
|
|
sysdeps/loongarch/lp64/strcpy.S | 173 ------------
|
|
sysdeps/loongarch/lp64/strlen.S | 85 ------
|
|
sysdeps/loongarch/lp64/strncmp.S | 256 ------------------
|
|
sysdeps/loongarch/lp64/strnlen.S | 82 ------
|
|
sysdeps/loongarch/lp64/strrchr.S | 105 -------
|
|
18 files changed, 1264 insertions(+), 1316 deletions(-)
|
|
delete mode 100644 sysdeps/loongarch/lp64/stpcpy.S
|
|
delete mode 100644 sysdeps/loongarch/lp64/strchr.S
|
|
delete mode 100644 sysdeps/loongarch/lp64/strchrnul.S
|
|
delete mode 100644 sysdeps/loongarch/lp64/strcmp.S
|
|
delete mode 100644 sysdeps/loongarch/lp64/strcpy.S
|
|
delete mode 100644 sysdeps/loongarch/lp64/strlen.S
|
|
delete mode 100644 sysdeps/loongarch/lp64/strncmp.S
|
|
delete mode 100644 sysdeps/loongarch/lp64/strnlen.S
|
|
delete mode 100644 sysdeps/loongarch/lp64/strrchr.S
|
|
|
|
diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
|
|
index 3d134e3f..7109b0f0 100644
|
|
--- a/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
|
|
+++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
|
|
@@ -1,8 +1,181 @@
|
|
+#ifdef _LIBC
|
|
+#include <sysdep.h>
|
|
+#include <sys/regdef.h>
|
|
+#include <sys/asm.h>
|
|
+#else
|
|
+#include <sys/asm.h>
|
|
+#include <sys/regdef.h>
|
|
+#endif
|
|
|
|
#if IS_IN (libc)
|
|
-
|
|
#define STPCPY_NAME __stpcpy_aligned
|
|
-
|
|
+#else
|
|
+#define STPCPY_NAME __stpcpy
|
|
#endif
|
|
|
|
-#include "../stpcpy.S"
|
|
+LEAF(STPCPY_NAME, 6)
|
|
+ andi a3, a0, 0x7
|
|
+ beqz a3, L(dest_align)
|
|
+ sub.d a5, a1, a3
|
|
+ addi.d a5, a5, 8
|
|
+
|
|
+L(make_dest_align):
|
|
+ ld.b t0, a1, 0
|
|
+ addi.d a1, a1, 1
|
|
+ st.b t0, a0, 0
|
|
+ addi.d a0, a0, 1
|
|
+
|
|
+ beqz t0, L(al_out)
|
|
+ bne a1, a5, L(make_dest_align)
|
|
+
|
|
+L(dest_align):
|
|
+ andi a4, a1, 7
|
|
+ bstrins.d a1, zero, 2, 0
|
|
+
|
|
+ lu12i.w t5, 0x1010
|
|
+ ld.d t0, a1, 0
|
|
+ ori t5, t5, 0x101
|
|
+ bstrins.d t5, t5, 63, 32
|
|
+
|
|
+ slli.d t6, t5, 0x7
|
|
+ bnez a4, L(unalign)
|
|
+ sub.d t1, t0, t5
|
|
+ andn t2, t6, t0
|
|
+
|
|
+ and t3, t1, t2
|
|
+ bnez t3, L(al_end)
|
|
+
|
|
+L(al_loop):
|
|
+ st.d t0, a0, 0
|
|
+ ld.d t0, a1, 8
|
|
+
|
|
+ addi.d a1, a1, 8
|
|
+ addi.d a0, a0, 8
|
|
+ sub.d t1, t0, t5
|
|
+ andn t2, t6, t0
|
|
+
|
|
+ and t3, t1, t2
|
|
+ beqz t3, L(al_loop)
|
|
+
|
|
+L(al_end):
|
|
+ ctz.d t1, t3
|
|
+ srli.d t1, t1, 3
|
|
+ addi.d t1, t1, 1 # add 1, since '\0' needs to be copied to dest
|
|
+
|
|
+ andi a3, t1, 8
|
|
+ andi a4, t1, 4
|
|
+ andi a5, t1, 2
|
|
+ andi a6, t1, 1
|
|
+
|
|
+L(al_end_8):
|
|
+ beqz a3, L(al_end_4)
|
|
+ st.d t0, a0, 0
|
|
+ addi.d a0, a0, 7
|
|
+ jr ra
|
|
+L(al_end_4):
|
|
+ beqz a4, L(al_end_2)
|
|
+ st.w t0, a0, 0
|
|
+ addi.d a0, a0, 4
|
|
+ srli.d t0, t0, 32
|
|
+L(al_end_2):
|
|
+ beqz a5, L(al_end_1)
|
|
+ st.h t0, a0, 0
|
|
+ addi.d a0, a0, 2
|
|
+ srli.d t0, t0, 16
|
|
+L(al_end_1):
|
|
+ beqz a6, L(al_out)
|
|
+ st.b t0, a0, 0
|
|
+ addi.d a0, a0, 1
|
|
+L(al_out):
|
|
+ addi.d a0, a0, -1
|
|
+ jr ra
|
|
+
|
|
+L(unalign):
|
|
+ slli.d a5, a4, 3
|
|
+ li.d t1, -1
|
|
+ sub.d a6, zero, a5
|
|
+
|
|
+ srl.d a7, t0, a5
|
|
+ sll.d t7, t1, a6
|
|
+
|
|
+ or t0, a7, t7
|
|
+ sub.d t1, t0, t5
|
|
+ andn t2, t6, t0
|
|
+ and t3, t1, t2
|
|
+
|
|
+ bnez t3, L(un_end)
|
|
+
|
|
+ ld.d t4, a1, 8
|
|
+ addi.d a1, a1, 8
|
|
+
|
|
+ sub.d t1, t4, t5
|
|
+ andn t2, t6, t4
|
|
+ sll.d t0, t4, a6
|
|
+ and t3, t1, t2
|
|
+
|
|
+ or t0, t0, a7
|
|
+ bnez t3, L(un_end_with_remaining)
|
|
+
|
|
+L(un_loop):
|
|
+ srl.d a7, t4, a5
|
|
+
|
|
+ ld.d t4, a1, 8
|
|
+ addi.d a1, a1, 8
|
|
+
|
|
+ st.d t0, a0, 0
|
|
+ addi.d a0, a0, 8
|
|
+
|
|
+ sub.d t1, t4, t5
|
|
+ andn t2, t6, t4
|
|
+ sll.d t0, t4, a6
|
|
+ and t3, t1, t2
|
|
+
|
|
+ or t0, t0, a7
|
|
+ beqz t3, L(un_loop)
|
|
+
|
|
+L(un_end_with_remaining):
|
|
+ ctz.d t1, t3
|
|
+ srli.d t1, t1, 3
|
|
+ addi.d t1, t1, 1
|
|
+ sub.d t1, t1, a4
|
|
+
|
|
+ blt t1, zero, L(un_end_less_8)
|
|
+ st.d t0, a0, 0
|
|
+ addi.d a0, a0, 8
|
|
+ beqz t1, L(un_out)
|
|
+ srl.d t0, t4, a5 # get the remaining part
|
|
+ b L(un_end_less_8)
|
|
+
|
|
+L(un_end):
|
|
+ ctz.d t1, t3
|
|
+ srli.d t1, t1, 3
|
|
+ addi.d t1, t1, 1
|
|
+
|
|
+L(un_end_less_8):
|
|
+ andi a4, t1, 4
|
|
+ andi a5, t1, 2
|
|
+ andi a6, t1, 1
|
|
+L(un_end_4):
|
|
+ beqz a4, L(un_end_2)
|
|
+ st.w t0, a0, 0
|
|
+ addi.d a0, a0, 4
|
|
+ srli.d t0, t0, 32
|
|
+L(un_end_2):
|
|
+ beqz a5, L(un_end_1)
|
|
+ st.h t0, a0, 0
|
|
+ addi.d a0, a0, 2
|
|
+ srli.d t0, t0, 16
|
|
+L(un_end_1):
|
|
+ beqz a6, L(un_out)
|
|
+ st.b t0, a0, 0
|
|
+ addi.d a0, a0, 1
|
|
+L(un_out):
|
|
+ addi.d a0, a0, -1
|
|
+ jr ra
|
|
+
|
|
+END(STPCPY_NAME)
|
|
+
|
|
+#ifdef _LIBC
|
|
+weak_alias (STPCPY_NAME, stpcpy)
|
|
+libc_hidden_builtin_def (STPCPY_NAME)
|
|
+#endif
|
|
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
|
|
index 92365658..d9bd4587 100644
|
|
--- a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
|
|
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
|
|
@@ -1,10 +1,95 @@
|
|
|
|
-#if IS_IN (libc)
|
|
|
|
-#define STRCHR_NAME __strchr_aligned
|
|
+#ifdef _LIBC
|
|
+#include <sysdep.h>
|
|
+#include <sys/regdef.h>
|
|
+#include <sys/asm.h>
|
|
+#else
|
|
+#include <sys/asm.h>
|
|
+#include <sys/regdef.h>
|
|
+#endif
|
|
|
|
+#if IS_IN (libc)
|
|
+#define STRCHR_NAME __strchr_aligned
|
|
+#else
|
|
+#define STRCHR_NAME strchr
|
|
#endif
|
|
|
|
-#include "../strchr.S"
|
|
+/* char * strchr (const char *s1, int c); */
|
|
+
|
|
+LEAF(STRCHR_NAME, 6)
|
|
+ slli.d t1, a0, 3
|
|
+ bstrins.d a0, zero, 2, 0
|
|
+ lu12i.w a2, 0x01010
|
|
+ ld.d t2, a0, 0
|
|
+
|
|
+ ori a2, a2, 0x101
|
|
+ andi a1, a1, 0xff
|
|
+ bstrins.d a2, a2, 63, 32
|
|
+ li.w t0, -1
|
|
+
|
|
+ mul.d a1, a1, a2 # "cccccccc"
|
|
+ sll.d t0, t0, t1
|
|
+ slli.d a3, a2, 7 # 0x8080808080808080
|
|
+ orn t2, t2, t0
|
|
+
|
|
+ sll.d t3, a1, t1
|
|
+ xor t4, t2, t3
|
|
+ sub.d a7, t2, a2
|
|
+ andn a6, a3, t2
|
|
+
|
|
+
|
|
+ sub.d a5, t4, a2
|
|
+ andn a4, a3, t4
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+
|
|
+ or t0, a6, a5
|
|
+ bnez t0, L(_mc8_a)
|
|
+ addi.d a0, a0, 8
|
|
+L(_aloop):
|
|
+ ld.d t4, a0, 0
|
|
+
|
|
+ xor t2, t4, a1
|
|
+ sub.d a7, t4, a2
|
|
+ andn a6, a3, t4
|
|
+ sub.d a5, t2, a2
|
|
+
|
|
+ andn a4, a3, t2
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+ or a7, a6, a5
|
|
+
|
|
+
|
|
+ bnez a7, L(_mc8_a)
|
|
+ ld.d t4, a0, 8
|
|
+ addi.d a0, a0, 16
|
|
+ xor t2, t4, a1
|
|
+
|
|
+ sub.d a7, t4, a2
|
|
+ andn a6, a3, t4
|
|
+ sub.d a5, t2, a2
|
|
+ andn a4, a3, t2
|
|
+
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+ or a7, a6, a5
|
|
+ beqz a7, L(_aloop)
|
|
+
|
|
+ addi.d a0, a0, -8
|
|
+
|
|
+L(_mc8_a):
|
|
+ ctz.d t0, a5
|
|
+ ctz.d t2, a6
|
|
+ srli.w t0, t0, 3
|
|
+
|
|
+
|
|
+ srli.w t2, t2, 3
|
|
+ sltu t1, t2, t0
|
|
+ add.d a0, a0, t0
|
|
+ masknez a0, a0, t1
|
|
+
|
|
+ jr ra
|
|
+END(STRCHR_NAME)
|
|
|
|
weak_alias (STRCHR_NAME, index)
|
|
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
|
|
index 4fa63ecc..f18b01a3 100644
|
|
--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
|
|
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
|
|
@@ -1,8 +1,96 @@
|
|
+#ifdef _LIBC
|
|
+#include <sysdep.h>
|
|
+#include <sys/regdef.h>
|
|
+#include <sys/asm.h>
|
|
+#else
|
|
+#include <sys/asm.h>
|
|
+#include <sys/regdef.h>
|
|
+#endif
|
|
|
|
#if IS_IN (libc)
|
|
-
|
|
#define STRCHRNUL_NAME __strchrnul_aligned
|
|
-
|
|
+#else
|
|
+#define STRCHRNUL_NAME __strchrnul
|
|
#endif
|
|
|
|
-#include "../strchrnul.S"
|
|
+/* char * strchrnul (const char *s1, int c); */
|
|
+
|
|
+LEAF(STRCHRNUL_NAME, 6)
|
|
+ slli.d t1, a0, 3
|
|
+ bstrins.d a0, zero, 2, 0
|
|
+ lu12i.w a2, 0x01010
|
|
+ ld.d t2, a0, 0
|
|
+
|
|
+ ori a2, a2, 0x101
|
|
+ andi a1, a1, 0xff
|
|
+ bstrins.d a2, a2, 63, 32
|
|
+ li.w t0, -1
|
|
+
|
|
+ mul.d a1, a1, a2 # "cccccccc"
|
|
+ sll.d t0, t0, t1
|
|
+ slli.d a3, a2, 7 # 0x8080808080808080
|
|
+ orn t2, t2, t0
|
|
+
|
|
+ sll.d t3, a1, t1
|
|
+ xor t4, t2, t3
|
|
+ sub.d a7, t2, a2
|
|
+ andn a6, a3, t2
|
|
+
|
|
+
|
|
+ sub.d a5, t4, a2
|
|
+ andn a4, a3, t4
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+
|
|
+ or t0, a6, a5
|
|
+ bnez t0, L(_mc8_a)
|
|
+ addi.d a0, a0, 8
|
|
+L(_aloop):
|
|
+ ld.d t4, a0, 0
|
|
+
|
|
+ xor t2, t4, a1
|
|
+ sub.d a7, t4, a2
|
|
+ andn a6, a3, t4
|
|
+ sub.d a5, t2, a2
|
|
+
|
|
+ andn a4, a3, t2
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+ or a7, a6, a5
|
|
+
|
|
+
|
|
+ bnez a7, L(_mc8_a)
|
|
+ ld.d t4, a0, 8
|
|
+ addi.d a0, a0, 16
|
|
+ xor t2, t4, a1
|
|
+
|
|
+ sub.d a7, t4, a2
|
|
+ andn a6, a3, t4
|
|
+ sub.d a5, t2, a2
|
|
+ andn a4, a3, t2
|
|
+
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+ or a7, a6, a5
|
|
+ beqz a7, L(_aloop)
|
|
+
|
|
+ addi.d a0, a0, -8
|
|
+L(_mc8_a):
|
|
+ ctz.d t0, a5
|
|
+ ctz.d t2, a6
|
|
+ srli.w t0, t0, 3
|
|
+
|
|
+ srli.w t2, t2, 3
|
|
+ slt t1, t0, t2
|
|
+ masknez t3, t2, t1
|
|
+ maskeqz t4, t0, t1
|
|
+
|
|
+ or t0, t3, t4
|
|
+ add.d a0, a0, t0
|
|
+ jr ra
|
|
+END(STRCHRNUL_NAME)
|
|
+
|
|
+#ifdef _LIBC
|
|
+weak_alias(STRCHRNUL_NAME, strchrnul)
|
|
+libc_hidden_builtin_def (STRCHRNUL_NAME)
|
|
+#endif
|
|
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
|
|
index f84f52b8..a9b74b0c 100644
|
|
--- a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
|
|
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
|
|
@@ -1,8 +1,229 @@
|
|
+/* 2022\06\15 loongarch64 author: chenxiaolong. */
|
|
|
|
-#if IS_IN (libc)
|
|
+#ifdef _LIBC
|
|
+#include <sysdep.h>
|
|
+#include <sys/regdef.h>
|
|
+#include <sys/asm.h>
|
|
+#else
|
|
+#include <sys/asm.h>
|
|
+#include <sys/regdef.h>
|
|
+#endif
|
|
|
|
+#if IS_IN (libc)
|
|
#define STRCMP_NAME __strcmp_aligned
|
|
+#else
|
|
+#define STRCMP_NAME strcmp
|
|
+#endif
|
|
+
|
|
+/* int strcmp (const char *s1, const char *s2); */
|
|
+
|
|
+/* Parameters and Results */
|
|
+#define src1 a0
|
|
+#define src2 a1
|
|
+#define result v0
|
|
+LEAF(STRCMP_NAME, 6)
|
|
+ xor a4, src1, src2
|
|
+ lu12i.w t5, 0x01010
|
|
+ lu12i.w t6, 0x7f7f7
|
|
+ andi a2, src1, 0x7
|
|
+
|
|
+ ori t5, t5, 0x101
|
|
+ andi a4, a4, 0x7
|
|
+ ori t6, t6, 0xf7f
|
|
+ bstrins.d t5, t5, 63, 32
|
|
+ bstrins.d t6, t6, 63, 32
|
|
+
|
|
+ bnez a4, 3f // unaligned
|
|
+ beqz a2, 1f // loop aligned
|
|
+
|
|
+// mutual aligned
|
|
+ bstrins.d src1, zero, 2, 0
|
|
+ bstrins.d src2, zero, 2, 0
|
|
+ slli.d a4, a2, 0x3
|
|
+ ld.d t0, src1, 0
|
|
+
|
|
+ sub.d a4, zero, a4
|
|
+ ld.d t1, src2, 0
|
|
+ addi.d src1, src1, 8
|
|
+ addi.d src2, src2, 8
|
|
+
|
|
+ nor a5, zero, zero
|
|
+ srl.d a5, a5, a4
|
|
+ or t0, t0, a5
|
|
+
|
|
+ or t1, t1, a5
|
|
+ b 2f //start realigned
|
|
+
|
|
+// loop aligned
|
|
+1:
|
|
+ ld.d t0, src1, 0
|
|
+ addi.d src1, src1, 8
|
|
+ ld.d t1, src2, 0
|
|
+ addi.d src2, src2, 8
|
|
+
|
|
+// start realigned:
|
|
+2:
|
|
+ sub.d t2, t0, t5
|
|
+ nor t3, t0, t6
|
|
+ and t2, t2, t3
|
|
+
|
|
+ xor t3, t0, t1
|
|
+ or t2, t2, t3
|
|
+ beqz t2, 1b
|
|
+
|
|
+ ctz.d t7, t2
|
|
+ bstrins.d t7, zero, 2, 0
|
|
+ srl.d t0, t0, t7
|
|
+ srl.d t1, t1, t7
|
|
+
|
|
+ andi t0, t0, 0xff
|
|
+ andi t1, t1, 0xff
|
|
+ sub.d v0, t0, t1
|
|
+ jr ra
|
|
+
|
|
+// unaligned
|
|
+3:
|
|
+ andi a3, src2, 0x7
|
|
+ slt a5, a2, a3
|
|
+ masknez t8, a2, a5
|
|
+ xor a6, src1, src2
|
|
+ maskeqz a6, a6, t8
|
|
+ xor src1, src1, a6
|
|
+ xor src2, src2, a6
|
|
+
|
|
+ andi a2, src1, 0x7
|
|
+ beqz a2, 4f // src1 is aligned
|
|
+
|
|
+//strcmp_unaligned:
|
|
+ andi a3, src2, 0x7
|
|
+ bstrins.d src1, zero, 2, 0
|
|
+ bstrins.d src2, zero, 2, 0
|
|
+ nor t3, zero, zero
|
|
+
|
|
+ ld.d t0, src1, 0
|
|
+ ld.d t1, src2, 0
|
|
+ sub.d a2, a3, a2
|
|
+ addi.d t2, zero, 8
|
|
+
|
|
+ sub.d a5, t2, a2
|
|
+ sub.d a6, t2, a3
|
|
+ slli.d a5, a5, 0x3
|
|
+ slli.d a6, a6, 0x3
|
|
+
|
|
+ srl.d t4, t3, a6
|
|
+ srl.d a4, t3, a5
|
|
+ rotr.d a7, t0, a5
|
|
+
|
|
+ addi.d src2, src2, 8
|
|
+ addi.d src1, src1, 8
|
|
+ or t1, t1, t4
|
|
+ or t0, a7, t4
|
|
+
|
|
+ sub.d t2, t0, t5
|
|
+ nor t3, t0, t6
|
|
+ and t2, t2, t3
|
|
+ xor t3, t0, t1
|
|
+ or t2, t2, t3
|
|
+ bnez t2, 7f
|
|
+
|
|
+ and a7, a7, a4
|
|
+ slli.d a6, a2, 0x3
|
|
+ nor a4, zero, a4
|
|
+ b 5f
|
|
+
|
|
+// src1 is aligned
|
|
+4:
|
|
+ andi a3, src2, 0x7
|
|
+ ld.d t0, src1, 0
|
|
+
|
|
+ bstrins.d src2, zero, 2, 0
|
|
+ nor t2, zero, zero
|
|
+ ld.d t1, src2, 0
|
|
+
|
|
+ addi.d t3, zero, 0x8
|
|
+ sub.d a5, t3, a3
|
|
+ slli.d a5, a5, 0x3
|
|
+ srl.d a4, t2, a5
|
|
+ rotr.d t4, t0, a5
|
|
+
|
|
+ addi.d src2, src2, 8
|
|
+ addi.d src1, src1, 8
|
|
+ or t1, t1, a4
|
|
+ or t0, t4, a4
|
|
+
|
|
+ sub.d t2, t0, t5
|
|
+ nor t3, t0, t6
|
|
+ and t2, t2, t3
|
|
+ xor t3, t0, t1
|
|
+ or t2, t2, t3
|
|
+
|
|
+ bnez t2, 7f
|
|
+
|
|
+ and a7, t4, a4
|
|
+ slli.d a6, a3, 0x3
|
|
+ nor a4, zero, a4
|
|
+
|
|
+// unaligned loop
|
|
+// a7: remaining number
|
|
+// a6: shift left number
|
|
+// a5: shift right number
|
|
+// a4: mask for checking remaining number
|
|
+5:
|
|
+ or t0, a7, a4
|
|
+ sub.d t2, t0, t5
|
|
+ nor t3, t0, t6
|
|
+ and t2, t2, t3
|
|
+ bnez t2, 6f
|
|
+
|
|
+ ld.d t0, src1, 0
|
|
+ addi.d src1, src1, 8
|
|
+ ld.d t1, src2, 0
|
|
+ addi.d src2, src2, 8
|
|
+
|
|
+ srl.d t7, t0, a5
|
|
+ sll.d t0, t0, a6
|
|
+ or t0, a7, t0
|
|
+
|
|
+ sub.d t2, t0, t5
|
|
+ nor t3, t0, t6
|
|
+ and t2, t2, t3
|
|
+ xor t3, t0, t1
|
|
+ or t2, t2, t3
|
|
+ bnez t2, 7f
|
|
+
|
|
+ or a7, t7, zero
|
|
+ b 5b
|
|
+
|
|
+6:
|
|
+ ld.bu t1, src2, 0
|
|
+ andi t0, a7, 0xff
|
|
+ xor t2, t0, t1
|
|
+ srli.d a7, a7, 0x8
|
|
+ masknez t2, t0, t2
|
|
+ addi.d src2, src2, 1
|
|
+ beqz t2, 8f
|
|
+ b 6b
|
|
+
|
|
+7:
|
|
+ ctz.d t7, t2
|
|
+ bstrins.d t7, zero, 2, 0
|
|
+ srl.d t0, t0, t7
|
|
+ srl.d t1, t1, t7
|
|
+
|
|
+ andi t0, t0, 0xff
|
|
+ andi t1, t1, 0xff
|
|
+
|
|
+8:
|
|
+ sub.d a4, t0, t1
|
|
+ sub.d a5, t1, t0
|
|
+ maskeqz a6, a5, t8
|
|
+ masknez result, a4, t8
|
|
+ or result, result, a6
|
|
+ jr ra
|
|
+
|
|
+END(STRCMP_NAME)
|
|
|
|
+#ifdef _LIBC
|
|
+libc_hidden_builtin_def (STRCMP_NAME)
|
|
#endif
|
|
|
|
-#include "../strcmp.S"
|
|
diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
|
|
index 4860398b..80954912 100644
|
|
--- a/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
|
|
+++ b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
|
|
@@ -1,8 +1,175 @@
|
|
+#ifdef _LIBC
|
|
+#include <sysdep.h>
|
|
+#include <sys/regdef.h>
|
|
+#include <sys/asm.h>
|
|
+#else
|
|
+#include <sys/asm.h>
|
|
+#include <sys/regdef.h>
|
|
+#endif
|
|
|
|
#if IS_IN (libc)
|
|
-
|
|
#define STRCPY __strcpy_aligned
|
|
-
|
|
+#else
|
|
+#define STRCPY strcpy
|
|
#endif
|
|
|
|
-#include "../strcpy.S"
|
|
+LEAF(STRCPY, 6)
|
|
+ andi a3, a0, 0x7
|
|
+ move a2, a0
|
|
+ beqz a3, L(dest_align)
|
|
+ sub.d a5, a1, a3
|
|
+ addi.d a5, a5, 8
|
|
+
|
|
+L(make_dest_align):
|
|
+ ld.b t0, a1, 0
|
|
+ addi.d a1, a1, 1
|
|
+ st.b t0, a2, 0
|
|
+ beqz t0, L(al_out)
|
|
+
|
|
+ addi.d a2, a2, 1
|
|
+ bne a1, a5, L(make_dest_align)
|
|
+
|
|
+L(dest_align):
|
|
+ andi a4, a1, 7
|
|
+ bstrins.d a1, zero, 2, 0
|
|
+
|
|
+ lu12i.w t5, 0x1010
|
|
+ ld.d t0, a1, 0
|
|
+ ori t5, t5, 0x101
|
|
+ bstrins.d t5, t5, 63, 32
|
|
+
|
|
+ slli.d t6, t5, 0x7
|
|
+ bnez a4, L(unalign)
|
|
+ sub.d t1, t0, t5
|
|
+ andn t2, t6, t0
|
|
+
|
|
+ and t3, t1, t2
|
|
+ bnez t3, L(al_end)
|
|
+
|
|
+L(al_loop):
|
|
+ st.d t0, a2, 0
|
|
+ ld.d t0, a1, 8
|
|
+
|
|
+ addi.d a1, a1, 8
|
|
+ addi.d a2, a2, 8
|
|
+ sub.d t1, t0, t5
|
|
+ andn t2, t6, t0
|
|
+
|
|
+ and t3, t1, t2
|
|
+ beqz t3, L(al_loop)
|
|
+
|
|
+L(al_end):
|
|
+ ctz.d t1, t3
|
|
+ srli.d t1, t1, 3
|
|
+ addi.d t1, t1, 1 # add 1, since '\0' needs to be copied to dest
|
|
+
|
|
+ andi a3, t1, 8
|
|
+ andi a4, t1, 4
|
|
+ andi a5, t1, 2
|
|
+ andi a6, t1, 1
|
|
+
|
|
+L(al_end_8):
|
|
+ beqz a3, L(al_end_4)
|
|
+ st.d t0, a2, 0
|
|
+ jr ra
|
|
+L(al_end_4):
|
|
+ beqz a4, L(al_end_2)
|
|
+ st.w t0, a2, 0
|
|
+ addi.d a2, a2, 4
|
|
+ srli.d t0, t0, 32
|
|
+L(al_end_2):
|
|
+ beqz a5, L(al_end_1)
|
|
+ st.h t0, a2, 0
|
|
+ addi.d a2, a2, 2
|
|
+ srli.d t0, t0, 16
|
|
+L(al_end_1):
|
|
+ beqz a6, L(al_out)
|
|
+ st.b t0, a2, 0
|
|
+L(al_out):
|
|
+ jr ra
|
|
+
|
|
+L(unalign):
|
|
+ slli.d a5, a4, 3
|
|
+ li.d t1, -1
|
|
+ sub.d a6, zero, a5
|
|
+
|
|
+ srl.d a7, t0, a5
|
|
+ sll.d t7, t1, a6
|
|
+
|
|
+ or t0, a7, t7
|
|
+ sub.d t1, t0, t5
|
|
+ andn t2, t6, t0
|
|
+ and t3, t1, t2
|
|
+
|
|
+ bnez t3, L(un_end)
|
|
+
|
|
+ ld.d t4, a1, 8
|
|
+
|
|
+ sub.d t1, t4, t5
|
|
+ andn t2, t6, t4
|
|
+ sll.d t0, t4, a6
|
|
+ and t3, t1, t2
|
|
+
|
|
+ or t0, t0, a7
|
|
+ bnez t3, L(un_end_with_remaining)
|
|
+
|
|
+L(un_loop):
|
|
+ srl.d a7, t4, a5
|
|
+
|
|
+ ld.d t4, a1, 16
|
|
+ addi.d a1, a1, 8
|
|
+
|
|
+ st.d t0, a2, 0
|
|
+ addi.d a2, a2, 8
|
|
+
|
|
+ sub.d t1, t4, t5
|
|
+ andn t2, t6, t4
|
|
+ sll.d t0, t4, a6
|
|
+ and t3, t1, t2
|
|
+
|
|
+ or t0, t0, a7
|
|
+ beqz t3, L(un_loop)
|
|
+
|
|
+L(un_end_with_remaining):
|
|
+ ctz.d t1, t3
|
|
+ srli.d t1, t1, 3
|
|
+ addi.d t1, t1, 1
|
|
+ sub.d t1, t1, a4
|
|
+
|
|
+ blt t1, zero, L(un_end_less_8)
|
|
+ st.d t0, a2, 0
|
|
+ addi.d a2, a2, 8
|
|
+ beqz t1, L(un_out)
|
|
+ srl.d t0, t4, a5 # get the remaining part
|
|
+ b L(un_end_less_8)
|
|
+
|
|
+L(un_end):
|
|
+ ctz.d t1, t3
|
|
+ srli.d t1, t1, 3
|
|
+ addi.d t1, t1, 1
|
|
+
|
|
+L(un_end_less_8):
|
|
+ andi a4, t1, 4
|
|
+ andi a5, t1, 2
|
|
+ andi a6, t1, 1
|
|
+L(un_end_4):
|
|
+ beqz a4, L(un_end_2)
|
|
+ st.w t0, a2, 0
|
|
+ addi.d a2, a2, 4
|
|
+ srli.d t0, t0, 32
|
|
+L(un_end_2):
|
|
+ beqz a5, L(un_end_1)
|
|
+ st.h t0, a2, 0
|
|
+ addi.d a2, a2, 2
|
|
+ srli.d t0, t0, 16
|
|
+L(un_end_1):
|
|
+ beqz a6, L(un_out)
|
|
+ st.b t0, a2, 0
|
|
+L(un_out):
|
|
+ jr ra
|
|
+
|
|
+END(STRCPY)
|
|
+
|
|
+#ifdef _LIBC
|
|
+libc_hidden_builtin_def (STRCPY)
|
|
+#endif
|
|
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
|
|
index d31875fd..fcbc4f6a 100644
|
|
--- a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
|
|
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
|
|
@@ -1,8 +1,87 @@
|
|
+#ifdef _LIBC
|
|
+#include <sysdep.h>
|
|
+#include <sys/regdef.h>
|
|
+#include <sys/asm.h>
|
|
+#else
|
|
+#include <sys/asm.h>
|
|
+#include <sys/regdef.h>
|
|
+#endif
|
|
|
|
#if IS_IN (libc)
|
|
-
|
|
#define STRLEN __strlen_aligned
|
|
-
|
|
+#else
|
|
+#define STRLEN strlen
|
|
#endif
|
|
|
|
-#include "../strlen.S"
|
|
+LEAF(STRLEN, 6)
|
|
+ move a1, a0
|
|
+ bstrins.d a0, zero, 2, 0
|
|
+ lu12i.w a2, 0x01010
|
|
+ li.w t0, -1
|
|
+
|
|
+ ld.d t2, a0, 0
|
|
+ andi t1, a1, 0x7
|
|
+ ori a2, a2, 0x101
|
|
+ slli.d t1, t1, 3
|
|
+
|
|
+ bstrins.d a2, a2, 63, 32
|
|
+ sll.d t1, t0, t1
|
|
+ slli.d t3, a2, 7
|
|
+ nor a3, zero, t3
|
|
+
|
|
+ orn t2, t2, t1
|
|
+ sub.d t0, t2, a2
|
|
+ nor t1, t2, a3
|
|
+ and t0, t0, t1
|
|
+
|
|
+
|
|
+ bnez t0, L(count_pos)
|
|
+ addi.d a0, a0, 8
|
|
+L(loop_16_7bit):
|
|
+ ld.d t2, a0, 0
|
|
+ sub.d t1, t2, a2
|
|
+
|
|
+ and t0, t1, t3
|
|
+ bnez t0, L(more_check)
|
|
+ ld.d t2, a0, 8
|
|
+ addi.d a0, a0, 16
|
|
+
|
|
+ sub.d t1, t2, a2
|
|
+ and t0, t1, t3
|
|
+ beqz t0, L(loop_16_7bit)
|
|
+ addi.d a0, a0, -8
|
|
+L(more_check):
|
|
+ nor t0, t2, a3
|
|
+
|
|
+ and t0, t1, t0
|
|
+ bnez t0, L(count_pos)
|
|
+ addi.d a0, a0, 8
|
|
+L(loop_16_8bit):
|
|
+ ld.d t2, a0, 0
|
|
+
|
|
+ sub.d t1, t2, a2
|
|
+ nor t0, t2, a3
|
|
+ and t0, t0, t1
|
|
+ bnez t0, L(count_pos)
|
|
+
|
|
+ ld.d t2, a0, 8
|
|
+ addi.d a0, a0, 16
|
|
+ sub.d t1, t2, a2
|
|
+ nor t0, t2, a3
|
|
+
|
|
+ and t0, t0, t1
|
|
+ beqz t0, L(loop_16_8bit)
|
|
+ addi.d a0, a0, -8
|
|
+L(count_pos):
|
|
+ ctz.d t1, t0
|
|
+ sub.d a0, a0, a1
|
|
+
|
|
+ srli.d t1, t1, 3
|
|
+ add.d a0, a0, t1
|
|
+ jr ra
|
|
+
|
|
+END(STRLEN)
|
|
+
|
|
+#ifdef _LIBC
|
|
+libc_hidden_builtin_def (STRLEN)
|
|
+#endif
|
|
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
|
|
index f371b19e..2cd56c44 100644
|
|
--- a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
|
|
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
|
|
@@ -1,8 +1,258 @@
|
|
+#ifdef _LIBC
|
|
+#include <sysdep.h>
|
|
+#include <sys/regdef.h>
|
|
+#include <sys/asm.h>
|
|
+#else
|
|
+#include <sys/asm.h>
|
|
+#include <sys/regdef.h>
|
|
+#endif
|
|
|
|
#if IS_IN (libc)
|
|
-
|
|
#define STRNCMP __strncmp_aligned
|
|
-
|
|
+#else
|
|
+#define STRNCMP strncmp
|
|
#endif
|
|
|
|
-#include "../strncmp.S"
|
|
+/* int strncmp (const char *s1, const char *s2); */
|
|
+
|
|
+LEAF(STRNCMP, 6)
|
|
+ beqz a2, L(ret0)
|
|
+ xor a4, a0, a1
|
|
+ lu12i.w t5, 0x01010
|
|
+ lu12i.w t6, 0x7f7f7
|
|
+
|
|
+ andi a3, a0, 0x7
|
|
+ ori t5, t5, 0x101
|
|
+ andi a4, a4, 0x7
|
|
+ ori t6, t6, 0xf7f
|
|
+
|
|
+ bstrins.d t5, t5, 63, 32
|
|
+ bstrins.d t6, t6, 63, 32
|
|
+
|
|
+ bnez a4, L(unalign)
|
|
+ bnez a3, L(mutual_align)
|
|
+
|
|
+L(a_loop):
|
|
+ ld.d t0, a0, 0
|
|
+ ld.d t1, a1, 0
|
|
+ addi.d a0, a0, 8
|
|
+ addi.d a1, a1, 8
|
|
+
|
|
+
|
|
+ sltui t7, a2, 9
|
|
+
|
|
+L(start_realign):
|
|
+ sub.d t2, t0, t5
|
|
+ nor t3, t0, t6
|
|
+ xor t4, t0, t1
|
|
+
|
|
+ and t2, t2, t3
|
|
+ addi.d a2, a2, -8
|
|
+
|
|
+ or t2, t2, t4
|
|
+ or t3, t2, t7
|
|
+ beqz t3, L(a_loop)
|
|
+
|
|
+L(end):
|
|
+ bge zero, t7, L(out)
|
|
+ andi t4, a2, 7
|
|
+ li.d t3, -1
|
|
+ addi.d t4, t4, -1
|
|
+ slli.d t4, t4, 3
|
|
+ sll.d t3, t3, t4
|
|
+ or t2, t2, t3
|
|
+
|
|
+
|
|
+L(out):
|
|
+ ctz.d t3, t2
|
|
+ bstrins.d t3, zero, 2, 0
|
|
+ srl.d t0, t0, t3
|
|
+ srl.d t1, t1, t3
|
|
+
|
|
+ andi t0, t0, 0xff
|
|
+ andi t1, t1, 0xff
|
|
+ sub.d a0, t0, t1
|
|
+ jr ra
|
|
+
|
|
+L(mutual_align):
|
|
+ bstrins.d a0, zero, 2, 0
|
|
+ bstrins.d a1, zero, 2, 0
|
|
+ slli.d a5, a3, 0x3
|
|
+ li.d t2, -1
|
|
+
|
|
+ ld.d t0, a0, 0
|
|
+ ld.d t1, a1, 0
|
|
+
|
|
+ li.d t3, 9
|
|
+ sll.d t2, t2, a5
|
|
+
|
|
+ sub.d t3, t3, a3
|
|
+ addi.d a0, a0, 8
|
|
+
|
|
+ sltu t7, a2, t3
|
|
+ addi.d a1, a1, 8
|
|
+
|
|
+ add.d a2, a2, a3
|
|
+ orn t0, t0, t2
|
|
+ orn t1, t1, t2
|
|
+ b L(start_realign)
|
|
+
|
|
+L(ret0):
|
|
+ move a0, zero
|
|
+ jr ra
|
|
+
|
|
+L(unalign):
|
|
+ li.d t8, 8
|
|
+ blt a2, t8, L(short_cmp)
|
|
+
|
|
+ # swap a0 and a1 in case a3 > a4
|
|
+ andi a4, a1, 0x7
|
|
+ sltu t8, a4, a3
|
|
+ xor a6, a0, a1
|
|
+ maskeqz a6, a6, t8
|
|
+ xor a0, a0, a6
|
|
+ xor a1, a1, a6
|
|
+
|
|
+ andi a3, a0, 0x7
|
|
+ andi a4, a1, 0x7
|
|
+
|
|
+ bstrins.d a0, zero, 2, 0
|
|
+ bstrins.d a1, zero, 2, 0
|
|
+
|
|
+ li.d t2, -1
|
|
+ li.d t3, 9
|
|
+
|
|
+ ld.d t0, a0, 0
|
|
+ ld.d t1, a1, 0
|
|
+
|
|
+ sub.d t3, t3, a4
|
|
+ sub.d a3, a4, a3
|
|
+
|
|
+ slli.d t4, a4, 3
|
|
+ slli.d a6, a3, 3
|
|
+
|
|
+ sub.d a5, zero, a6
|
|
+ sltu t7, a2, t3
|
|
+
|
|
+ rotr.d a7, t0, a5
|
|
+ sll.d t4, t2, t4 # mask for first num
|
|
+
|
|
+ add.d a2, a2, a4
|
|
+ sll.d a4, t2, a6 # mask for a7
|
|
+
|
|
+ orn t0, a7, t4
|
|
+ orn t1, t1, t4
|
|
+
|
|
+ sub.d t2, t0, t5
|
|
+ nor t4, t0, t6
|
|
+ and t2, t2, t4
|
|
+
|
|
+ xor t3, t0, t1
|
|
+ or t2, t2, t3
|
|
+
|
|
+ or t3, t2, t7
|
|
+ bnez t3, L(un_end)
|
|
+
|
|
+ andn a7, a7, a4
|
|
+ addi.d a3, a3, 1
|
|
+
|
|
+L(un_loop):
|
|
+ addi.d a2, a2, -8
|
|
+ # in case remaining part has '\0', no more load instructions should be executed on a0 address
|
|
+ or t0, a7, a4
|
|
+ sltu t7, a2, a3
|
|
+
|
|
+ sub.d t2, t0, t5
|
|
+ nor t3, t0, t6
|
|
+ and t2, t2, t3
|
|
+
|
|
+ or t3, t2, t7
|
|
+ bnez t3, L(check_remaining)
|
|
+
|
|
+ ld.d t7, a0, 8
|
|
+ ld.d t1, a1, 8
|
|
+ addi.d a0, a0, 8
|
|
+ addi.d a1, a1, 8
|
|
+
|
|
+ sll.d t4, t7, a6
|
|
+ sub.d t2, t1, t5
|
|
+ nor t3, t1, t6
|
|
+
|
|
+ or t0, t4, a7
|
|
+ srl.d a7, t7, a5
|
|
+
|
|
+ and t2, t2, t3
|
|
+ xor t3, t0, t1
|
|
+
|
|
+ sltui t7, a2, 9
|
|
+ or t2, t2, t3
|
|
+
|
|
+ or t3, t2, t7
|
|
+ beqz t3, L(un_loop)
|
|
+ b L(un_end)
|
|
+
|
|
+L(check_remaining):
|
|
+ ld.d t1, a1, 8
|
|
+ xor t3, t1, a7
|
|
+ or t2, t2, t3
|
|
+
|
|
+L(un_end):
|
|
+ bge zero, t7, L(un_out)
|
|
+ andi t4, a2, 7
|
|
+ li.d t3, -1
|
|
+
|
|
+ addi.d t4, t4, -1
|
|
+ slli.d t4, t4, 3
|
|
+ sll.d t3, t3, t4
|
|
+ or t2, t2, t3
|
|
+
|
|
+L(un_out):
|
|
+ ctz.d t3, t2
|
|
+ bstrins.d t3, zero, 2, 0
|
|
+ srl.d t0, t0, t3
|
|
+ srl.d t1, t1, t3
|
|
+
|
|
+ andi t0, t0, 0xff
|
|
+ andi t1, t1, 0xff
|
|
+
|
|
+ sub.d a4, t0, t1
|
|
+ sub.d a5, t1, t0
|
|
+
|
|
+ maskeqz a6, a5, t8
|
|
+ masknez a0, a4, t8
|
|
+
|
|
+ or a0, a0, a6
|
|
+ jr ra
|
|
+
|
|
+L(short_cmp):
|
|
+ ld.bu t0, a0, 0
|
|
+ ld.bu t1, a1, 0
|
|
+ addi.d a2, a2, -1
|
|
+
|
|
+ xor t2, t0, t1
|
|
+ masknez t2, t0, t2
|
|
+ maskeqz t2, a2, t2
|
|
+
|
|
+ beqz t2, L(short_out)
|
|
+
|
|
+ ld.bu t0, a0, 1
|
|
+ ld.bu t1, a1, 1
|
|
+
|
|
+ addi.d a2, a2, -1
|
|
+ addi.d a0, a0, 2
|
|
+
|
|
+ addi.d a1, a1, 2
|
|
+ xor t2, t0, t1
|
|
+ masknez t2, t0, t2
|
|
+ maskeqz t2, a2, t2
|
|
+
|
|
+ bnez t2, L(short_cmp)
|
|
+
|
|
+L(short_out):
|
|
+ sub.d a0, t0, t1
|
|
+ jr ra
|
|
+
|
|
+END(STRNCMP)
|
|
+#ifdef _LIBC
|
|
+libc_hidden_builtin_def (STRNCMP)
|
|
+#endif
|
|
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
|
|
index 503442b3..78c8fd5d 100644
|
|
--- a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
|
|
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
|
|
@@ -1,8 +1,84 @@
|
|
+#ifdef _LIBC
|
|
+#include <sysdep.h>
|
|
+#include <sys/regdef.h>
|
|
+#include <sys/asm.h>
|
|
+#else
|
|
+#include <sys/asm.h>
|
|
+#include <sys/regdef.h>
|
|
+#endif
|
|
|
|
#if IS_IN (libc)
|
|
-
|
|
#define STRNLEN __strnlen_aligned
|
|
-
|
|
+#else
|
|
+#define STRNLEN __strnlen
|
|
#endif
|
|
|
|
-#include "../strnlen.S"
|
|
+#. before every load, a1(t5) must > 0;
|
|
+#. first load with t1 != 0, need to adjust t5;
|
|
+#. return the less one of both strlen(s) and a1;
|
|
+
|
|
+LEAF(STRNLEN, 6)
|
|
+ beqz a1, L(out)
|
|
+ lu12i.w a2, 0x01010
|
|
+ andi t1, a0, 0x7
|
|
+ move t4, a0
|
|
+
|
|
+ bstrins.d a0, zero, 2, 0
|
|
+ ori a2, a2, 0x101
|
|
+ li.w t0, -1
|
|
+ ld.d t2, a0, 0
|
|
+
|
|
+ slli.d t3, t1, 3
|
|
+ bstrins.d a2, a2, 63, 32
|
|
+ li.w t5, 8
|
|
+ slli.d a3, a2, 7
|
|
+
|
|
+ sub.w t1, t5, t1
|
|
+ sll.d t0, t0, t3
|
|
+ nor a3, zero, a3
|
|
+ orn t2, t2, t0
|
|
+
|
|
+
|
|
+ sub.d t0, t2, a2
|
|
+ nor t3, t2, a3
|
|
+ and t0, t0, t3
|
|
+ bnez t0, L(count_pos)
|
|
+
|
|
+ sub.d t5, a1, t1
|
|
+ bgeu t1, a1, L(out)
|
|
+L(loop_8bytes):
|
|
+ ld.d t2, a0, 8
|
|
+ addi.d a0, a0, 8
|
|
+
|
|
+ sub.d t0, t2, a2
|
|
+ nor t1, t2, a3
|
|
+ sltui t6, t5, 9
|
|
+ and t0, t0, t1
|
|
+
|
|
+ addi.d t5, t5, -8
|
|
+ or t7, t0, t6
|
|
+ beqz t7, L(loop_8bytes)
|
|
+L(count_pos):
|
|
+ ctz.d t1, t0
|
|
+
|
|
+
|
|
+ sub.d a0, a0, t4
|
|
+ srli.d t1, t1, 3
|
|
+ add.d a0, t1, a0
|
|
+ sltu t0, a0, a1
|
|
+
|
|
+ masknez t1, a1, t0
|
|
+ maskeqz a0, a0, t0
|
|
+ or a0, a0, t1
|
|
+ jr ra
|
|
+
|
|
+L(out):
|
|
+ move a0, a1
|
|
+ jr ra
|
|
+
|
|
+END(STRNLEN)
|
|
+
|
|
+#ifdef _LIBC
|
|
+weak_alias (STRNLEN, strnlen)
|
|
+libc_hidden_builtin_def (STRNLEN)
|
|
+#endif
|
|
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
|
|
index a58ddde8..6931045b 100644
|
|
--- a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
|
|
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
|
|
@@ -1,11 +1,110 @@
|
|
+#ifdef _LIBC
|
|
+#include <sysdep.h>
|
|
+#include <sys/regdef.h>
|
|
+#include <sys/asm.h>
|
|
+#else
|
|
+#include <sys/asm.h>
|
|
+#include <sys/regdef.h>
|
|
+#endif
|
|
|
|
#if IS_IN (libc)
|
|
-
|
|
#define STRRCHR_NAME __strrchr_aligned
|
|
-
|
|
+#else
|
|
+#define STRRCHR_NAME strrchr
|
|
#endif
|
|
|
|
-#include "../strrchr.S"
|
|
+LEAF(STRRCHR_NAME, 6)
|
|
+ slli.d t1, a0, 3
|
|
+ bstrins.d a0, zero, 2, 0
|
|
+ lu12i.w a2, 0x01010
|
|
+ ld.d t2, a0, 0 // t2 = "5ZZ21abc"
|
|
+
|
|
+ ori a2, a2, 0x101
|
|
+ andi a1, a1, 0xff // a1 = "0000000Z"
|
|
+ li.d a5, -1
|
|
+ bstrins.d a2, a2, 63, 32 // a2 = 0x0101010101010101
|
|
+
|
|
+ sll.d t1, a5, t1 // t1 = 0xffffffffff000000
|
|
+ mul.d a1, a1, a2 // a1 = "ZZZZZZZZ"
|
|
+ orn t2, t2, t1 // t2 = "5ZZ21YYY"
|
|
+ slli.d a3, a2, 7 // a3 = 0x8080808080808080
|
|
+
|
|
+ sub.d a4, t2, a2
|
|
+ andn t0, a3, t2
|
|
+ move t3, zero
|
|
+ and t0, a4, t0
|
|
+
|
|
+
|
|
+ xor a4, t2, a1
|
|
+ move t5, zero
|
|
+ orn a4, a4, t1
|
|
+ bnez t0, L(found_end)
|
|
+
|
|
+ sub.d t1, a4, a2
|
|
+ andn t0, a3, a4
|
|
+ and t1, t1, t0
|
|
+
|
|
+L(loop_8bytes):
|
|
+ masknez t4, t3, t1
|
|
+
|
|
+ maskeqz t3, t2, t1
|
|
+ ld.d t2, a0, 8
|
|
+ masknez t0, t5, t1
|
|
+ maskeqz t5, a0, t1
|
|
+
|
|
+ or t3, t3, t4
|
|
+ or t5, t0, t5
|
|
+ sub.d t0, t2, a2
|
|
+ andn t1, a3, t2
|
|
+
|
|
+
|
|
+ xor a4, t2, a1
|
|
+ and t0, t0, t1 //t0 hold diff pattern for '\0'
|
|
+ sub.d t1, a4, a2
|
|
+ andn t4, a3, a4
|
|
+
|
|
+ and t1, t1, t4 //t1 hold diff pattern for 'a1'
|
|
+ addi.d a0, a0, 8
|
|
+ beqz t0, L(loop_8bytes) //ok, neither \0 nor found
|
|
+L(found_end):
|
|
+ ctz.d t1, t0
|
|
+
|
|
+ xor t3, t3, a1
|
|
+ orn t1, zero, t1
|
|
+ revb.d t3, t3
|
|
+ srl.d t1, a5, t1 // mask for '\0'
|
|
+
|
|
+ sub.d t4, t3, a2
|
|
+ orn a4, a4, t1
|
|
+ andn t3, a3, t3
|
|
+ revb.d t2, a4
|
|
+
|
|
+ sub.d t0, t2, a2
|
|
+ andn t1, a3, t2
|
|
+ and t3, t3, t4
|
|
+ and t1, t0, t1
|
|
+
|
|
+ li.d t7, 7
|
|
+ masknez t4, t3, t1
|
|
+ maskeqz t3, t1, t1
|
|
+ masknez t5, t5, t1
|
|
+
|
|
+ or t3, t3, t4
|
|
+ maskeqz t6, a0, t1
|
|
+ ctz.d t0, t3
|
|
+ or t5, t6, t5
|
|
+
|
|
+ srli.d t0, t0, 3
|
|
+ sub.d t0, t7, t0
|
|
+ add.d a0, t5, t0
|
|
+ maskeqz a0, a0, t3
|
|
+
|
|
+ jr ra
|
|
+END(STRRCHR_NAME)
|
|
+
|
|
+#ifdef _LIBC
|
|
+libc_hidden_builtin_def(STRRCHR_NAME)
|
|
+#endif
|
|
|
|
#undef rindex
|
|
weak_alias(STRRCHR_NAME, rindex)
|
|
diff --git a/sysdeps/loongarch/lp64/stpcpy.S b/sysdeps/loongarch/lp64/stpcpy.S
|
|
deleted file mode 100644
|
|
index b6a367dc..00000000
|
|
--- a/sysdeps/loongarch/lp64/stpcpy.S
|
|
+++ /dev/null
|
|
@@ -1,179 +0,0 @@
|
|
-#ifdef _LIBC
|
|
-#include <sysdep.h>
|
|
-#include <sys/regdef.h>
|
|
-#include <sys/asm.h>
|
|
-#else
|
|
-#include <sys/asm.h>
|
|
-#include <sys/regdef.h>
|
|
-#endif
|
|
-
|
|
-#ifndef STPCPY_NAME
|
|
-#define STPCPY_NAME __stpcpy
|
|
-#endif
|
|
-
|
|
-LEAF(STPCPY_NAME, 6)
|
|
- andi a3, a0, 0x7
|
|
- beqz a3, L(dest_align)
|
|
- sub.d a5, a1, a3
|
|
- addi.d a5, a5, 8
|
|
-
|
|
-L(make_dest_align):
|
|
- ld.b t0, a1, 0
|
|
- addi.d a1, a1, 1
|
|
- st.b t0, a0, 0
|
|
- addi.d a0, a0, 1
|
|
-
|
|
- beqz t0, L(al_out)
|
|
- bne a1, a5, L(make_dest_align)
|
|
-
|
|
-L(dest_align):
|
|
- andi a4, a1, 7
|
|
- bstrins.d a1, zero, 2, 0
|
|
-
|
|
- lu12i.w t5, 0x1010
|
|
- ld.d t0, a1, 0
|
|
- ori t5, t5, 0x101
|
|
- bstrins.d t5, t5, 63, 32
|
|
-
|
|
- slli.d t6, t5, 0x7
|
|
- bnez a4, L(unalign)
|
|
- sub.d t1, t0, t5
|
|
- andn t2, t6, t0
|
|
-
|
|
- and t3, t1, t2
|
|
- bnez t3, L(al_end)
|
|
-
|
|
-L(al_loop):
|
|
- st.d t0, a0, 0
|
|
- ld.d t0, a1, 8
|
|
-
|
|
- addi.d a1, a1, 8
|
|
- addi.d a0, a0, 8
|
|
- sub.d t1, t0, t5
|
|
- andn t2, t6, t0
|
|
-
|
|
- and t3, t1, t2
|
|
- beqz t3, L(al_loop)
|
|
-
|
|
-L(al_end):
|
|
- ctz.d t1, t3
|
|
- srli.d t1, t1, 3
|
|
- addi.d t1, t1, 1 # add 1, since '\0' needs to be copied to dest
|
|
-
|
|
- andi a3, t1, 8
|
|
- andi a4, t1, 4
|
|
- andi a5, t1, 2
|
|
- andi a6, t1, 1
|
|
-
|
|
-L(al_end_8):
|
|
- beqz a3, L(al_end_4)
|
|
- st.d t0, a0, 0
|
|
- addi.d a0, a0, 7
|
|
- jr ra
|
|
-L(al_end_4):
|
|
- beqz a4, L(al_end_2)
|
|
- st.w t0, a0, 0
|
|
- addi.d a0, a0, 4
|
|
- srli.d t0, t0, 32
|
|
-L(al_end_2):
|
|
- beqz a5, L(al_end_1)
|
|
- st.h t0, a0, 0
|
|
- addi.d a0, a0, 2
|
|
- srli.d t0, t0, 16
|
|
-L(al_end_1):
|
|
- beqz a6, L(al_out)
|
|
- st.b t0, a0, 0
|
|
- addi.d a0, a0, 1
|
|
-L(al_out):
|
|
- addi.d a0, a0, -1
|
|
- jr ra
|
|
-
|
|
-L(unalign):
|
|
- slli.d a5, a4, 3
|
|
- li.d t1, -1
|
|
- sub.d a6, zero, a5
|
|
-
|
|
- srl.d a7, t0, a5
|
|
- sll.d t7, t1, a6
|
|
-
|
|
- or t0, a7, t7
|
|
- sub.d t1, t0, t5
|
|
- andn t2, t6, t0
|
|
- and t3, t1, t2
|
|
-
|
|
- bnez t3, L(un_end)
|
|
-
|
|
- ld.d t4, a1, 8
|
|
- addi.d a1, a1, 8
|
|
-
|
|
- sub.d t1, t4, t5
|
|
- andn t2, t6, t4
|
|
- sll.d t0, t4, a6
|
|
- and t3, t1, t2
|
|
-
|
|
- or t0, t0, a7
|
|
- bnez t3, L(un_end_with_remaining)
|
|
-
|
|
-L(un_loop):
|
|
- srl.d a7, t4, a5
|
|
-
|
|
- ld.d t4, a1, 8
|
|
- addi.d a1, a1, 8
|
|
-
|
|
- st.d t0, a0, 0
|
|
- addi.d a0, a0, 8
|
|
-
|
|
- sub.d t1, t4, t5
|
|
- andn t2, t6, t4
|
|
- sll.d t0, t4, a6
|
|
- and t3, t1, t2
|
|
-
|
|
- or t0, t0, a7
|
|
- beqz t3, L(un_loop)
|
|
-
|
|
-L(un_end_with_remaining):
|
|
- ctz.d t1, t3
|
|
- srli.d t1, t1, 3
|
|
- addi.d t1, t1, 1
|
|
- sub.d t1, t1, a4
|
|
-
|
|
- blt t1, zero, L(un_end_less_8)
|
|
- st.d t0, a0, 0
|
|
- addi.d a0, a0, 8
|
|
- beqz t1, L(un_out)
|
|
- srl.d t0, t4, a5 # get the remaining part
|
|
- b L(un_end_less_8)
|
|
-
|
|
-L(un_end):
|
|
- ctz.d t1, t3
|
|
- srli.d t1, t1, 3
|
|
- addi.d t1, t1, 1
|
|
-
|
|
-L(un_end_less_8):
|
|
- andi a4, t1, 4
|
|
- andi a5, t1, 2
|
|
- andi a6, t1, 1
|
|
-L(un_end_4):
|
|
- beqz a4, L(un_end_2)
|
|
- st.w t0, a0, 0
|
|
- addi.d a0, a0, 4
|
|
- srli.d t0, t0, 32
|
|
-L(un_end_2):
|
|
- beqz a5, L(un_end_1)
|
|
- st.h t0, a0, 0
|
|
- addi.d a0, a0, 2
|
|
- srli.d t0, t0, 16
|
|
-L(un_end_1):
|
|
- beqz a6, L(un_out)
|
|
- st.b t0, a0, 0
|
|
- addi.d a0, a0, 1
|
|
-L(un_out):
|
|
- addi.d a0, a0, -1
|
|
- jr ra
|
|
-
|
|
-END(STPCPY_NAME)
|
|
-
|
|
-#ifdef _LIBC
|
|
-weak_alias (STPCPY_NAME, stpcpy)
|
|
-libc_hidden_builtin_def (STPCPY_NAME)
|
|
-#endif
|
|
diff --git a/sysdeps/loongarch/lp64/strchr.S b/sysdeps/loongarch/lp64/strchr.S
|
|
deleted file mode 100644
|
|
index fde53a30..00000000
|
|
--- a/sysdeps/loongarch/lp64/strchr.S
|
|
+++ /dev/null
|
|
@@ -1,89 +0,0 @@
|
|
-#ifdef _LIBC
|
|
-#include <sysdep.h>
|
|
-#include <sys/regdef.h>
|
|
-#include <sys/asm.h>
|
|
-#else
|
|
-#include <sys/asm.h>
|
|
-#include <sys/regdef.h>
|
|
-#endif
|
|
-
|
|
-#ifndef STRCHR_NAME
|
|
-#define STRCHR_NAME strchr
|
|
-#endif
|
|
-
|
|
-/* char * strchr (const char *s1, int c); */
|
|
-
|
|
-LEAF(STRCHR_NAME, 6)
|
|
- slli.d t1, a0, 3
|
|
- bstrins.d a0, zero, 2, 0
|
|
- lu12i.w a2, 0x01010
|
|
- ld.d t2, a0, 0
|
|
-
|
|
- ori a2, a2, 0x101
|
|
- andi a1, a1, 0xff
|
|
- bstrins.d a2, a2, 63, 32
|
|
- li.w t0, -1
|
|
-
|
|
- mul.d a1, a1, a2 # "cccccccc"
|
|
- sll.d t0, t0, t1
|
|
- slli.d a3, a2, 7 # 0x8080808080808080
|
|
- orn t2, t2, t0
|
|
-
|
|
- sll.d t3, a1, t1
|
|
- xor t4, t2, t3
|
|
- sub.d a7, t2, a2
|
|
- andn a6, a3, t2
|
|
-
|
|
-
|
|
- sub.d a5, t4, a2
|
|
- andn a4, a3, t4
|
|
- and a6, a7, a6
|
|
- and a5, a5, a4
|
|
-
|
|
- or t0, a6, a5
|
|
- bnez t0, L(_mc8_a)
|
|
- addi.d a0, a0, 8
|
|
-L(_aloop):
|
|
- ld.d t4, a0, 0
|
|
-
|
|
- xor t2, t4, a1
|
|
- sub.d a7, t4, a2
|
|
- andn a6, a3, t4
|
|
- sub.d a5, t2, a2
|
|
-
|
|
- andn a4, a3, t2
|
|
- and a6, a7, a6
|
|
- and a5, a5, a4
|
|
- or a7, a6, a5
|
|
-
|
|
-
|
|
- bnez a7, L(_mc8_a)
|
|
- ld.d t4, a0, 8
|
|
- addi.d a0, a0, 16
|
|
- xor t2, t4, a1
|
|
-
|
|
- sub.d a7, t4, a2
|
|
- andn a6, a3, t4
|
|
- sub.d a5, t2, a2
|
|
- andn a4, a3, t2
|
|
-
|
|
- and a6, a7, a6
|
|
- and a5, a5, a4
|
|
- or a7, a6, a5
|
|
- beqz a7, L(_aloop)
|
|
-
|
|
- addi.d a0, a0, -8
|
|
-
|
|
-L(_mc8_a):
|
|
- ctz.d t0, a5
|
|
- ctz.d t2, a6
|
|
- srli.w t0, t0, 3
|
|
-
|
|
-
|
|
- srli.w t2, t2, 3
|
|
- sltu t1, t2, t0
|
|
- add.d a0, a0, t0
|
|
- masknez a0, a0, t1
|
|
-
|
|
- jr ra
|
|
-END(STRCHR_NAME)
|
|
diff --git a/sysdeps/loongarch/lp64/strchrnul.S b/sysdeps/loongarch/lp64/strchrnul.S
|
|
deleted file mode 100644
|
|
index a5ee09a3..00000000
|
|
--- a/sysdeps/loongarch/lp64/strchrnul.S
|
|
+++ /dev/null
|
|
@@ -1,94 +0,0 @@
|
|
-#ifdef _LIBC
|
|
-#include <sysdep.h>
|
|
-#include <sys/regdef.h>
|
|
-#include <sys/asm.h>
|
|
-#else
|
|
-#include <sys/asm.h>
|
|
-#include <sys/regdef.h>
|
|
-#endif
|
|
-
|
|
-#ifndef STRCHRNUL_NAME
|
|
-#define STRCHRNUL_NAME __strchrnul
|
|
-#endif
|
|
-
|
|
-/* char * strchrnul (const char *s1, int c); */
|
|
-
|
|
-LEAF(STRCHRNUL_NAME, 6)
|
|
- slli.d t1, a0, 3
|
|
- bstrins.d a0, zero, 2, 0
|
|
- lu12i.w a2, 0x01010
|
|
- ld.d t2, a0, 0
|
|
-
|
|
- ori a2, a2, 0x101
|
|
- andi a1, a1, 0xff
|
|
- bstrins.d a2, a2, 63, 32
|
|
- li.w t0, -1
|
|
-
|
|
- mul.d a1, a1, a2 # "cccccccc"
|
|
- sll.d t0, t0, t1
|
|
- slli.d a3, a2, 7 # 0x8080808080808080
|
|
- orn t2, t2, t0
|
|
-
|
|
- sll.d t3, a1, t1
|
|
- xor t4, t2, t3
|
|
- sub.d a7, t2, a2
|
|
- andn a6, a3, t2
|
|
-
|
|
-
|
|
- sub.d a5, t4, a2
|
|
- andn a4, a3, t4
|
|
- and a6, a7, a6
|
|
- and a5, a5, a4
|
|
-
|
|
- or t0, a6, a5
|
|
- bnez t0, L(_mc8_a)
|
|
- addi.d a0, a0, 8
|
|
-L(_aloop):
|
|
- ld.d t4, a0, 0
|
|
-
|
|
- xor t2, t4, a1
|
|
- sub.d a7, t4, a2
|
|
- andn a6, a3, t4
|
|
- sub.d a5, t2, a2
|
|
-
|
|
- andn a4, a3, t2
|
|
- and a6, a7, a6
|
|
- and a5, a5, a4
|
|
- or a7, a6, a5
|
|
-
|
|
-
|
|
- bnez a7, L(_mc8_a)
|
|
- ld.d t4, a0, 8
|
|
- addi.d a0, a0, 16
|
|
- xor t2, t4, a1
|
|
-
|
|
- sub.d a7, t4, a2
|
|
- andn a6, a3, t4
|
|
- sub.d a5, t2, a2
|
|
- andn a4, a3, t2
|
|
-
|
|
- and a6, a7, a6
|
|
- and a5, a5, a4
|
|
- or a7, a6, a5
|
|
- beqz a7, L(_aloop)
|
|
-
|
|
- addi.d a0, a0, -8
|
|
-L(_mc8_a):
|
|
- ctz.d t0, a5
|
|
- ctz.d t2, a6
|
|
- srli.w t0, t0, 3
|
|
-
|
|
- srli.w t2, t2, 3
|
|
- slt t1, t0, t2
|
|
- masknez t3, t2, t1
|
|
- maskeqz t4, t0, t1
|
|
-
|
|
- or t0, t3, t4
|
|
- add.d a0, a0, t0
|
|
- jr ra
|
|
-END(STRCHRNUL_NAME)
|
|
-
|
|
-#ifdef _LIBC
|
|
-weak_alias(STRCHRNUL_NAME, strchrnul)
|
|
-libc_hidden_builtin_def (STRCHRNUL_NAME)
|
|
-#endif
|
|
diff --git a/sysdeps/loongarch/lp64/strcmp.S b/sysdeps/loongarch/lp64/strcmp.S
|
|
deleted file mode 100644
|
|
index 3a863992..00000000
|
|
--- a/sysdeps/loongarch/lp64/strcmp.S
|
|
+++ /dev/null
|
|
@@ -1,227 +0,0 @@
|
|
-/* 2022\06\15 loongarch64 author: chenxiaolong. */
|
|
-
|
|
-#ifdef _LIBC
|
|
-#include <sysdep.h>
|
|
-#include <sys/regdef.h>
|
|
-#include <sys/asm.h>
|
|
-#else
|
|
-#include <sys/asm.h>
|
|
-#include <sys/regdef.h>
|
|
-#endif
|
|
-
|
|
-#ifndef STRCMP_NAME
|
|
-#define STRCMP_NAME strcmp
|
|
-#endif
|
|
-
|
|
-/* int strcmp (const char *s1, const char *s2); */
|
|
-
|
|
-/* Parameters and Results */
|
|
-#define src1 a0
|
|
-#define src2 a1
|
|
-#define result v0
|
|
-LEAF(STRCMP_NAME, 6)
|
|
- xor a4, src1, src2
|
|
- lu12i.w t5, 0x01010
|
|
- lu12i.w t6, 0x7f7f7
|
|
- andi a2, src1, 0x7
|
|
-
|
|
- ori t5, t5, 0x101
|
|
- andi a4, a4, 0x7
|
|
- ori t6, t6, 0xf7f
|
|
- bstrins.d t5, t5, 63, 32
|
|
- bstrins.d t6, t6, 63, 32
|
|
-
|
|
- bnez a4, 3f // unaligned
|
|
- beqz a2, 1f // loop aligned
|
|
-
|
|
-// mutual aligned
|
|
- bstrins.d src1, zero, 2, 0
|
|
- bstrins.d src2, zero, 2, 0
|
|
- slli.d a4, a2, 0x3
|
|
- ld.d t0, src1, 0
|
|
-
|
|
- sub.d a4, zero, a4
|
|
- ld.d t1, src2, 0
|
|
- addi.d src1, src1, 8
|
|
- addi.d src2, src2, 8
|
|
-
|
|
- nor a5, zero, zero
|
|
- srl.d a5, a5, a4
|
|
- or t0, t0, a5
|
|
-
|
|
- or t1, t1, a5
|
|
- b 2f //start realigned
|
|
-
|
|
-// loop aligned
|
|
-1:
|
|
- ld.d t0, src1, 0
|
|
- addi.d src1, src1, 8
|
|
- ld.d t1, src2, 0
|
|
- addi.d src2, src2, 8
|
|
-
|
|
-// start realigned:
|
|
-2:
|
|
- sub.d t2, t0, t5
|
|
- nor t3, t0, t6
|
|
- and t2, t2, t3
|
|
-
|
|
- xor t3, t0, t1
|
|
- or t2, t2, t3
|
|
- beqz t2, 1b
|
|
-
|
|
- ctz.d t7, t2
|
|
- bstrins.d t7, zero, 2, 0
|
|
- srl.d t0, t0, t7
|
|
- srl.d t1, t1, t7
|
|
-
|
|
- andi t0, t0, 0xff
|
|
- andi t1, t1, 0xff
|
|
- sub.d v0, t0, t1
|
|
- jr ra
|
|
-
|
|
-// unaligned
|
|
-3:
|
|
- andi a3, src2, 0x7
|
|
- slt a5, a2, a3
|
|
- masknez t8, a2, a5
|
|
- xor a6, src1, src2
|
|
- maskeqz a6, a6, t8
|
|
- xor src1, src1, a6
|
|
- xor src2, src2, a6
|
|
-
|
|
- andi a2, src1, 0x7
|
|
- beqz a2, 4f // src1 is aligned
|
|
-
|
|
-//strcmp_unaligned:
|
|
- andi a3, src2, 0x7
|
|
- bstrins.d src1, zero, 2, 0
|
|
- bstrins.d src2, zero, 2, 0
|
|
- nor t3, zero, zero
|
|
-
|
|
- ld.d t0, src1, 0
|
|
- ld.d t1, src2, 0
|
|
- sub.d a2, a3, a2
|
|
- addi.d t2, zero, 8
|
|
-
|
|
- sub.d a5, t2, a2
|
|
- sub.d a6, t2, a3
|
|
- slli.d a5, a5, 0x3
|
|
- slli.d a6, a6, 0x3
|
|
-
|
|
- srl.d t4, t3, a6
|
|
- srl.d a4, t3, a5
|
|
- rotr.d a7, t0, a5
|
|
-
|
|
- addi.d src2, src2, 8
|
|
- addi.d src1, src1, 8
|
|
- or t1, t1, t4
|
|
- or t0, a7, t4
|
|
-
|
|
- sub.d t2, t0, t5
|
|
- nor t3, t0, t6
|
|
- and t2, t2, t3
|
|
- xor t3, t0, t1
|
|
- or t2, t2, t3
|
|
- bnez t2, 7f
|
|
-
|
|
- and a7, a7, a4
|
|
- slli.d a6, a2, 0x3
|
|
- nor a4, zero, a4
|
|
- b 5f
|
|
-
|
|
-// src1 is aligned
|
|
-4:
|
|
- andi a3, src2, 0x7
|
|
- ld.d t0, src1, 0
|
|
-
|
|
- bstrins.d src2, zero, 2, 0
|
|
- nor t2, zero, zero
|
|
- ld.d t1, src2, 0
|
|
-
|
|
- addi.d t3, zero, 0x8
|
|
- sub.d a5, t3, a3
|
|
- slli.d a5, a5, 0x3
|
|
- srl.d a4, t2, a5
|
|
- rotr.d t4, t0, a5
|
|
-
|
|
- addi.d src2, src2, 8
|
|
- addi.d src1, src1, 8
|
|
- or t1, t1, a4
|
|
- or t0, t4, a4
|
|
-
|
|
- sub.d t2, t0, t5
|
|
- nor t3, t0, t6
|
|
- and t2, t2, t3
|
|
- xor t3, t0, t1
|
|
- or t2, t2, t3
|
|
-
|
|
- bnez t2, 7f
|
|
-
|
|
- and a7, t4, a4
|
|
- slli.d a6, a3, 0x3
|
|
- nor a4, zero, a4
|
|
-
|
|
-// unaligned loop
|
|
-// a7: remaining number
|
|
-// a6: shift left number
|
|
-// a5: shift right number
|
|
-// a4: mask for checking remaining number
|
|
-5:
|
|
- or t0, a7, a4
|
|
- sub.d t2, t0, t5
|
|
- nor t3, t0, t6
|
|
- and t2, t2, t3
|
|
- bnez t2, 6f
|
|
-
|
|
- ld.d t0, src1, 0
|
|
- addi.d src1, src1, 8
|
|
- ld.d t1, src2, 0
|
|
- addi.d src2, src2, 8
|
|
-
|
|
- srl.d t7, t0, a5
|
|
- sll.d t0, t0, a6
|
|
- or t0, a7, t0
|
|
-
|
|
- sub.d t2, t0, t5
|
|
- nor t3, t0, t6
|
|
- and t2, t2, t3
|
|
- xor t3, t0, t1
|
|
- or t2, t2, t3
|
|
- bnez t2, 7f
|
|
-
|
|
- or a7, t7, zero
|
|
- b 5b
|
|
-
|
|
-6:
|
|
- ld.bu t1, src2, 0
|
|
- andi t0, a7, 0xff
|
|
- xor t2, t0, t1
|
|
- srli.d a7, a7, 0x8
|
|
- masknez t2, t0, t2
|
|
- addi.d src2, src2, 1
|
|
- beqz t2, 8f
|
|
- b 6b
|
|
-
|
|
-7:
|
|
- ctz.d t7, t2
|
|
- bstrins.d t7, zero, 2, 0
|
|
- srl.d t0, t0, t7
|
|
- srl.d t1, t1, t7
|
|
-
|
|
- andi t0, t0, 0xff
|
|
- andi t1, t1, 0xff
|
|
-
|
|
-8:
|
|
- sub.d a4, t0, t1
|
|
- sub.d a5, t1, t0
|
|
- maskeqz a6, a5, t8
|
|
- masknez result, a4, t8
|
|
- or result, result, a6
|
|
- jr ra
|
|
-
|
|
-END(STRCMP_NAME)
|
|
-
|
|
-#ifdef _LIBC
|
|
-libc_hidden_builtin_def (STRCMP_NAME)
|
|
-#endif
|
|
-
|
|
diff --git a/sysdeps/loongarch/lp64/strcpy.S b/sysdeps/loongarch/lp64/strcpy.S
|
|
deleted file mode 100644
|
|
index 08505192..00000000
|
|
--- a/sysdeps/loongarch/lp64/strcpy.S
|
|
+++ /dev/null
|
|
@@ -1,173 +0,0 @@
|
|
-#ifdef _LIBC
|
|
-#include <sysdep.h>
|
|
-#include <sys/regdef.h>
|
|
-#include <sys/asm.h>
|
|
-#else
|
|
-#include <sys/asm.h>
|
|
-#include <sys/regdef.h>
|
|
-#endif
|
|
-
|
|
-#ifndef STRCPY
|
|
-#define STRCPY strcpy
|
|
-#endif
|
|
-
|
|
-LEAF(STRCPY, 6)
|
|
- andi a3, a0, 0x7
|
|
- move a2, a0
|
|
- beqz a3, L(dest_align)
|
|
- sub.d a5, a1, a3
|
|
- addi.d a5, a5, 8
|
|
-
|
|
-L(make_dest_align):
|
|
- ld.b t0, a1, 0
|
|
- addi.d a1, a1, 1
|
|
- st.b t0, a2, 0
|
|
- beqz t0, L(al_out)
|
|
-
|
|
- addi.d a2, a2, 1
|
|
- bne a1, a5, L(make_dest_align)
|
|
-
|
|
-L(dest_align):
|
|
- andi a4, a1, 7
|
|
- bstrins.d a1, zero, 2, 0
|
|
-
|
|
- lu12i.w t5, 0x1010
|
|
- ld.d t0, a1, 0
|
|
- ori t5, t5, 0x101
|
|
- bstrins.d t5, t5, 63, 32
|
|
-
|
|
- slli.d t6, t5, 0x7
|
|
- bnez a4, L(unalign)
|
|
- sub.d t1, t0, t5
|
|
- andn t2, t6, t0
|
|
-
|
|
- and t3, t1, t2
|
|
- bnez t3, L(al_end)
|
|
-
|
|
-L(al_loop):
|
|
- st.d t0, a2, 0
|
|
- ld.d t0, a1, 8
|
|
-
|
|
- addi.d a1, a1, 8
|
|
- addi.d a2, a2, 8
|
|
- sub.d t1, t0, t5
|
|
- andn t2, t6, t0
|
|
-
|
|
- and t3, t1, t2
|
|
- beqz t3, L(al_loop)
|
|
-
|
|
-L(al_end):
|
|
- ctz.d t1, t3
|
|
- srli.d t1, t1, 3
|
|
- addi.d t1, t1, 1 # add 1, since '\0' needs to be copied to dest
|
|
-
|
|
- andi a3, t1, 8
|
|
- andi a4, t1, 4
|
|
- andi a5, t1, 2
|
|
- andi a6, t1, 1
|
|
-
|
|
-L(al_end_8):
|
|
- beqz a3, L(al_end_4)
|
|
- st.d t0, a2, 0
|
|
- jr ra
|
|
-L(al_end_4):
|
|
- beqz a4, L(al_end_2)
|
|
- st.w t0, a2, 0
|
|
- addi.d a2, a2, 4
|
|
- srli.d t0, t0, 32
|
|
-L(al_end_2):
|
|
- beqz a5, L(al_end_1)
|
|
- st.h t0, a2, 0
|
|
- addi.d a2, a2, 2
|
|
- srli.d t0, t0, 16
|
|
-L(al_end_1):
|
|
- beqz a6, L(al_out)
|
|
- st.b t0, a2, 0
|
|
-L(al_out):
|
|
- jr ra
|
|
-
|
|
-L(unalign):
|
|
- slli.d a5, a4, 3
|
|
- li.d t1, -1
|
|
- sub.d a6, zero, a5
|
|
-
|
|
- srl.d a7, t0, a5
|
|
- sll.d t7, t1, a6
|
|
-
|
|
- or t0, a7, t7
|
|
- sub.d t1, t0, t5
|
|
- andn t2, t6, t0
|
|
- and t3, t1, t2
|
|
-
|
|
- bnez t3, L(un_end)
|
|
-
|
|
- ld.d t4, a1, 8
|
|
-
|
|
- sub.d t1, t4, t5
|
|
- andn t2, t6, t4
|
|
- sll.d t0, t4, a6
|
|
- and t3, t1, t2
|
|
-
|
|
- or t0, t0, a7
|
|
- bnez t3, L(un_end_with_remaining)
|
|
-
|
|
-L(un_loop):
|
|
- srl.d a7, t4, a5
|
|
-
|
|
- ld.d t4, a1, 16
|
|
- addi.d a1, a1, 8
|
|
-
|
|
- st.d t0, a2, 0
|
|
- addi.d a2, a2, 8
|
|
-
|
|
- sub.d t1, t4, t5
|
|
- andn t2, t6, t4
|
|
- sll.d t0, t4, a6
|
|
- and t3, t1, t2
|
|
-
|
|
- or t0, t0, a7
|
|
- beqz t3, L(un_loop)
|
|
-
|
|
-L(un_end_with_remaining):
|
|
- ctz.d t1, t3
|
|
- srli.d t1, t1, 3
|
|
- addi.d t1, t1, 1
|
|
- sub.d t1, t1, a4
|
|
-
|
|
- blt t1, zero, L(un_end_less_8)
|
|
- st.d t0, a2, 0
|
|
- addi.d a2, a2, 8
|
|
- beqz t1, L(un_out)
|
|
- srl.d t0, t4, a5 # get the remaining part
|
|
- b L(un_end_less_8)
|
|
-
|
|
-L(un_end):
|
|
- ctz.d t1, t3
|
|
- srli.d t1, t1, 3
|
|
- addi.d t1, t1, 1
|
|
-
|
|
-L(un_end_less_8):
|
|
- andi a4, t1, 4
|
|
- andi a5, t1, 2
|
|
- andi a6, t1, 1
|
|
-L(un_end_4):
|
|
- beqz a4, L(un_end_2)
|
|
- st.w t0, a2, 0
|
|
- addi.d a2, a2, 4
|
|
- srli.d t0, t0, 32
|
|
-L(un_end_2):
|
|
- beqz a5, L(un_end_1)
|
|
- st.h t0, a2, 0
|
|
- addi.d a2, a2, 2
|
|
- srli.d t0, t0, 16
|
|
-L(un_end_1):
|
|
- beqz a6, L(un_out)
|
|
- st.b t0, a2, 0
|
|
-L(un_out):
|
|
- jr ra
|
|
-
|
|
-END(STRCPY)
|
|
-
|
|
-#ifdef _LIBC
|
|
-libc_hidden_builtin_def (STRCPY)
|
|
-#endif
|
|
diff --git a/sysdeps/loongarch/lp64/strlen.S b/sysdeps/loongarch/lp64/strlen.S
|
|
deleted file mode 100644
|
|
index 71431ce2..00000000
|
|
--- a/sysdeps/loongarch/lp64/strlen.S
|
|
+++ /dev/null
|
|
@@ -1,85 +0,0 @@
|
|
-#ifdef _LIBC
|
|
-#include <sysdep.h>
|
|
-#include <sys/regdef.h>
|
|
-#include <sys/asm.h>
|
|
-#else
|
|
-#include <sys/asm.h>
|
|
-#include <sys/regdef.h>
|
|
-#endif
|
|
-
|
|
-#ifndef STRLEN
|
|
-#define STRLEN strlen
|
|
-#endif
|
|
-
|
|
-LEAF(STRLEN, 6)
|
|
- move a1, a0
|
|
- bstrins.d a0, zero, 2, 0
|
|
- lu12i.w a2, 0x01010
|
|
- li.w t0, -1
|
|
-
|
|
- ld.d t2, a0, 0
|
|
- andi t1, a1, 0x7
|
|
- ori a2, a2, 0x101
|
|
- slli.d t1, t1, 3
|
|
-
|
|
- bstrins.d a2, a2, 63, 32
|
|
- sll.d t1, t0, t1
|
|
- slli.d t3, a2, 7
|
|
- nor a3, zero, t3
|
|
-
|
|
- orn t2, t2, t1
|
|
- sub.d t0, t2, a2
|
|
- nor t1, t2, a3
|
|
- and t0, t0, t1
|
|
-
|
|
-
|
|
- bnez t0, L(count_pos)
|
|
- addi.d a0, a0, 8
|
|
-L(loop_16_7bit):
|
|
- ld.d t2, a0, 0
|
|
- sub.d t1, t2, a2
|
|
-
|
|
- and t0, t1, t3
|
|
- bnez t0, L(more_check)
|
|
- ld.d t2, a0, 8
|
|
- addi.d a0, a0, 16
|
|
-
|
|
- sub.d t1, t2, a2
|
|
- and t0, t1, t3
|
|
- beqz t0, L(loop_16_7bit)
|
|
- addi.d a0, a0, -8
|
|
-L(more_check):
|
|
- nor t0, t2, a3
|
|
-
|
|
- and t0, t1, t0
|
|
- bnez t0, L(count_pos)
|
|
- addi.d a0, a0, 8
|
|
-L(loop_16_8bit):
|
|
- ld.d t2, a0, 0
|
|
-
|
|
- sub.d t1, t2, a2
|
|
- nor t0, t2, a3
|
|
- and t0, t0, t1
|
|
- bnez t0, L(count_pos)
|
|
-
|
|
- ld.d t2, a0, 8
|
|
- addi.d a0, a0, 16
|
|
- sub.d t1, t2, a2
|
|
- nor t0, t2, a3
|
|
-
|
|
- and t0, t0, t1
|
|
- beqz t0, L(loop_16_8bit)
|
|
- addi.d a0, a0, -8
|
|
-L(count_pos):
|
|
- ctz.d t1, t0
|
|
- sub.d a0, a0, a1
|
|
-
|
|
- srli.d t1, t1, 3
|
|
- add.d a0, a0, t1
|
|
- jr ra
|
|
-
|
|
-END(STRLEN)
|
|
-
|
|
-#ifdef _LIBC
|
|
-libc_hidden_builtin_def (STRLEN)
|
|
-#endif
|
|
diff --git a/sysdeps/loongarch/lp64/strncmp.S b/sysdeps/loongarch/lp64/strncmp.S
|
|
deleted file mode 100644
|
|
index 55450e55..00000000
|
|
--- a/sysdeps/loongarch/lp64/strncmp.S
|
|
+++ /dev/null
|
|
@@ -1,256 +0,0 @@
|
|
-#ifdef _LIBC
|
|
-#include <sysdep.h>
|
|
-#include <sys/regdef.h>
|
|
-#include <sys/asm.h>
|
|
-#else
|
|
-#include <sys/asm.h>
|
|
-#include <sys/regdef.h>
|
|
-#endif
|
|
-
|
|
-#ifndef STRNCMP
|
|
-#define STRNCMP strncmp
|
|
-#endif
|
|
-
|
|
-/* int strncmp (const char *s1, const char *s2); */
|
|
-
|
|
-LEAF(STRNCMP, 6)
|
|
- beqz a2, L(ret0)
|
|
- xor a4, a0, a1
|
|
- lu12i.w t5, 0x01010
|
|
- lu12i.w t6, 0x7f7f7
|
|
-
|
|
- andi a3, a0, 0x7
|
|
- ori t5, t5, 0x101
|
|
- andi a4, a4, 0x7
|
|
- ori t6, t6, 0xf7f
|
|
-
|
|
- bstrins.d t5, t5, 63, 32
|
|
- bstrins.d t6, t6, 63, 32
|
|
-
|
|
- bnez a4, L(unalign)
|
|
- bnez a3, L(mutual_align)
|
|
-
|
|
-L(a_loop):
|
|
- ld.d t0, a0, 0
|
|
- ld.d t1, a1, 0
|
|
- addi.d a0, a0, 8
|
|
- addi.d a1, a1, 8
|
|
-
|
|
-
|
|
- sltui t7, a2, 9
|
|
-
|
|
-L(start_realign):
|
|
- sub.d t2, t0, t5
|
|
- nor t3, t0, t6
|
|
- xor t4, t0, t1
|
|
-
|
|
- and t2, t2, t3
|
|
- addi.d a2, a2, -8
|
|
-
|
|
- or t2, t2, t4
|
|
- or t3, t2, t7
|
|
- beqz t3, L(a_loop)
|
|
-
|
|
-L(end):
|
|
- bge zero, t7, L(out)
|
|
- andi t4, a2, 7
|
|
- li.d t3, -1
|
|
- addi.d t4, t4, -1
|
|
- slli.d t4, t4, 3
|
|
- sll.d t3, t3, t4
|
|
- or t2, t2, t3
|
|
-
|
|
-
|
|
-L(out):
|
|
- ctz.d t3, t2
|
|
- bstrins.d t3, zero, 2, 0
|
|
- srl.d t0, t0, t3
|
|
- srl.d t1, t1, t3
|
|
-
|
|
- andi t0, t0, 0xff
|
|
- andi t1, t1, 0xff
|
|
- sub.d a0, t0, t1
|
|
- jr ra
|
|
-
|
|
-L(mutual_align):
|
|
- bstrins.d a0, zero, 2, 0
|
|
- bstrins.d a1, zero, 2, 0
|
|
- slli.d a5, a3, 0x3
|
|
- li.d t2, -1
|
|
-
|
|
- ld.d t0, a0, 0
|
|
- ld.d t1, a1, 0
|
|
-
|
|
- li.d t3, 9
|
|
- sll.d t2, t2, a5
|
|
-
|
|
- sub.d t3, t3, a3
|
|
- addi.d a0, a0, 8
|
|
-
|
|
- sltu t7, a2, t3
|
|
- addi.d a1, a1, 8
|
|
-
|
|
- add.d a2, a2, a3
|
|
- orn t0, t0, t2
|
|
- orn t1, t1, t2
|
|
- b L(start_realign)
|
|
-
|
|
-L(ret0):
|
|
- move a0, zero
|
|
- jr ra
|
|
-
|
|
-L(unalign):
|
|
- li.d t8, 8
|
|
- blt a2, t8, L(short_cmp)
|
|
-
|
|
- # swap a0 and a1 in case a3 > a4
|
|
- andi a4, a1, 0x7
|
|
- sltu t8, a4, a3
|
|
- xor a6, a0, a1
|
|
- maskeqz a6, a6, t8
|
|
- xor a0, a0, a6
|
|
- xor a1, a1, a6
|
|
-
|
|
- andi a3, a0, 0x7
|
|
- andi a4, a1, 0x7
|
|
-
|
|
- bstrins.d a0, zero, 2, 0
|
|
- bstrins.d a1, zero, 2, 0
|
|
-
|
|
- li.d t2, -1
|
|
- li.d t3, 9
|
|
-
|
|
- ld.d t0, a0, 0
|
|
- ld.d t1, a1, 0
|
|
-
|
|
- sub.d t3, t3, a4
|
|
- sub.d a3, a4, a3
|
|
-
|
|
- slli.d t4, a4, 3
|
|
- slli.d a6, a3, 3
|
|
-
|
|
- sub.d a5, zero, a6
|
|
- sltu t7, a2, t3
|
|
-
|
|
- rotr.d a7, t0, a5
|
|
- sll.d t4, t2, t4 # mask for first num
|
|
-
|
|
- add.d a2, a2, a4
|
|
- sll.d a4, t2, a6 # mask for a7
|
|
-
|
|
- orn t0, a7, t4
|
|
- orn t1, t1, t4
|
|
-
|
|
- sub.d t2, t0, t5
|
|
- nor t4, t0, t6
|
|
- and t2, t2, t4
|
|
-
|
|
- xor t3, t0, t1
|
|
- or t2, t2, t3
|
|
-
|
|
- or t3, t2, t7
|
|
- bnez t3, L(un_end)
|
|
-
|
|
- andn a7, a7, a4
|
|
- addi.d a3, a3, 1
|
|
-
|
|
-L(un_loop):
|
|
- addi.d a2, a2, -8
|
|
- # in case remaining part has '\0', no more load instructions should be executed on a0 address
|
|
- or t0, a7, a4
|
|
- sltu t7, a2, a3
|
|
-
|
|
- sub.d t2, t0, t5
|
|
- nor t3, t0, t6
|
|
- and t2, t2, t3
|
|
-
|
|
- or t3, t2, t7
|
|
- bnez t3, L(check_remaining)
|
|
-
|
|
- ld.d t7, a0, 8
|
|
- ld.d t1, a1, 8
|
|
- addi.d a0, a0, 8
|
|
- addi.d a1, a1, 8
|
|
-
|
|
- sll.d t4, t7, a6
|
|
- sub.d t2, t1, t5
|
|
- nor t3, t1, t6
|
|
-
|
|
- or t0, t4, a7
|
|
- srl.d a7, t7, a5
|
|
-
|
|
- and t2, t2, t3
|
|
- xor t3, t0, t1
|
|
-
|
|
- sltui t7, a2, 9
|
|
- or t2, t2, t3
|
|
-
|
|
- or t3, t2, t7
|
|
- beqz t3, L(un_loop)
|
|
- b L(un_end)
|
|
-
|
|
-L(check_remaining):
|
|
- ld.d t1, a1, 8
|
|
- xor t3, t1, a7
|
|
- or t2, t2, t3
|
|
-
|
|
-L(un_end):
|
|
- bge zero, t7, L(un_out)
|
|
- andi t4, a2, 7
|
|
- li.d t3, -1
|
|
-
|
|
- addi.d t4, t4, -1
|
|
- slli.d t4, t4, 3
|
|
- sll.d t3, t3, t4
|
|
- or t2, t2, t3
|
|
-
|
|
-L(un_out):
|
|
- ctz.d t3, t2
|
|
- bstrins.d t3, zero, 2, 0
|
|
- srl.d t0, t0, t3
|
|
- srl.d t1, t1, t3
|
|
-
|
|
- andi t0, t0, 0xff
|
|
- andi t1, t1, 0xff
|
|
-
|
|
- sub.d a4, t0, t1
|
|
- sub.d a5, t1, t0
|
|
-
|
|
- maskeqz a6, a5, t8
|
|
- masknez a0, a4, t8
|
|
-
|
|
- or a0, a0, a6
|
|
- jr ra
|
|
-
|
|
-L(short_cmp):
|
|
- ld.bu t0, a0, 0
|
|
- ld.bu t1, a1, 0
|
|
- addi.d a2, a2, -1
|
|
-
|
|
- xor t2, t0, t1
|
|
- masknez t2, t0, t2
|
|
- maskeqz t2, a2, t2
|
|
-
|
|
- beqz t2, L(short_out)
|
|
-
|
|
- ld.bu t0, a0, 1
|
|
- ld.bu t1, a1, 1
|
|
-
|
|
- addi.d a2, a2, -1
|
|
- addi.d a0, a0, 2
|
|
-
|
|
- addi.d a1, a1, 2
|
|
- xor t2, t0, t1
|
|
- masknez t2, t0, t2
|
|
- maskeqz t2, a2, t2
|
|
-
|
|
- bnez t2, L(short_cmp)
|
|
-
|
|
-L(short_out):
|
|
- sub.d a0, t0, t1
|
|
- jr ra
|
|
-
|
|
-END(STRNCMP)
|
|
-#ifdef _LIBC
|
|
-libc_hidden_builtin_def (STRNCMP)
|
|
-#endif
|
|
diff --git a/sysdeps/loongarch/lp64/strnlen.S b/sysdeps/loongarch/lp64/strnlen.S
|
|
deleted file mode 100644
|
|
index 5b5ab585..00000000
|
|
--- a/sysdeps/loongarch/lp64/strnlen.S
|
|
+++ /dev/null
|
|
@@ -1,82 +0,0 @@
|
|
-#ifdef _LIBC
|
|
-#include <sysdep.h>
|
|
-#include <sys/regdef.h>
|
|
-#include <sys/asm.h>
|
|
-#else
|
|
-#include <sys/asm.h>
|
|
-#include <sys/regdef.h>
|
|
-#endif
|
|
-
|
|
-#ifndef STRNLEN
|
|
-#define STRNLEN __strnlen
|
|
-#endif
|
|
-
|
|
-#. before every load, a1(t5) must > 0;
|
|
-#. first load with t1 != 0, need to adjust t5;
|
|
-#. return the less one of both strlen(s) and a1;
|
|
-
|
|
-LEAF(STRNLEN, 6)
|
|
- beqz a1, L(out)
|
|
- lu12i.w a2, 0x01010
|
|
- andi t1, a0, 0x7
|
|
- move t4, a0
|
|
-
|
|
- bstrins.d a0, zero, 2, 0
|
|
- ori a2, a2, 0x101
|
|
- li.w t0, -1
|
|
- ld.d t2, a0, 0
|
|
-
|
|
- slli.d t3, t1, 3
|
|
- bstrins.d a2, a2, 63, 32
|
|
- li.w t5, 8
|
|
- slli.d a3, a2, 7
|
|
-
|
|
- sub.w t1, t5, t1
|
|
- sll.d t0, t0, t3
|
|
- nor a3, zero, a3
|
|
- orn t2, t2, t0
|
|
-
|
|
-
|
|
- sub.d t0, t2, a2
|
|
- nor t3, t2, a3
|
|
- and t0, t0, t3
|
|
- bnez t0, L(count_pos)
|
|
-
|
|
- sub.d t5, a1, t1
|
|
- bgeu t1, a1, L(out)
|
|
-L(loop_8bytes):
|
|
- ld.d t2, a0, 8
|
|
- addi.d a0, a0, 8
|
|
-
|
|
- sub.d t0, t2, a2
|
|
- nor t1, t2, a3
|
|
- sltui t6, t5, 9
|
|
- and t0, t0, t1
|
|
-
|
|
- addi.d t5, t5, -8
|
|
- or t7, t0, t6
|
|
- beqz t7, L(loop_8bytes)
|
|
-L(count_pos):
|
|
- ctz.d t1, t0
|
|
-
|
|
-
|
|
- sub.d a0, a0, t4
|
|
- srli.d t1, t1, 3
|
|
- add.d a0, t1, a0
|
|
- sltu t0, a0, a1
|
|
-
|
|
- masknez t1, a1, t0
|
|
- maskeqz a0, a0, t0
|
|
- or a0, a0, t1
|
|
- jr ra
|
|
-
|
|
-L(out):
|
|
- move a0, a1
|
|
- jr ra
|
|
-
|
|
-END(STRNLEN)
|
|
-
|
|
-#ifdef _LIBC
|
|
-weak_alias (STRNLEN, strnlen)
|
|
-libc_hidden_builtin_def (STRNLEN)
|
|
-#endif
|
|
diff --git a/sysdeps/loongarch/lp64/strrchr.S b/sysdeps/loongarch/lp64/strrchr.S
|
|
deleted file mode 100644
|
|
index df7fcb6b..00000000
|
|
--- a/sysdeps/loongarch/lp64/strrchr.S
|
|
+++ /dev/null
|
|
@@ -1,105 +0,0 @@
|
|
-#ifdef _LIBC
|
|
-#include <sysdep.h>
|
|
-#include <sys/regdef.h>
|
|
-#include <sys/asm.h>
|
|
-#else
|
|
-#include <sys/asm.h>
|
|
-#include <sys/regdef.h>
|
|
-#endif
|
|
-
|
|
-#ifndef STRRCHR_NAME
|
|
-#define STRRCHR_NAME strrchr
|
|
-#endif
|
|
-
|
|
-LEAF(STRRCHR_NAME, 6)
|
|
- slli.d t1, a0, 3
|
|
- bstrins.d a0, zero, 2, 0
|
|
- lu12i.w a2, 0x01010
|
|
- ld.d t2, a0, 0 // t2 = "5ZZ21abc"
|
|
-
|
|
- ori a2, a2, 0x101
|
|
- andi a1, a1, 0xff // a1 = "0000000Z"
|
|
- li.d a5, -1
|
|
- bstrins.d a2, a2, 63, 32 // a2 = 0x0101010101010101
|
|
-
|
|
- sll.d t1, a5, t1 // t1 = 0xffffffffff000000
|
|
- mul.d a1, a1, a2 // a1 = "ZZZZZZZZ"
|
|
- orn t2, t2, t1 // t2 = "5ZZ21YYY"
|
|
- slli.d a3, a2, 7 // a3 = 0x8080808080808080
|
|
-
|
|
- sub.d a4, t2, a2
|
|
- andn t0, a3, t2
|
|
- move t3, zero
|
|
- and t0, a4, t0
|
|
-
|
|
-
|
|
- xor a4, t2, a1
|
|
- move t5, zero
|
|
- orn a4, a4, t1
|
|
- bnez t0, L(found_end)
|
|
-
|
|
- sub.d t1, a4, a2
|
|
- andn t0, a3, a4
|
|
- and t1, t1, t0
|
|
-
|
|
-L(loop_8bytes):
|
|
- masknez t4, t3, t1
|
|
-
|
|
- maskeqz t3, t2, t1
|
|
- ld.d t2, a0, 8
|
|
- masknez t0, t5, t1
|
|
- maskeqz t5, a0, t1
|
|
-
|
|
- or t3, t3, t4
|
|
- or t5, t0, t5
|
|
- sub.d t0, t2, a2
|
|
- andn t1, a3, t2
|
|
-
|
|
-
|
|
- xor a4, t2, a1
|
|
- and t0, t0, t1 //t0 hold diff pattern for '\0'
|
|
- sub.d t1, a4, a2
|
|
- andn t4, a3, a4
|
|
-
|
|
- and t1, t1, t4 //t1 hold diff pattern for 'a1'
|
|
- addi.d a0, a0, 8
|
|
- beqz t0, L(loop_8bytes) //ok, neither \0 nor found
|
|
-L(found_end):
|
|
- ctz.d t1, t0
|
|
-
|
|
- xor t3, t3, a1
|
|
- orn t1, zero, t1
|
|
- revb.d t3, t3
|
|
- srl.d t1, a5, t1 // mask for '\0'
|
|
-
|
|
- sub.d t4, t3, a2
|
|
- orn a4, a4, t1
|
|
- andn t3, a3, t3
|
|
- revb.d t2, a4
|
|
-
|
|
- sub.d t0, t2, a2
|
|
- andn t1, a3, t2
|
|
- and t3, t3, t4
|
|
- and t1, t0, t1
|
|
-
|
|
- li.d t7, 7
|
|
- masknez t4, t3, t1
|
|
- maskeqz t3, t1, t1
|
|
- masknez t5, t5, t1
|
|
-
|
|
- or t3, t3, t4
|
|
- maskeqz t6, a0, t1
|
|
- ctz.d t0, t3
|
|
- or t5, t6, t5
|
|
-
|
|
- srli.d t0, t0, 3
|
|
- sub.d t0, t7, t0
|
|
- add.d a0, t5, t0
|
|
- maskeqz a0, a0, t3
|
|
-
|
|
- jr ra
|
|
-END(STRRCHR_NAME)
|
|
-
|
|
-#ifdef _LIBC
|
|
-libc_hidden_builtin_def(STRRCHR_NAME)
|
|
-#endif
|
|
--
|
|
2.33.0
|
|
|