LoongArch: Sync from glibc upstream
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
This commit is contained in:
parent
8f862e2d80
commit
62fe06b740
30 changed files with 12740 additions and 1 deletions
40
Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch
Normal file
40
Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch
Normal file
|
@ -0,0 +1,40 @@
|
|||
From 2c8dfc45a8009e5110a9d2148b62d802e989fde7 Mon Sep 17 00:00:00 2001
|
||||
From: ticat_fp <fanpeng@loongson.cn>
|
||||
Date: Thu, 29 Feb 2024 15:58:31 +0800
|
||||
Subject: [PATCH] Decrease value of arch_minimum_kernel with LoongArch
|
||||
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/unix/sysv/linux/loongarch/configure | 2 +-
|
||||
sysdeps/unix/sysv/linux/loongarch/configure.ac | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/configure b/sysdeps/unix/sysv/linux/loongarch/configure
|
||||
index 0d1159e9..851b2285 100644
|
||||
--- a/sysdeps/unix/sysv/linux/loongarch/configure
|
||||
+++ b/sysdeps/unix/sysv/linux/loongarch/configure
|
||||
@@ -1,7 +1,7 @@
|
||||
# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
|
||||
# Local configure fragment for sysdeps/unix/sysv/linux/loongarch.
|
||||
|
||||
-arch_minimum_kernel=5.19.0
|
||||
+arch_minimum_kernel=4.19.0
|
||||
|
||||
libc_cv_loongarch_int_abi=no
|
||||
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/configure.ac b/sysdeps/unix/sysv/linux/loongarch/configure.ac
|
||||
index 04e9150a..00815c2f 100644
|
||||
--- a/sysdeps/unix/sysv/linux/loongarch/configure.ac
|
||||
+++ b/sysdeps/unix/sysv/linux/loongarch/configure.ac
|
||||
@@ -2,7 +2,7 @@ sinclude(./aclocal.m4)dnl Autoconf lossage
|
||||
GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
|
||||
# Local configure fragment for sysdeps/unix/sysv/linux/loongarch.
|
||||
|
||||
-arch_minimum_kernel=5.19.0
|
||||
+arch_minimum_kernel=4.19.0
|
||||
|
||||
libc_cv_loongarch_int_abi=no
|
||||
AC_EGREP_CPP(4 8 8, [__SIZEOF_INT__ __SIZEOF_LONG__ __SIZEOF_POINTER__
|
||||
--
|
||||
2.33.0
|
||||
|
499
LoongArch-Add-glibc.cpu.hwcap-support.patch
Normal file
499
LoongArch-Add-glibc.cpu.hwcap-support.patch
Normal file
|
@ -0,0 +1,499 @@
|
|||
From 8923e4e9c79e672fd6b3b89aba598a60d5c01211 Mon Sep 17 00:00:00 2001
|
||||
From: caiyinyu <caiyinyu@loongson.cn>
|
||||
Date: Fri, 15 Sep 2023 17:35:19 +0800
|
||||
Subject: [PATCH 25/29] LoongArch: Add glibc.cpu.hwcap support.
|
||||
|
||||
Key Points:
|
||||
1. On lasx & lsx platforms, We must use _dl_runtime_{profile, resolve}_{lsx, lasx}
|
||||
to save vector registers.
|
||||
2. Via "tunables", users can choose str/mem_{lasx,lsx,unaligned} functions with
|
||||
`export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,...`.
|
||||
Note: glibc.cpu.hwcaps doesn't affect _dl_runtime_{profile, resolve}_{lsx, lasx}
|
||||
selection.
|
||||
|
||||
Usage Notes:
|
||||
1. Only valid inputs: LASX, LSX, UAL. Case-sensitive, comma-separated, no spaces.
|
||||
2. Example: `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL` turns on LASX & UAL.
|
||||
Unmentioned features turn off. With default ifunc: lasx > lsx > unaligned >
|
||||
aligned > generic, effect is: lasx > unaligned > aligned > generic; lsx off.
|
||||
3. Incorrect GLIBC_TUNABLES settings will show error messages.
|
||||
For example: On lsx platforms, you cannot enable lasx features. If you do
|
||||
that, you will get error messages.
|
||||
4. Valid input examples:
|
||||
- GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX: lasx > aligned > generic.
|
||||
- GLIBC_TUNABLES=glibc.cpu.hwcaps=LSX,UAL: lsx > unaligned > aligned > generic.
|
||||
- GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL,LASX,UAL,LSX,LASX,UAL: Repetitions
|
||||
allowed but not recommended. Results in: lasx > lsx > unaligned > aligned >
|
||||
generic.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/Makefile | 4 +
|
||||
sysdeps/loongarch/Versions | 5 ++
|
||||
sysdeps/loongarch/cpu-tunables.c | 89 +++++++++++++++++++
|
||||
sysdeps/loongarch/dl-get-cpu-features.c | 25 ++++++
|
||||
sysdeps/loongarch/dl-machine.h | 27 +++++-
|
||||
sysdeps/loongarch/dl-tunables.list | 25 ++++++
|
||||
.../unix/sysv/linux/loongarch/cpu-features.c | 29 ++++++
|
||||
.../unix/sysv/linux/loongarch/cpu-features.h | 18 +++-
|
||||
.../unix/sysv/linux/loongarch/dl-procinfo.c | 60 +++++++++++++
|
||||
sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 +++++
|
||||
.../unix/sysv/linux/loongarch/libc-start.c | 34 +++++++
|
||||
11 files changed, 329 insertions(+), 8 deletions(-)
|
||||
create mode 100644 sysdeps/loongarch/Versions
|
||||
create mode 100644 sysdeps/loongarch/cpu-tunables.c
|
||||
create mode 100644 sysdeps/loongarch/dl-get-cpu-features.c
|
||||
create mode 100644 sysdeps/loongarch/dl-tunables.list
|
||||
create mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c
|
||||
create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
|
||||
create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
|
||||
create mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
|
||||
index 43d2f583..30a1f4a8 100644
|
||||
--- a/sysdeps/loongarch/Makefile
|
||||
+++ b/sysdeps/loongarch/Makefile
|
||||
@@ -6,6 +6,10 @@ ifeq ($(subdir),elf)
|
||||
gen-as-const-headers += dl-link.sym
|
||||
endif
|
||||
|
||||
+ifeq ($(subdir),elf)
|
||||
+ sysdep-dl-routines += dl-get-cpu-features
|
||||
+endif
|
||||
+
|
||||
# LoongArch's assembler also needs to know about PIC as it changes the
|
||||
# definition of some assembler macros.
|
||||
ASFLAGS-.os += $(pic-ccflag)
|
||||
diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions
|
||||
new file mode 100644
|
||||
index 00000000..33ae2cc0
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/Versions
|
||||
@@ -0,0 +1,5 @@
|
||||
+ld {
|
||||
+ GLIBC_PRIVATE {
|
||||
+ _dl_larch_get_cpu_features;
|
||||
+ }
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c
|
||||
new file mode 100644
|
||||
index 00000000..8e9fab93
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/cpu-tunables.c
|
||||
@@ -0,0 +1,89 @@
|
||||
+/* LoongArch CPU feature tuning.
|
||||
+ This file is part of the GNU C Library.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+# include <stdbool.h>
|
||||
+# include <stdint.h>
|
||||
+# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */
|
||||
+# include <elf/dl-tunables.h>
|
||||
+# include <string.h>
|
||||
+# include <cpu-features.h>
|
||||
+# include <ldsodefs.h>
|
||||
+# include <sys/auxv.h>
|
||||
+
|
||||
+# define HWCAP_LOONGARCH_IFUNC \
|
||||
+ (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX)
|
||||
+
|
||||
+# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len) \
|
||||
+ _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
|
||||
+ if (!memcmp (f, #name, len) && \
|
||||
+ (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name)) \
|
||||
+ { \
|
||||
+ hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC)); \
|
||||
+ break; \
|
||||
+ } \
|
||||
+
|
||||
+attribute_hidden
|
||||
+void
|
||||
+TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
|
||||
+{
|
||||
+ const char *p = valp->strval;
|
||||
+ size_t len;
|
||||
+ unsigned long hwcap = 0;
|
||||
+ const char *c;
|
||||
+
|
||||
+ do {
|
||||
+ for (c = p; *c != ','; c++)
|
||||
+ if (*c == '\0')
|
||||
+ break;
|
||||
+
|
||||
+ len = c - p;
|
||||
+
|
||||
+ switch(len)
|
||||
+ {
|
||||
+ default:
|
||||
+ _dl_fatal_printf (
|
||||
+ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
|
||||
+ );
|
||||
+ break;
|
||||
+ case 3:
|
||||
+ {
|
||||
+ CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3);
|
||||
+ CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3);
|
||||
+ _dl_fatal_printf (
|
||||
+ "Some features are invalid or not supported on this machine!!\n"
|
||||
+ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
|
||||
+ );
|
||||
+ }
|
||||
+ break;
|
||||
+ case 4:
|
||||
+ {
|
||||
+ CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4);
|
||||
+ _dl_fatal_printf (
|
||||
+ "Some features are invalid or not supported on this machine!!\n"
|
||||
+ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
|
||||
+ );
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ p += len + 1;
|
||||
+ }
|
||||
+ while (*c != '\0');
|
||||
+
|
||||
+ GLRO (dl_larch_cpu_features).hwcap &= hwcap;
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c
|
||||
new file mode 100644
|
||||
index 00000000..7cd9bc15
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/dl-get-cpu-features.c
|
||||
@@ -0,0 +1,25 @@
|
||||
+/* Define _dl_larch_get_cpu_features.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+
|
||||
+const struct cpu_features *
|
||||
+_dl_larch_get_cpu_features (void)
|
||||
+{
|
||||
+ return &GLRO(dl_larch_cpu_features);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
|
||||
index 57913cef..b395a928 100644
|
||||
--- a/sysdeps/loongarch/dl-machine.h
|
||||
+++ b/sysdeps/loongarch/dl-machine.h
|
||||
@@ -29,6 +29,8 @@
|
||||
#include <dl-static-tls.h>
|
||||
#include <dl-machine-rel.h>
|
||||
|
||||
+#include <cpu-features.c>
|
||||
+
|
||||
#ifndef _RTLD_PROLOGUE
|
||||
# define _RTLD_PROLOGUE(entry) \
|
||||
".globl\t" __STRING (entry) "\n\t" \
|
||||
@@ -53,6 +55,23 @@
|
||||
#define ELF_MACHINE_NO_REL 1
|
||||
#define ELF_MACHINE_NO_RELA 0
|
||||
|
||||
+#define DL_PLATFORM_INIT dl_platform_init ()
|
||||
+
|
||||
+static inline void __attribute__ ((unused))
|
||||
+dl_platform_init (void)
|
||||
+{
|
||||
+ if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
|
||||
+ /* Avoid an empty string which would disturb us. */
|
||||
+ GLRO(dl_platform) = NULL;
|
||||
+
|
||||
+#ifdef SHARED
|
||||
+ /* init_cpu_features has been called early from __libc_start_main in
|
||||
+ static executable. */
|
||||
+ init_cpu_features (&GLRO(dl_larch_cpu_features));
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+
|
||||
/* Return nonzero iff ELF header is compatible with the running host. */
|
||||
static inline int
|
||||
elf_machine_matches_host (const ElfW (Ehdr) *ehdr)
|
||||
@@ -290,9 +309,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
||||
if (profile != 0)
|
||||
{
|
||||
#if !defined __loongarch_soft_float
|
||||
- if (SUPPORT_LASX)
|
||||
+ if (RTLD_SUPPORT_LASX)
|
||||
gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
|
||||
- else if (SUPPORT_LSX)
|
||||
+ else if (RTLD_SUPPORT_LSX)
|
||||
gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
|
||||
else
|
||||
#endif
|
||||
@@ -310,9 +329,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
||||
indicated by the offset on the stack, and then jump to
|
||||
the resolved address. */
|
||||
#if !defined __loongarch_soft_float
|
||||
- if (SUPPORT_LASX)
|
||||
+ if (RTLD_SUPPORT_LASX)
|
||||
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
|
||||
- else if (SUPPORT_LSX)
|
||||
+ else if (RTLD_SUPPORT_LSX)
|
||||
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
|
||||
else
|
||||
#endif
|
||||
diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list
|
||||
new file mode 100644
|
||||
index 00000000..66b34275
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/dl-tunables.list
|
||||
@@ -0,0 +1,25 @@
|
||||
+# LoongArch specific tunables.
|
||||
+# Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+# This file is part of the GNU C Library.
|
||||
+
|
||||
+# The GNU C Library is free software; you can redistribute it and/or
|
||||
+# modify it under the terms of the GNU Lesser General Public
|
||||
+# License as published by the Free Software Foundation; either
|
||||
+# version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+# The GNU C Library is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+# Lesser General Public License for more details.
|
||||
+
|
||||
+# You should have received a copy of the GNU Lesser General Public
|
||||
+# License along with the GNU C Library; if not, see
|
||||
+# <http://www.gnu.org/licenses/>.
|
||||
+
|
||||
+glibc {
|
||||
+ cpu {
|
||||
+ hwcaps {
|
||||
+ type: STRING
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
|
||||
new file mode 100644
|
||||
index 00000000..1290c4ce
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
|
||||
@@ -0,0 +1,29 @@
|
||||
+/* Initialize CPU feature data. LoongArch64 version.
|
||||
+ This file is part of the GNU C Library.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <cpu-features.h>
|
||||
+#include <elf/dl-hwcaps.h>
|
||||
+#include <elf/dl-tunables.h>
|
||||
+extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden;
|
||||
+
|
||||
+static inline void
|
||||
+init_cpu_features (struct cpu_features *cpu_features)
|
||||
+{
|
||||
+ GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap);
|
||||
+ TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
|
||||
+}
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
|
||||
index d1a280a5..450963ce 100644
|
||||
--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
|
||||
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
|
||||
@@ -19,13 +19,23 @@
|
||||
#ifndef _CPU_FEATURES_LOONGARCH64_H
|
||||
#define _CPU_FEATURES_LOONGARCH64_H
|
||||
|
||||
+#include <stdint.h>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
-#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
|
||||
-#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
|
||||
-#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
|
||||
+struct cpu_features
|
||||
+ {
|
||||
+ uint64_t hwcap;
|
||||
+ };
|
||||
|
||||
+/* Get a pointer to the CPU features structure. */
|
||||
+extern const struct cpu_features *_dl_larch_get_cpu_features (void)
|
||||
+ __attribute__ ((pure));
|
||||
+
|
||||
+#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL)
|
||||
+#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX)
|
||||
+#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX)
|
||||
+#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
|
||||
+#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
|
||||
#define INIT_ARCH()
|
||||
|
||||
#endif /* _CPU_FEATURES_LOONGARCH64_H */
|
||||
-
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
|
||||
new file mode 100644
|
||||
index 00000000..6217fda9
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
|
||||
@@ -0,0 +1,60 @@
|
||||
+/* Data for LoongArch64 version of processor capability information.
|
||||
+ Linux version.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* If anything should be added here check whether the size of each string
|
||||
+ is still ok with the given array size.
|
||||
+
|
||||
+ All the #ifdefs in the definitions are quite irritating but
|
||||
+ necessary if we want to avoid duplicating the information. There
|
||||
+ are three different modes:
|
||||
+
|
||||
+ - PROCINFO_DECL is defined. This means we are only interested in
|
||||
+ declarations.
|
||||
+
|
||||
+ - PROCINFO_DECL is not defined:
|
||||
+
|
||||
+ + if SHARED is defined the file is included in an array
|
||||
+ initializer. The .element = { ... } syntax is needed.
|
||||
+
|
||||
+ + if SHARED is not defined a normal array initialization is
|
||||
+ needed.
|
||||
+ */
|
||||
+
|
||||
+#ifndef PROCINFO_CLASS
|
||||
+# define PROCINFO_CLASS
|
||||
+#endif
|
||||
+
|
||||
+#if !IS_IN (ldconfig)
|
||||
+# if !defined PROCINFO_DECL && defined SHARED
|
||||
+ ._dl_larch_cpu_features
|
||||
+# else
|
||||
+PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features
|
||||
+# endif
|
||||
+# ifndef PROCINFO_DECL
|
||||
+= { }
|
||||
+# endif
|
||||
+# if !defined SHARED || defined PROCINFO_DECL
|
||||
+;
|
||||
+# else
|
||||
+,
|
||||
+# endif
|
||||
+#endif
|
||||
+
|
||||
+#undef PROCINFO_DECL
|
||||
+#undef PROCINFO_CLASS
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
|
||||
new file mode 100644
|
||||
index 00000000..455fd71a
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
|
||||
@@ -0,0 +1,21 @@
|
||||
+/* Operating system support for run-time dynamic linker. LoongArch version.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <config.h>
|
||||
+#include <sysdeps/loongarch/cpu-tunables.c>
|
||||
+#include <sysdeps/unix/sysv/linux/dl-sysdep.c>
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
|
||||
new file mode 100644
|
||||
index 00000000..f1346ece
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
|
||||
@@ -0,0 +1,34 @@
|
||||
+/* Override csu/libc-start.c on LoongArch64.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef SHARED
|
||||
+
|
||||
+/* Mark symbols hidden in static PIE for early self relocation to work. */
|
||||
+# if BUILD_PIE_DEFAULT
|
||||
+# pragma GCC visibility push(hidden)
|
||||
+# endif
|
||||
+
|
||||
+# include <ldsodefs.h>
|
||||
+# include <cpu-features.c>
|
||||
+
|
||||
+extern struct cpu_features _dl_larch_cpu_features;
|
||||
+
|
||||
+# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features)
|
||||
+
|
||||
+#endif
|
||||
+#include <csu/libc-start.c>
|
||||
--
|
||||
2.33.0
|
||||
|
485
LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch
Normal file
485
LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch
Normal file
|
@ -0,0 +1,485 @@
|
|||
From 3ee56bbc56faa7b85a6513340db4a4fdd6ce709d Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Mon, 28 Aug 2023 10:08:36 +0800
|
||||
Subject: [PATCH 15/29] LoongArch: Add ifunc support for memchr{aligned, lsx,
|
||||
lasx}
|
||||
|
||||
According to glibc memchr microbenchmark, this implementation could reduce
|
||||
the runtime as following:
|
||||
|
||||
Name Percent of runtime reduced
|
||||
memchr-lasx 37%-83%
|
||||
memchr-lsx 30%-66%
|
||||
memchr-aligned 0%-15%
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
|
||||
.../lp64/multiarch/ifunc-impl-list.c | 7 ++
|
||||
.../loongarch/lp64/multiarch/ifunc-memchr.h | 40 ++++++
|
||||
.../loongarch/lp64/multiarch/memchr-aligned.S | 95 ++++++++++++++
|
||||
.../loongarch/lp64/multiarch/memchr-lasx.S | 117 ++++++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/memchr-lsx.S | 102 +++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/memchr.c | 37 ++++++
|
||||
7 files changed, 401 insertions(+)
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
index 64416b02..2f4802cf 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
@@ -24,5 +24,8 @@ sysdep_routines += \
|
||||
rawmemchr-aligned \
|
||||
rawmemchr-lsx \
|
||||
rawmemchr-lasx \
|
||||
+ memchr-aligned \
|
||||
+ memchr-lsx \
|
||||
+ memchr-lasx \
|
||||
# sysdep_routines
|
||||
endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
index 3db9af14..a567b9cf 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
@@ -102,5 +102,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
|
||||
)
|
||||
|
||||
+ IFUNC_IMPL (i, name, memchr,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx)
|
||||
+ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned)
|
||||
+ )
|
||||
return i;
|
||||
}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
|
||||
new file mode 100644
|
||||
index 00000000..9060ccd5
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
|
||||
@@ -0,0 +1,40 @@
|
||||
+/* Common definition for memchr ifunc selections.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-init.h>
|
||||
+
|
||||
+#if !defined __loongarch_soft_float
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||
+#endif
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
|
||||
+
|
||||
+static inline void *
|
||||
+IFUNC_SELECTOR (void)
|
||||
+{
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LASX)
|
||||
+ return OPTIMIZE (lasx);
|
||||
+ else if (SUPPORT_LSX)
|
||||
+ return OPTIMIZE (lsx);
|
||||
+ else
|
||||
+#endif
|
||||
+ return OPTIMIZE (aligned);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
|
||||
new file mode 100644
|
||||
index 00000000..81d0d004
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
|
||||
@@ -0,0 +1,95 @@
|
||||
+/* Optimized memchr implementation using basic LoongArch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define MEMCHR_NAME __memchr_aligned
|
||||
+#else
|
||||
+# define MEMCHR_NAME memchr
|
||||
+#endif
|
||||
+
|
||||
+LEAF(MEMCHR_NAME, 6)
|
||||
+ beqz a2, L(out)
|
||||
+ andi t1, a0, 0x7
|
||||
+ add.d a5, a0, a2
|
||||
+ bstrins.d a0, zero, 2, 0
|
||||
+
|
||||
+ ld.d t0, a0, 0
|
||||
+ bstrins.d a1, a1, 15, 8
|
||||
+ lu12i.w a3, 0x01010
|
||||
+ slli.d t2, t1, 03
|
||||
+
|
||||
+ bstrins.d a1, a1, 31, 16
|
||||
+ ori a3, a3, 0x101
|
||||
+ li.d t7, -1
|
||||
+ li.d t8, 8
|
||||
+
|
||||
+ bstrins.d a1, a1, 63, 32
|
||||
+ bstrins.d a3, a3, 63, 32
|
||||
+ sll.d t2, t7, t2
|
||||
+ xor t0, t0, a1
|
||||
+
|
||||
+
|
||||
+ addi.d a6, a5, -1
|
||||
+ slli.d a4, a3, 7
|
||||
+ sub.d t1, t8, t1
|
||||
+ orn t0, t0, t2
|
||||
+
|
||||
+ sub.d t2, t0, a3
|
||||
+ andn t3, a4, t0
|
||||
+ bstrins.d a6, zero, 2, 0
|
||||
+ and t0, t2, t3
|
||||
+
|
||||
+ bgeu t1, a2, L(end)
|
||||
+L(loop):
|
||||
+ bnez t0, L(found)
|
||||
+ ld.d t1, a0, 8
|
||||
+ xor t0, t1, a1
|
||||
+
|
||||
+ addi.d a0, a0, 8
|
||||
+ sub.d t2, t0, a3
|
||||
+ andn t3, a4, t0
|
||||
+ and t0, t2, t3
|
||||
+
|
||||
+
|
||||
+ bne a0, a6, L(loop)
|
||||
+L(end):
|
||||
+ sub.d t1, a5, a6
|
||||
+ ctz.d t0, t0
|
||||
+ srli.d t0, t0, 3
|
||||
+
|
||||
+ sltu t1, t0, t1
|
||||
+ add.d a0, a0, t0
|
||||
+ maskeqz a0, a0, t1
|
||||
+ jr ra
|
||||
+
|
||||
+L(found):
|
||||
+ ctz.d t0, t0
|
||||
+ srli.d t0, t0, 3
|
||||
+ add.d a0, a0, t0
|
||||
+ jr ra
|
||||
+
|
||||
+L(out):
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+END(MEMCHR_NAME)
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMCHR_NAME)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
|
||||
new file mode 100644
|
||||
index 00000000..a26cdf48
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
|
||||
@@ -0,0 +1,117 @@
|
||||
+/* Optimized memchr implementation using LoongArch LASX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define MEMCHR __memchr_lasx
|
||||
+
|
||||
+LEAF(MEMCHR, 6)
|
||||
+ beqz a2, L(ret0)
|
||||
+ add.d a3, a0, a2
|
||||
+ andi t0, a0, 0x3f
|
||||
+ bstrins.d a0, zero, 5, 0
|
||||
+
|
||||
+ xvld xr0, a0, 0
|
||||
+ xvld xr1, a0, 32
|
||||
+ li.d t1, -1
|
||||
+ li.d t2, 64
|
||||
+
|
||||
+ xvreplgr2vr.b xr2, a1
|
||||
+ sll.d t3, t1, t0
|
||||
+ sub.d t2, t2, t0
|
||||
+ xvseq.b xr0, xr0, xr2
|
||||
+
|
||||
+ xvseq.b xr1, xr1, xr2
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+ xvmsknz.b xr1, xr1
|
||||
+ xvpickve.w xr3, xr0, 4
|
||||
+
|
||||
+
|
||||
+ xvpickve.w xr4, xr1, 4
|
||||
+ vilvl.h vr0, vr3, vr0
|
||||
+ vilvl.h vr1, vr4, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+
|
||||
+ movfr2gr.d t0, fa0
|
||||
+ and t0, t0, t3
|
||||
+ bgeu t2, a2, L(end)
|
||||
+ bnez t0, L(found)
|
||||
+
|
||||
+ addi.d a4, a3, -1
|
||||
+ bstrins.d a4, zero, 5, 0
|
||||
+L(loop):
|
||||
+ xvld xr0, a0, 64
|
||||
+ xvld xr1, a0, 96
|
||||
+
|
||||
+ addi.d a0, a0, 64
|
||||
+ xvseq.b xr0, xr0, xr2
|
||||
+ xvseq.b xr1, xr1, xr2
|
||||
+ beq a0, a4, L(out)
|
||||
+
|
||||
+
|
||||
+ xvmax.bu xr3, xr0, xr1
|
||||
+ xvseteqz.v fcc0, xr3
|
||||
+ bcnez fcc0, L(loop)
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+
|
||||
+ xvmsknz.b xr1, xr1
|
||||
+ xvpickve.w xr3, xr0, 4
|
||||
+ xvpickve.w xr4, xr1, 4
|
||||
+ vilvl.h vr0, vr3, vr0
|
||||
+
|
||||
+ vilvl.h vr1, vr4, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+ movfr2gr.d t0, fa0
|
||||
+L(found):
|
||||
+ ctz.d t1, t0
|
||||
+
|
||||
+ add.d a0, a0, t1
|
||||
+ jr ra
|
||||
+L(ret0):
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+
|
||||
+
|
||||
+L(out):
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+ xvmsknz.b xr1, xr1
|
||||
+ xvpickve.w xr3, xr0, 4
|
||||
+ xvpickve.w xr4, xr1, 4
|
||||
+
|
||||
+ vilvl.h vr0, vr3, vr0
|
||||
+ vilvl.h vr1, vr4, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+ movfr2gr.d t0, fa0
|
||||
+
|
||||
+L(end):
|
||||
+ sub.d t2, zero, a3
|
||||
+ srl.d t1, t1, t2
|
||||
+ and t0, t0, t1
|
||||
+ ctz.d t1, t0
|
||||
+
|
||||
+ add.d a0, a0, t1
|
||||
+ maskeqz a0, a0, t0
|
||||
+ jr ra
|
||||
+END(MEMCHR)
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMCHR)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..a73ecd25
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
|
||||
@@ -0,0 +1,102 @@
|
||||
+/* Optimized memchr implementation using LoongArch LSX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define MEMCHR __memchr_lsx
|
||||
+
|
||||
+LEAF(MEMCHR, 6)
|
||||
+ beqz a2, L(ret0)
|
||||
+ add.d a3, a0, a2
|
||||
+ andi t0, a0, 0x1f
|
||||
+ bstrins.d a0, zero, 4, 0
|
||||
+
|
||||
+ vld vr0, a0, 0
|
||||
+ vld vr1, a0, 16
|
||||
+ li.d t1, -1
|
||||
+ li.d t2, 32
|
||||
+
|
||||
+ vreplgr2vr.b vr2, a1
|
||||
+ sll.d t3, t1, t0
|
||||
+ sub.d t2, t2, t0
|
||||
+ vseq.b vr0, vr0, vr2
|
||||
+
|
||||
+ vseq.b vr1, vr1, vr2
|
||||
+ vmsknz.b vr0, vr0
|
||||
+ vmsknz.b vr1, vr1
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+
|
||||
+
|
||||
+ movfr2gr.s t0, fa0
|
||||
+ and t0, t0, t3
|
||||
+ bgeu t2, a2, L(end)
|
||||
+ bnez t0, L(found)
|
||||
+
|
||||
+ addi.d a4, a3, -1
|
||||
+ bstrins.d a4, zero, 4, 0
|
||||
+L(loop):
|
||||
+ vld vr0, a0, 32
|
||||
+ vld vr1, a0, 48
|
||||
+
|
||||
+ addi.d a0, a0, 32
|
||||
+ vseq.b vr0, vr0, vr2
|
||||
+ vseq.b vr1, vr1, vr2
|
||||
+ beq a0, a4, L(out)
|
||||
+
|
||||
+ vmax.bu vr3, vr0, vr1
|
||||
+ vseteqz.v fcc0, vr3
|
||||
+ bcnez fcc0, L(loop)
|
||||
+ vmsknz.b vr0, vr0
|
||||
+
|
||||
+
|
||||
+ vmsknz.b vr1, vr1
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+L(found):
|
||||
+ ctz.w t0, t0
|
||||
+
|
||||
+ add.d a0, a0, t0
|
||||
+ jr ra
|
||||
+L(ret0):
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+
|
||||
+L(out):
|
||||
+ vmsknz.b vr0, vr0
|
||||
+ vmsknz.b vr1, vr1
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+
|
||||
+L(end):
|
||||
+ sub.d t2, zero, a3
|
||||
+ srl.w t1, t1, t2
|
||||
+ and t0, t0, t1
|
||||
+ ctz.w t1, t0
|
||||
+
|
||||
+
|
||||
+ add.d a0, a0, t1
|
||||
+ maskeqz a0, a0, t0
|
||||
+ jr ra
|
||||
+END(MEMCHR)
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMCHR)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr.c b/sysdeps/loongarch/lp64/multiarch/memchr.c
|
||||
new file mode 100644
|
||||
index 00000000..059479c0
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memchr.c
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* Multiple versions of memchr.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Define multiple versions only for the definition in libc. */
|
||||
+#if IS_IN (libc)
|
||||
+# define memchr __redirect_memchr
|
||||
+# include <string.h>
|
||||
+# undef memchr
|
||||
+
|
||||
+# define SYMBOL_NAME memchr
|
||||
+# include "ifunc-memchr.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_memchr, memchr,
|
||||
+ IFUNC_SELECTOR ());
|
||||
+
|
||||
+# ifdef SHARED
|
||||
+__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
|
||||
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
|
||||
+# endif
|
||||
+
|
||||
+#endif
|
||||
--
|
||||
2.33.0
|
||||
|
946
LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch
Normal file
946
LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch
Normal file
|
@ -0,0 +1,946 @@
|
|||
From 60f4bbd1eec528ba8df044ae6b3091f6337a7fcc Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Mon, 28 Aug 2023 10:08:39 +0800
|
||||
Subject: [PATCH 18/29] LoongArch: Add ifunc support for memcmp{aligned, lsx,
|
||||
lasx}
|
||||
|
||||
According to glibc memcmp microbenchmark test results(Add generic
|
||||
memcmp), this implementation have performance improvement
|
||||
except the length is less than 3, details as below:
|
||||
|
||||
Name Percent of time reduced
|
||||
memcmp-lasx 16%-74%
|
||||
memcmp-lsx 20%-50%
|
||||
memcmp-aligned 5%-20%
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
|
||||
.../lp64/multiarch/ifunc-impl-list.c | 7 +
|
||||
.../loongarch/lp64/multiarch/ifunc-memcmp.h | 40 +++
|
||||
.../loongarch/lp64/multiarch/memcmp-aligned.S | 292 ++++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/memcmp-lasx.S | 207 +++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S | 269 ++++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/memcmp.c | 43 +++
|
||||
7 files changed, 861 insertions(+)
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
index 216886c5..360a6718 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
@@ -34,5 +34,8 @@ sysdep_routines += \
|
||||
memset-unaligned \
|
||||
memset-lsx \
|
||||
memset-lasx \
|
||||
+ memcmp-aligned \
|
||||
+ memcmp-lsx \
|
||||
+ memcmp-lasx \
|
||||
# sysdep_routines
|
||||
endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
index 37f60dde..e397d58c 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
@@ -127,5 +127,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned)
|
||||
)
|
||||
|
||||
+ IFUNC_IMPL (i, name, memcmp,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LASX, __memcmp_lasx)
|
||||
+ IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LSX, __memcmp_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_aligned)
|
||||
+ )
|
||||
return i;
|
||||
}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
|
||||
new file mode 100644
|
||||
index 00000000..04adc2e5
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
|
||||
@@ -0,0 +1,40 @@
|
||||
+/* Common definition for memcmp ifunc selections.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-init.h>
|
||||
+
|
||||
+#if !defined __loongarch_soft_float
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||
+#endif
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
|
||||
+
|
||||
+static inline void *
|
||||
+IFUNC_SELECTOR (void)
|
||||
+{
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LASX)
|
||||
+ return OPTIMIZE (lasx);
|
||||
+ else if (SUPPORT_LSX)
|
||||
+ return OPTIMIZE (lsx);
|
||||
+ else
|
||||
+#endif
|
||||
+ return OPTIMIZE (aligned);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
|
||||
new file mode 100644
|
||||
index 00000000..14a7caa9
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
|
||||
@@ -0,0 +1,292 @@
|
||||
+/* Optimized memcmp implementation using basic LoongArch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define MEMCMP_NAME __memcmp_aligned
|
||||
+#else
|
||||
+# define MEMCMP_NAME memcmp
|
||||
+#endif
|
||||
+
|
||||
+LEAF(MEMCMP_NAME, 6)
|
||||
+ beqz a2, L(ret)
|
||||
+ andi a4, a1, 0x7
|
||||
+ andi a3, a0, 0x7
|
||||
+ sltu a5, a4, a3
|
||||
+
|
||||
+ xor t0, a0, a1
|
||||
+ li.w t8, 8
|
||||
+ maskeqz t0, t0, a5
|
||||
+ li.w t7, -1
|
||||
+
|
||||
+ xor a0, a0, t0
|
||||
+ xor a1, a1, t0
|
||||
+ andi a3, a0, 0x7
|
||||
+ andi a4, a1, 0x7
|
||||
+
|
||||
+ xor a0, a0, a3
|
||||
+ xor a1, a1, a4
|
||||
+ ld.d t2, a0, 0
|
||||
+ ld.d t1, a1, 0
|
||||
+
|
||||
+ slli.d t3, a3, 3
|
||||
+ slli.d t4, a4, 3
|
||||
+ sub.d a6, t3, t4
|
||||
+ srl.d t1, t1, t4
|
||||
+
|
||||
+ srl.d t0, t2, t3
|
||||
+ srl.d t5, t7, t4
|
||||
+ sub.d t6, t0, t1
|
||||
+ and t6, t6, t5
|
||||
+
|
||||
+ sub.d t5, t8, a4
|
||||
+ bnez t6, L(first_out)
|
||||
+ bgeu t5, a2, L(ret)
|
||||
+ sub.d a2, a2, t5
|
||||
+
|
||||
+ bnez a6, L(unaligned)
|
||||
+ blt a2, t8, L(al_less_8bytes)
|
||||
+ andi t1, a2, 31
|
||||
+ beq t1, a2, L(al_less_32bytes)
|
||||
+
|
||||
+ sub.d t2, a2, t1
|
||||
+ add.d a4, a0, t2
|
||||
+ move a2, t1
|
||||
+
|
||||
+L(al_loop):
|
||||
+ ld.d t0, a0, 8
|
||||
+
|
||||
+ ld.d t1, a1, 8
|
||||
+ ld.d t2, a0, 16
|
||||
+ ld.d t3, a1, 16
|
||||
+ ld.d t4, a0, 24
|
||||
+
|
||||
+ ld.d t5, a1, 24
|
||||
+ ld.d t6, a0, 32
|
||||
+ ld.d t7, a1, 32
|
||||
+ addi.d a0, a0, 32
|
||||
+
|
||||
+ addi.d a1, a1, 32
|
||||
+ bne t0, t1, L(out1)
|
||||
+ bne t2, t3, L(out2)
|
||||
+ bne t4, t5, L(out3)
|
||||
+
|
||||
+ bne t6, t7, L(out4)
|
||||
+ bne a0, a4, L(al_loop)
|
||||
+
|
||||
+L(al_less_32bytes):
|
||||
+ srai.d a4, a2, 4
|
||||
+ beqz a4, L(al_less_16bytes)
|
||||
+
|
||||
+ ld.d t0, a0, 8
|
||||
+ ld.d t1, a1, 8
|
||||
+ ld.d t2, a0, 16
|
||||
+ ld.d t3, a1, 16
|
||||
+
|
||||
+ addi.d a0, a0, 16
|
||||
+ addi.d a1, a1, 16
|
||||
+ addi.d a2, a2, -16
|
||||
+ bne t0, t1, L(out1)
|
||||
+
|
||||
+ bne t2, t3, L(out2)
|
||||
+
|
||||
+L(al_less_16bytes):
|
||||
+ srai.d a4, a2, 3
|
||||
+ beqz a4, L(al_less_8bytes)
|
||||
+ ld.d t0, a0, 8
|
||||
+
|
||||
+ ld.d t1, a1, 8
|
||||
+ addi.d a0, a0, 8
|
||||
+ addi.d a1, a1, 8
|
||||
+ addi.d a2, a2, -8
|
||||
+
|
||||
+ bne t0, t1, L(out1)
|
||||
+
|
||||
+L(al_less_8bytes):
|
||||
+ beqz a2, L(ret)
|
||||
+ ld.d t0, a0, 8
|
||||
+ ld.d t1, a1, 8
|
||||
+
|
||||
+ li.d t7, -1
|
||||
+ slli.d t2, a2, 3
|
||||
+ sll.d t2, t7, t2
|
||||
+ sub.d t3, t0, t1
|
||||
+
|
||||
+ andn t6, t3, t2
|
||||
+ bnez t6, L(count_diff)
|
||||
+
|
||||
+L(ret):
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+
|
||||
+L(out4):
|
||||
+ move t0, t6
|
||||
+ move t1, t7
|
||||
+ sub.d t6, t6, t7
|
||||
+ b L(count_diff)
|
||||
+
|
||||
+L(out3):
|
||||
+ move t0, t4
|
||||
+ move t1, t5
|
||||
+ sub.d t6, t4, t5
|
||||
+ b L(count_diff)
|
||||
+
|
||||
+L(out2):
|
||||
+ move t0, t2
|
||||
+ move t1, t3
|
||||
+L(out1):
|
||||
+ sub.d t6, t0, t1
|
||||
+ b L(count_diff)
|
||||
+
|
||||
+L(first_out):
|
||||
+ slli.d t4, a2, 3
|
||||
+ slt t3, a2, t5
|
||||
+ sll.d t4, t7, t4
|
||||
+ maskeqz t4, t4, t3
|
||||
+
|
||||
+ andn t6, t6, t4
|
||||
+
|
||||
+L(count_diff):
|
||||
+ ctz.d t2, t6
|
||||
+ bstrins.d t2, zero, 2, 0
|
||||
+ srl.d t0, t0, t2
|
||||
+
|
||||
+ srl.d t1, t1, t2
|
||||
+ andi t0, t0, 0xff
|
||||
+ andi t1, t1, 0xff
|
||||
+ sub.d t2, t0, t1
|
||||
+
|
||||
+ sub.d t3, t1, t0
|
||||
+ masknez t2, t2, a5
|
||||
+ maskeqz t3, t3, a5
|
||||
+ or a0, t2, t3
|
||||
+
|
||||
+ jr ra
|
||||
+
|
||||
+L(unaligned):
|
||||
+ sub.d a7, zero, a6
|
||||
+ srl.d t0, t2, a6
|
||||
+ blt a2, t8, L(un_less_8bytes)
|
||||
+
|
||||
+ andi t1, a2, 31
|
||||
+ beq t1, a2, L(un_less_32bytes)
|
||||
+ sub.d t2, a2, t1
|
||||
+ add.d a4, a0, t2
|
||||
+
|
||||
+ move a2, t1
|
||||
+
|
||||
+L(un_loop):
|
||||
+ ld.d t2, a0, 8
|
||||
+ ld.d t1, a1, 8
|
||||
+ ld.d t4, a0, 16
|
||||
+
|
||||
+ ld.d t3, a1, 16
|
||||
+ ld.d t6, a0, 24
|
||||
+ ld.d t5, a1, 24
|
||||
+ ld.d t8, a0, 32
|
||||
+
|
||||
+ ld.d t7, a1, 32
|
||||
+ addi.d a0, a0, 32
|
||||
+ addi.d a1, a1, 32
|
||||
+ sll.d a3, t2, a7
|
||||
+
|
||||
+ or t0, a3, t0
|
||||
+ bne t0, t1, L(out1)
|
||||
+ srl.d t0, t2, a6
|
||||
+ sll.d a3, t4, a7
|
||||
+
|
||||
+ or t2, a3, t0
|
||||
+ bne t2, t3, L(out2)
|
||||
+ srl.d t0, t4, a6
|
||||
+ sll.d a3, t6, a7
|
||||
+
|
||||
+ or t4, a3, t0
|
||||
+ bne t4, t5, L(out3)
|
||||
+ srl.d t0, t6, a6
|
||||
+ sll.d a3, t8, a7
|
||||
+
|
||||
+ or t6, t0, a3
|
||||
+ bne t6, t7, L(out4)
|
||||
+ srl.d t0, t8, a6
|
||||
+ bne a0, a4, L(un_loop)
|
||||
+
|
||||
+L(un_less_32bytes):
|
||||
+ srai.d a4, a2, 4
|
||||
+ beqz a4, L(un_less_16bytes)
|
||||
+ ld.d t2, a0, 8
|
||||
+ ld.d t1, a1, 8
|
||||
+
|
||||
+ ld.d t4, a0, 16
|
||||
+ ld.d t3, a1, 16
|
||||
+ addi.d a0, a0, 16
|
||||
+ addi.d a1, a1, 16
|
||||
+
|
||||
+ addi.d a2, a2, -16
|
||||
+ sll.d a3, t2, a7
|
||||
+ or t0, a3, t0
|
||||
+ bne t0, t1, L(out1)
|
||||
+
|
||||
+ srl.d t0, t2, a6
|
||||
+ sll.d a3, t4, a7
|
||||
+ or t2, a3, t0
|
||||
+ bne t2, t3, L(out2)
|
||||
+
|
||||
+ srl.d t0, t4, a6
|
||||
+
|
||||
+L(un_less_16bytes):
|
||||
+ srai.d a4, a2, 3
|
||||
+ beqz a4, L(un_less_8bytes)
|
||||
+ ld.d t2, a0, 8
|
||||
+
|
||||
+ ld.d t1, a1, 8
|
||||
+ addi.d a0, a0, 8
|
||||
+ addi.d a1, a1, 8
|
||||
+ addi.d a2, a2, -8
|
||||
+
|
||||
+ sll.d a3, t2, a7
|
||||
+ or t0, a3, t0
|
||||
+ bne t0, t1, L(out1)
|
||||
+ srl.d t0, t2, a6
|
||||
+
|
||||
+L(un_less_8bytes):
|
||||
+ beqz a2, L(ret)
|
||||
+ andi a7, a7, 63
|
||||
+ slli.d a4, a2, 3
|
||||
+ bgeu a7, a4, L(last_cmp)
|
||||
+
|
||||
+ ld.d t2, a0, 8
|
||||
+ sll.d a3, t2, a7
|
||||
+ or t0, a3, t0
|
||||
+
|
||||
+L(last_cmp):
|
||||
+ ld.d t1, a1, 8
|
||||
+
|
||||
+ li.d t7, -1
|
||||
+ sll.d t2, t7, a4
|
||||
+ sub.d t3, t0, t1
|
||||
+ andn t6, t3, t2
|
||||
+
|
||||
+ bnez t6, L(count_diff)
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+END(MEMCMP_NAME)
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMCMP_NAME)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
|
||||
new file mode 100644
|
||||
index 00000000..3151a179
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
|
||||
@@ -0,0 +1,207 @@
|
||||
+/* Optimized memcmp implementation using LoongArch LASX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define MEMCMP __memcmp_lasx
|
||||
+
|
||||
+LEAF(MEMCMP, 6)
|
||||
+ li.d t2, 32
|
||||
+ add.d a3, a0, a2
|
||||
+ add.d a4, a1, a2
|
||||
+ bgeu t2, a2, L(less32)
|
||||
+
|
||||
+ li.d t1, 160
|
||||
+ bgeu a2, t1, L(make_aligned)
|
||||
+L(loop32):
|
||||
+ xvld xr0, a0, 0
|
||||
+ xvld xr1, a1, 0
|
||||
+
|
||||
+ addi.d a0, a0, 32
|
||||
+ addi.d a1, a1, 32
|
||||
+ addi.d a2, a2, -32
|
||||
+ xvseq.b xr2, xr0, xr1
|
||||
+
|
||||
+ xvsetanyeqz.b fcc0, xr2
|
||||
+ bcnez fcc0, L(end)
|
||||
+L(last_bytes):
|
||||
+ bltu t2, a2, L(loop32)
|
||||
+ xvld xr0, a3, -32
|
||||
+
|
||||
+
|
||||
+ xvld xr1, a4, -32
|
||||
+ xvseq.b xr2, xr0, xr1
|
||||
+L(end):
|
||||
+ xvmsknz.b xr2, xr2
|
||||
+ xvpermi.q xr4, xr0, 1
|
||||
+
|
||||
+ xvpickve.w xr3, xr2, 4
|
||||
+ xvpermi.q xr5, xr1, 1
|
||||
+ vilvl.h vr2, vr3, vr2
|
||||
+ movfr2gr.s t0, fa2
|
||||
+
|
||||
+ cto.w t0, t0
|
||||
+ vreplgr2vr.b vr2, t0
|
||||
+ vshuf.b vr0, vr4, vr0, vr2
|
||||
+ vshuf.b vr1, vr5, vr1, vr2
|
||||
+
|
||||
+ vpickve2gr.bu t0, vr0, 0
|
||||
+ vpickve2gr.bu t1, vr1, 0
|
||||
+ sub.d a0, t0, t1
|
||||
+ jr ra
|
||||
+
|
||||
+
|
||||
+L(less32):
|
||||
+ srli.d t0, a2, 4
|
||||
+ beqz t0, L(less16)
|
||||
+ vld vr0, a0, 0
|
||||
+ vld vr1, a1, 0
|
||||
+
|
||||
+ vld vr2, a3, -16
|
||||
+ vld vr3, a4, -16
|
||||
+L(short_ret):
|
||||
+ vseq.b vr4, vr0, vr1
|
||||
+ vseq.b vr5, vr2, vr3
|
||||
+
|
||||
+ vmsknz.b vr4, vr4
|
||||
+ vmsknz.b vr5, vr5
|
||||
+ vilvl.h vr4, vr5, vr4
|
||||
+ movfr2gr.s t0, fa4
|
||||
+
|
||||
+ cto.w t0, t0
|
||||
+ vreplgr2vr.b vr4, t0
|
||||
+ vshuf.b vr0, vr2, vr0, vr4
|
||||
+ vshuf.b vr1, vr3, vr1, vr4
|
||||
+
|
||||
+
|
||||
+ vpickve2gr.bu t0, vr0, 0
|
||||
+ vpickve2gr.bu t1, vr1, 0
|
||||
+ sub.d a0, t0, t1
|
||||
+ jr ra
|
||||
+
|
||||
+L(less16):
|
||||
+ srli.d t0, a2, 3
|
||||
+ beqz t0, L(less8)
|
||||
+ vldrepl.d vr0, a0, 0
|
||||
+ vldrepl.d vr1, a1, 0
|
||||
+
|
||||
+ vldrepl.d vr2, a3, -8
|
||||
+ vldrepl.d vr3, a4, -8
|
||||
+ b L(short_ret)
|
||||
+ nop
|
||||
+
|
||||
+L(less8):
|
||||
+ srli.d t0, a2, 2
|
||||
+ beqz t0, L(less4)
|
||||
+ vldrepl.w vr0, a0, 0
|
||||
+ vldrepl.w vr1, a1, 0
|
||||
+
|
||||
+
|
||||
+ vldrepl.w vr2, a3, -4
|
||||
+ vldrepl.w vr3, a4, -4
|
||||
+ b L(short_ret)
|
||||
+ nop
|
||||
+
|
||||
+L(less4):
|
||||
+ srli.d t0, a2, 1
|
||||
+ beqz t0, L(less2)
|
||||
+ vldrepl.h vr0, a0, 0
|
||||
+ vldrepl.h vr1, a1, 0
|
||||
+
|
||||
+ vldrepl.h vr2, a3, -2
|
||||
+ vldrepl.h vr3, a4, -2
|
||||
+ b L(short_ret)
|
||||
+ nop
|
||||
+
|
||||
+L(less2):
|
||||
+ beqz a2, L(ret0)
|
||||
+ ld.bu t0, a0, 0
|
||||
+ ld.bu t1, a1, 0
|
||||
+ sub.d a0, t0, t1
|
||||
+
|
||||
+ jr ra
|
||||
+L(ret0):
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+
|
||||
+L(make_aligned):
|
||||
+ xvld xr0, a0, 0
|
||||
+
|
||||
+ xvld xr1, a1, 0
|
||||
+ xvseq.b xr2, xr0, xr1
|
||||
+ xvsetanyeqz.b fcc0, xr2
|
||||
+ bcnez fcc0, L(end)
|
||||
+
|
||||
+ andi t0, a0, 0x1f
|
||||
+ sub.d t0, t2, t0
|
||||
+ sub.d t1, a2, t0
|
||||
+ add.d a0, a0, t0
|
||||
+
|
||||
+ add.d a1, a1, t0
|
||||
+ andi a2, t1, 0x3f
|
||||
+ sub.d t0, t1, a2
|
||||
+ add.d a5, a0, t0
|
||||
+
|
||||
+
|
||||
+L(loop_align):
|
||||
+ xvld xr0, a0, 0
|
||||
+ xvld xr1, a1, 0
|
||||
+ xvld xr2, a0, 32
|
||||
+ xvld xr3, a1, 32
|
||||
+
|
||||
+ xvseq.b xr0, xr0, xr1
|
||||
+ xvseq.b xr1, xr2, xr3
|
||||
+ xvmin.bu xr2, xr1, xr0
|
||||
+ xvsetanyeqz.b fcc0, xr2
|
||||
+
|
||||
+ bcnez fcc0, L(pair_end)
|
||||
+ addi.d a0, a0, 64
|
||||
+ addi.d a1, a1, 64
|
||||
+ bne a0, a5, L(loop_align)
|
||||
+
|
||||
+ bnez a2, L(last_bytes)
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+ nop
|
||||
+
|
||||
+
|
||||
+L(pair_end):
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+ xvmsknz.b xr1, xr1
|
||||
+ xvpickve.w xr2, xr0, 4
|
||||
+ xvpickve.w xr3, xr1, 4
|
||||
+
|
||||
+ vilvl.h vr0, vr2, vr0
|
||||
+ vilvl.h vr1, vr3, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+ movfr2gr.d t0, fa0
|
||||
+
|
||||
+ cto.d t0, t0
|
||||
+ ldx.bu t1, a0, t0
|
||||
+ ldx.bu t2, a1, t0
|
||||
+ sub.d a0, t1, t2
|
||||
+
|
||||
+ jr ra
|
||||
+END(MEMCMP)
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMCMP)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..38a50a4c
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
|
||||
@@ -0,0 +1,269 @@
|
||||
+/* Optimized memcmp implementation using LoongArch LSX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+#define MEMCMP __memcmp_lsx
|
||||
+
|
||||
+LEAF(MEMCMP, 6)
|
||||
+ beqz a2, L(out)
|
||||
+ pcalau12i t0, %pc_hi20(L(INDEX))
|
||||
+ andi a3, a0, 0xf
|
||||
+ vld vr5, t0, %pc_lo12(L(INDEX))
|
||||
+
|
||||
+ andi a4, a1, 0xf
|
||||
+ bne a3, a4, L(unaligned)
|
||||
+ bstrins.d a0, zero, 3, 0
|
||||
+ xor a1, a1, a4
|
||||
+
|
||||
+ vld vr0, a0, 0
|
||||
+ vld vr1, a1, 0
|
||||
+ li.d t0, 16
|
||||
+ vreplgr2vr.b vr3, a3
|
||||
+
|
||||
+ sub.d t1, t0, a3
|
||||
+ vadd.b vr3, vr3, vr5
|
||||
+ vshuf.b vr0, vr3, vr0, vr3
|
||||
+ vshuf.b vr1, vr3, vr1, vr3
|
||||
+
|
||||
+
|
||||
+ vseq.b vr4, vr0, vr1
|
||||
+ bgeu t1, a2, L(al_end)
|
||||
+ vsetanyeqz.b fcc0, vr4
|
||||
+ bcnez fcc0, L(al_found)
|
||||
+
|
||||
+ sub.d t1, a2, t1
|
||||
+ andi a2, t1, 31
|
||||
+ beq a2, t1, L(al_less_32bytes)
|
||||
+ sub.d t2, t1, a2
|
||||
+
|
||||
+ add.d a4, a0, t2
|
||||
+L(al_loop):
|
||||
+ vld vr0, a0, 16
|
||||
+ vld vr1, a1, 16
|
||||
+ vld vr2, a0, 32
|
||||
+
|
||||
+ vld vr3, a1, 32
|
||||
+ addi.d a0, a0, 32
|
||||
+ addi.d a1, a1, 32
|
||||
+ vseq.b vr4, vr0, vr1
|
||||
+
|
||||
+
|
||||
+ vseq.b vr6, vr2, vr3
|
||||
+ vand.v vr6, vr4, vr6
|
||||
+ vsetanyeqz.b fcc0, vr6
|
||||
+ bcnez fcc0, L(al_pair_end)
|
||||
+
|
||||
+ bne a0, a4, L(al_loop)
|
||||
+L(al_less_32bytes):
|
||||
+ bgeu t0, a2, L(al_less_16bytes)
|
||||
+ vld vr0, a0, 16
|
||||
+ vld vr1, a1, 16
|
||||
+
|
||||
+ vld vr2, a0, 32
|
||||
+ vld vr3, a1, 32
|
||||
+ addi.d a2, a2, -16
|
||||
+ vreplgr2vr.b vr6, a2
|
||||
+
|
||||
+ vslt.b vr5, vr5, vr6
|
||||
+ vseq.b vr4, vr0, vr1
|
||||
+ vseq.b vr6, vr2, vr3
|
||||
+ vorn.v vr6, vr6, vr5
|
||||
+
|
||||
+
|
||||
+L(al_pair_end):
|
||||
+ vsetanyeqz.b fcc0, vr4
|
||||
+ bcnez fcc0, L(al_found)
|
||||
+ vnori.b vr4, vr6, 0
|
||||
+ vfrstpi.b vr4, vr4, 0
|
||||
+
|
||||
+ vshuf.b vr0, vr2, vr2, vr4
|
||||
+ vshuf.b vr1, vr3, vr3, vr4
|
||||
+ vpickve2gr.bu t0, vr0, 0
|
||||
+ vpickve2gr.bu t1, vr1, 0
|
||||
+
|
||||
+ sub.d a0, t0, t1
|
||||
+ jr ra
|
||||
+ nop
|
||||
+ nop
|
||||
+
|
||||
+L(al_less_16bytes):
|
||||
+ beqz a2, L(out)
|
||||
+ vld vr0, a0, 16
|
||||
+ vld vr1, a1, 16
|
||||
+ vseq.b vr4, vr0, vr1
|
||||
+
|
||||
+
|
||||
+L(al_end):
|
||||
+ vreplgr2vr.b vr6, a2
|
||||
+ vslt.b vr5, vr5, vr6
|
||||
+ vorn.v vr4, vr4, vr5
|
||||
+ nop
|
||||
+
|
||||
+L(al_found):
|
||||
+ vnori.b vr4, vr4, 0
|
||||
+ vfrstpi.b vr4, vr4, 0
|
||||
+ vshuf.b vr0, vr0, vr0, vr4
|
||||
+ vshuf.b vr1, vr1, vr1, vr4
|
||||
+
|
||||
+ vpickve2gr.bu t0, vr0, 0
|
||||
+ vpickve2gr.bu t1, vr1, 0
|
||||
+ sub.d a0, t0, t1
|
||||
+ jr ra
|
||||
+
|
||||
+L(out):
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+ nop
|
||||
+ nop
|
||||
+
|
||||
+
|
||||
+L(unaligned):
|
||||
+ xor t2, a0, a1
|
||||
+ sltu a5, a3, a4
|
||||
+ masknez t2, t2, a5
|
||||
+ xor a0, a0, t2
|
||||
+
|
||||
+ xor a1, a1, t2
|
||||
+ andi a3, a0, 0xf
|
||||
+ andi a4, a1, 0xf
|
||||
+ bstrins.d a0, zero, 3, 0
|
||||
+
|
||||
+ xor a1, a1, a4
|
||||
+ vld vr4, a0, 0
|
||||
+ vld vr1, a1, 0
|
||||
+ li.d t0, 16
|
||||
+
|
||||
+ vreplgr2vr.b vr2, a4
|
||||
+ sub.d a6, a4, a3
|
||||
+ sub.d t1, t0, a4
|
||||
+ sub.d t2, t0, a6
|
||||
+
|
||||
+
|
||||
+ vadd.b vr2, vr2, vr5
|
||||
+ vreplgr2vr.b vr6, t2
|
||||
+ vadd.b vr6, vr6, vr5
|
||||
+ vshuf.b vr0, vr4, vr4, vr6
|
||||
+
|
||||
+ vshuf.b vr1, vr2, vr1, vr2
|
||||
+ vshuf.b vr0, vr2, vr0, vr2
|
||||
+ vseq.b vr7, vr0, vr1
|
||||
+ bgeu t1, a2, L(un_end)
|
||||
+
|
||||
+ vsetanyeqz.b fcc0, vr7
|
||||
+ bcnez fcc0, L(un_found)
|
||||
+ sub.d a2, a2, t1
|
||||
+ andi t1, a2, 31
|
||||
+
|
||||
+ beq a2, t1, L(un_less_32bytes)
|
||||
+ sub.d t2, a2, t1
|
||||
+ move a2, t1
|
||||
+ add.d a4, a1, t2
|
||||
+
|
||||
+
|
||||
+L(un_loop):
|
||||
+ vld vr2, a0, 16
|
||||
+ vld vr1, a1, 16
|
||||
+ vld vr3, a1, 32
|
||||
+ addi.d a1, a1, 32
|
||||
+
|
||||
+ addi.d a0, a0, 32
|
||||
+ vshuf.b vr0, vr2, vr4, vr6
|
||||
+ vld vr4, a0, 0
|
||||
+ vseq.b vr7, vr0, vr1
|
||||
+
|
||||
+ vshuf.b vr2, vr4, vr2, vr6
|
||||
+ vseq.b vr8, vr2, vr3
|
||||
+ vand.v vr8, vr7, vr8
|
||||
+ vsetanyeqz.b fcc0, vr8
|
||||
+
|
||||
+ bcnez fcc0, L(un_pair_end)
|
||||
+ bne a1, a4, L(un_loop)
|
||||
+
|
||||
+L(un_less_32bytes):
|
||||
+ bltu a2, t0, L(un_less_16bytes)
|
||||
+ vld vr2, a0, 16
|
||||
+ vld vr1, a1, 16
|
||||
+ addi.d a0, a0, 16
|
||||
+
|
||||
+ addi.d a1, a1, 16
|
||||
+ addi.d a2, a2, -16
|
||||
+ vshuf.b vr0, vr2, vr4, vr6
|
||||
+ vor.v vr4, vr2, vr2
|
||||
+
|
||||
+ vseq.b vr7, vr0, vr1
|
||||
+ vsetanyeqz.b fcc0, vr7
|
||||
+ bcnez fcc0, L(un_found)
|
||||
+L(un_less_16bytes):
|
||||
+ beqz a2, L(out)
|
||||
+ vld vr1, a1, 16
|
||||
+ bgeu a6, a2, 1f
|
||||
+
|
||||
+ vld vr2, a0, 16
|
||||
+1:
|
||||
+ vshuf.b vr0, vr2, vr4, vr6
|
||||
+ vseq.b vr7, vr0, vr1
|
||||
+L(un_end):
|
||||
+ vreplgr2vr.b vr3, a2
|
||||
+
|
||||
+
|
||||
+ vslt.b vr3, vr5, vr3
|
||||
+ vorn.v vr7, vr7, vr3
|
||||
+
|
||||
+L(un_found):
|
||||
+ vnori.b vr7, vr7, 0
|
||||
+ vfrstpi.b vr7, vr7, 0
|
||||
+
|
||||
+ vshuf.b vr0, vr0, vr0, vr7
|
||||
+ vshuf.b vr1, vr1, vr1, vr7
|
||||
+L(calc_result):
|
||||
+ vpickve2gr.bu t0, vr0, 0
|
||||
+ vpickve2gr.bu t1, vr1, 0
|
||||
+
|
||||
+ sub.d t2, t0, t1
|
||||
+ sub.d t3, t1, t0
|
||||
+ masknez t0, t3, a5
|
||||
+ maskeqz t1, t2, a5
|
||||
+
|
||||
+ or a0, t0, t1
|
||||
+ jr ra
|
||||
+L(un_pair_end):
|
||||
+ vsetanyeqz.b fcc0, vr7
|
||||
+ bcnez fcc0, L(un_found)
|
||||
+
|
||||
+
|
||||
+ vnori.b vr7, vr8, 0
|
||||
+ vfrstpi.b vr7, vr7, 0
|
||||
+ vshuf.b vr0, vr2, vr2, vr7
|
||||
+ vshuf.b vr1, vr3, vr3, vr7
|
||||
+
|
||||
+ b L(calc_result)
|
||||
+END(MEMCMP)
|
||||
+
|
||||
+ .section .rodata.cst16,"M",@progbits,16
|
||||
+ .align 4
|
||||
+L(INDEX):
|
||||
+ .dword 0x0706050403020100
|
||||
+ .dword 0x0f0e0d0c0b0a0908
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMCMP)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp.c b/sysdeps/loongarch/lp64/multiarch/memcmp.c
|
||||
new file mode 100644
|
||||
index 00000000..32eccac2
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp.c
|
||||
@@ -0,0 +1,43 @@
|
||||
+/* Multiple versions of memcmp.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Define multiple versions only for the definition in libc. */
|
||||
+#if IS_IN (libc)
|
||||
+# define memcmp __redirect_memcmp
|
||||
+# include <string.h>
|
||||
+# undef memcmp
|
||||
+
|
||||
+# define SYMBOL_NAME memcmp
|
||||
+# include "ifunc-memcmp.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_memcmp, memcmp,
|
||||
+ IFUNC_SELECTOR ());
|
||||
+# undef bcmp
|
||||
+weak_alias (memcmp, bcmp)
|
||||
+
|
||||
+# undef __memcmpeq
|
||||
+strong_alias (memcmp, __memcmpeq)
|
||||
+libc_hidden_def (__memcmpeq)
|
||||
+
|
||||
+# ifdef SHARED
|
||||
+__hidden_ver1 (memcmp, __GI_memcmp, __redirect_memcmp)
|
||||
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp);
|
||||
+# endif
|
||||
+
|
||||
+#endif
|
||||
--
|
||||
2.33.0
|
||||
|
417
LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch
Normal file
417
LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch
Normal file
|
@ -0,0 +1,417 @@
|
|||
From c4c272fb8067364530a2a78df92c37403acc963f Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Mon, 28 Aug 2023 10:08:37 +0800
|
||||
Subject: [PATCH 16/29] LoongArch: Add ifunc support for memrchr{lsx, lasx}
|
||||
|
||||
According to glibc memrchr microbenchmark, this implementation could reduce
|
||||
the runtime as following:
|
||||
|
||||
Name Percent of rutime reduced
|
||||
memrchr-lasx 20%-83%
|
||||
memrchr-lsx 20%-64%
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
|
||||
.../lp64/multiarch/ifunc-impl-list.c | 8 ++
|
||||
.../loongarch/lp64/multiarch/ifunc-memrchr.h | 40 ++++++
|
||||
.../lp64/multiarch/memrchr-generic.c | 23 ++++
|
||||
.../loongarch/lp64/multiarch/memrchr-lasx.S | 123 ++++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/memrchr-lsx.S | 105 +++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/memrchr.c | 33 +++++
|
||||
7 files changed, 335 insertions(+)
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
index 2f4802cf..7b87bc90 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
@@ -27,5 +27,8 @@ sysdep_routines += \
|
||||
memchr-aligned \
|
||||
memchr-lsx \
|
||||
memchr-lasx \
|
||||
+ memrchr-generic \
|
||||
+ memrchr-lsx \
|
||||
+ memrchr-lasx \
|
||||
# sysdep_routines
|
||||
endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
index a567b9cf..8bd5489e 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
@@ -109,5 +109,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
#endif
|
||||
IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned)
|
||||
)
|
||||
+
|
||||
+ IFUNC_IMPL (i, name, memrchr,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LASX, __memrchr_lasx)
|
||||
+ IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LSX, __memrchr_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic)
|
||||
+ )
|
||||
return i;
|
||||
}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
|
||||
new file mode 100644
|
||||
index 00000000..8215f9ad
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
|
||||
@@ -0,0 +1,40 @@
|
||||
+/* Common definition for memrchr implementation.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-init.h>
|
||||
+
|
||||
+#if !defined __loongarch_soft_float
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||
+#endif
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
|
||||
+
|
||||
+static inline void *
|
||||
+IFUNC_SELECTOR (void)
|
||||
+{
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LASX)
|
||||
+ return OPTIMIZE (lasx);
|
||||
+ else if (SUPPORT_LSX)
|
||||
+ return OPTIMIZE (lsx);
|
||||
+ else
|
||||
+#endif
|
||||
+ return OPTIMIZE (generic);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
|
||||
new file mode 100644
|
||||
index 00000000..ced61ebc
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
|
||||
@@ -0,0 +1,23 @@
|
||||
+/* Generic implementation of memrchr.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define MEMRCHR __memrchr_generic
|
||||
+#endif
|
||||
+
|
||||
+#include <string/memrchr.c>
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
|
||||
new file mode 100644
|
||||
index 00000000..5f3e0d06
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
|
||||
@@ -0,0 +1,123 @@
|
||||
+/* Optimized memrchr implementation using LoongArch LASX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+#ifndef MEMRCHR
|
||||
+# define MEMRCHR __memrchr_lasx
|
||||
+#endif
|
||||
+
|
||||
+LEAF(MEMRCHR, 6)
|
||||
+ beqz a2, L(ret0)
|
||||
+ addi.d a2, a2, -1
|
||||
+ add.d a3, a0, a2
|
||||
+ andi t1, a3, 0x3f
|
||||
+
|
||||
+ bstrins.d a3, zero, 5, 0
|
||||
+ addi.d t1, t1, 1
|
||||
+ xvld xr0, a3, 0
|
||||
+ xvld xr1, a3, 32
|
||||
+
|
||||
+ sub.d t2, zero, t1
|
||||
+ li.d t3, -1
|
||||
+ xvreplgr2vr.b xr2, a1
|
||||
+ andi t4, a0, 0x3f
|
||||
+
|
||||
+ srl.d t2, t3, t2
|
||||
+ xvseq.b xr0, xr0, xr2
|
||||
+ xvseq.b xr1, xr1, xr2
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+
|
||||
+
|
||||
+ xvmsknz.b xr1, xr1
|
||||
+ xvpickve.w xr3, xr0, 4
|
||||
+ xvpickve.w xr4, xr1, 4
|
||||
+ vilvl.h vr0, vr3, vr0
|
||||
+
|
||||
+ vilvl.h vr1, vr4, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+ movfr2gr.d t0, fa0
|
||||
+ and t0, t0, t2
|
||||
+
|
||||
+ bltu a2, t1, L(end)
|
||||
+ bnez t0, L(found)
|
||||
+ bstrins.d a0, zero, 5, 0
|
||||
+L(loop):
|
||||
+ xvld xr0, a3, -64
|
||||
+
|
||||
+ xvld xr1, a3, -32
|
||||
+ addi.d a3, a3, -64
|
||||
+ xvseq.b xr0, xr0, xr2
|
||||
+ xvseq.b xr1, xr1, xr2
|
||||
+
|
||||
+
|
||||
+ beq a0, a3, L(out)
|
||||
+ xvmax.bu xr3, xr0, xr1
|
||||
+ xvseteqz.v fcc0, xr3
|
||||
+ bcnez fcc0, L(loop)
|
||||
+
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+ xvmsknz.b xr1, xr1
|
||||
+ xvpickve.w xr3, xr0, 4
|
||||
+ xvpickve.w xr4, xr1, 4
|
||||
+
|
||||
+ vilvl.h vr0, vr3, vr0
|
||||
+ vilvl.h vr1, vr4, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+ movfr2gr.d t0, fa0
|
||||
+
|
||||
+L(found):
|
||||
+ addi.d a0, a3, 63
|
||||
+ clz.d t1, t0
|
||||
+ sub.d a0, a0, t1
|
||||
+ jr ra
|
||||
+
|
||||
+
|
||||
+L(out):
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+ xvmsknz.b xr1, xr1
|
||||
+ xvpickve.w xr3, xr0, 4
|
||||
+ xvpickve.w xr4, xr1, 4
|
||||
+
|
||||
+ vilvl.h vr0, vr3, vr0
|
||||
+ vilvl.h vr1, vr4, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+ movfr2gr.d t0, fa0
|
||||
+
|
||||
+L(end):
|
||||
+ sll.d t2, t3, t4
|
||||
+ and t0, t0, t2
|
||||
+ addi.d a0, a3, 63
|
||||
+ clz.d t1, t0
|
||||
+
|
||||
+ sub.d a0, a0, t1
|
||||
+ maskeqz a0, a0, t0
|
||||
+ jr ra
|
||||
+L(ret0):
|
||||
+ move a0, zero
|
||||
+
|
||||
+
|
||||
+ jr ra
|
||||
+END(MEMRCHR)
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMRCHR)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..39a7c8b0
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
|
||||
@@ -0,0 +1,105 @@
|
||||
+/* Optimized memrchr implementation using LoongArch LSX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define MEMRCHR __memrchr_lsx
|
||||
+
|
||||
+LEAF(MEMRCHR, 6)
|
||||
+ beqz a2, L(ret0)
|
||||
+ addi.d a2, a2, -1
|
||||
+ add.d a3, a0, a2
|
||||
+ andi t1, a3, 0x1f
|
||||
+
|
||||
+ bstrins.d a3, zero, 4, 0
|
||||
+ addi.d t1, t1, 1
|
||||
+ vld vr0, a3, 0
|
||||
+ vld vr1, a3, 16
|
||||
+
|
||||
+ sub.d t2, zero, t1
|
||||
+ li.d t3, -1
|
||||
+ vreplgr2vr.b vr2, a1
|
||||
+ andi t4, a0, 0x1f
|
||||
+
|
||||
+ srl.d t2, t3, t2
|
||||
+ vseq.b vr0, vr0, vr2
|
||||
+ vseq.b vr1, vr1, vr2
|
||||
+ vmsknz.b vr0, vr0
|
||||
+
|
||||
+
|
||||
+ vmsknz.b vr1, vr1
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+ and t0, t0, t2
|
||||
+
|
||||
+ bltu a2, t1, L(end)
|
||||
+ bnez t0, L(found)
|
||||
+ bstrins.d a0, zero, 4, 0
|
||||
+L(loop):
|
||||
+ vld vr0, a3, -32
|
||||
+
|
||||
+ vld vr1, a3, -16
|
||||
+ addi.d a3, a3, -32
|
||||
+ vseq.b vr0, vr0, vr2
|
||||
+ vseq.b vr1, vr1, vr2
|
||||
+
|
||||
+ beq a0, a3, L(out)
|
||||
+ vmax.bu vr3, vr0, vr1
|
||||
+ vseteqz.v fcc0, vr3
|
||||
+ bcnez fcc0, L(loop)
|
||||
+
|
||||
+
|
||||
+ vmsknz.b vr0, vr0
|
||||
+ vmsknz.b vr1, vr1
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+
|
||||
+L(found):
|
||||
+ addi.d a0, a3, 31
|
||||
+ clz.w t1, t0
|
||||
+ sub.d a0, a0, t1
|
||||
+ jr ra
|
||||
+
|
||||
+L(out):
|
||||
+ vmsknz.b vr0, vr0
|
||||
+ vmsknz.b vr1, vr1
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+
|
||||
+L(end):
|
||||
+ sll.d t2, t3, t4
|
||||
+ and t0, t0, t2
|
||||
+ addi.d a0, a3, 31
|
||||
+ clz.w t1, t0
|
||||
+
|
||||
+
|
||||
+ sub.d a0, a0, t1
|
||||
+ maskeqz a0, a0, t0
|
||||
+ jr ra
|
||||
+L(ret0):
|
||||
+ move a0, zero
|
||||
+
|
||||
+ jr ra
|
||||
+END(MEMRCHR)
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMRCHR)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr.c b/sysdeps/loongarch/lp64/multiarch/memrchr.c
|
||||
new file mode 100644
|
||||
index 00000000..8baba9ab
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr.c
|
||||
@@ -0,0 +1,33 @@
|
||||
+/* Multiple versions of memrchr.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Define multiple versions only for the definition in libc. */
|
||||
+#if IS_IN (libc)
|
||||
+# define memrchr __redirect_memrchr
|
||||
+# include <string.h>
|
||||
+# undef memrchr
|
||||
+
|
||||
+# define SYMBOL_NAME memrchr
|
||||
+# include "ifunc-memrchr.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_memrchr, __memrchr, IFUNC_SELECTOR ());
|
||||
+libc_hidden_def (__memrchr)
|
||||
+weak_alias (__memrchr, memrchr)
|
||||
+
|
||||
+#endif
|
||||
--
|
||||
2.33.0
|
||||
|
784
LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch
Normal file
784
LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch
Normal file
|
@ -0,0 +1,784 @@
|
|||
From 14032f7bbe18443af8492f5d0365f72b76701673 Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Mon, 28 Aug 2023 10:08:38 +0800
|
||||
Subject: [PATCH 17/29] LoongArch: Add ifunc support for memset{aligned,
|
||||
unaligned, lsx, lasx}
|
||||
|
||||
According to glibc memset microbenchmark test results, for LSX and LASX
|
||||
versions, A few cases with length less than 8 experience performace
|
||||
degradation, overall, the LASX version could reduce the runtime about
|
||||
15% - 75%, LSX version could reduce the runtime about 15%-50%.
|
||||
|
||||
The unaligned version uses unaligned memmory access to set data which
|
||||
length is less than 64 and make address aligned with 8. For this part,
|
||||
the performace is better than aligned version. Comparing with the generic
|
||||
version, the performance is close when the length is larger than 128. When
|
||||
the length is 8-128, the unaligned version could reduce the runtime about
|
||||
30%-70%, the aligned version could reduce the runtime about 20%-50%.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/Makefile | 4 +
|
||||
.../lp64/multiarch/dl-symbol-redir-ifunc.h | 24 +++
|
||||
.../lp64/multiarch/ifunc-impl-list.c | 10 +
|
||||
.../loongarch/lp64/multiarch/memset-aligned.S | 174 ++++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/memset-lasx.S | 142 ++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/memset-lsx.S | 135 ++++++++++++++
|
||||
.../lp64/multiarch/memset-unaligned.S | 162 ++++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/memset.c | 37 ++++
|
||||
8 files changed, 688 insertions(+)
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-aligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lasx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/memset.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
index 7b87bc90..216886c5 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
@@ -30,5 +30,9 @@ sysdep_routines += \
|
||||
memrchr-generic \
|
||||
memrchr-lsx \
|
||||
memrchr-lasx \
|
||||
+ memset-aligned \
|
||||
+ memset-unaligned \
|
||||
+ memset-lsx \
|
||||
+ memset-lasx \
|
||||
# sysdep_routines
|
||||
endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
|
||||
new file mode 100644
|
||||
index 00000000..e2723873
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
|
||||
@@ -0,0 +1,24 @@
|
||||
+/* Symbol rediretion for loader/static initialization code.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _DL_IFUNC_GENERIC_H
|
||||
+#define _DL_IFUNC_GENERIC_H
|
||||
+
|
||||
+asm ("memset = __memset_aligned");
|
||||
+
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
index 8bd5489e..37f60dde 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
@@ -117,5 +117,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
#endif
|
||||
IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic)
|
||||
)
|
||||
+
|
||||
+ IFUNC_IMPL (i, name, memset,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LASX, __memset_lasx)
|
||||
+ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LSX, __memset_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_UAL, __memset_unaligned)
|
||||
+ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned)
|
||||
+ )
|
||||
+
|
||||
return i;
|
||||
}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memset-aligned.S b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S
|
||||
new file mode 100644
|
||||
index 00000000..1fce95b7
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S
|
||||
@@ -0,0 +1,174 @@
|
||||
+/* Optimized memset aligned implementation using basic LoongArch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define MEMSET_NAME __memset_aligned
|
||||
+#else
|
||||
+# define MEMSET_NAME memset
|
||||
+#endif
|
||||
+
|
||||
+LEAF(MEMSET_NAME, 6)
|
||||
+ move t0, a0
|
||||
+ andi a3, a0, 0x7
|
||||
+ li.w t6, 16
|
||||
+ beqz a3, L(align)
|
||||
+ bltu a2, t6, L(short_data)
|
||||
+
|
||||
+L(make_align):
|
||||
+ li.w t8, 8
|
||||
+ sub.d t2, t8, a3
|
||||
+ pcaddi t1, 11
|
||||
+ slli.d t3, t2, 2
|
||||
+ sub.d t1, t1, t3
|
||||
+ jr t1
|
||||
+
|
||||
+L(al7):
|
||||
+ st.b a1, t0, 6
|
||||
+L(al6):
|
||||
+ st.b a1, t0, 5
|
||||
+L(al5):
|
||||
+ st.b a1, t0, 4
|
||||
+L(al4):
|
||||
+ st.b a1, t0, 3
|
||||
+L(al3):
|
||||
+ st.b a1, t0, 2
|
||||
+L(al2):
|
||||
+ st.b a1, t0, 1
|
||||
+L(al1):
|
||||
+ st.b a1, t0, 0
|
||||
+L(al0):
|
||||
+ add.d t0, t0, t2
|
||||
+ sub.d a2, a2, t2
|
||||
+
|
||||
+L(align):
|
||||
+ bstrins.d a1, a1, 15, 8
|
||||
+ bstrins.d a1, a1, 31, 16
|
||||
+ bstrins.d a1, a1, 63, 32
|
||||
+ bltu a2, t6, L(less_16bytes)
|
||||
+
|
||||
+ andi a4, a2, 0x3f
|
||||
+ beq a4, a2, L(less_64bytes)
|
||||
+
|
||||
+ sub.d t1, a2, a4
|
||||
+ move a2, a4
|
||||
+ add.d a5, t0, t1
|
||||
+
|
||||
+L(loop_64bytes):
|
||||
+ addi.d t0, t0, 64
|
||||
+ st.d a1, t0, -64
|
||||
+ st.d a1, t0, -56
|
||||
+ st.d a1, t0, -48
|
||||
+ st.d a1, t0, -40
|
||||
+
|
||||
+ st.d a1, t0, -32
|
||||
+ st.d a1, t0, -24
|
||||
+ st.d a1, t0, -16
|
||||
+ st.d a1, t0, -8
|
||||
+ bne t0, a5, L(loop_64bytes)
|
||||
+
|
||||
+L(less_64bytes):
|
||||
+ srai.d a4, a2, 5
|
||||
+ beqz a4, L(less_32bytes)
|
||||
+ addi.d a2, a2, -32
|
||||
+ st.d a1, t0, 0
|
||||
+
|
||||
+ st.d a1, t0, 8
|
||||
+ st.d a1, t0, 16
|
||||
+ st.d a1, t0, 24
|
||||
+ addi.d t0, t0, 32
|
||||
+
|
||||
+L(less_32bytes):
|
||||
+ bltu a2, t6, L(less_16bytes)
|
||||
+ addi.d a2, a2, -16
|
||||
+ st.d a1, t0, 0
|
||||
+ st.d a1, t0, 8
|
||||
+ addi.d t0, t0, 16
|
||||
+
|
||||
+L(less_16bytes):
|
||||
+ srai.d a4, a2, 3
|
||||
+ beqz a4, L(less_8bytes)
|
||||
+ addi.d a2, a2, -8
|
||||
+ st.d a1, t0, 0
|
||||
+ addi.d t0, t0, 8
|
||||
+
|
||||
+L(less_8bytes):
|
||||
+ beqz a2, L(less_1byte)
|
||||
+ srai.d a4, a2, 2
|
||||
+ beqz a4, L(less_4bytes)
|
||||
+ addi.d a2, a2, -4
|
||||
+ st.w a1, t0, 0
|
||||
+ addi.d t0, t0, 4
|
||||
+
|
||||
+L(less_4bytes):
|
||||
+ srai.d a3, a2, 1
|
||||
+ beqz a3, L(less_2bytes)
|
||||
+ addi.d a2, a2, -2
|
||||
+ st.h a1, t0, 0
|
||||
+ addi.d t0, t0, 2
|
||||
+
|
||||
+L(less_2bytes):
|
||||
+ beqz a2, L(less_1byte)
|
||||
+ st.b a1, t0, 0
|
||||
+L(less_1byte):
|
||||
+ jr ra
|
||||
+
|
||||
+L(short_data):
|
||||
+ pcaddi t1, 19
|
||||
+ slli.d t3, a2, 2
|
||||
+ sub.d t1, t1, t3
|
||||
+ jr t1
|
||||
+L(short_15):
|
||||
+ st.b a1, a0, 14
|
||||
+L(short_14):
|
||||
+ st.b a1, a0, 13
|
||||
+L(short_13):
|
||||
+ st.b a1, a0, 12
|
||||
+L(short_12):
|
||||
+ st.b a1, a0, 11
|
||||
+L(short_11):
|
||||
+ st.b a1, a0, 10
|
||||
+L(short_10):
|
||||
+ st.b a1, a0, 9
|
||||
+L(short_9):
|
||||
+ st.b a1, a0, 8
|
||||
+L(short_8):
|
||||
+ st.b a1, a0, 7
|
||||
+L(short_7):
|
||||
+ st.b a1, a0, 6
|
||||
+L(short_6):
|
||||
+ st.b a1, a0, 5
|
||||
+L(short_5):
|
||||
+ st.b a1, a0, 4
|
||||
+L(short_4):
|
||||
+ st.b a1, a0, 3
|
||||
+L(short_3):
|
||||
+ st.b a1, a0, 2
|
||||
+L(short_2):
|
||||
+ st.b a1, a0, 1
|
||||
+L(short_1):
|
||||
+ st.b a1, a0, 0
|
||||
+L(short_0):
|
||||
+ jr ra
|
||||
+END(MEMSET_NAME)
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMSET_NAME)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lasx.S b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S
|
||||
new file mode 100644
|
||||
index 00000000..041abbac
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S
|
||||
@@ -0,0 +1,142 @@
|
||||
+/* Optimized memset implementation using LoongArch LASX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define MEMSET __memset_lasx
|
||||
+
|
||||
+LEAF(MEMSET, 6)
|
||||
+ li.d t1, 32
|
||||
+ move a3, a0
|
||||
+ xvreplgr2vr.b xr0, a1
|
||||
+ add.d a4, a0, a2
|
||||
+
|
||||
+ bgeu t1, a2, L(less_32bytes)
|
||||
+ li.d t3, 128
|
||||
+ li.d t2, 64
|
||||
+ blt t3, a2, L(long_bytes)
|
||||
+
|
||||
+L(less_128bytes):
|
||||
+ bgeu t2, a2, L(less_64bytes)
|
||||
+ xvst xr0, a3, 0
|
||||
+ xvst xr0, a3, 32
|
||||
+ xvst xr0, a4, -32
|
||||
+
|
||||
+ xvst xr0, a4, -64
|
||||
+ jr ra
|
||||
+L(less_64bytes):
|
||||
+ xvst xr0, a3, 0
|
||||
+ xvst xr0, a4, -32
|
||||
+
|
||||
+
|
||||
+ jr ra
|
||||
+L(less_32bytes):
|
||||
+ srli.d t0, a2, 4
|
||||
+ beqz t0, L(less_16bytes)
|
||||
+ vst vr0, a3, 0
|
||||
+
|
||||
+ vst vr0, a4, -16
|
||||
+ jr ra
|
||||
+L(less_16bytes):
|
||||
+ srli.d t0, a2, 3
|
||||
+ beqz t0, L(less_8bytes)
|
||||
+
|
||||
+ vstelm.d vr0, a3, 0, 0
|
||||
+ vstelm.d vr0, a4, -8, 0
|
||||
+ jr ra
|
||||
+L(less_8bytes):
|
||||
+ srli.d t0, a2, 2
|
||||
+
|
||||
+ beqz t0, L(less_4bytes)
|
||||
+ vstelm.w vr0, a3, 0, 0
|
||||
+ vstelm.w vr0, a4, -4, 0
|
||||
+ jr ra
|
||||
+
|
||||
+
|
||||
+L(less_4bytes):
|
||||
+ srli.d t0, a2, 1
|
||||
+ beqz t0, L(less_2bytes)
|
||||
+ vstelm.h vr0, a3, 0, 0
|
||||
+ vstelm.h vr0, a4, -2, 0
|
||||
+
|
||||
+ jr ra
|
||||
+L(less_2bytes):
|
||||
+ beqz a2, L(less_1bytes)
|
||||
+ st.b a1, a3, 0
|
||||
+L(less_1bytes):
|
||||
+ jr ra
|
||||
+
|
||||
+L(long_bytes):
|
||||
+ xvst xr0, a3, 0
|
||||
+ bstrins.d a3, zero, 4, 0
|
||||
+ addi.d a3, a3, 32
|
||||
+ sub.d a2, a4, a3
|
||||
+
|
||||
+ andi t0, a2, 0xff
|
||||
+ beq t0, a2, L(long_end)
|
||||
+ move a2, t0
|
||||
+ sub.d t0, a4, t0
|
||||
+
|
||||
+
|
||||
+L(loop_256):
|
||||
+ xvst xr0, a3, 0
|
||||
+ xvst xr0, a3, 32
|
||||
+ xvst xr0, a3, 64
|
||||
+ xvst xr0, a3, 96
|
||||
+
|
||||
+ xvst xr0, a3, 128
|
||||
+ xvst xr0, a3, 160
|
||||
+ xvst xr0, a3, 192
|
||||
+ xvst xr0, a3, 224
|
||||
+
|
||||
+ addi.d a3, a3, 256
|
||||
+ bne a3, t0, L(loop_256)
|
||||
+L(long_end):
|
||||
+ bltu a2, t3, L(end_less_128)
|
||||
+ addi.d a2, a2, -128
|
||||
+
|
||||
+ xvst xr0, a3, 0
|
||||
+ xvst xr0, a3, 32
|
||||
+ xvst xr0, a3, 64
|
||||
+ xvst xr0, a3, 96
|
||||
+
|
||||
+
|
||||
+ addi.d a3, a3, 128
|
||||
+L(end_less_128):
|
||||
+ bltu a2, t2, L(end_less_64)
|
||||
+ addi.d a2, a2, -64
|
||||
+ xvst xr0, a3, 0
|
||||
+
|
||||
+ xvst xr0, a3, 32
|
||||
+ addi.d a3, a3, 64
|
||||
+L(end_less_64):
|
||||
+ bltu a2, t1, L(end_less_32)
|
||||
+ xvst xr0, a3, 0
|
||||
+
|
||||
+L(end_less_32):
|
||||
+ xvst xr0, a4, -32
|
||||
+ jr ra
|
||||
+END(MEMSET)
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMSET)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lsx.S b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..3d3982aa
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S
|
||||
@@ -0,0 +1,135 @@
|
||||
+/* Optimized memset implementation using LoongArch LSX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define MEMSET __memset_lsx
|
||||
+
|
||||
+LEAF(MEMSET, 6)
|
||||
+ li.d t1, 16
|
||||
+ move a3, a0
|
||||
+ vreplgr2vr.b vr0, a1
|
||||
+ add.d a4, a0, a2
|
||||
+
|
||||
+ bgeu t1, a2, L(less_16bytes)
|
||||
+ li.d t3, 64
|
||||
+ li.d t2, 32
|
||||
+ bgeu a2, t3, L(long_bytes)
|
||||
+
|
||||
+L(less_64bytes):
|
||||
+ bgeu t2, a2, L(less_32bytes)
|
||||
+ vst vr0, a3, 0
|
||||
+ vst vr0, a3, 16
|
||||
+ vst vr0, a4, -32
|
||||
+
|
||||
+ vst vr0, a4, -16
|
||||
+ jr ra
|
||||
+L(less_32bytes):
|
||||
+ vst vr0, a3, 0
|
||||
+ vst vr0, a4, -16
|
||||
+
|
||||
+
|
||||
+ jr ra
|
||||
+L(less_16bytes):
|
||||
+ srli.d t0, a2, 3
|
||||
+ beqz t0, L(less_8bytes)
|
||||
+ vstelm.d vr0, a3, 0, 0
|
||||
+
|
||||
+ vstelm.d vr0, a4, -8, 0
|
||||
+ jr ra
|
||||
+L(less_8bytes):
|
||||
+ srli.d t0, a2, 2
|
||||
+ beqz t0, L(less_4bytes)
|
||||
+
|
||||
+ vstelm.w vr0, a3, 0, 0
|
||||
+ vstelm.w vr0, a4, -4, 0
|
||||
+ jr ra
|
||||
+L(less_4bytes):
|
||||
+ srli.d t0, a2, 1
|
||||
+
|
||||
+ beqz t0, L(less_2bytes)
|
||||
+ vstelm.h vr0, a3, 0, 0
|
||||
+ vstelm.h vr0, a4, -2, 0
|
||||
+ jr ra
|
||||
+
|
||||
+
|
||||
+L(less_2bytes):
|
||||
+ beqz a2, L(less_1bytes)
|
||||
+ vstelm.b vr0, a3, 0, 0
|
||||
+L(less_1bytes):
|
||||
+ jr ra
|
||||
+L(long_bytes):
|
||||
+ vst vr0, a3, 0
|
||||
+
|
||||
+ bstrins.d a3, zero, 3, 0
|
||||
+ addi.d a3, a3, 16
|
||||
+ sub.d a2, a4, a3
|
||||
+ andi t0, a2, 0x7f
|
||||
+
|
||||
+ beq t0, a2, L(long_end)
|
||||
+ move a2, t0
|
||||
+ sub.d t0, a4, t0
|
||||
+
|
||||
+L(loop_128):
|
||||
+ vst vr0, a3, 0
|
||||
+
|
||||
+ vst vr0, a3, 16
|
||||
+ vst vr0, a3, 32
|
||||
+ vst vr0, a3, 48
|
||||
+ vst vr0, a3, 64
|
||||
+
|
||||
+
|
||||
+ vst vr0, a3, 80
|
||||
+ vst vr0, a3, 96
|
||||
+ vst vr0, a3, 112
|
||||
+ addi.d a3, a3, 128
|
||||
+
|
||||
+ bne a3, t0, L(loop_128)
|
||||
+L(long_end):
|
||||
+ bltu a2, t3, L(end_less_64)
|
||||
+ addi.d a2, a2, -64
|
||||
+ vst vr0, a3, 0
|
||||
+
|
||||
+ vst vr0, a3, 16
|
||||
+ vst vr0, a3, 32
|
||||
+ vst vr0, a3, 48
|
||||
+ addi.d a3, a3, 64
|
||||
+
|
||||
+L(end_less_64):
|
||||
+ bltu a2, t2, L(end_less_32)
|
||||
+ addi.d a2, a2, -32
|
||||
+ vst vr0, a3, 0
|
||||
+ vst vr0, a3, 16
|
||||
+
|
||||
+ addi.d a3, a3, 32
|
||||
+L(end_less_32):
|
||||
+ bltu a2, t1, L(end_less_16)
|
||||
+ vst vr0, a3, 0
|
||||
+
|
||||
+L(end_less_16):
|
||||
+ vst vr0, a4, -16
|
||||
+ jr ra
|
||||
+END(MEMSET)
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMSET)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
|
||||
new file mode 100644
|
||||
index 00000000..f7d32039
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
|
||||
@@ -0,0 +1,162 @@
|
||||
+/* Optimized memset unaligned implementation using basic LoongArch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+
|
||||
+# define MEMSET_NAME __memset_unaligned
|
||||
+
|
||||
+#define ST_128(n) \
|
||||
+ st.d a1, a0, n; \
|
||||
+ st.d a1, a0, n+8 ; \
|
||||
+ st.d a1, a0, n+16 ; \
|
||||
+ st.d a1, a0, n+24 ; \
|
||||
+ st.d a1, a0, n+32 ; \
|
||||
+ st.d a1, a0, n+40 ; \
|
||||
+ st.d a1, a0, n+48 ; \
|
||||
+ st.d a1, a0, n+56 ; \
|
||||
+ st.d a1, a0, n+64 ; \
|
||||
+ st.d a1, a0, n+72 ; \
|
||||
+ st.d a1, a0, n+80 ; \
|
||||
+ st.d a1, a0, n+88 ; \
|
||||
+ st.d a1, a0, n+96 ; \
|
||||
+ st.d a1, a0, n+104; \
|
||||
+ st.d a1, a0, n+112; \
|
||||
+ st.d a1, a0, n+120;
|
||||
+
|
||||
+LEAF(MEMSET_NAME, 6)
|
||||
+ bstrins.d a1, a1, 15, 8
|
||||
+ add.d t7, a0, a2
|
||||
+ bstrins.d a1, a1, 31, 16
|
||||
+ move t0, a0
|
||||
+
|
||||
+ bstrins.d a1, a1, 63, 32
|
||||
+ srai.d t8, a2, 4
|
||||
+ beqz t8, L(less_16bytes)
|
||||
+ srai.d t8, a2, 6
|
||||
+
|
||||
+ bnez t8, L(more_64bytes)
|
||||
+ srai.d t8, a2, 5
|
||||
+ beqz t8, L(less_32bytes)
|
||||
+
|
||||
+ st.d a1, a0, 0
|
||||
+ st.d a1, a0, 8
|
||||
+ st.d a1, a0, 16
|
||||
+ st.d a1, a0, 24
|
||||
+
|
||||
+ st.d a1, t7, -32
|
||||
+ st.d a1, t7, -24
|
||||
+ st.d a1, t7, -16
|
||||
+ st.d a1, t7, -8
|
||||
+
|
||||
+ jr ra
|
||||
+
|
||||
+L(less_32bytes):
|
||||
+ st.d a1, a0, 0
|
||||
+ st.d a1, a0, 8
|
||||
+ st.d a1, t7, -16
|
||||
+ st.d a1, t7, -8
|
||||
+
|
||||
+ jr ra
|
||||
+
|
||||
+L(less_16bytes):
|
||||
+ srai.d t8, a2, 3
|
||||
+ beqz t8, L(less_8bytes)
|
||||
+ st.d a1, a0, 0
|
||||
+ st.d a1, t7, -8
|
||||
+
|
||||
+ jr ra
|
||||
+
|
||||
+L(less_8bytes):
|
||||
+ srai.d t8, a2, 2
|
||||
+ beqz t8, L(less_4bytes)
|
||||
+ st.w a1, a0, 0
|
||||
+ st.w a1, t7, -4
|
||||
+
|
||||
+ jr ra
|
||||
+
|
||||
+L(less_4bytes):
|
||||
+ srai.d t8, a2, 1
|
||||
+ beqz t8, L(less_2bytes)
|
||||
+ st.h a1, a0, 0
|
||||
+ st.h a1, t7, -2
|
||||
+
|
||||
+ jr ra
|
||||
+
|
||||
+L(less_2bytes):
|
||||
+ beqz a2, L(less_1bytes)
|
||||
+ st.b a1, a0, 0
|
||||
+
|
||||
+ jr ra
|
||||
+
|
||||
+L(less_1bytes):
|
||||
+ jr ra
|
||||
+
|
||||
+L(more_64bytes):
|
||||
+ srli.d a0, a0, 3
|
||||
+ slli.d a0, a0, 3
|
||||
+ addi.d a0, a0, 0x8
|
||||
+ st.d a1, t0, 0
|
||||
+
|
||||
+ sub.d t2, t0, a0
|
||||
+ add.d a2, t2, a2
|
||||
+ addi.d a2, a2, -0x80
|
||||
+ blt a2, zero, L(end_unalign_proc)
|
||||
+
|
||||
+L(loop_less):
|
||||
+ ST_128(0)
|
||||
+ addi.d a0, a0, 0x80
|
||||
+ addi.d a2, a2, -0x80
|
||||
+ bge a2, zero, L(loop_less)
|
||||
+
|
||||
+L(end_unalign_proc):
|
||||
+ addi.d a2, a2, 0x80
|
||||
+ pcaddi t1, 20
|
||||
+ andi t5, a2, 0x78
|
||||
+ srli.d t5, t5, 1
|
||||
+
|
||||
+ sub.d t1, t1, t5
|
||||
+ jr t1
|
||||
+
|
||||
+ st.d a1, a0, 112
|
||||
+ st.d a1, a0, 104
|
||||
+ st.d a1, a0, 96
|
||||
+ st.d a1, a0, 88
|
||||
+ st.d a1, a0, 80
|
||||
+ st.d a1, a0, 72
|
||||
+ st.d a1, a0, 64
|
||||
+ st.d a1, a0, 56
|
||||
+ st.d a1, a0, 48
|
||||
+ st.d a1, a0, 40
|
||||
+ st.d a1, a0, 32
|
||||
+ st.d a1, a0, 24
|
||||
+ st.d a1, a0, 16
|
||||
+ st.d a1, a0, 8
|
||||
+ st.d a1, a0, 0
|
||||
+ st.d a1, t7, -8
|
||||
+
|
||||
+ move a0, t0
|
||||
+ jr ra
|
||||
+END(MEMSET_NAME)
|
||||
+
|
||||
+libc_hidden_builtin_def (MEMSET_NAME)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memset.c b/sysdeps/loongarch/lp64/multiarch/memset.c
|
||||
new file mode 100644
|
||||
index 00000000..3ff60d8a
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memset.c
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* Multiple versions of memset.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Define multiple versions only for the definition in libc. */
|
||||
+#if IS_IN (libc)
|
||||
+# define memset __redirect_memset
|
||||
+# include <string.h>
|
||||
+# undef memset
|
||||
+
|
||||
+# define SYMBOL_NAME memset
|
||||
+# include "ifunc-lasx.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_memset, memset,
|
||||
+ IFUNC_SELECTOR ());
|
||||
+
|
||||
+# ifdef SHARED
|
||||
+__hidden_ver1 (memset, __GI_memset, __redirect_memset)
|
||||
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memset);
|
||||
+# endif
|
||||
+
|
||||
+#endif
|
||||
--
|
||||
2.33.0
|
||||
|
448
LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch
Normal file
448
LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch
Normal file
|
@ -0,0 +1,448 @@
|
|||
From b412bcb2cf4914a664bcd24924d670a2e37394b3 Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Mon, 28 Aug 2023 10:08:35 +0800
|
||||
Subject: [PATCH 14/29] LoongArch: Add ifunc support for rawmemchr{aligned,
|
||||
lsx, lasx}
|
||||
|
||||
According to glibc rawmemchr microbenchmark, A few cases tested with
|
||||
char '\0' experience performance degradation due to the lasx and lsx
|
||||
versions don't handle the '\0' separately. Overall, rawmemchr-lasx
|
||||
implementation could reduce the runtime about 40%-80%, rawmemchr-lsx
|
||||
implementation could reduce the runtime about 40%-66%, rawmemchr-aligned
|
||||
implementation could reduce the runtime about 20%-40%.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
|
||||
.../lp64/multiarch/ifunc-impl-list.c | 8 ++
|
||||
.../lp64/multiarch/ifunc-rawmemchr.h | 40 ++++++
|
||||
.../lp64/multiarch/rawmemchr-aligned.S | 124 ++++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/rawmemchr-lasx.S | 82 ++++++++++++
|
||||
.../loongarch/lp64/multiarch/rawmemchr-lsx.S | 71 ++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/rawmemchr.c | 37 ++++++
|
||||
7 files changed, 365 insertions(+)
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
index 5d7ae7ae..64416b02 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
@@ -21,5 +21,8 @@ sysdep_routines += \
|
||||
memmove-unaligned \
|
||||
memmove-lsx \
|
||||
memmove-lasx \
|
||||
+ rawmemchr-aligned \
|
||||
+ rawmemchr-lsx \
|
||||
+ rawmemchr-lasx \
|
||||
# sysdep_routines
|
||||
endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
index c8ba87bd..3db9af14 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
@@ -94,5 +94,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned)
|
||||
)
|
||||
|
||||
+ IFUNC_IMPL (i, name, rawmemchr,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx)
|
||||
+ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
|
||||
+ )
|
||||
+
|
||||
return i;
|
||||
}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
|
||||
new file mode 100644
|
||||
index 00000000..a7bb4cf9
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
|
||||
@@ -0,0 +1,40 @@
|
||||
+/* Common definition for rawmemchr ifunc selections.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-init.h>
|
||||
+
|
||||
+#if !defined __loongarch_soft_float
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||
+#endif
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
|
||||
+
|
||||
+static inline void *
|
||||
+IFUNC_SELECTOR (void)
|
||||
+{
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LASX)
|
||||
+ return OPTIMIZE (lasx);
|
||||
+ else if (SUPPORT_LSX)
|
||||
+ return OPTIMIZE (lsx);
|
||||
+ else
|
||||
+#endif
|
||||
+ return OPTIMIZE (aligned);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
|
||||
new file mode 100644
|
||||
index 00000000..9c7155ae
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
|
||||
@@ -0,0 +1,124 @@
|
||||
+/* Optimized rawmemchr implementation using basic LoongArch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define RAWMEMCHR_NAME __rawmemchr_aligned
|
||||
+#else
|
||||
+# define RAWMEMCHR_NAME __rawmemchr
|
||||
+#endif
|
||||
+
|
||||
+LEAF(RAWMEMCHR_NAME, 6)
|
||||
+ andi t1, a0, 0x7
|
||||
+ bstrins.d a0, zero, 2, 0
|
||||
+ lu12i.w a2, 0x01010
|
||||
+ bstrins.d a1, a1, 15, 8
|
||||
+
|
||||
+ ld.d t0, a0, 0
|
||||
+ slli.d t1, t1, 3
|
||||
+ ori a2, a2, 0x101
|
||||
+ bstrins.d a1, a1, 31, 16
|
||||
+
|
||||
+ li.w t8, -1
|
||||
+ bstrins.d a1, a1, 63, 32
|
||||
+ bstrins.d a2, a2, 63, 32
|
||||
+ sll.d t2, t8, t1
|
||||
+
|
||||
+ sll.d t3, a1, t1
|
||||
+ orn t0, t0, t2
|
||||
+ slli.d a3, a2, 7
|
||||
+ beqz a1, L(find_zero)
|
||||
+
|
||||
+ xor t0, t0, t3
|
||||
+ sub.d t1, t0, a2
|
||||
+ andn t2, a3, t0
|
||||
+ and t3, t1, t2
|
||||
+
|
||||
+ bnez t3, L(count_pos)
|
||||
+ addi.d a0, a0, 8
|
||||
+
|
||||
+L(loop):
|
||||
+ ld.d t0, a0, 0
|
||||
+ xor t0, t0, a1
|
||||
+
|
||||
+ sub.d t1, t0, a2
|
||||
+ andn t2, a3, t0
|
||||
+ and t3, t1, t2
|
||||
+ bnez t3, L(count_pos)
|
||||
+
|
||||
+ ld.d t0, a0, 8
|
||||
+ addi.d a0, a0, 16
|
||||
+ xor t0, t0, a1
|
||||
+ sub.d t1, t0, a2
|
||||
+
|
||||
+ andn t2, a3, t0
|
||||
+ and t3, t1, t2
|
||||
+ beqz t3, L(loop)
|
||||
+ addi.d a0, a0, -8
|
||||
+L(count_pos):
|
||||
+ ctz.d t0, t3
|
||||
+ srli.d t0, t0, 3
|
||||
+ add.d a0, a0, t0
|
||||
+ jr ra
|
||||
+
|
||||
+L(loop_7bit):
|
||||
+ ld.d t0, a0, 0
|
||||
+L(find_zero):
|
||||
+ sub.d t1, t0, a2
|
||||
+ and t2, t1, a3
|
||||
+ bnez t2, L(more_check)
|
||||
+
|
||||
+ ld.d t0, a0, 8
|
||||
+ addi.d a0, a0, 16
|
||||
+ sub.d t1, t0, a2
|
||||
+ and t2, t1, a3
|
||||
+
|
||||
+ beqz t2, L(loop_7bit)
|
||||
+ addi.d a0, a0, -8
|
||||
+
|
||||
+L(more_check):
|
||||
+ andn t2, a3, t0
|
||||
+ and t3, t1, t2
|
||||
+ bnez t3, L(count_pos)
|
||||
+ addi.d a0, a0, 8
|
||||
+
|
||||
+L(loop_8bit):
|
||||
+ ld.d t0, a0, 0
|
||||
+
|
||||
+ sub.d t1, t0, a2
|
||||
+ andn t2, a3, t0
|
||||
+ and t3, t1, t2
|
||||
+ bnez t3, L(count_pos)
|
||||
+
|
||||
+ ld.d t0, a0, 8
|
||||
+ addi.d a0, a0, 16
|
||||
+ sub.d t1, t0, a2
|
||||
+
|
||||
+ andn t2, a3, t0
|
||||
+ and t3, t1, t2
|
||||
+ beqz t3, L(loop_8bit)
|
||||
+
|
||||
+ addi.d a0, a0, -8
|
||||
+ b L(count_pos)
|
||||
+
|
||||
+END(RAWMEMCHR_NAME)
|
||||
+
|
||||
+libc_hidden_builtin_def (__rawmemchr)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
|
||||
new file mode 100644
|
||||
index 00000000..be2eb59d
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
|
||||
@@ -0,0 +1,82 @@
|
||||
+/* Optimized rawmemchr implementation using LoongArch LASX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/asm.h>
|
||||
+#include <sys/regdef.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define RAWMEMCHR __rawmemchr_lasx
|
||||
+
|
||||
+LEAF(RAWMEMCHR, 6)
|
||||
+ move a2, a0
|
||||
+ bstrins.d a0, zero, 5, 0
|
||||
+ xvld xr0, a0, 0
|
||||
+ xvld xr1, a0, 32
|
||||
+
|
||||
+ xvreplgr2vr.b xr2, a1
|
||||
+ xvseq.b xr0, xr0, xr2
|
||||
+ xvseq.b xr1, xr1, xr2
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+
|
||||
+ xvmsknz.b xr1, xr1
|
||||
+ xvpickve.w xr3, xr0, 4
|
||||
+ xvpickve.w xr4, xr1, 4
|
||||
+ vilvl.h vr0, vr3, vr0
|
||||
+
|
||||
+ vilvl.h vr1, vr4, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+ movfr2gr.d t0, fa0
|
||||
+ sra.d t0, t0, a2
|
||||
+
|
||||
+
|
||||
+ beqz t0, L(loop)
|
||||
+ ctz.d t0, t0
|
||||
+ add.d a0, a2, t0
|
||||
+ jr ra
|
||||
+
|
||||
+L(loop):
|
||||
+ xvld xr0, a0, 64
|
||||
+ xvld xr1, a0, 96
|
||||
+ addi.d a0, a0, 64
|
||||
+ xvseq.b xr0, xr0, xr2
|
||||
+
|
||||
+ xvseq.b xr1, xr1, xr2
|
||||
+ xvmax.bu xr3, xr0, xr1
|
||||
+ xvseteqz.v fcc0, xr3
|
||||
+ bcnez fcc0, L(loop)
|
||||
+
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+ xvmsknz.b xr1, xr1
|
||||
+ xvpickve.w xr3, xr0, 4
|
||||
+ xvpickve.w xr4, xr1, 4
|
||||
+
|
||||
+
|
||||
+ vilvl.h vr0, vr3, vr0
|
||||
+ vilvl.h vr1, vr4, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+ movfr2gr.d t0, fa0
|
||||
+
|
||||
+ ctz.d t0, t0
|
||||
+ add.d a0, a0, t0
|
||||
+ jr ra
|
||||
+END(RAWMEMCHR)
|
||||
+
|
||||
+libc_hidden_builtin_def (RAWMEMCHR)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..2f6fe024
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
|
||||
@@ -0,0 +1,71 @@
|
||||
+/* Optimized rawmemchr implementation using LoongArch LSX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define RAWMEMCHR __rawmemchr_lsx
|
||||
+
|
||||
+LEAF(RAWMEMCHR, 6)
|
||||
+ move a2, a0
|
||||
+ bstrins.d a0, zero, 4, 0
|
||||
+ vld vr0, a0, 0
|
||||
+ vld vr1, a0, 16
|
||||
+
|
||||
+ vreplgr2vr.b vr2, a1
|
||||
+ vseq.b vr0, vr0, vr2
|
||||
+ vseq.b vr1, vr1, vr2
|
||||
+ vmsknz.b vr0, vr0
|
||||
+
|
||||
+ vmsknz.b vr1, vr1
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+ sra.w t0, t0, a2
|
||||
+
|
||||
+ beqz t0, L(loop)
|
||||
+ ctz.w t0, t0
|
||||
+ add.d a0, a2, t0
|
||||
+ jr ra
|
||||
+
|
||||
+
|
||||
+L(loop):
|
||||
+ vld vr0, a0, 32
|
||||
+ vld vr1, a0, 48
|
||||
+ addi.d a0, a0, 32
|
||||
+ vseq.b vr0, vr0, vr2
|
||||
+
|
||||
+ vseq.b vr1, vr1, vr2
|
||||
+ vmax.bu vr3, vr0, vr1
|
||||
+ vseteqz.v fcc0, vr3
|
||||
+ bcnez fcc0, L(loop)
|
||||
+
|
||||
+ vmsknz.b vr0, vr0
|
||||
+ vmsknz.b vr1, vr1
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+
|
||||
+ ctz.w t0, t0
|
||||
+ add.d a0, a0, t0
|
||||
+ jr ra
|
||||
+END(RAWMEMCHR)
|
||||
+
|
||||
+libc_hidden_builtin_def (RAWMEMCHR)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
|
||||
new file mode 100644
|
||||
index 00000000..89c7ffff
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* Multiple versions of rawmemchr.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define rawmemchr __redirect_rawmemchr
|
||||
+# define __rawmemchr __redirect___rawmemchr
|
||||
+# include <string.h>
|
||||
+# undef rawmemchr
|
||||
+# undef __rawmemchr
|
||||
+
|
||||
+# define SYMBOL_NAME rawmemchr
|
||||
+# include "ifunc-rawmemchr.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr,
|
||||
+ IFUNC_SELECTOR ());
|
||||
+weak_alias (__rawmemchr, rawmemchr)
|
||||
+# ifdef SHARED
|
||||
+__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr)
|
||||
+ __attribute__((visibility ("hidden")));
|
||||
+# endif
|
||||
+#endif
|
||||
--
|
||||
2.33.0
|
||||
|
499
LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch
Normal file
499
LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch
Normal file
|
@ -0,0 +1,499 @@
|
|||
From e258cfcf92f5e31e902fa045b41652f00fcf2521 Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Thu, 24 Aug 2023 16:50:18 +0800
|
||||
Subject: [PATCH 09/29] LoongArch: Add ifunc support for strcmp{aligned, lsx}
|
||||
|
||||
Based on the glibc microbenchmark, strcmp-aligned implementation could
|
||||
reduce the runtime 0%-10% for aligned comparison, 10%-20% for unaligned
|
||||
comparison, strcmp-lsx implemenation could reduce the runtime 0%-50%.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/Makefile | 2 +
|
||||
.../lp64/multiarch/ifunc-impl-list.c | 7 +
|
||||
.../loongarch/lp64/multiarch/ifunc-strcmp.h | 38 ++++
|
||||
.../loongarch/lp64/multiarch/strcmp-aligned.S | 179 ++++++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 165 ++++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/strcmp.c | 35 ++++
|
||||
6 files changed, 426 insertions(+)
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
index c4dd3143..d5a500de 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
@@ -12,6 +12,8 @@ sysdep_routines += \
|
||||
strchrnul-aligned \
|
||||
strchrnul-lsx \
|
||||
strchrnul-lasx \
|
||||
+ strcmp-aligned \
|
||||
+ strcmp-lsx \
|
||||
memcpy-aligned \
|
||||
memcpy-unaligned \
|
||||
memmove-unaligned \
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
index 7cec0b77..9183b7da 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
@@ -62,6 +62,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
|
||||
)
|
||||
|
||||
+ IFUNC_IMPL (i, name, strcmp,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, strcmp, SUPPORT_LSX, __strcmp_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned)
|
||||
+ )
|
||||
+
|
||||
IFUNC_IMPL (i, name, memcpy,
|
||||
#if !defined __loongarch_soft_float
|
||||
IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
|
||||
new file mode 100644
|
||||
index 00000000..ca26352b
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
|
||||
@@ -0,0 +1,38 @@
|
||||
+/* Common definition for strcmp ifunc selection.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-init.h>
|
||||
+
|
||||
+#if !defined __loongarch_soft_float
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||
+#endif
|
||||
+
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
|
||||
+
|
||||
+static inline void *
|
||||
+IFUNC_SELECTOR (void)
|
||||
+{
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LSX)
|
||||
+ return OPTIMIZE (lsx);
|
||||
+ else
|
||||
+#endif
|
||||
+ return OPTIMIZE (aligned);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
|
||||
new file mode 100644
|
||||
index 00000000..f5f4f336
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
|
||||
@@ -0,0 +1,179 @@
|
||||
+/* Optimized strcmp implementation using basic Loongarch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define STRCMP_NAME __strcmp_aligned
|
||||
+#else
|
||||
+# define STRCMP_NAME strcmp
|
||||
+#endif
|
||||
+
|
||||
+LEAF(STRCMP_NAME, 6)
|
||||
+ lu12i.w a4, 0x01010
|
||||
+ andi a2, a0, 0x7
|
||||
+ ori a4, a4, 0x101
|
||||
+ andi a3, a1, 0x7
|
||||
+
|
||||
+ bstrins.d a4, a4, 63, 32
|
||||
+ li.d t7, -1
|
||||
+ li.d t8, 8
|
||||
+ slli.d a5, a4, 7
|
||||
+
|
||||
+ bne a2, a3, L(unaligned)
|
||||
+ bstrins.d a0, zero, 2, 0
|
||||
+ bstrins.d a1, zero, 2, 0
|
||||
+ ld.d t0, a0, 0
|
||||
+
|
||||
+ ld.d t1, a1, 0
|
||||
+ slli.d t3, a2, 3
|
||||
+ sll.d t2, t7, t3
|
||||
+ orn t0, t0, t2
|
||||
+
|
||||
+
|
||||
+ orn t1, t1, t2
|
||||
+ sub.d t2, t0, a4
|
||||
+ andn t3, a5, t0
|
||||
+ and t2, t2, t3
|
||||
+
|
||||
+ bne t0, t1, L(al_end)
|
||||
+L(al_loop):
|
||||
+ bnez t2, L(ret0)
|
||||
+ ldx.d t0, a0, t8
|
||||
+ ldx.d t1, a1, t8
|
||||
+
|
||||
+ addi.d t8, t8, 8
|
||||
+ sub.d t2, t0, a4
|
||||
+ andn t3, a5, t0
|
||||
+ and t2, t2, t3
|
||||
+
|
||||
+ beq t0, t1, L(al_loop)
|
||||
+L(al_end):
|
||||
+ xor t3, t0, t1
|
||||
+ or t2, t2, t3
|
||||
+ ctz.d t3, t2
|
||||
+
|
||||
+
|
||||
+ bstrins.d t3, zero, 2, 0
|
||||
+ srl.d t0, t0, t3
|
||||
+ srl.d t1, t1, t3
|
||||
+ andi t0, t0, 0xff
|
||||
+
|
||||
+ andi t1, t1, 0xff
|
||||
+ sub.d a0, t0, t1
|
||||
+ jr ra
|
||||
+ nop
|
||||
+
|
||||
+L(ret0):
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+ nop
|
||||
+ nop
|
||||
+
|
||||
+L(unaligned):
|
||||
+ slt a6, a3, a2
|
||||
+ xor t0, a0, a1
|
||||
+ maskeqz t0, t0, a6
|
||||
+ xor a0, a0, t0
|
||||
+
|
||||
+
|
||||
+ xor a1, a1, t0
|
||||
+ andi a2, a0, 0x7
|
||||
+ andi a3, a1, 0x7
|
||||
+ bstrins.d a0, zero, 2, 0
|
||||
+
|
||||
+ bstrins.d a1, zero, 2, 0
|
||||
+ ld.d t4, a0, 0
|
||||
+ ld.d t1, a1, 0
|
||||
+ slli.d a2, a2, 3
|
||||
+
|
||||
+ slli.d a3, a3, 3
|
||||
+ srl.d t0, t4, a2
|
||||
+ srl.d t1, t1, a3
|
||||
+ srl.d t5, t7, a3
|
||||
+
|
||||
+ orn t0, t0, t5
|
||||
+ orn t1, t1, t5
|
||||
+ bne t0, t1, L(not_equal)
|
||||
+ sll.d t5, t7, a2
|
||||
+
|
||||
+
|
||||
+ sub.d a3, a2, a3
|
||||
+ orn t4, t4, t5
|
||||
+ sub.d a2, zero, a3
|
||||
+ sub.d t2, t4, a4
|
||||
+
|
||||
+ andn t3, a5, t4
|
||||
+ and t2, t2, t3
|
||||
+ bnez t2, L(find_zero)
|
||||
+L(un_loop):
|
||||
+ srl.d t5, t4, a3
|
||||
+
|
||||
+ ldx.d t4, a0, t8
|
||||
+ ldx.d t1, a1, t8
|
||||
+ addi.d t8, t8, 8
|
||||
+ sll.d t0, t4, a2
|
||||
+
|
||||
+ or t0, t0, t5
|
||||
+ bne t0, t1, L(not_equal)
|
||||
+ sub.d t2, t4, a4
|
||||
+ andn t3, a5, t4
|
||||
+
|
||||
+
|
||||
+ and t2, t2, t3
|
||||
+ beqz t2, L(un_loop)
|
||||
+L(find_zero):
|
||||
+ sub.d t2, t0, a4
|
||||
+ andn t3, a5, t0
|
||||
+
|
||||
+ and t2, t2, t3
|
||||
+ bnez t2, L(ret0)
|
||||
+ ldx.d t1, a1, t8
|
||||
+ srl.d t0, t4, a3
|
||||
+
|
||||
+L(not_equal):
|
||||
+ sub.d t2, t0, a4
|
||||
+ andn t3, a5, t0
|
||||
+ and t2, t2, t3
|
||||
+ xor t3, t0, t1
|
||||
+
|
||||
+ or t2, t2, t3
|
||||
+L(un_end):
|
||||
+ ctz.d t3, t2
|
||||
+ bstrins.d t3, zero, 2, 0
|
||||
+ srl.d t0, t0, t3
|
||||
+
|
||||
+
|
||||
+ srl.d t1, t1, t3
|
||||
+ andi t0, t0, 0xff
|
||||
+ andi t1, t1, 0xff
|
||||
+ sub.d t2, t0, t1
|
||||
+
|
||||
+
|
||||
+ sub.d t3, t1, t0
|
||||
+ masknez t0, t2, a6
|
||||
+ maskeqz t1, t3, a6
|
||||
+ or a0, t0, t1
|
||||
+
|
||||
+ jr ra
|
||||
+END(STRCMP_NAME)
|
||||
+
|
||||
+libc_hidden_builtin_def (STRCMP_NAME)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..2e177a38
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
|
||||
@@ -0,0 +1,165 @@
|
||||
+/* Optimized strcmp implementation using Loongarch LSX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define STRCMP __strcmp_lsx
|
||||
+
|
||||
+LEAF(STRCMP, 6)
|
||||
+ pcalau12i t0, %pc_hi20(L(INDEX))
|
||||
+ andi a2, a0, 0xf
|
||||
+ vld vr2, t0, %pc_lo12(L(INDEX))
|
||||
+ andi a3, a1, 0xf
|
||||
+
|
||||
+ bne a2, a3, L(unaligned)
|
||||
+ bstrins.d a0, zero, 3, 0
|
||||
+ bstrins.d a1, zero, 3, 0
|
||||
+ vld vr0, a0, 0
|
||||
+
|
||||
+ vld vr1, a1, 0
|
||||
+ vreplgr2vr.b vr3, a2
|
||||
+ vslt.b vr2, vr2, vr3
|
||||
+ vseq.b vr3, vr0, vr1
|
||||
+
|
||||
+ vmin.bu vr3, vr0, vr3
|
||||
+ vor.v vr3, vr3, vr2
|
||||
+ vsetanyeqz.b fcc0, vr3
|
||||
+ bcnez fcc0, L(al_out)
|
||||
+
|
||||
+
|
||||
+L(al_loop):
|
||||
+ vld vr0, a0, 16
|
||||
+ vld vr1, a1, 16
|
||||
+ addi.d a0, a0, 16
|
||||
+ addi.d a1, a1, 16
|
||||
+
|
||||
+ vseq.b vr3, vr0, vr1
|
||||
+ vmin.bu vr3, vr0, vr3
|
||||
+ vsetanyeqz.b fcc0, vr3
|
||||
+ bceqz fcc0, L(al_loop)
|
||||
+
|
||||
+L(al_out):
|
||||
+ vseqi.b vr3, vr3, 0
|
||||
+ vfrstpi.b vr3, vr3, 0
|
||||
+ vshuf.b vr0, vr0, vr0, vr3
|
||||
+ vshuf.b vr1, vr1, vr1, vr3
|
||||
+
|
||||
+ vpickve2gr.bu t0, vr0, 0
|
||||
+ vpickve2gr.bu t1, vr1, 0
|
||||
+ sub.d a0, t0, t1
|
||||
+ jr ra
|
||||
+
|
||||
+
|
||||
+L(unaligned):
|
||||
+ slt a4, a3, a2
|
||||
+ xor t0, a0, a1
|
||||
+ maskeqz t0, t0, a4
|
||||
+ xor a0, a0, t0
|
||||
+
|
||||
+ xor a1, a1, t0
|
||||
+ andi a2, a0, 0xf
|
||||
+ andi a3, a1, 0xf
|
||||
+ bstrins.d a0, zero, 3, 0
|
||||
+
|
||||
+ bstrins.d a1, zero, 3, 0
|
||||
+ vld vr3, a0, 0
|
||||
+ vld vr1, a1, 0
|
||||
+ vreplgr2vr.b vr4, a2
|
||||
+
|
||||
+ vreplgr2vr.b vr5, a3
|
||||
+ vslt.b vr7, vr2, vr5
|
||||
+ vsub.b vr5, vr5, vr4
|
||||
+ vaddi.bu vr6, vr2, 16
|
||||
+
|
||||
+
|
||||
+ vsub.b vr6, vr6, vr5
|
||||
+ vshuf.b vr0, vr3, vr3, vr6
|
||||
+ vor.v vr0, vr0, vr7
|
||||
+ vor.v vr1, vr1, vr7
|
||||
+
|
||||
+ vseq.b vr5, vr0, vr1
|
||||
+ vsetanyeqz.b fcc0, vr5
|
||||
+ bcnez fcc0, L(not_equal)
|
||||
+ vslt.b vr4, vr2, vr4
|
||||
+
|
||||
+ vor.v vr0, vr3, vr4
|
||||
+ vsetanyeqz.b fcc0, vr0
|
||||
+ bcnez fcc0, L(find_zero)
|
||||
+ nop
|
||||
+
|
||||
+L(un_loop):
|
||||
+ vld vr3, a0, 16
|
||||
+ vld vr1, a1, 16
|
||||
+ addi.d a0, a0, 16
|
||||
+ addi.d a1, a1, 16
|
||||
+
|
||||
+
|
||||
+ vshuf.b vr0, vr3, vr0, vr6
|
||||
+ vseq.b vr5, vr0, vr1
|
||||
+ vsetanyeqz.b fcc0, vr5
|
||||
+ bcnez fcc0, L(not_equal)
|
||||
+
|
||||
+ vsetanyeqz.b fcc0, vr3
|
||||
+ vor.v vr0, vr3, vr3
|
||||
+ bceqz fcc0, L(un_loop)
|
||||
+L(find_zero):
|
||||
+ vmin.bu vr5, vr1, vr5
|
||||
+
|
||||
+ vsetanyeqz.b fcc0, vr5
|
||||
+ bcnez fcc0, L(ret0)
|
||||
+ vld vr1, a1, 16
|
||||
+ vshuf.b vr0, vr3, vr3, vr6
|
||||
+
|
||||
+ vseq.b vr5, vr0, vr1
|
||||
+L(not_equal):
|
||||
+ vmin.bu vr5, vr0, vr5
|
||||
+L(un_end):
|
||||
+ vseqi.b vr5, vr5, 0
|
||||
+ vfrstpi.b vr5, vr5, 0
|
||||
+
|
||||
+
|
||||
+ vshuf.b vr0, vr0, vr0, vr5
|
||||
+ vshuf.b vr1, vr1, vr1, vr5
|
||||
+ vpickve2gr.bu t0, vr0, 0
|
||||
+ vpickve2gr.bu t1, vr1, 0
|
||||
+
|
||||
+ sub.d t3, t0, t1
|
||||
+ sub.d t4, t1, t0
|
||||
+ masknez t0, t3, a4
|
||||
+ maskeqz t1, t4, a4
|
||||
+
|
||||
+ or a0, t0, t1
|
||||
+ jr ra
|
||||
+L(ret0):
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+END(STRCMP)
|
||||
+
|
||||
+ .section .rodata.cst16,"M",@progbits,16
|
||||
+ .align 4
|
||||
+L(INDEX):
|
||||
+ .dword 0x0706050403020100
|
||||
+ .dword 0x0f0e0d0c0b0a0908
|
||||
+
|
||||
+libc_hidden_builtin_def (STRCMP)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp.c b/sysdeps/loongarch/lp64/multiarch/strcmp.c
|
||||
new file mode 100644
|
||||
index 00000000..6f249c0b
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp.c
|
||||
@@ -0,0 +1,35 @@
|
||||
+/* Multiple versions of strcmp.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Define multiple versions only for the definition in libc. */
|
||||
+#if IS_IN (libc)
|
||||
+# define strcmp __redirect_strcmp
|
||||
+# include <string.h>
|
||||
+# undef strcmp
|
||||
+
|
||||
+# define SYMBOL_NAME strcmp
|
||||
+# include "ifunc-strcmp.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_strcmp, strcmp, IFUNC_SELECTOR ());
|
||||
+
|
||||
+# ifdef SHARED
|
||||
+__hidden_ver1 (strcmp, __GI_strcmp, __redirect_strcmp)
|
||||
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcmp);
|
||||
+# endif
|
||||
+#endif
|
||||
--
|
||||
2.33.0
|
||||
|
1099
LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch
Normal file
1099
LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch
Normal file
File diff suppressed because it is too large
Load diff
583
LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch
Normal file
583
LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch
Normal file
|
@ -0,0 +1,583 @@
|
|||
From 6f03da2d7ef218c0f78375cf706dada59c3fee63 Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Thu, 24 Aug 2023 16:50:19 +0800
|
||||
Subject: [PATCH 10/29] LoongArch: Add ifunc support for strncmp{aligned, lsx}
|
||||
|
||||
Based on the glibc microbenchmark, only a few short inputs with this
|
||||
strncmp-aligned and strncmp-lsx implementation experience performance
|
||||
degradation, overall, strncmp-aligned could reduce the runtime 0%-10%
|
||||
for aligned comparision, 10%-25% for unaligend comparision, strncmp-lsx
|
||||
could reduce the runtime about 0%-60%.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/Makefile | 2 +
|
||||
.../lp64/multiarch/ifunc-impl-list.c | 7 +
|
||||
.../loongarch/lp64/multiarch/ifunc-strncmp.h | 38 +++
|
||||
.../lp64/multiarch/strncmp-aligned.S | 218 ++++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/strncmp-lsx.S | 208 +++++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/strncmp.c | 35 +++
|
||||
6 files changed, 508 insertions(+)
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
index d5a500de..5d7ae7ae 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
@@ -14,6 +14,8 @@ sysdep_routines += \
|
||||
strchrnul-lasx \
|
||||
strcmp-aligned \
|
||||
strcmp-lsx \
|
||||
+ strncmp-aligned \
|
||||
+ strncmp-lsx \
|
||||
memcpy-aligned \
|
||||
memcpy-unaligned \
|
||||
memmove-unaligned \
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
index 9183b7da..c8ba87bd 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
@@ -69,6 +69,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned)
|
||||
)
|
||||
|
||||
+ IFUNC_IMPL (i, name, strncmp,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, strncmp, SUPPORT_LSX, __strncmp_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned)
|
||||
+ )
|
||||
+
|
||||
IFUNC_IMPL (i, name, memcpy,
|
||||
#if !defined __loongarch_soft_float
|
||||
IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
|
||||
new file mode 100644
|
||||
index 00000000..1a7dc36b
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
|
||||
@@ -0,0 +1,38 @@
|
||||
+/* Common definition for strncmp ifunc selection.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-init.h>
|
||||
+
|
||||
+#if !defined __loongarch_soft_float
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||
+#endif
|
||||
+
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
|
||||
+
|
||||
+static inline void *
|
||||
+IFUNC_SELECTOR (void)
|
||||
+{
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LSX)
|
||||
+ return OPTIMIZE (lsx);
|
||||
+ else
|
||||
+#endif
|
||||
+ return OPTIMIZE (aligned);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
|
||||
new file mode 100644
|
||||
index 00000000..e2687fa7
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
|
||||
@@ -0,0 +1,218 @@
|
||||
+/* Optimized strncmp implementation using basic Loongarch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define STRNCMP __strncmp_aligned
|
||||
+#else
|
||||
+# define STRNCMP strncmp
|
||||
+#endif
|
||||
+
|
||||
+LEAF(STRNCMP, 6)
|
||||
+ beqz a2, L(ret0)
|
||||
+ lu12i.w a5, 0x01010
|
||||
+ andi a3, a0, 0x7
|
||||
+ ori a5, a5, 0x101
|
||||
+
|
||||
+ andi a4, a1, 0x7
|
||||
+ bstrins.d a5, a5, 63, 32
|
||||
+ li.d t7, -1
|
||||
+ li.d t8, 8
|
||||
+
|
||||
+ addi.d a2, a2, -1
|
||||
+ slli.d a6, a5, 7
|
||||
+ bne a3, a4, L(unaligned)
|
||||
+ bstrins.d a0, zero, 2, 0
|
||||
+
|
||||
+ bstrins.d a1, zero, 2, 0
|
||||
+ ld.d t0, a0, 0
|
||||
+ ld.d t1, a1, 0
|
||||
+ slli.d t2, a3, 3
|
||||
+
|
||||
+
|
||||
+ sub.d t5, t8, a3
|
||||
+ srl.d t3, t7, t2
|
||||
+ srl.d t0, t0, t2
|
||||
+ srl.d t1, t1, t2
|
||||
+
|
||||
+ orn t0, t0, t3
|
||||
+ orn t1, t1, t3
|
||||
+ sub.d t2, t0, a5
|
||||
+ andn t3, a6, t0
|
||||
+
|
||||
+ and t2, t2, t3
|
||||
+ bne t0, t1, L(al_end)
|
||||
+ sltu t4, a2, t5
|
||||
+ sub.d a2, a2, t5
|
||||
+
|
||||
+L(al_loop):
|
||||
+ or t4, t2, t4
|
||||
+ bnez t4, L(ret0)
|
||||
+ ldx.d t0, a0, t8
|
||||
+ ldx.d t1, a1, t8
|
||||
+
|
||||
+
|
||||
+ addi.d t8, t8, 8
|
||||
+ sltui t4, a2, 8
|
||||
+ addi.d a2, a2, -8
|
||||
+ sub.d t2, t0, a5
|
||||
+
|
||||
+ andn t3, a6, t0
|
||||
+ and t2, t2, t3
|
||||
+ beq t0, t1, L(al_loop)
|
||||
+ addi.d a2, a2, 8
|
||||
+
|
||||
+L(al_end):
|
||||
+ xor t3, t0, t1
|
||||
+ or t2, t2, t3
|
||||
+ ctz.d t2, t2
|
||||
+ srli.d t4, t2, 3
|
||||
+
|
||||
+ bstrins.d t2, zero, 2, 0
|
||||
+ srl.d t0, t0, t2
|
||||
+ srl.d t1, t1, t2
|
||||
+ andi t0, t0, 0xff
|
||||
+
|
||||
+
|
||||
+ andi t1, t1, 0xff
|
||||
+ sltu t2, a2, t4
|
||||
+ sub.d a0, t0, t1
|
||||
+ masknez a0, a0, t2
|
||||
+
|
||||
+ jr ra
|
||||
+L(ret0):
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+ nop
|
||||
+
|
||||
+L(unaligned):
|
||||
+ slt a7, a4, a3
|
||||
+ xor t0, a0, a1
|
||||
+ maskeqz t0, t0, a7
|
||||
+ xor a0, a0, t0
|
||||
+
|
||||
+ xor a1, a1, t0
|
||||
+ andi a3, a0, 0x7
|
||||
+ andi a4, a1, 0x7
|
||||
+ bstrins.d a0, zero, 2, 0
|
||||
+
|
||||
+
|
||||
+ bstrins.d a1, zero, 2, 0
|
||||
+ ld.d t4, a0, 0
|
||||
+ ld.d t1, a1, 0
|
||||
+ slli.d t2, a3, 3
|
||||
+
|
||||
+ slli.d t3, a4, 3
|
||||
+ srl.d t5, t7, t3
|
||||
+ srl.d t0, t4, t2
|
||||
+ srl.d t1, t1, t3
|
||||
+
|
||||
+ orn t0, t0, t5
|
||||
+ orn t1, t1, t5
|
||||
+ bne t0, t1, L(not_equal)
|
||||
+ sub.d t6, t8, a4
|
||||
+
|
||||
+ sub.d a4, t2, t3
|
||||
+ sll.d t2, t7, t2
|
||||
+ sub.d t5, t8, a3
|
||||
+ orn t4, t4, t2
|
||||
+
|
||||
+
|
||||
+ sub.d t2, t4, a5
|
||||
+ andn t3, a6, t4
|
||||
+ sltu t7, a2, t5
|
||||
+ and t2, t2, t3
|
||||
+
|
||||
+ sub.d a3, zero, a4
|
||||
+ or t2, t2, t7
|
||||
+ bnez t2, L(un_end)
|
||||
+ sub.d t7, t5, t6
|
||||
+
|
||||
+ sub.d a2, a2, t5
|
||||
+ sub.d t6, t8, t7
|
||||
+L(un_loop):
|
||||
+ srl.d t5, t4, a4
|
||||
+ ldx.d t4, a0, t8
|
||||
+
|
||||
+ ldx.d t1, a1, t8
|
||||
+ addi.d t8, t8, 8
|
||||
+ sll.d t0, t4, a3
|
||||
+ or t0, t0, t5
|
||||
+
|
||||
+
|
||||
+ bne t0, t1, L(loop_not_equal)
|
||||
+ sub.d t2, t4, a5
|
||||
+ andn t3, a6, t4
|
||||
+ sltui t5, a2, 8
|
||||
+
|
||||
+ and t2, t2, t3
|
||||
+ addi.d a2, a2, -8
|
||||
+ or t3, t2, t5
|
||||
+ beqz t3, L(un_loop)
|
||||
+
|
||||
+ addi.d a2, a2, 8
|
||||
+L(un_end):
|
||||
+ sub.d t2, t0, a5
|
||||
+ andn t3, a6, t0
|
||||
+ sltu t5, a2, t6
|
||||
+
|
||||
+ and t2, t2, t3
|
||||
+ or t2, t2, t5
|
||||
+ bnez t2, L(ret0)
|
||||
+ ldx.d t1, a1, t8
|
||||
+
|
||||
+
|
||||
+ srl.d t0, t4, a4
|
||||
+ sub.d a2, a2, t6
|
||||
+L(not_equal):
|
||||
+ sub.d t2, t0, a5
|
||||
+ andn t3, a6, t0
|
||||
+
|
||||
+ xor t4, t0, t1
|
||||
+ and t2, t2, t3
|
||||
+ or t2, t2, t4
|
||||
+ ctz.d t2, t2
|
||||
+
|
||||
+ bstrins.d t2, zero, 2, 0
|
||||
+ srli.d t4, t2, 3
|
||||
+ srl.d t0, t0, t2
|
||||
+ srl.d t1, t1, t2
|
||||
+
|
||||
+ andi t0, t0, 0xff
|
||||
+ andi t1, t1, 0xff
|
||||
+ sub.d t2, t0, t1
|
||||
+ sub.d t3, t1, t0
|
||||
+
|
||||
+
|
||||
+ masknez t0, t2, a7
|
||||
+ maskeqz t1, t3, a7
|
||||
+ sltu t2, a2, t4
|
||||
+ or a0, t0, t1
|
||||
+
|
||||
+ masknez a0, a0, t2
|
||||
+ jr ra
|
||||
+L(loop_not_equal):
|
||||
+ add.d a2, a2, t7
|
||||
+ b L(not_equal)
|
||||
+END(STRNCMP)
|
||||
+
|
||||
+libc_hidden_builtin_def (STRNCMP)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..0b4eee2a
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
|
||||
@@ -0,0 +1,208 @@
|
||||
+/* Optimized strncmp implementation using Loongarch LSX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define STRNCMP __strncmp_lsx
|
||||
+
|
||||
+LEAF(STRNCMP, 6)
|
||||
+ beqz a2, L(ret0)
|
||||
+ pcalau12i t0, %pc_hi20(L(INDEX))
|
||||
+ andi a3, a0, 0xf
|
||||
+ vld vr2, t0, %pc_lo12(L(INDEX))
|
||||
+
|
||||
+ andi a4, a1, 0xf
|
||||
+ li.d t2, 16
|
||||
+ bne a3, a4, L(unaligned)
|
||||
+ xor t0, a0, a3
|
||||
+
|
||||
+ xor t1, a1, a4
|
||||
+ vld vr0, t0, 0
|
||||
+ vld vr1, t1, 0
|
||||
+ vreplgr2vr.b vr3, a3
|
||||
+
|
||||
+
|
||||
+ sub.d t2, t2, a3
|
||||
+ vadd.b vr3, vr3, vr2
|
||||
+ vshuf.b vr0, vr3, vr0, vr3
|
||||
+ vshuf.b vr1, vr3, vr1, vr3
|
||||
+
|
||||
+ vseq.b vr3, vr0, vr1
|
||||
+ vmin.bu vr3, vr0, vr3
|
||||
+ bgeu t2, a2, L(al_early_end)
|
||||
+ vsetanyeqz.b fcc0, vr3
|
||||
+
|
||||
+ bcnez fcc0, L(al_end)
|
||||
+ add.d a3, a0, a2
|
||||
+ addi.d a4, a3, -1
|
||||
+ bstrins.d a4, zero, 3, 0
|
||||
+
|
||||
+ sub.d a2, a3, a4
|
||||
+L(al_loop):
|
||||
+ vld vr0, t0, 16
|
||||
+ vld vr1, t1, 16
|
||||
+ addi.d t0, t0, 16
|
||||
+
|
||||
+
|
||||
+ addi.d t1, t1, 16
|
||||
+ vseq.b vr3, vr0, vr1
|
||||
+ vmin.bu vr3, vr0, vr3
|
||||
+ beq t0, a4, L(al_early_end)
|
||||
+
|
||||
+ vsetanyeqz.b fcc0, vr3
|
||||
+ bceqz fcc0, L(al_loop)
|
||||
+L(al_end):
|
||||
+ vseqi.b vr3, vr3, 0
|
||||
+ vfrstpi.b vr3, vr3, 0
|
||||
+
|
||||
+ vshuf.b vr0, vr0, vr0, vr3
|
||||
+ vshuf.b vr1, vr1, vr1, vr3
|
||||
+ vpickve2gr.bu t0, vr0, 0
|
||||
+ vpickve2gr.bu t1, vr1, 0
|
||||
+
|
||||
+ sub.d a0, t0, t1
|
||||
+ jr ra
|
||||
+L(al_early_end):
|
||||
+ vreplgr2vr.b vr4, a2
|
||||
+ vslt.b vr4, vr2, vr4
|
||||
+
|
||||
+
|
||||
+ vorn.v vr3, vr3, vr4
|
||||
+ b L(al_end)
|
||||
+L(unaligned):
|
||||
+ slt a5, a3, a4
|
||||
+ xor t0, a0, a1
|
||||
+
|
||||
+ maskeqz t0, t0, a5
|
||||
+ xor a0, a0, t0
|
||||
+ xor a1, a1, t0
|
||||
+ andi a3, a0, 0xf
|
||||
+
|
||||
+ andi a4, a1, 0xf
|
||||
+ xor t0, a0, a3
|
||||
+ xor t1, a1, a4
|
||||
+ vld vr0, t0, 0
|
||||
+
|
||||
+ vld vr3, t1, 0
|
||||
+ sub.d t2, t2, a3
|
||||
+ vreplgr2vr.b vr4, a3
|
||||
+ vreplgr2vr.b vr5, a4
|
||||
+
|
||||
+
|
||||
+ vaddi.bu vr6, vr2, 16
|
||||
+ vsub.b vr7, vr4, vr5
|
||||
+ vsub.b vr6, vr6, vr7
|
||||
+ vadd.b vr4, vr2, vr4
|
||||
+
|
||||
+ vshuf.b vr1, vr3, vr3, vr6
|
||||
+ vshuf.b vr0, vr7, vr0, vr4
|
||||
+ vshuf.b vr1, vr7, vr1, vr4
|
||||
+ vseq.b vr4, vr0, vr1
|
||||
+
|
||||
+ vmin.bu vr4, vr0, vr4
|
||||
+ bgeu t2, a2, L(un_early_end)
|
||||
+ vsetanyeqz.b fcc0, vr4
|
||||
+ bcnez fcc0, L(un_end)
|
||||
+
|
||||
+ add.d a6, a0, a2
|
||||
+ vslt.b vr5, vr2, vr5
|
||||
+ addi.d a7, a6, -1
|
||||
+ vor.v vr3, vr3, vr5
|
||||
+
|
||||
+
|
||||
+ bstrins.d a7, zero, 3, 0
|
||||
+ sub.d a2, a6, a7
|
||||
+L(un_loop):
|
||||
+ vld vr0, t0, 16
|
||||
+ addi.d t0, t0, 16
|
||||
+
|
||||
+ vsetanyeqz.b fcc0, vr3
|
||||
+ bcnez fcc0, L(has_zero)
|
||||
+ beq t0, a7, L(end_with_len)
|
||||
+ vor.v vr1, vr3, vr3
|
||||
+
|
||||
+ vld vr3, t1, 16
|
||||
+ addi.d t1, t1, 16
|
||||
+ vshuf.b vr1, vr3, vr1, vr6
|
||||
+ vseq.b vr4, vr0, vr1
|
||||
+
|
||||
+ vmin.bu vr4, vr0, vr4
|
||||
+ vsetanyeqz.b fcc0, vr4
|
||||
+ bceqz fcc0, L(un_loop)
|
||||
+L(un_end):
|
||||
+ vseqi.b vr4, vr4, 0
|
||||
+
|
||||
+
|
||||
+ vfrstpi.b vr4, vr4, 0
|
||||
+ vshuf.b vr0, vr0, vr0, vr4
|
||||
+ vshuf.b vr1, vr1, vr1, vr4
|
||||
+ vpickve2gr.bu t0, vr0, 0
|
||||
+
|
||||
+ vpickve2gr.bu t1, vr1, 0
|
||||
+ sub.d t2, t0, t1
|
||||
+ sub.d t3, t1, t0
|
||||
+ masknez t0, t2, a5
|
||||
+
|
||||
+ maskeqz t1, t3, a5
|
||||
+ or a0, t0, t1
|
||||
+ jr ra
|
||||
+L(has_zero):
|
||||
+ vshuf.b vr1, vr3, vr3, vr6
|
||||
+
|
||||
+ vseq.b vr4, vr0, vr1
|
||||
+ vmin.bu vr4, vr0, vr4
|
||||
+ bne t0, a7, L(un_end)
|
||||
+L(un_early_end):
|
||||
+ vreplgr2vr.b vr5, a2
|
||||
+
|
||||
+ vslt.b vr5, vr2, vr5
|
||||
+ vorn.v vr4, vr4, vr5
|
||||
+ b L(un_end)
|
||||
+L(end_with_len):
|
||||
+ sub.d a6, a3, a4
|
||||
+
|
||||
+ bgeu a6, a2, 1f
|
||||
+ vld vr4, t1, 16
|
||||
+1:
|
||||
+ vshuf.b vr1, vr4, vr3, vr6
|
||||
+ vseq.b vr4, vr0, vr1
|
||||
+
|
||||
+ vmin.bu vr4, vr0, vr4
|
||||
+ vreplgr2vr.b vr5, a2
|
||||
+ vslt.b vr5, vr2, vr5
|
||||
+ vorn.v vr4, vr4, vr5
|
||||
+
|
||||
+ b L(un_end)
|
||||
+L(ret0):
|
||||
+ move a0, zero
|
||||
+ jr ra
|
||||
+END(STRNCMP)
|
||||
+
|
||||
+ .section .rodata.cst16,"M",@progbits,16
|
||||
+ .align 4
|
||||
+L(INDEX):
|
||||
+ .dword 0x0706050403020100
|
||||
+ .dword 0x0f0e0d0c0b0a0908
|
||||
+
|
||||
+libc_hidden_builtin_def (STRNCMP)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp.c b/sysdeps/loongarch/lp64/multiarch/strncmp.c
|
||||
new file mode 100644
|
||||
index 00000000..af6d0bc4
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp.c
|
||||
@@ -0,0 +1,35 @@
|
||||
+/* Multiple versions of strncmp.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Define multiple versions only for the definition in libc. */
|
||||
+#if IS_IN (libc)
|
||||
+# define strncmp __redirect_strncmp
|
||||
+# include <string.h>
|
||||
+# undef strncmp
|
||||
+
|
||||
+# define SYMBOL_NAME strncmp
|
||||
+# include "ifunc-strncmp.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_strncmp, strncmp, IFUNC_SELECTOR ());
|
||||
+
|
||||
+# ifdef SHARED
|
||||
+__hidden_ver1 (strncmp, __GI_strncmp, __redirect_strncmp)
|
||||
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strncmp);
|
||||
+# endif
|
||||
+#endif
|
||||
--
|
||||
2.33.0
|
||||
|
465
LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch
Normal file
465
LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch
Normal file
|
@ -0,0 +1,465 @@
|
|||
From e494d32d3b76eee0d59cfab37789a356459b517a Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Thu, 24 Aug 2023 16:50:17 +0800
|
||||
Subject: [PATCH 08/29] LoongArch: Add ifunc support for strnlen{aligned, lsx,
|
||||
lasx}
|
||||
|
||||
Based on the glibc microbenchmark, strnlen-aligned implementation could
|
||||
reduce the runtime more than 10%, strnlen-lsx implementation could reduce
|
||||
the runtime about 50%-78%, strnlen-lasx implementation could reduce the
|
||||
runtime about 50%-88%.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
|
||||
.../lp64/multiarch/ifunc-impl-list.c | 8 ++
|
||||
.../loongarch/lp64/multiarch/ifunc-strnlen.h | 41 +++++++
|
||||
.../lp64/multiarch/strnlen-aligned.S | 102 ++++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/strnlen-lasx.S | 100 +++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/strnlen-lsx.S | 89 +++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/strnlen.c | 39 +++++++
|
||||
7 files changed, 382 insertions(+)
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
index afa51041..c4dd3143 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
@@ -3,6 +3,9 @@ sysdep_routines += \
|
||||
strlen-aligned \
|
||||
strlen-lsx \
|
||||
strlen-lasx \
|
||||
+ strnlen-aligned \
|
||||
+ strnlen-lsx \
|
||||
+ strnlen-lasx \
|
||||
strchr-aligned \
|
||||
strchr-lsx \
|
||||
strchr-lasx \
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
index 25eb96b0..7cec0b77 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
@@ -38,6 +38,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
|
||||
)
|
||||
|
||||
+ IFUNC_IMPL (i, name, strnlen,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LASX, __strnlen_lasx)
|
||||
+ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LSX, __strnlen_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_aligned)
|
||||
+ )
|
||||
+
|
||||
IFUNC_IMPL (i, name, strchr,
|
||||
#if !defined __loongarch_soft_float
|
||||
IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
|
||||
new file mode 100644
|
||||
index 00000000..5cf89810
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
|
||||
@@ -0,0 +1,41 @@
|
||||
+/* Common definition for strnlen ifunc selections.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-init.h>
|
||||
+
|
||||
+#if !defined __loongarch_soft_float
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||
+#endif
|
||||
+
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
|
||||
+
|
||||
+static inline void *
|
||||
+IFUNC_SELECTOR (void)
|
||||
+{
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LASX)
|
||||
+ return OPTIMIZE (lasx);
|
||||
+ else if (SUPPORT_LSX)
|
||||
+ return OPTIMIZE (lsx);
|
||||
+ else
|
||||
+#endif
|
||||
+ return OPTIMIZE (aligned);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
|
||||
new file mode 100644
|
||||
index 00000000..b900430a
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
|
||||
@@ -0,0 +1,102 @@
|
||||
+/* Optimized strnlen implementation using basic Loongarch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define STRNLEN __strnlen_aligned
|
||||
+#else
|
||||
+# define STRNLEN __strnlen
|
||||
+#endif
|
||||
+
|
||||
+LEAF(STRNLEN, 6)
|
||||
+ beqz a1, L(out)
|
||||
+ lu12i.w a2, 0x01010
|
||||
+ andi t1, a0, 0x7
|
||||
+ move t4, a0
|
||||
+
|
||||
+ bstrins.d a0, zero, 2, 0
|
||||
+ ori a2, a2, 0x101
|
||||
+ li.w t0, -1
|
||||
+ ld.d t2, a0, 0
|
||||
+
|
||||
+ slli.d t3, t1, 3
|
||||
+ bstrins.d a2, a2, 63, 32
|
||||
+ li.w t5, 8
|
||||
+ slli.d a3, a2, 7
|
||||
+
|
||||
+ sub.w t1, t5, t1
|
||||
+ sll.d t0, t0, t3
|
||||
+ orn t2, t2, t0
|
||||
+ sub.d t0, t2, a2
|
||||
+
|
||||
+
|
||||
+ andn t3, a3, t2
|
||||
+ and t0, t0, t3
|
||||
+ bnez t0, L(count_pos)
|
||||
+ sub.d t5, a1, t1
|
||||
+
|
||||
+ bgeu t1, a1, L(out)
|
||||
+ addi.d a0, a0, 8
|
||||
+L(loop):
|
||||
+ ld.d t2, a0, 0
|
||||
+ sub.d t0, t2, a2
|
||||
+
|
||||
+ andn t1, a3, t2
|
||||
+ sltui t6, t5, 9
|
||||
+ and t0, t0, t1
|
||||
+ or t7, t0, t6
|
||||
+
|
||||
+ bnez t7, L(count_pos)
|
||||
+ ld.d t2, a0, 8
|
||||
+ addi.d a0, a0, 16
|
||||
+ sub.d t0, t2, a2
|
||||
+
|
||||
+
|
||||
+ andn t1, a3, t2
|
||||
+ sltui t6, t5, 17
|
||||
+ and t0, t0, t1
|
||||
+ addi.d t5, t5, -16
|
||||
+
|
||||
+ or t7, t0, t6
|
||||
+ beqz t7, L(loop)
|
||||
+ addi.d a0, a0, -8
|
||||
+L(count_pos):
|
||||
+ ctz.d t1, t0
|
||||
+
|
||||
+ sub.d a0, a0, t4
|
||||
+ srli.d t1, t1, 3
|
||||
+ add.d a0, t1, a0
|
||||
+ sltu t0, a0, a1
|
||||
+
|
||||
+ masknez t1, a1, t0
|
||||
+ maskeqz a0, a0, t0
|
||||
+ or a0, a0, t1
|
||||
+ jr ra
|
||||
+
|
||||
+
|
||||
+L(out):
|
||||
+ move a0, a1
|
||||
+ jr ra
|
||||
+END(STRNLEN)
|
||||
+
|
||||
+weak_alias (STRNLEN, strnlen)
|
||||
+libc_hidden_builtin_def (STRNLEN)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
|
||||
new file mode 100644
|
||||
index 00000000..2c03d3d9
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
|
||||
@@ -0,0 +1,100 @@
|
||||
+/* Optimized strnlen implementation using loongarch LASX instructions
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define STRNLEN __strnlen_lasx
|
||||
+
|
||||
+LEAF(STRNLEN, 6)
|
||||
+ beqz a1, L(ret0)
|
||||
+ andi t1, a0, 0x3f
|
||||
+ li.d t3, 65
|
||||
+ sub.d a2, a0, t1
|
||||
+
|
||||
+ xvld xr0, a2, 0
|
||||
+ xvld xr1, a2, 32
|
||||
+ sub.d t1, t3, t1
|
||||
+ move a3, a0
|
||||
+
|
||||
+ sltu t1, a1, t1
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+ xvmsknz.b xr1, xr1
|
||||
+ xvpickve.w xr2, xr0, 4
|
||||
+
|
||||
+ xvpickve.w xr3, xr1, 4
|
||||
+ vilvl.h vr0, vr2, vr0
|
||||
+ vilvl.h vr1, vr3, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+
|
||||
+
|
||||
+ movfr2gr.d t0, fa0
|
||||
+ sra.d t0, t0, a0
|
||||
+ orn t1, t1, t0
|
||||
+ bnez t1, L(end)
|
||||
+
|
||||
+ add.d a4, a0, a1
|
||||
+ move a0, a2
|
||||
+ addi.d a4, a4, -1
|
||||
+ bstrins.d a4, zero, 5, 0
|
||||
+
|
||||
+L(loop):
|
||||
+ xvld xr0, a0, 64
|
||||
+ xvld xr1, a0, 96
|
||||
+ addi.d a0, a0, 64
|
||||
+ beq a0, a4, L(out)
|
||||
+
|
||||
+ xvmin.bu xr2, xr0, xr1
|
||||
+ xvsetanyeqz.b fcc0, xr2
|
||||
+ bceqz fcc0, L(loop)
|
||||
+L(out):
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+
|
||||
+
|
||||
+ xvmsknz.b xr1, xr1
|
||||
+ xvpickve.w xr2, xr0, 4
|
||||
+ xvpickve.w xr3, xr1, 4
|
||||
+ vilvl.h vr0, vr2, vr0
|
||||
+
|
||||
+ vilvl.h vr1, vr3, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+ movfr2gr.d t0, fa0
|
||||
+L(end):
|
||||
+ sub.d a0, a0, a3
|
||||
+
|
||||
+ cto.d t0, t0
|
||||
+ add.d a0, a0, t0
|
||||
+ sltu t1, a0, a1
|
||||
+ masknez t0, a1, t1
|
||||
+
|
||||
+ maskeqz t1, a0, t1
|
||||
+ or a0, t0, t1
|
||||
+ jr ra
|
||||
+L(ret0):
|
||||
+ move a0, zero
|
||||
+
|
||||
+
|
||||
+ jr ra
|
||||
+END(STRNLEN)
|
||||
+
|
||||
+libc_hidden_def (STRNLEN)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..b769a895
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
|
||||
@@ -0,0 +1,89 @@
|
||||
+/* Optimized strnlen implementation using loongarch LSX instructions
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define STRNLEN __strnlen_lsx
|
||||
+
|
||||
+LEAF(STRNLEN, 6)
|
||||
+ beqz a1, L(ret0)
|
||||
+ andi t1, a0, 0x1f
|
||||
+ li.d t3, 33
|
||||
+ sub.d a2, a0, t1
|
||||
+
|
||||
+ vld vr0, a2, 0
|
||||
+ vld vr1, a2, 16
|
||||
+ sub.d t1, t3, t1
|
||||
+ move a3, a0
|
||||
+
|
||||
+ sltu t1, a1, t1
|
||||
+ vmsknz.b vr0, vr0
|
||||
+ vmsknz.b vr1, vr1
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+
|
||||
+ movfr2gr.s t0, fa0
|
||||
+ sra.w t0, t0, a0
|
||||
+ orn t1, t1, t0
|
||||
+ bnez t1, L(end)
|
||||
+
|
||||
+
|
||||
+ add.d a4, a0, a1
|
||||
+ move a0, a2
|
||||
+ addi.d a4, a4, -1
|
||||
+ bstrins.d a4, zero, 4, 0
|
||||
+
|
||||
+L(loop):
|
||||
+ vld vr0, a0, 32
|
||||
+ vld vr1, a0, 48
|
||||
+ addi.d a0, a0, 32
|
||||
+ beq a0, a4, L(out)
|
||||
+
|
||||
+ vmin.bu vr2, vr0, vr1
|
||||
+ vsetanyeqz.b fcc0, vr2
|
||||
+ bceqz fcc0, L(loop)
|
||||
+L(out):
|
||||
+ vmsknz.b vr0, vr0
|
||||
+
|
||||
+ vmsknz.b vr1, vr1
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+L(end):
|
||||
+ sub.d a0, a0, a3
|
||||
+
|
||||
+
|
||||
+ cto.w t0, t0
|
||||
+ add.d a0, a0, t0
|
||||
+ sltu t1, a0, a1
|
||||
+ masknez t0, a1, t1
|
||||
+
|
||||
+ maskeqz t1, a0, t1
|
||||
+ or a0, t0, t1
|
||||
+ jr ra
|
||||
+L(ret0):
|
||||
+ move a0, zero
|
||||
+
|
||||
+ jr ra
|
||||
+END(STRNLEN)
|
||||
+
|
||||
+libc_hidden_builtin_def (STRNLEN)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen.c b/sysdeps/loongarch/lp64/multiarch/strnlen.c
|
||||
new file mode 100644
|
||||
index 00000000..38b7a25a
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen.c
|
||||
@@ -0,0 +1,39 @@
|
||||
+/* Multiple versions of strnlen.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Define multiple versions only for the definition in libc. */
|
||||
+#if IS_IN (libc)
|
||||
+# define strnlen __redirect_strnlen
|
||||
+# define __strnlen __redirect___strnlen
|
||||
+# include <string.h>
|
||||
+# undef __strnlen
|
||||
+# undef strnlen
|
||||
+
|
||||
+# define SYMBOL_NAME strnlen
|
||||
+# include "ifunc-strnlen.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ());
|
||||
+weak_alias (__strnlen, strnlen);
|
||||
+# ifdef SHARED
|
||||
+__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen)
|
||||
+ __attribute__((visibility ("hidden"))) __attribute_copy__ (strnlen);
|
||||
+__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen)
|
||||
+ __attribute__((weak, visibility ("hidden"))) __attribute_copy__ (strnlen);
|
||||
+# endif
|
||||
+#endif
|
||||
--
|
||||
2.33.0
|
||||
|
670
LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch
Normal file
670
LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch
Normal file
|
@ -0,0 +1,670 @@
|
|||
From d537d0ab45a55048c8da483e73be4448ddb45525 Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Wed, 13 Sep 2023 15:35:00 +0800
|
||||
Subject: [PATCH 23/29] LoongArch: Add ifunc support for strrchr{aligned, lsx,
|
||||
lasx}
|
||||
|
||||
According to glibc strrchr microbenchmark test results, this implementation
|
||||
could reduce the runtime time as following:
|
||||
|
||||
Name Percent of rutime reduced
|
||||
strrchr-lasx 10%-50%
|
||||
strrchr-lsx 0%-50%
|
||||
strrchr-aligned 5%-50%
|
||||
|
||||
Generic strrchr is implemented by function strlen + memrchr, the lasx version
|
||||
will compare with generic strrchr implemented by strlen-lasx + memrchr-lasx,
|
||||
the lsx version will compare with generic strrchr implemented by strlen-lsx +
|
||||
memrchr-lsx, the aligned version will compare with generic strrchr implemented
|
||||
by strlen-aligned + memrchr-generic.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
|
||||
.../lp64/multiarch/ifunc-impl-list.c | 8 +
|
||||
.../loongarch/lp64/multiarch/ifunc-strrchr.h | 41 ++++
|
||||
.../lp64/multiarch/strrchr-aligned.S | 170 +++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/strrchr-lasx.S | 176 ++++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/strrchr-lsx.S | 144 ++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/strrchr.c | 36 ++++
|
||||
7 files changed, 578 insertions(+)
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
index 39550bea..fe863e1b 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
@@ -9,6 +9,9 @@ sysdep_routines += \
|
||||
strchr-aligned \
|
||||
strchr-lsx \
|
||||
strchr-lasx \
|
||||
+ strrchr-aligned \
|
||||
+ strrchr-lsx \
|
||||
+ strrchr-lasx \
|
||||
strchrnul-aligned \
|
||||
strchrnul-lsx \
|
||||
strchrnul-lasx \
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
index 39a14f1d..529e2369 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
@@ -94,6 +94,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
|
||||
)
|
||||
|
||||
+ IFUNC_IMPL (i, name, strrchr,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx)
|
||||
+ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned)
|
||||
+ )
|
||||
+
|
||||
IFUNC_IMPL (i, name, memcpy,
|
||||
#if !defined __loongarch_soft_float
|
||||
IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
|
||||
new file mode 100644
|
||||
index 00000000..bbb34089
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
|
||||
@@ -0,0 +1,41 @@
|
||||
+/* Common definition for strrchr ifunc selections.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-init.h>
|
||||
+
|
||||
+#if !defined __loongarch_soft_float
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||
+#endif
|
||||
+
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
|
||||
+
|
||||
+static inline void *
|
||||
+IFUNC_SELECTOR (void)
|
||||
+{
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LASX)
|
||||
+ return OPTIMIZE (lasx);
|
||||
+ else if (SUPPORT_LSX)
|
||||
+ return OPTIMIZE (lsx);
|
||||
+ else
|
||||
+#endif
|
||||
+ return OPTIMIZE (aligned);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
|
||||
new file mode 100644
|
||||
index 00000000..a73deb78
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
|
||||
@@ -0,0 +1,170 @@
|
||||
+/* Optimized strrchr implementation using basic LoongArch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define STRRCHR __strrchr_aligned
|
||||
+#else
|
||||
+# define STRRCHR strrchr
|
||||
+#endif
|
||||
+
|
||||
+LEAF(STRRCHR, 6)
|
||||
+ slli.d t0, a0, 3
|
||||
+ bstrins.d a0, zero, 2, 0
|
||||
+ lu12i.w a2, 0x01010
|
||||
+ ld.d t2, a0, 0
|
||||
+
|
||||
+ andi a1, a1, 0xff
|
||||
+ ori a2, a2, 0x101
|
||||
+ li.d t3, -1
|
||||
+ bstrins.d a2, a2, 63, 32
|
||||
+
|
||||
+ sll.d t5, t3, t0
|
||||
+ slli.d a3, a2, 7
|
||||
+ orn t4, t2, t5
|
||||
+ mul.d a1, a1, a2
|
||||
+
|
||||
+ sub.d t0, t4, a2
|
||||
+ andn t1, a3, t4
|
||||
+ and t1, t0, t1
|
||||
+ beqz t1, L(find_tail)
|
||||
+
|
||||
+
|
||||
+ ctz.d t0, t1
|
||||
+ orn t0, zero, t0
|
||||
+ xor t2, t4, a1
|
||||
+ srl.d t0, t3, t0
|
||||
+
|
||||
+ orn t2, t2, t0
|
||||
+ orn t2, t2, t5
|
||||
+ revb.d t2, t2
|
||||
+ sub.d t1, t2, a2
|
||||
+
|
||||
+ andn t0, a3, t2
|
||||
+ and t1, t0, t1
|
||||
+ ctz.d t0, t1
|
||||
+ srli.d t0, t0, 3
|
||||
+
|
||||
+ addi.d a0, a0, 7
|
||||
+ sub.d a0, a0, t0
|
||||
+ maskeqz a0, a0, t1
|
||||
+ jr ra
|
||||
+
|
||||
+
|
||||
+L(find_tail):
|
||||
+ addi.d a4, a0, 8
|
||||
+ addi.d a0, a0, 8
|
||||
+L(loop_ascii):
|
||||
+ ld.d t2, a0, 0
|
||||
+ sub.d t1, t2, a2
|
||||
+
|
||||
+ and t0, t1, a3
|
||||
+ bnez t0, L(more_check)
|
||||
+ ld.d t2, a0, 8
|
||||
+ sub.d t1, t2, a2
|
||||
+
|
||||
+ and t0, t1, a3
|
||||
+ addi.d a0, a0, 16
|
||||
+ beqz t0, L(loop_ascii)
|
||||
+ addi.d a0, a0, -8
|
||||
+
|
||||
+L(more_check):
|
||||
+ andn t0, a3, t2
|
||||
+ and t1, t1, t0
|
||||
+ bnez t1, L(tail)
|
||||
+ addi.d a0, a0, 8
|
||||
+
|
||||
+
|
||||
+L(loop_nonascii):
|
||||
+ ld.d t2, a0, 0
|
||||
+ sub.d t1, t2, a2
|
||||
+ andn t0, a3, t2
|
||||
+ and t1, t0, t1
|
||||
+
|
||||
+ bnez t1, L(tail)
|
||||
+ ld.d t2, a0, 8
|
||||
+ addi.d a0, a0, 16
|
||||
+ sub.d t1, t2, a2
|
||||
+
|
||||
+ andn t0, a3, t2
|
||||
+ and t1, t0, t1
|
||||
+ beqz t1, L(loop_nonascii)
|
||||
+ addi.d a0, a0, -8
|
||||
+
|
||||
+L(tail):
|
||||
+ ctz.d t0, t1
|
||||
+ orn t0, zero, t0
|
||||
+ xor t2, t2, a1
|
||||
+ srl.d t0, t3, t0
|
||||
+
|
||||
+
|
||||
+ orn t2, t2, t0
|
||||
+ revb.d t2, t2
|
||||
+ sub.d t1, t2, a2
|
||||
+ andn t0, a3, t2
|
||||
+
|
||||
+ and t1, t0, t1
|
||||
+ bnez t1, L(count_pos)
|
||||
+L(find_loop):
|
||||
+ beq a0, a4, L(find_end)
|
||||
+ ld.d t2, a0, -8
|
||||
+
|
||||
+ addi.d a0, a0, -8
|
||||
+ xor t2, t2, a1
|
||||
+ sub.d t1, t2, a2
|
||||
+ andn t0, a3, t2
|
||||
+
|
||||
+ and t1, t0, t1
|
||||
+ beqz t1, L(find_loop)
|
||||
+ revb.d t2, t2
|
||||
+ sub.d t1, t2, a2
|
||||
+
|
||||
+
|
||||
+ andn t0, a3, t2
|
||||
+ and t1, t0, t1
|
||||
+L(count_pos):
|
||||
+ ctz.d t0, t1
|
||||
+ addi.d a0, a0, 7
|
||||
+
|
||||
+ srli.d t0, t0, 3
|
||||
+ sub.d a0, a0, t0
|
||||
+ jr ra
|
||||
+ nop
|
||||
+
|
||||
+L(find_end):
|
||||
+ xor t2, t4, a1
|
||||
+ orn t2, t2, t5
|
||||
+ revb.d t2, t2
|
||||
+ sub.d t1, t2, a2
|
||||
+
|
||||
+
|
||||
+ andn t0, a3, t2
|
||||
+ and t1, t0, t1
|
||||
+ ctz.d t0, t1
|
||||
+ srli.d t0, t0, 3
|
||||
+
|
||||
+ addi.d a0, a4, -1
|
||||
+ sub.d a0, a0, t0
|
||||
+ maskeqz a0, a0, t1
|
||||
+ jr ra
|
||||
+END(STRRCHR)
|
||||
+
|
||||
+libc_hidden_builtin_def(STRRCHR)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
|
||||
new file mode 100644
|
||||
index 00000000..5a6e2297
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
|
||||
@@ -0,0 +1,176 @@
|
||||
+/* Optimized strrchr implementation using LoongArch LASX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+#define STRRCHR __strrchr_lasx
|
||||
+
|
||||
+LEAF(STRRCHR, 6)
|
||||
+ move a2, a0
|
||||
+ bstrins.d a0, zero, 5, 0
|
||||
+ xvld xr0, a0, 0
|
||||
+ xvld xr1, a0, 32
|
||||
+
|
||||
+ li.d t2, -1
|
||||
+ xvreplgr2vr.b xr4, a1
|
||||
+ xvmsknz.b xr2, xr0
|
||||
+ xvmsknz.b xr3, xr1
|
||||
+
|
||||
+ xvpickve.w xr5, xr2, 4
|
||||
+ xvpickve.w xr6, xr3, 4
|
||||
+ vilvl.h vr2, vr5, vr2
|
||||
+ vilvl.h vr3, vr6, vr3
|
||||
+
|
||||
+ vilvl.w vr2, vr3, vr2
|
||||
+ movfr2gr.d t0, fa2
|
||||
+ sra.d t0, t0, a2
|
||||
+ beq t0, t2, L(find_tail)
|
||||
+
|
||||
+
|
||||
+ xvseq.b xr2, xr0, xr4
|
||||
+ xvseq.b xr3, xr1, xr4
|
||||
+ xvmsknz.b xr2, xr2
|
||||
+ xvmsknz.b xr3, xr3
|
||||
+
|
||||
+ xvpickve.w xr4, xr2, 4
|
||||
+ xvpickve.w xr5, xr3, 4
|
||||
+ vilvl.h vr2, vr4, vr2
|
||||
+ vilvl.h vr3, vr5, vr3
|
||||
+
|
||||
+ vilvl.w vr1, vr3, vr2
|
||||
+ slli.d t3, t2, 1
|
||||
+ movfr2gr.d t1, fa1
|
||||
+ cto.d t0, t0
|
||||
+
|
||||
+ srl.d t1, t1, a2
|
||||
+ sll.d t3, t3, t0
|
||||
+ addi.d a0, a2, 63
|
||||
+ andn t1, t1, t3
|
||||
+
|
||||
+
|
||||
+ clz.d t0, t1
|
||||
+ sub.d a0, a0, t0
|
||||
+ maskeqz a0, a0, t1
|
||||
+ jr ra
|
||||
+
|
||||
+ .align 5
|
||||
+L(find_tail):
|
||||
+ addi.d a3, a0, 64
|
||||
+L(loop):
|
||||
+ xvld xr2, a0, 64
|
||||
+ xvld xr3, a0, 96
|
||||
+ addi.d a0, a0, 64
|
||||
+
|
||||
+ xvmin.bu xr5, xr2, xr3
|
||||
+ xvsetanyeqz.b fcc0, xr5
|
||||
+ bceqz fcc0, L(loop)
|
||||
+ xvmsknz.b xr5, xr2
|
||||
+
|
||||
+
|
||||
+ xvmsknz.b xr6, xr3
|
||||
+ xvpickve.w xr7, xr5, 4
|
||||
+ xvpickve.w xr8, xr6, 4
|
||||
+ vilvl.h vr5, vr7, vr5
|
||||
+
|
||||
+ vilvl.h vr6, vr8, vr6
|
||||
+ xvseq.b xr2, xr2, xr4
|
||||
+ xvseq.b xr3, xr3, xr4
|
||||
+ xvmsknz.b xr2, xr2
|
||||
+
|
||||
+ xvmsknz.b xr3, xr3
|
||||
+ xvpickve.w xr7, xr2, 4
|
||||
+ xvpickve.w xr8, xr3, 4
|
||||
+ vilvl.h vr2, vr7, vr2
|
||||
+
|
||||
+ vilvl.h vr3, vr8, vr3
|
||||
+ vilvl.w vr5, vr6, vr5
|
||||
+ vilvl.w vr2, vr3, vr2
|
||||
+ movfr2gr.d t0, fa5
|
||||
+
|
||||
+
|
||||
+ movfr2gr.d t1, fa2
|
||||
+ slli.d t3, t2, 1
|
||||
+ cto.d t0, t0
|
||||
+ sll.d t3, t3, t0
|
||||
+
|
||||
+ andn t1, t1, t3
|
||||
+ beqz t1, L(find_loop)
|
||||
+ clz.d t0, t1
|
||||
+ addi.d a0, a0, 63
|
||||
+
|
||||
+ sub.d a0, a0, t0
|
||||
+ jr ra
|
||||
+L(find_loop):
|
||||
+ beq a0, a3, L(find_end)
|
||||
+ xvld xr2, a0, -64
|
||||
+
|
||||
+ xvld xr3, a0, -32
|
||||
+ addi.d a0, a0, -64
|
||||
+ xvseq.b xr2, xr2, xr4
|
||||
+ xvseq.b xr3, xr3, xr4
|
||||
+
|
||||
+
|
||||
+ xvmax.bu xr5, xr2, xr3
|
||||
+ xvseteqz.v fcc0, xr5
|
||||
+ bcnez fcc0, L(find_loop)
|
||||
+ xvmsknz.b xr0, xr2
|
||||
+
|
||||
+ xvmsknz.b xr1, xr3
|
||||
+ xvpickve.w xr2, xr0, 4
|
||||
+ xvpickve.w xr3, xr1, 4
|
||||
+ vilvl.h vr0, vr2, vr0
|
||||
+
|
||||
+ vilvl.h vr1, vr3, vr1
|
||||
+ vilvl.w vr0, vr1, vr0
|
||||
+ movfr2gr.d t0, fa0
|
||||
+ addi.d a0, a0, 63
|
||||
+
|
||||
+ clz.d t0, t0
|
||||
+ sub.d a0, a0, t0
|
||||
+ jr ra
|
||||
+ nop
|
||||
+
|
||||
+
|
||||
+L(find_end):
|
||||
+ xvseq.b xr2, xr0, xr4
|
||||
+ xvseq.b xr3, xr1, xr4
|
||||
+ xvmsknz.b xr2, xr2
|
||||
+ xvmsknz.b xr3, xr3
|
||||
+
|
||||
+ xvpickve.w xr4, xr2, 4
|
||||
+ xvpickve.w xr5, xr3, 4
|
||||
+ vilvl.h vr2, vr4, vr2
|
||||
+ vilvl.h vr3, vr5, vr3
|
||||
+
|
||||
+ vilvl.w vr1, vr3, vr2
|
||||
+ movfr2gr.d t1, fa1
|
||||
+ addi.d a0, a2, 63
|
||||
+ srl.d t1, t1, a2
|
||||
+
|
||||
+ clz.d t0, t1
|
||||
+ sub.d a0, a0, t0
|
||||
+ maskeqz a0, a0, t1
|
||||
+ jr ra
|
||||
+END(STRRCHR)
|
||||
+
|
||||
+libc_hidden_builtin_def(STRRCHR)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..8f2fd22e
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
|
||||
@@ -0,0 +1,144 @@
|
||||
+/* Optimized strrchr implementation using LoongArch LSX instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+#define STRRCHR __strrchr_lsx
|
||||
+
|
||||
+LEAF(STRRCHR, 6)
|
||||
+ move a2, a0
|
||||
+ bstrins.d a0, zero, 4, 0
|
||||
+ vld vr0, a0, 0
|
||||
+ vld vr1, a0, 16
|
||||
+
|
||||
+ li.d t2, -1
|
||||
+ vreplgr2vr.b vr4, a1
|
||||
+ vmsknz.b vr2, vr0
|
||||
+ vmsknz.b vr3, vr1
|
||||
+
|
||||
+ vilvl.h vr2, vr3, vr2
|
||||
+ movfr2gr.s t0, fa2
|
||||
+ sra.w t0, t0, a2
|
||||
+ beq t0, t2, L(find_tail)
|
||||
+
|
||||
+ vseq.b vr2, vr0, vr4
|
||||
+ vseq.b vr3, vr1, vr4
|
||||
+ vmsknz.b vr2, vr2
|
||||
+ vmsknz.b vr3, vr3
|
||||
+
|
||||
+
|
||||
+ vilvl.h vr1, vr3, vr2
|
||||
+ slli.d t3, t2, 1
|
||||
+ movfr2gr.s t1, fa1
|
||||
+ cto.w t0, t0
|
||||
+
|
||||
+ srl.w t1, t1, a2
|
||||
+ sll.d t3, t3, t0
|
||||
+ addi.d a0, a2, 31
|
||||
+ andn t1, t1, t3
|
||||
+
|
||||
+ clz.w t0, t1
|
||||
+ sub.d a0, a0, t0
|
||||
+ maskeqz a0, a0, t1
|
||||
+ jr ra
|
||||
+
|
||||
+ .align 5
|
||||
+L(find_tail):
|
||||
+ addi.d a3, a0, 32
|
||||
+L(loop):
|
||||
+ vld vr2, a0, 32
|
||||
+ vld vr3, a0, 48
|
||||
+ addi.d a0, a0, 32
|
||||
+
|
||||
+ vmin.bu vr5, vr2, vr3
|
||||
+ vsetanyeqz.b fcc0, vr5
|
||||
+ bceqz fcc0, L(loop)
|
||||
+ vmsknz.b vr5, vr2
|
||||
+
|
||||
+ vmsknz.b vr6, vr3
|
||||
+ vilvl.h vr5, vr6, vr5
|
||||
+ vseq.b vr2, vr2, vr4
|
||||
+ vseq.b vr3, vr3, vr4
|
||||
+
|
||||
+ vmsknz.b vr2, vr2
|
||||
+ vmsknz.b vr3, vr3
|
||||
+ vilvl.h vr2, vr3, vr2
|
||||
+ movfr2gr.s t0, fa5
|
||||
+
|
||||
+
|
||||
+ movfr2gr.s t1, fa2
|
||||
+ slli.d t3, t2, 1
|
||||
+ cto.w t0, t0
|
||||
+ sll.d t3, t3, t0
|
||||
+
|
||||
+ andn t1, t1, t3
|
||||
+ beqz t1, L(find_loop)
|
||||
+ clz.w t0, t1
|
||||
+ addi.d a0, a0, 31
|
||||
+
|
||||
+ sub.d a0, a0, t0
|
||||
+ jr ra
|
||||
+L(find_loop):
|
||||
+ beq a0, a3, L(find_end)
|
||||
+ vld vr2, a0, -32
|
||||
+
|
||||
+ vld vr3, a0, -16
|
||||
+ addi.d a0, a0, -32
|
||||
+ vseq.b vr2, vr2, vr4
|
||||
+ vseq.b vr3, vr3, vr4
|
||||
+
|
||||
+
|
||||
+ vmax.bu vr5, vr2, vr3
|
||||
+ vseteqz.v fcc0, vr5
|
||||
+ bcnez fcc0, L(find_loop)
|
||||
+ vmsknz.b vr0, vr2
|
||||
+
|
||||
+ vmsknz.b vr1, vr3
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+ addi.d a0, a0, 31
|
||||
+
|
||||
+ clz.w t0, t0
|
||||
+ sub.d a0, a0, t0
|
||||
+ jr ra
|
||||
+ nop
|
||||
+
|
||||
+L(find_end):
|
||||
+ vseq.b vr2, vr0, vr4
|
||||
+ vseq.b vr3, vr1, vr4
|
||||
+ vmsknz.b vr2, vr2
|
||||
+ vmsknz.b vr3, vr3
|
||||
+
|
||||
+
|
||||
+ vilvl.h vr1, vr3, vr2
|
||||
+ movfr2gr.s t1, fa1
|
||||
+ addi.d a0, a2, 31
|
||||
+ srl.w t1, t1, a2
|
||||
+
|
||||
+ clz.w t0, t1
|
||||
+ sub.d a0, a0, t0
|
||||
+ maskeqz a0, a0, t1
|
||||
+ jr ra
|
||||
+END(STRRCHR)
|
||||
+
|
||||
+libc_hidden_builtin_def(STRRCHR)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr.c b/sysdeps/loongarch/lp64/multiarch/strrchr.c
|
||||
new file mode 100644
|
||||
index 00000000..d9c9f660
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr.c
|
||||
@@ -0,0 +1,36 @@
|
||||
+/* Multiple versions of strrchr.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Define multiple versions only for the definition in libc. */
|
||||
+#if IS_IN (libc)
|
||||
+# define strrchr __redirect_strrchr
|
||||
+# include <string.h>
|
||||
+# undef strrchr
|
||||
+
|
||||
+# define SYMBOL_NAME strrchr
|
||||
+# include "ifunc-strrchr.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ());
|
||||
+weak_alias (strrchr, rindex)
|
||||
+# ifdef SHARED
|
||||
+__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr)
|
||||
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strrchr);
|
||||
+# endif
|
||||
+
|
||||
+#endif
|
||||
--
|
||||
2.33.0
|
||||
|
626
LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch
Normal file
626
LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch
Normal file
|
@ -0,0 +1,626 @@
|
|||
From b5979df8ad07823c79a934c1fa0a91ec0abffb61 Mon Sep 17 00:00:00 2001
|
||||
From: caiyinyu <caiyinyu@loongson.cn>
|
||||
Date: Fri, 8 Sep 2023 14:10:55 +0800
|
||||
Subject: [PATCH 20/29] LoongArch: Add lasx/lsx support for
|
||||
_dl_runtime_profile.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/bits/link.h | 24 ++-
|
||||
sysdeps/loongarch/bits/link_lavcurrent.h | 25 +++
|
||||
sysdeps/loongarch/dl-audit-check.h | 23 +++
|
||||
sysdeps/loongarch/dl-link.sym | 8 +-
|
||||
sysdeps/loongarch/dl-machine.h | 11 +-
|
||||
sysdeps/loongarch/dl-trampoline.S | 177 +----------------
|
||||
sysdeps/loongarch/dl-trampoline.h | 242 +++++++++++++++++++++++
|
||||
7 files changed, 331 insertions(+), 179 deletions(-)
|
||||
create mode 100644 sysdeps/loongarch/bits/link_lavcurrent.h
|
||||
create mode 100644 sysdeps/loongarch/dl-audit-check.h
|
||||
|
||||
diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h
|
||||
index 7fa61312..00f6f25f 100644
|
||||
--- a/sysdeps/loongarch/bits/link.h
|
||||
+++ b/sysdeps/loongarch/bits/link.h
|
||||
@@ -20,10 +20,26 @@
|
||||
#error "Never include <bits/link.h> directly; use <link.h> instead."
|
||||
#endif
|
||||
|
||||
+#ifndef __loongarch_soft_float
|
||||
+typedef float La_loongarch_vr
|
||||
+ __attribute__ ((__vector_size__ (16), __aligned__ (16)));
|
||||
+typedef float La_loongarch_xr
|
||||
+ __attribute__ ((__vector_size__ (32), __aligned__ (16)));
|
||||
+
|
||||
+typedef union
|
||||
+{
|
||||
+ double fpreg[4];
|
||||
+ La_loongarch_vr vr[2];
|
||||
+ La_loongarch_xr xr[1];
|
||||
+} La_loongarch_vector __attribute__ ((__aligned__ (16)));
|
||||
+#endif
|
||||
+
|
||||
typedef struct La_loongarch_regs
|
||||
{
|
||||
unsigned long int lr_reg[8]; /* a0 - a7 */
|
||||
- double lr_fpreg[8]; /* fa0 - fa7 */
|
||||
+#ifndef __loongarch_soft_float
|
||||
+ La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/
|
||||
+#endif
|
||||
unsigned long int lr_ra;
|
||||
unsigned long int lr_sp;
|
||||
} La_loongarch_regs;
|
||||
@@ -33,8 +49,10 @@ typedef struct La_loongarch_retval
|
||||
{
|
||||
unsigned long int lrv_a0;
|
||||
unsigned long int lrv_a1;
|
||||
- double lrv_fa0;
|
||||
- double lrv_fa1;
|
||||
+#ifndef __loongarch_soft_float
|
||||
+ La_loongarch_vector lrv_vec0;
|
||||
+ La_loongarch_vector lrv_vec1;
|
||||
+#endif
|
||||
} La_loongarch_retval;
|
||||
|
||||
__BEGIN_DECLS
|
||||
diff --git a/sysdeps/loongarch/bits/link_lavcurrent.h b/sysdeps/loongarch/bits/link_lavcurrent.h
|
||||
new file mode 100644
|
||||
index 00000000..15f1eb84
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/bits/link_lavcurrent.h
|
||||
@@ -0,0 +1,25 @@
|
||||
+/* Data structure for communication from the run-time dynamic linker for
|
||||
+ loaded ELF shared objects. LAV_CURRENT definition.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _LINK_H
|
||||
+# error "Never include <bits/link_lavcurrent.h> directly; use <link.h> instead."
|
||||
+#endif
|
||||
+
|
||||
+/* Version numbers for la_version handshake interface. */
|
||||
+#define LAV_CURRENT 3
|
||||
diff --git a/sysdeps/loongarch/dl-audit-check.h b/sysdeps/loongarch/dl-audit-check.h
|
||||
new file mode 100644
|
||||
index 00000000..a139c939
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/dl-audit-check.h
|
||||
@@ -0,0 +1,23 @@
|
||||
+/* rtld-audit version check. LoongArch version.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+static inline bool
|
||||
+_dl_audit_check_version (unsigned int lav)
|
||||
+{
|
||||
+ return lav == LAV_CURRENT;
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
|
||||
index 868ab7c6..b534968e 100644
|
||||
--- a/sysdeps/loongarch/dl-link.sym
|
||||
+++ b/sysdeps/loongarch/dl-link.sym
|
||||
@@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
|
||||
DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
|
||||
|
||||
DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg)
|
||||
-DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg)
|
||||
+#ifndef __loongarch_soft_float
|
||||
+DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec)
|
||||
+#endif
|
||||
DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra)
|
||||
DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp)
|
||||
|
||||
DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0)
|
||||
-DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1)
|
||||
+#ifndef __loongarch_soft_float
|
||||
+DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
|
||||
index 066bb233..8a2db9de 100644
|
||||
--- a/sysdeps/loongarch/dl-machine.h
|
||||
+++ b/sysdeps/loongarch/dl-machine.h
|
||||
@@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
||||
#if !defined __loongarch_soft_float
|
||||
extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
|
||||
extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
|
||||
+ extern void _dl_runtime_profile_lasx (void) attribute_hidden;
|
||||
+ extern void _dl_runtime_profile_lsx (void) attribute_hidden;
|
||||
#endif
|
||||
extern void _dl_runtime_resolve (void) attribute_hidden;
|
||||
extern void _dl_runtime_profile (void) attribute_hidden;
|
||||
@@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
||||
end in this function. */
|
||||
if (profile != 0)
|
||||
{
|
||||
- gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LASX)
|
||||
+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
|
||||
+ else if (SUPPORT_LSX)
|
||||
+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
|
||||
+ else
|
||||
+#endif
|
||||
+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
|
||||
|
||||
if (GLRO(dl_profile) != NULL
|
||||
&& _dl_name_match_p (GLRO(dl_profile), l))
|
||||
diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
|
||||
index 8fd91469..bb449ecf 100644
|
||||
--- a/sysdeps/loongarch/dl-trampoline.S
|
||||
+++ b/sysdeps/loongarch/dl-trampoline.S
|
||||
@@ -22,190 +22,21 @@
|
||||
#if !defined __loongarch_soft_float
|
||||
#define USE_LASX
|
||||
#define _dl_runtime_resolve _dl_runtime_resolve_lasx
|
||||
+#define _dl_runtime_profile _dl_runtime_profile_lasx
|
||||
#include "dl-trampoline.h"
|
||||
#undef FRAME_SIZE
|
||||
#undef USE_LASX
|
||||
#undef _dl_runtime_resolve
|
||||
+#undef _dl_runtime_profile
|
||||
|
||||
#define USE_LSX
|
||||
#define _dl_runtime_resolve _dl_runtime_resolve_lsx
|
||||
+#define _dl_runtime_profile _dl_runtime_profile_lsx
|
||||
#include "dl-trampoline.h"
|
||||
#undef FRAME_SIZE
|
||||
#undef USE_LSX
|
||||
#undef _dl_runtime_resolve
|
||||
+#undef _dl_runtime_profile
|
||||
#endif
|
||||
|
||||
#include "dl-trampoline.h"
|
||||
-
|
||||
-#include "dl-link.h"
|
||||
-
|
||||
-ENTRY (_dl_runtime_profile)
|
||||
- /* LoongArch we get called with:
|
||||
- t0 linkr_map pointer
|
||||
- t1 the scaled offset stored in t0, which can be used
|
||||
- to calculate the offset of the current symbol in .rela.plt
|
||||
- t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function
|
||||
- t3 dl resolver entry point, no use in this function
|
||||
-
|
||||
- Stack frame layout:
|
||||
- [sp, #96] La_loongarch_regs
|
||||
- [sp, #48] La_loongarch_retval
|
||||
- [sp, #40] frame size return from pltenter
|
||||
- [sp, #32] dl_profile_call saved a1
|
||||
- [sp, #24] dl_profile_call saved a0
|
||||
- [sp, #16] T1
|
||||
- [sp, #0] ra, fp <- fp
|
||||
- */
|
||||
-
|
||||
-# define OFFSET_T1 16
|
||||
-# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8
|
||||
-# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16
|
||||
-# define OFFSET_RV OFFSET_FS + 8
|
||||
-# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
|
||||
-
|
||||
-# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
|
||||
-
|
||||
- /* Save arguments to stack. */
|
||||
- ADDI sp, sp, -SF_SIZE
|
||||
- REG_S ra, sp, 0
|
||||
- REG_S fp, sp, 8
|
||||
-
|
||||
- or fp, sp, zero
|
||||
-
|
||||
- REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
|
||||
- REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
|
||||
- REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
|
||||
- REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
|
||||
- REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
|
||||
- REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
|
||||
- REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
|
||||
- REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
|
||||
-
|
||||
-#ifndef __loongarch_soft_float
|
||||
- FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
|
||||
- FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
|
||||
- FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
|
||||
- FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
|
||||
- FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
|
||||
- FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
|
||||
- FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
|
||||
- FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
|
||||
-#endif
|
||||
-
|
||||
- /* Update .got.plt and obtain runtime address of callee. */
|
||||
- SLLI a1, t1, 1
|
||||
- or a0, t0, zero
|
||||
- ADD a1, a1, t1
|
||||
- or a2, ra, zero /* return addr */
|
||||
- ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */
|
||||
- ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */
|
||||
-
|
||||
- REG_S a0, fp, OFFSET_SAVED_CALL_A0
|
||||
- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
|
||||
-
|
||||
- la t2, _dl_profile_fixup
|
||||
- jirl ra, t2, 0
|
||||
-
|
||||
- REG_L t3, fp, OFFSET_FS
|
||||
- bge t3, zero, 1f
|
||||
-
|
||||
- /* Save the return. */
|
||||
- or t4, v0, zero
|
||||
-
|
||||
- /* Restore arguments from stack. */
|
||||
- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
|
||||
- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
|
||||
- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
|
||||
- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
|
||||
- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
|
||||
- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
|
||||
- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
|
||||
- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
|
||||
-
|
||||
-#ifndef __loongarch_soft_float
|
||||
- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
|
||||
- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
|
||||
- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
|
||||
- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
|
||||
- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
|
||||
- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
|
||||
- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
|
||||
- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
|
||||
-#endif
|
||||
-
|
||||
- REG_L ra, fp, 0
|
||||
- REG_L fp, fp, SZREG
|
||||
-
|
||||
- ADDI sp, sp, SF_SIZE
|
||||
- jirl zero, t4, 0
|
||||
-
|
||||
-1:
|
||||
- /* The new frame size is in t3. */
|
||||
- SUB sp, fp, t3
|
||||
- BSTRINS sp, zero, 3, 0
|
||||
-
|
||||
- REG_S a0, fp, OFFSET_T1
|
||||
-
|
||||
- or a0, sp, zero
|
||||
- ADDI a1, fp, SF_SIZE
|
||||
- or a2, t3, zero
|
||||
- la t5, memcpy
|
||||
- jirl ra, t5, 0
|
||||
-
|
||||
- REG_L t6, fp, OFFSET_T1
|
||||
-
|
||||
- /* Call the function. */
|
||||
- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
|
||||
- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
|
||||
- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
|
||||
- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
|
||||
- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
|
||||
- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
|
||||
- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
|
||||
- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
|
||||
-
|
||||
-#ifndef __loongarch_soft_float
|
||||
- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
|
||||
- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
|
||||
- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
|
||||
- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
|
||||
- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
|
||||
- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
|
||||
- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
|
||||
- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
|
||||
-#endif
|
||||
- jirl ra, t6, 0
|
||||
-
|
||||
- REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
|
||||
- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
|
||||
-
|
||||
-#ifndef __loongarch_soft_float
|
||||
- FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0
|
||||
- FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG
|
||||
-#endif
|
||||
-
|
||||
- /* Setup call to pltexit. */
|
||||
- REG_L a0, fp, OFFSET_SAVED_CALL_A0
|
||||
- REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
|
||||
- ADDI a2, fp, OFFSET_RG
|
||||
- ADDI a3, fp, OFFSET_RV
|
||||
- la t7, _dl_audit_pltexit
|
||||
- jirl ra, t7, 0
|
||||
-
|
||||
- REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
|
||||
- REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
|
||||
-
|
||||
-#ifndef __loongarch_soft_float
|
||||
- FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0
|
||||
- FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG
|
||||
-#endif
|
||||
-
|
||||
- /* RA from within La_loongarch_reg. */
|
||||
- REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
|
||||
- or sp, fp, zero
|
||||
- ADDI sp, sp, SF_SIZE
|
||||
- REG_S fp, fp, SZREG
|
||||
-
|
||||
- jirl zero, ra, 0
|
||||
-
|
||||
-END (_dl_runtime_profile)
|
||||
diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
|
||||
index 99fcacab..e298439d 100644
|
||||
--- a/sysdeps/loongarch/dl-trampoline.h
|
||||
+++ b/sysdeps/loongarch/dl-trampoline.h
|
||||
@@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve)
|
||||
/* Invoke the callee. */
|
||||
jirl zero, t1, 0
|
||||
END (_dl_runtime_resolve)
|
||||
+
|
||||
+#include "dl-link.h"
|
||||
+
|
||||
+ENTRY (_dl_runtime_profile)
|
||||
+ /* LoongArch we get called with:
|
||||
+ t0 linkr_map pointer
|
||||
+ t1 the scaled offset stored in t0, which can be used
|
||||
+ to calculate the offset of the current symbol in .rela.plt
|
||||
+ t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function
|
||||
+ t3 dl resolver entry point, no use in this function
|
||||
+
|
||||
+ Stack frame layout:
|
||||
+ [sp, #208] La_loongarch_regs
|
||||
+ [sp, #128] La_loongarch_retval // align: 16
|
||||
+ [sp, #112] frame size return from pltenter
|
||||
+ [sp, #80 ] dl_profile_call saved vec1
|
||||
+ [sp, #48 ] dl_profile_call saved vec0 // align: 16
|
||||
+ [sp, #32 ] dl_profile_call saved a1
|
||||
+ [sp, #24 ] dl_profile_call saved a0
|
||||
+ [sp, #16 ] T1
|
||||
+ [sp, #0 ] ra, fp <- fp
|
||||
+ */
|
||||
+
|
||||
+# define OFFSET_T1 16
|
||||
+# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8
|
||||
+# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64
|
||||
+# define OFFSET_RV OFFSET_FS + 8 + 8
|
||||
+# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
|
||||
+
|
||||
+# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
|
||||
+
|
||||
+ /* Save arguments to stack. */
|
||||
+ ADDI sp, sp, -SF_SIZE
|
||||
+ REG_S ra, sp, 0
|
||||
+ REG_S fp, sp, 8
|
||||
+
|
||||
+ or fp, sp, zero
|
||||
+
|
||||
+ REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
|
||||
+ REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
|
||||
+ REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
|
||||
+ REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
|
||||
+ REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
|
||||
+ REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
|
||||
+ REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
|
||||
+ REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
|
||||
+
|
||||
+#ifdef USE_LASX
|
||||
+ xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
|
||||
+ xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
|
||||
+ xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
|
||||
+ xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
|
||||
+ xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
|
||||
+ xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
|
||||
+ xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
|
||||
+ xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
|
||||
+#elif defined USE_LSX
|
||||
+ vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
|
||||
+ vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
|
||||
+ vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
|
||||
+ vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
|
||||
+ vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
|
||||
+ vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
|
||||
+ vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
|
||||
+ vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
|
||||
+#elif !defined __loongarch_soft_float
|
||||
+ FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
|
||||
+ FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
|
||||
+ FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
|
||||
+ FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
|
||||
+ FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
|
||||
+ FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
|
||||
+ FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
|
||||
+ FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
|
||||
+#endif
|
||||
+
|
||||
+ /* Update .got.plt and obtain runtime address of callee. */
|
||||
+ SLLI a1, t1, 1
|
||||
+ or a0, t0, zero
|
||||
+ ADD a1, a1, t1
|
||||
+ or a2, ra, zero /* return addr */
|
||||
+ ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */
|
||||
+ ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */
|
||||
+
|
||||
+ REG_S a0, fp, OFFSET_SAVED_CALL_A0
|
||||
+ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
|
||||
+
|
||||
+ la t2, _dl_profile_fixup
|
||||
+ jirl ra, t2, 0
|
||||
+
|
||||
+ REG_L t3, fp, OFFSET_FS
|
||||
+ bge t3, zero, 1f
|
||||
+
|
||||
+ /* Save the return. */
|
||||
+ or t4, v0, zero
|
||||
+
|
||||
+ /* Restore arguments from stack. */
|
||||
+ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
|
||||
+ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
|
||||
+ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
|
||||
+ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
|
||||
+ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
|
||||
+ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
|
||||
+ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
|
||||
+ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
|
||||
+
|
||||
+#ifdef USE_LASX
|
||||
+ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
|
||||
+ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
|
||||
+ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
|
||||
+ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
|
||||
+ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
|
||||
+ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
|
||||
+ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
|
||||
+ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
|
||||
+#elif defined USE_LSX
|
||||
+ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
|
||||
+ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
|
||||
+ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
|
||||
+ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
|
||||
+ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
|
||||
+ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
|
||||
+ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
|
||||
+ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
|
||||
+#elif !defined __loongarch_soft_float
|
||||
+ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
|
||||
+ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
|
||||
+ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
|
||||
+ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
|
||||
+ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
|
||||
+ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
|
||||
+ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
|
||||
+ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
|
||||
+#endif
|
||||
+
|
||||
+ REG_L ra, fp, 0
|
||||
+ REG_L fp, fp, SZREG
|
||||
+
|
||||
+ ADDI sp, sp, SF_SIZE
|
||||
+ jirl zero, t4, 0
|
||||
+
|
||||
+1:
|
||||
+ /* The new frame size is in t3. */
|
||||
+ SUB sp, fp, t3
|
||||
+ BSTRINS sp, zero, 3, 0
|
||||
+
|
||||
+ REG_S a0, fp, OFFSET_T1
|
||||
+
|
||||
+ or a0, sp, zero
|
||||
+ ADDI a1, fp, SF_SIZE
|
||||
+ or a2, t3, zero
|
||||
+ la t5, memcpy
|
||||
+ jirl ra, t5, 0
|
||||
+
|
||||
+ REG_L t6, fp, OFFSET_T1
|
||||
+
|
||||
+ /* Call the function. */
|
||||
+ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
|
||||
+ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
|
||||
+ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
|
||||
+ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
|
||||
+ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
|
||||
+ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
|
||||
+ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
|
||||
+ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
|
||||
+
|
||||
+#ifdef USE_LASX
|
||||
+ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
|
||||
+ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
|
||||
+ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
|
||||
+ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
|
||||
+ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
|
||||
+ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
|
||||
+ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
|
||||
+ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
|
||||
+#elif defined USE_LSX
|
||||
+ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
|
||||
+ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
|
||||
+ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
|
||||
+ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
|
||||
+ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
|
||||
+ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
|
||||
+ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
|
||||
+ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
|
||||
+#elif !defined __loongarch_soft_float
|
||||
+ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
|
||||
+ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
|
||||
+ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
|
||||
+ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
|
||||
+ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
|
||||
+ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
|
||||
+ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
|
||||
+ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
|
||||
+#endif
|
||||
+
|
||||
+ jirl ra, t6, 0
|
||||
+
|
||||
+ REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
|
||||
+ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
|
||||
+
|
||||
+#ifdef USE_LASX
|
||||
+ xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
|
||||
+ xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
|
||||
+#elif defined USE_LSX
|
||||
+ vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
|
||||
+ vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
|
||||
+#elif !defined __loongarch_soft_float
|
||||
+ FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
|
||||
+ FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
|
||||
+#endif
|
||||
+
|
||||
+ /* Setup call to pltexit. */
|
||||
+ REG_L a0, fp, OFFSET_SAVED_CALL_A0
|
||||
+ REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
|
||||
+ ADDI a2, fp, OFFSET_RG
|
||||
+ ADDI a3, fp, OFFSET_RV
|
||||
+ la t7, _dl_audit_pltexit
|
||||
+ jirl ra, t7, 0
|
||||
+
|
||||
+ REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
|
||||
+ REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
|
||||
+
|
||||
+#ifdef USE_LASX
|
||||
+ xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
|
||||
+ xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
|
||||
+#elif defined USE_LSX
|
||||
+ vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
|
||||
+ vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
|
||||
+#elif !defined __loongarch_soft_float
|
||||
+ FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
|
||||
+ FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
|
||||
+#endif
|
||||
+
|
||||
+ /* RA from within La_loongarch_reg. */
|
||||
+ REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
|
||||
+ or sp, fp, zero
|
||||
+ ADDI sp, sp, SF_SIZE
|
||||
+ REG_S fp, fp, SZREG
|
||||
+
|
||||
+ jirl zero, ra, 0
|
||||
+
|
||||
+END (_dl_runtime_profile)
|
||||
--
|
||||
2.33.0
|
||||
|
102
LoongArch-Add-minuimum-binutils-required-version.patch
Normal file
102
LoongArch-Add-minuimum-binutils-required-version.patch
Normal file
|
@ -0,0 +1,102 @@
|
|||
From 7353f21f6ed1754b67e455e2b80123787efa9e91 Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Tue, 8 Aug 2023 14:15:43 +0800
|
||||
Subject: [PATCH 02/29] LoongArch: Add minuimum binutils required version
|
||||
|
||||
LoongArch glibc can add some LASX/LSX vector instructions codes,
|
||||
change the required minimum binutils version to 2.41 which could
|
||||
support vector instructions. HAVE_LOONGARCH_VEC_ASM is removed
|
||||
accordingly.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
config.h.in | 5 -----
|
||||
sysdeps/loongarch/configure | 5 ++---
|
||||
sysdeps/loongarch/configure.ac | 4 ++--
|
||||
sysdeps/loongarch/dl-machine.h | 4 ++--
|
||||
sysdeps/loongarch/dl-trampoline.S | 2 +-
|
||||
5 files changed, 7 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/config.h.in b/config.h.in
|
||||
index 0dedc124..44a34072 100644
|
||||
--- a/config.h.in
|
||||
+++ b/config.h.in
|
||||
@@ -141,11 +141,6 @@
|
||||
/* LOONGARCH floating-point ABI for ld.so. */
|
||||
#undef LOONGARCH_ABI_FRLEN
|
||||
|
||||
-/* Assembler support LoongArch LASX/LSX vector instructions.
|
||||
- This macro becomes obsolete when glibc increased the minimum
|
||||
- required version of GNU 'binutils' to 2.41 or later. */
|
||||
-#define HAVE_LOONGARCH_VEC_ASM 0
|
||||
-
|
||||
/* Linux specific: minimum supported kernel version. */
|
||||
#undef __LINUX_KERNEL_VERSION
|
||||
|
||||
diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure
|
||||
index 5843c7cf..395ddc92 100644
|
||||
--- a/sysdeps/loongarch/configure
|
||||
+++ b/sysdeps/loongarch/configure
|
||||
@@ -128,8 +128,7 @@ rm -f conftest*
|
||||
fi
|
||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_asm" >&5
|
||||
printf "%s\n" "$libc_cv_loongarch_vec_asm" >&6; }
|
||||
-if test $libc_cv_loongarch_vec_asm = yes; then
|
||||
- printf "%s\n" "#define HAVE_LOONGARCH_VEC_ASM 1" >>confdefs.h
|
||||
-
|
||||
+if test $libc_cv_loongarch_vec_asm = no; then
|
||||
+ as_fn_error $? "binutils version is too old, use 2.41 or newer version" "$LINENO" 5
|
||||
fi
|
||||
|
||||
diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac
|
||||
index ba89d834..989287c6 100644
|
||||
--- a/sysdeps/loongarch/configure.ac
|
||||
+++ b/sysdeps/loongarch/configure.ac
|
||||
@@ -74,6 +74,6 @@ else
|
||||
libc_cv_loongarch_vec_asm=no
|
||||
fi
|
||||
rm -f conftest*])
|
||||
-if test $libc_cv_loongarch_vec_asm = yes; then
|
||||
- AC_DEFINE(HAVE_LOONGARCH_VEC_ASM)
|
||||
+if test $libc_cv_loongarch_vec_asm = no; then
|
||||
+ AC_MSG_ERROR([binutils version is too old, use 2.41 or newer version])
|
||||
fi
|
||||
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
|
||||
index 51ce9af8..066bb233 100644
|
||||
--- a/sysdeps/loongarch/dl-machine.h
|
||||
+++ b/sysdeps/loongarch/dl-machine.h
|
||||
@@ -270,7 +270,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
||||
/* If using PLTs, fill in the first two entries of .got.plt. */
|
||||
if (l->l_info[DT_JMPREL])
|
||||
{
|
||||
-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
|
||||
+#if !defined __loongarch_soft_float
|
||||
extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
|
||||
extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
|
||||
#endif
|
||||
@@ -300,7 +300,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
||||
/* This function will get called to fix up the GOT entry
|
||||
indicated by the offset on the stack, and then jump to
|
||||
the resolved address. */
|
||||
-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
|
||||
+#if !defined __loongarch_soft_float
|
||||
if (SUPPORT_LASX)
|
||||
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
|
||||
else if (SUPPORT_LSX)
|
||||
diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
|
||||
index f6ba5e44..8fd91469 100644
|
||||
--- a/sysdeps/loongarch/dl-trampoline.S
|
||||
+++ b/sysdeps/loongarch/dl-trampoline.S
|
||||
@@ -19,7 +19,7 @@
|
||||
#include <sysdep.h>
|
||||
#include <sys/asm.h>
|
||||
|
||||
-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
|
||||
+#if !defined __loongarch_soft_float
|
||||
#define USE_LASX
|
||||
#define _dl_runtime_resolve _dl_runtime_resolve_lasx
|
||||
#include "dl-trampoline.h"
|
||||
--
|
||||
2.33.0
|
||||
|
277
LoongArch-Change-loongarch-to-LoongArch-in-comments.patch
Normal file
277
LoongArch-Change-loongarch-to-LoongArch-in-comments.patch
Normal file
|
@ -0,0 +1,277 @@
|
|||
From e5ccd79e81de7ad5821fde83875973e878d85d4b Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Mon, 28 Aug 2023 10:08:40 +0800
|
||||
Subject: [PATCH 19/29] LoongArch: Change loongarch to LoongArch in comments
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/memmove-aligned.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/memmove-lasx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/memmove-lsx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strchr-aligned.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strchr-lasx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strchr-lsx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strlen-aligned.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strlen-lasx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strlen-lsx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S | 2 +-
|
||||
sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S | 2 +-
|
||||
24 files changed, 24 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
|
||||
index 299dd49c..7eb34395 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized memcpy_aligned implementation using basic Loongarch instructions.
|
||||
+/* Optimized memcpy_aligned implementation using basic LoongArch instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
|
||||
index 4aae5bf8..ae148df5 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized memcpy implementation using Loongarch LASX instructions.
|
||||
+/* Optimized memcpy implementation using LoongArch LASX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
|
||||
index 6ebbe7a2..feb2bb0e 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized memcpy implementation using Loongarch LSX instructions.
|
||||
+/* Optimized memcpy implementation using LoongArch LSX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
|
||||
index 8e60a22d..31019b13 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized unaligned memcpy implementation using basic Loongarch instructions.
|
||||
+/* Optimized unaligned memcpy implementation using basic LoongArch instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
|
||||
index 5354f383..a02114c0 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized memmove_aligned implementation using basic Loongarch instructions.
|
||||
+/* Optimized memmove_aligned implementation using basic LoongArch instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
|
||||
index ff68e7a2..95d8ee7b 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized memmove implementation using Loongarch LASX instructions.
|
||||
+/* Optimized memmove implementation using LoongArch LASX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
|
||||
index 9e1502a7..8a936770 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized memmove implementation using Loongarch LSX instructions.
|
||||
+/* Optimized memmove implementation using LoongArch LSX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
|
||||
index 90a64b6b..3284ce25 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized memmove_unaligned implementation using basic Loongarch instructions.
|
||||
+/* Optimized memmove_unaligned implementation using basic LoongArch instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
|
||||
index 5fb01806..62020054 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strchr implementation using basic Loongarch instructions.
|
||||
+/* Optimized strchr implementation using basic LoongArch instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
|
||||
index 254402da..4d3cc588 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strchr implementation using loongarch LASX SIMD instructions.
|
||||
+/* Optimized strchr implementation using LoongArch LASX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
|
||||
index dae98b0a..8b78c35c 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strlen implementation using loongarch LSX SIMD instructions.
|
||||
+/* Optimized strlen implementation using LoongArch LSX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
|
||||
index 1c01a023..20856a06 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strchrnul implementation using basic Loongarch instructions.
|
||||
+/* Optimized strchrnul implementation using basic LoongArch instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
|
||||
index d45495e4..4753d4ce 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strchrnul implementation using loongarch LASX SIMD instructions.
|
||||
+/* Optimized strchrnul implementation using LoongArch LASX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
|
||||
index 07d793ae..671e740c 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strchrnul implementation using loongarch LSX SIMD instructions.
|
||||
+/* Optimized strchrnul implementation using LoongArch LSX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
|
||||
index f5f4f336..ba1f9667 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strcmp implementation using basic Loongarch instructions.
|
||||
+/* Optimized strcmp implementation using basic LoongArch instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
|
||||
index 2e177a38..091c8c9e 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strcmp implementation using Loongarch LSX instructions.
|
||||
+/* Optimized strcmp implementation using LoongArch LSX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
|
||||
index e9e1d2fc..ed0548e4 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strlen implementation using basic Loongarch instructions.
|
||||
+/* Optimized strlen implementation using basic LoongArch instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
|
||||
index 258c47ce..91342f34 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strlen implementation using loongarch LASX SIMD instructions.
|
||||
+/* Optimized strlen implementation using LoongArch LASX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
|
||||
index b194355e..b09c12e0 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strlen implementation using Loongarch LSX SIMD instructions.
|
||||
+/* Optimized strlen implementation using LoongArch LSX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
|
||||
index e2687fa7..f63de872 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strncmp implementation using basic Loongarch instructions.
|
||||
+/* Optimized strncmp implementation using basic LoongArch instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
|
||||
index 0b4eee2a..83cb801d 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strncmp implementation using Loongarch LSX instructions.
|
||||
+/* Optimized strncmp implementation using LoongArch LSX instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
|
||||
index b900430a..a8296a1b 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strnlen implementation using basic Loongarch instructions.
|
||||
+/* Optimized strnlen implementation using basic LoongArch instructions.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
|
||||
index 2c03d3d9..aa6c812d 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strnlen implementation using loongarch LASX instructions
|
||||
+/* Optimized strnlen implementation using LoongArch LASX instructions
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
|
||||
index b769a895..d0febe3e 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
|
||||
@@ -1,4 +1,4 @@
|
||||
-/* Optimized strnlen implementation using loongarch LSX instructions
|
||||
+/* Optimized strnlen implementation using LoongArch LSX instructions
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
--
|
||||
2.33.0
|
||||
|
67
LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch
Normal file
67
LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch
Normal file
|
@ -0,0 +1,67 @@
|
|||
From fb72c81f9894b23797f6e2e066532c0963f5155f Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Wed, 13 Sep 2023 15:35:01 +0800
|
||||
Subject: [PATCH 24/29] LoongArch: Change to put magic number to .rodata
|
||||
section
|
||||
|
||||
Change to put magic number to .rodata section in memmove-lsx, and use
|
||||
pcalau12i and %pc_lo12 with vld to get the data.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
.../loongarch/lp64/multiarch/memmove-lsx.S | 20 +++++++++----------
|
||||
1 file changed, 10 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
|
||||
index 8a936770..5eb819ef 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
|
||||
@@ -209,13 +209,10 @@ L(al_less_16):
|
||||
nop
|
||||
|
||||
|
||||
-L(magic_num):
|
||||
- .dword 0x0706050403020100
|
||||
- .dword 0x0f0e0d0c0b0a0908
|
||||
L(unaligned):
|
||||
- pcaddi t2, -4
|
||||
+ pcalau12i t2, %pc_hi20(L(INDEX))
|
||||
bstrins.d a1, zero, 3, 0
|
||||
- vld vr8, t2, 0
|
||||
+ vld vr8, t2, %pc_lo12(L(INDEX))
|
||||
vld vr0, a1, 0
|
||||
|
||||
vld vr1, a1, 16
|
||||
@@ -413,13 +410,10 @@ L(back_al_less_16):
|
||||
vst vr1, a0, 0
|
||||
jr ra
|
||||
|
||||
-L(magic_num_2):
|
||||
- .dword 0x0706050403020100
|
||||
- .dword 0x0f0e0d0c0b0a0908
|
||||
L(back_unaligned):
|
||||
- pcaddi t2, -4
|
||||
+ pcalau12i t2, %pc_hi20(L(INDEX))
|
||||
bstrins.d a4, zero, 3, 0
|
||||
- vld vr8, t2, 0
|
||||
+ vld vr8, t2, %pc_lo12(L(INDEX))
|
||||
vld vr0, a4, 0
|
||||
|
||||
vld vr1, a4, -16
|
||||
@@ -529,6 +523,12 @@ L(back_un_less_16):
|
||||
jr ra
|
||||
END(MEMMOVE_NAME)
|
||||
|
||||
+ .section .rodata.cst16,"M",@progbits,16
|
||||
+ .align 4
|
||||
+L(INDEX):
|
||||
+ .dword 0x0706050403020100
|
||||
+ .dword 0x0f0e0d0c0b0a0908
|
||||
+
|
||||
libc_hidden_builtin_def (MEMCPY_NAME)
|
||||
libc_hidden_builtin_def (MEMMOVE_NAME)
|
||||
#endif
|
||||
--
|
||||
2.33.0
|
||||
|
44
LoongArch-Micro-optimize-LD_PCREL.patch
Normal file
44
LoongArch-Micro-optimize-LD_PCREL.patch
Normal file
|
@ -0,0 +1,44 @@
|
|||
From 7f703cf758c4f185dd62f2a4f463002bb514af16 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Sun, 27 Aug 2023 00:36:51 +0800
|
||||
Subject: [PATCH 13/29] LoongArch: Micro-optimize LD_PCREL
|
||||
|
||||
We are requiring Binutils >= 2.41, so explicit relocation syntax is
|
||||
always supported by the assembler. Use it to reduce one instruction.
|
||||
|
||||
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/unix/sysv/linux/loongarch/pointer_guard.h | 10 ++++------
|
||||
1 file changed, 4 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
|
||||
index b25e353b..d6c78687 100644
|
||||
--- a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
|
||||
+++ b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
|
||||
@@ -19,17 +19,15 @@
|
||||
#ifndef POINTER_GUARD_H
|
||||
#define POINTER_GUARD_H
|
||||
|
||||
-/* Load a got-relative EXPR into G, using T.
|
||||
- Note G and T are register names. */
|
||||
+/* Load a got-relative EXPR into register G. */
|
||||
#define LD_GLOBAL(G, EXPR) \
|
||||
la.global G, EXPR; \
|
||||
REG_L G, G, 0;
|
||||
|
||||
-/* Load a pc-relative EXPR into G, using T.
|
||||
- Note G and T are register names. */
|
||||
+/* Load a pc-relative EXPR into register G. */
|
||||
#define LD_PCREL(G, EXPR) \
|
||||
- la.pcrel G, EXPR; \
|
||||
- REG_L G, G, 0;
|
||||
+ pcalau12i G, %pc_hi20(EXPR); \
|
||||
+ REG_L G, G, %pc_lo12(EXPR);
|
||||
|
||||
#if (IS_IN (rtld) \
|
||||
|| (!defined SHARED && (IS_IN (libc) \
|
||||
--
|
||||
2.33.0
|
||||
|
65
LoongArch-Redefine-macro-LEAF-ENTRY.patch
Normal file
65
LoongArch-Redefine-macro-LEAF-ENTRY.patch
Normal file
|
@ -0,0 +1,65 @@
|
|||
From 8dcd8c837df2e3cf81675522487697522f1542f8 Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Tue, 8 Aug 2023 14:15:42 +0800
|
||||
Subject: [PATCH 01/29] LoongArch: Redefine macro LEAF/ENTRY.
|
||||
|
||||
The following usage of macro LEAF/ENTRY are all feasible:
|
||||
1. LEAF(fcn) -- the align value of fcn is .align 3(default value)
|
||||
2. LEAF(fcn, 6) -- the align value of fcn is .align 6
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/sys/asm.h | 36 ++++++++++++++++++++++++++----------
|
||||
1 file changed, 26 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
|
||||
index d1a279b8..c5eb8afa 100644
|
||||
--- a/sysdeps/loongarch/sys/asm.h
|
||||
+++ b/sysdeps/loongarch/sys/asm.h
|
||||
@@ -39,16 +39,32 @@
|
||||
#define FREG_L fld.d
|
||||
#define FREG_S fst.d
|
||||
|
||||
-/* Declare leaf routine. */
|
||||
-#define LEAF(symbol) \
|
||||
- .text; \
|
||||
- .globl symbol; \
|
||||
- .align 3; \
|
||||
- cfi_startproc; \
|
||||
- .type symbol, @function; \
|
||||
- symbol:
|
||||
-
|
||||
-#define ENTRY(symbol) LEAF (symbol)
|
||||
+/* Declare leaf routine.
|
||||
+ The usage of macro LEAF/ENTRY is as follows:
|
||||
+ 1. LEAF(fcn) -- the align value of fcn is .align 3 (default value)
|
||||
+ 2. LEAF(fcn, 6) -- the align value of fcn is .align 6
|
||||
+*/
|
||||
+#define LEAF_IMPL(symbol, aln, ...) \
|
||||
+ .text; \
|
||||
+ .globl symbol; \
|
||||
+ .align aln; \
|
||||
+ .type symbol, @function; \
|
||||
+symbol: \
|
||||
+ cfi_startproc;
|
||||
+
|
||||
+
|
||||
+#define LEAF(...) LEAF_IMPL(__VA_ARGS__, 3)
|
||||
+#define ENTRY(...) LEAF(__VA_ARGS__)
|
||||
+
|
||||
+#define LEAF_NO_ALIGN(symbol) \
|
||||
+ .text; \
|
||||
+ .globl symbol; \
|
||||
+ .type symbol, @function; \
|
||||
+symbol: \
|
||||
+ cfi_startproc;
|
||||
+
|
||||
+#define ENTRY_NO_ALIGN(symbol) LEAF_NO_ALIGN(symbol)
|
||||
+
|
||||
|
||||
/* Mark end of function. */
|
||||
#undef END
|
||||
--
|
||||
2.33.0
|
||||
|
56
LoongArch-Remove-support-code-for-old-linker-in-star.patch
Normal file
56
LoongArch-Remove-support-code-for-old-linker-in-star.patch
Normal file
|
@ -0,0 +1,56 @@
|
|||
From f8d66a269cb6f1a7087afadf3375bdf0553abf53 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Sun, 27 Aug 2023 00:36:50 +0800
|
||||
Subject: [PATCH 12/29] LoongArch: Remove support code for old linker in
|
||||
start.S
|
||||
|
||||
We are requiring Binutils >= 2.41, so la.pcrel always works here.
|
||||
|
||||
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/start.S | 19 +++----------------
|
||||
1 file changed, 3 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/sysdeps/loongarch/start.S b/sysdeps/loongarch/start.S
|
||||
index e9d82033..bf6bfc9e 100644
|
||||
--- a/sysdeps/loongarch/start.S
|
||||
+++ b/sysdeps/loongarch/start.S
|
||||
@@ -60,20 +60,7 @@ ENTRY (ENTRY_POINT)
|
||||
cfi_undefined (1)
|
||||
or a5, a0, zero /* rtld_fini */
|
||||
|
||||
-#if ENABLE_STATIC_PIE
|
||||
-/* For static PIE, the GOT cannot be used in _start because the GOT entries are
|
||||
- offsets instead of real addresses before __libc_start_main.
|
||||
- __libc_start_main and/or main may be not local, so we rely on the linker to
|
||||
- produce PLT entries for them. GNU ld >= 2.40 supports this. */
|
||||
-# define LA la.pcrel
|
||||
-#else
|
||||
-/* Old GNU ld (< 2.40) cannot handle PC relative address against a non-local
|
||||
- function correctly. We deem these old linkers failing to support static PIE
|
||||
- and load the addresses from GOT. */
|
||||
-# define LA la.got
|
||||
-#endif
|
||||
-
|
||||
- LA a0, t0, main
|
||||
+ la.pcrel a0, t0, main
|
||||
REG_L a1, sp, 0
|
||||
ADDI a2, sp, SZREG
|
||||
|
||||
@@ -84,9 +71,9 @@ ENTRY (ENTRY_POINT)
|
||||
move a4, zero /* used to be fini */
|
||||
or a6, sp, zero /* stack_end */
|
||||
|
||||
- LA ra, t0, __libc_start_main
|
||||
+ la.pcrel ra, t0, __libc_start_main
|
||||
jirl ra, ra, 0
|
||||
|
||||
- LA ra, t0, abort
|
||||
+ la.pcrel ra, t0, abort
|
||||
jirl ra, ra, 0
|
||||
END (ENTRY_POINT)
|
||||
--
|
||||
2.33.0
|
||||
|
28
LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch
Normal file
28
LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch
Normal file
|
@ -0,0 +1,28 @@
|
|||
From b4b4bb7c9220a0bbdf5aec0ac8c1de1d22329280 Mon Sep 17 00:00:00 2001
|
||||
From: caiyinyu <caiyinyu@loongson.cn>
|
||||
Date: Thu, 14 Sep 2023 19:48:24 +0800
|
||||
Subject: [PATCH 21/29] LoongArch: Replace deprecated $v0 with $a0 to eliminate
|
||||
'as' Warnings.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/dl-machine.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
|
||||
index 8a2db9de..57913cef 100644
|
||||
--- a/sysdeps/loongarch/dl-machine.h
|
||||
+++ b/sysdeps/loongarch/dl-machine.h
|
||||
@@ -90,7 +90,7 @@ static inline ElfW (Addr) elf_machine_dynamic (void)
|
||||
or $a0, $sp, $zero \n\
|
||||
bl _dl_start \n\
|
||||
# Stash user entry point in s0. \n\
|
||||
- or $s0, $v0, $zero \n\
|
||||
+ or $s0, $a0, $zero \n\
|
||||
# Load the original argument count. \n\
|
||||
ld.d $a1, $sp, 0 \n\
|
||||
# Call _dl_init (struct link_map *main_map, int argc, \
|
||||
--
|
||||
2.33.0
|
||||
|
81
LoongArch-Unify-Register-Names.patch
Normal file
81
LoongArch-Unify-Register-Names.patch
Normal file
|
@ -0,0 +1,81 @@
|
|||
From 458ab6d5f39cca1cabd83abd2022f67491f6f5ed Mon Sep 17 00:00:00 2001
|
||||
From: caiyinyu <caiyinyu@loongson.cn>
|
||||
Date: Fri, 20 Oct 2023 09:20:02 +0800
|
||||
Subject: [PATCH 27/29] LoongArch: Unify Register Names.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/__longjmp.S | 20 ++++++++++----------
|
||||
sysdeps/loongarch/setjmp.S | 18 +++++++++---------
|
||||
2 files changed, 19 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/sysdeps/loongarch/__longjmp.S b/sysdeps/loongarch/__longjmp.S
|
||||
index cbde1946..e87ce311 100644
|
||||
--- a/sysdeps/loongarch/__longjmp.S
|
||||
+++ b/sysdeps/loongarch/__longjmp.S
|
||||
@@ -43,18 +43,18 @@ ENTRY (__longjmp)
|
||||
REG_L s8, a0, 12*SZREG
|
||||
|
||||
#ifndef __loongarch_soft_float
|
||||
- FREG_L $f24, a0, 13*SZREG + 0*SZFREG
|
||||
- FREG_L $f25, a0, 13*SZREG + 1*SZFREG
|
||||
- FREG_L $f26, a0, 13*SZREG + 2*SZFREG
|
||||
- FREG_L $f27, a0, 13*SZREG + 3*SZFREG
|
||||
- FREG_L $f28, a0, 13*SZREG + 4*SZFREG
|
||||
- FREG_L $f29, a0, 13*SZREG + 5*SZFREG
|
||||
- FREG_L $f30, a0, 13*SZREG + 6*SZFREG
|
||||
- FREG_L $f31, a0, 13*SZREG + 7*SZFREG
|
||||
+ FREG_L fs0, a0, 13*SZREG + 0*SZFREG
|
||||
+ FREG_L fs1, a0, 13*SZREG + 1*SZFREG
|
||||
+ FREG_L fs2, a0, 13*SZREG + 2*SZFREG
|
||||
+ FREG_L fs3, a0, 13*SZREG + 3*SZFREG
|
||||
+ FREG_L fs4, a0, 13*SZREG + 4*SZFREG
|
||||
+ FREG_L fs5, a0, 13*SZREG + 5*SZFREG
|
||||
+ FREG_L fs6, a0, 13*SZREG + 6*SZFREG
|
||||
+ FREG_L fs7, a0, 13*SZREG + 7*SZFREG
|
||||
#endif
|
||||
|
||||
- sltui a0,a1,1
|
||||
+ sltui a0, a1, 1
|
||||
ADD a0, a0, a1 # a0 = (a1 == 0) ? 1 : a1
|
||||
- jirl zero,ra,0
|
||||
+ jirl zero, ra, 0
|
||||
|
||||
END (__longjmp)
|
||||
diff --git a/sysdeps/loongarch/setjmp.S b/sysdeps/loongarch/setjmp.S
|
||||
index 6c7065cd..b6e4f727 100644
|
||||
--- a/sysdeps/loongarch/setjmp.S
|
||||
+++ b/sysdeps/loongarch/setjmp.S
|
||||
@@ -52,19 +52,19 @@ ENTRY (__sigsetjmp)
|
||||
REG_S s8, a0, 12*SZREG
|
||||
|
||||
#ifndef __loongarch_soft_float
|
||||
- FREG_S $f24, a0, 13*SZREG + 0*SZFREG
|
||||
- FREG_S $f25, a0, 13*SZREG + 1*SZFREG
|
||||
- FREG_S $f26, a0, 13*SZREG + 2*SZFREG
|
||||
- FREG_S $f27, a0, 13*SZREG + 3*SZFREG
|
||||
- FREG_S $f28, a0, 13*SZREG + 4*SZFREG
|
||||
- FREG_S $f29, a0, 13*SZREG + 5*SZFREG
|
||||
- FREG_S $f30, a0, 13*SZREG + 6*SZFREG
|
||||
- FREG_S $f31, a0, 13*SZREG + 7*SZFREG
|
||||
+ FREG_S fs0, a0, 13*SZREG + 0*SZFREG
|
||||
+ FREG_S fs1, a0, 13*SZREG + 1*SZFREG
|
||||
+ FREG_S fs2, a0, 13*SZREG + 2*SZFREG
|
||||
+ FREG_S fs3, a0, 13*SZREG + 3*SZFREG
|
||||
+ FREG_S fs4, a0, 13*SZREG + 4*SZFREG
|
||||
+ FREG_S fs5, a0, 13*SZREG + 5*SZFREG
|
||||
+ FREG_S fs6, a0, 13*SZREG + 6*SZFREG
|
||||
+ FREG_S fs7, a0, 13*SZREG + 7*SZFREG
|
||||
#endif
|
||||
|
||||
#if !IS_IN (libc) && IS_IN(rtld)
|
||||
li.w v0, 0
|
||||
- jirl zero,ra,0
|
||||
+ jirl zero, ra, 0
|
||||
#else
|
||||
b __sigjmp_save
|
||||
#endif
|
||||
--
|
||||
2.33.0
|
||||
|
24
LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch
Normal file
24
LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch
Normal file
|
@ -0,0 +1,24 @@
|
|||
From 4828d1aa0028e819a5fb336d962e8f7cbfedf8b4 Mon Sep 17 00:00:00 2001
|
||||
From: caiyinyu <caiyinyu@loongson.cn>
|
||||
Date: Mon, 23 Oct 2023 15:53:38 +0800
|
||||
Subject: [PATCH 28/29] LoongArch: Update hwcap.h to sync with LoongArch
|
||||
kernel.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
|
||||
index 5104b69c..7acec23d 100644
|
||||
--- a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
|
||||
+++ b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
|
||||
@@ -35,3 +35,4 @@
|
||||
#define HWCAP_LOONGARCH_LBT_X86 (1 << 10)
|
||||
#define HWCAP_LOONGARCH_LBT_ARM (1 << 11)
|
||||
#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12)
|
||||
+#define HWCAP_LOONGARCH_PTW (1 << 13)
|
||||
--
|
||||
2.33.0
|
||||
|
30
LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch
Normal file
30
LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch
Normal file
|
@ -0,0 +1,30 @@
|
|||
From 4938840b15ff9734fdcc63cc0744ce3f3bbb0b16 Mon Sep 17 00:00:00 2001
|
||||
From: caiyinyu <caiyinyu@loongson.cn>
|
||||
Date: Mon, 14 Aug 2023 15:34:08 +0800
|
||||
Subject: [PATCH 05/29] LoongArch: elf: Add new LoongArch reloc types 109 into
|
||||
elf.h
|
||||
|
||||
These reloc types are generated by GNU assembler >= 2.41 for relaxation
|
||||
support.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
elf/elf.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/elf/elf.h b/elf/elf.h
|
||||
index d623bdeb..9c51073f 100644
|
||||
--- a/elf/elf.h
|
||||
+++ b/elf/elf.h
|
||||
@@ -4213,6 +4213,7 @@ enum
|
||||
#define R_LARCH_SUB6 106
|
||||
#define R_LARCH_ADD_ULEB128 107
|
||||
#define R_LARCH_SUB_ULEB128 108
|
||||
+#define R_LARCH_64_PCREL 109
|
||||
|
||||
/* ARC specific declarations. */
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
528
Loongarch-Add-ifunc-support-and-add-different-versio.patch
Normal file
528
Loongarch-Add-ifunc-support-and-add-different-versio.patch
Normal file
|
@ -0,0 +1,528 @@
|
|||
From 43abd8772a143cd96688c081500397dd712e631b Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Tue, 8 Aug 2023 14:15:44 +0800
|
||||
Subject: [PATCH 03/29] Loongarch: Add ifunc support and add different versions
|
||||
of strlen
|
||||
|
||||
strlen-lasx is implemeted by LASX simd instructions(256bit)
|
||||
strlen-lsx is implemeted by LSX simd instructions(128bit)
|
||||
strlen-align is implemented by LA basic instructions and never use unaligned memory acess
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/Makefile | 7 ++
|
||||
.../lp64/multiarch/ifunc-impl-list.c | 41 +++++++
|
||||
.../loongarch/lp64/multiarch/ifunc-strlen.h | 40 +++++++
|
||||
.../loongarch/lp64/multiarch/strlen-aligned.S | 100 ++++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/strlen-lasx.S | 63 +++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/strlen-lsx.S | 71 +++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/strlen.c | 37 +++++++
|
||||
sysdeps/loongarch/sys/regdef.h | 57 ++++++++++
|
||||
.../unix/sysv/linux/loongarch/cpu-features.h | 2 +
|
||||
9 files changed, 418 insertions(+)
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
new file mode 100644
|
||||
index 00000000..76c506c9
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
@@ -0,0 +1,7 @@
|
||||
+ifeq ($(subdir),string)
|
||||
+sysdep_routines += \
|
||||
+ strlen-aligned \
|
||||
+ strlen-lsx \
|
||||
+ strlen-lasx \
|
||||
+# sysdep_routines
|
||||
+endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
new file mode 100644
|
||||
index 00000000..1a2a576f
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
@@ -0,0 +1,41 @@
|
||||
+/* Enumerate available IFUNC implementations of a function LoongArch64 version.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <assert.h>
|
||||
+#include <string.h>
|
||||
+#include <wchar.h>
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-impl-list.h>
|
||||
+#include <stdio.h>
|
||||
+
|
||||
+size_t
|
||||
+__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
+ size_t max)
|
||||
+{
|
||||
+
|
||||
+ size_t i = max;
|
||||
+
|
||||
+ IFUNC_IMPL (i, name, strlen,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LASX, __strlen_lasx)
|
||||
+ IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LSX, __strlen_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
|
||||
+ )
|
||||
+ return i;
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
|
||||
new file mode 100644
|
||||
index 00000000..6258bb76
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
|
||||
@@ -0,0 +1,40 @@
|
||||
+/* Common definition for strlen ifunc selections.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-init.h>
|
||||
+
|
||||
+#if !defined __loongarch_soft_float
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||
+#endif
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
|
||||
+
|
||||
+static inline void *
|
||||
+IFUNC_SELECTOR (void)
|
||||
+{
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LASX)
|
||||
+ return OPTIMIZE (lasx);
|
||||
+ else if (SUPPORT_LSX)
|
||||
+ return OPTIMIZE (lsx);
|
||||
+ else
|
||||
+#endif
|
||||
+ return OPTIMIZE (aligned);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
|
||||
new file mode 100644
|
||||
index 00000000..e9e1d2fc
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
|
||||
@@ -0,0 +1,100 @@
|
||||
+/* Optimized strlen implementation using basic Loongarch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define STRLEN __strlen_aligned
|
||||
+#else
|
||||
+# define STRLEN strlen
|
||||
+#endif
|
||||
+
|
||||
+LEAF(STRLEN, 6)
|
||||
+ move a1, a0
|
||||
+ bstrins.d a0, zero, 2, 0
|
||||
+ lu12i.w a2, 0x01010
|
||||
+ li.w t0, -1
|
||||
+
|
||||
+ ld.d t2, a0, 0
|
||||
+ andi t1, a1, 0x7
|
||||
+ ori a2, a2, 0x101
|
||||
+ slli.d t1, t1, 3
|
||||
+
|
||||
+ bstrins.d a2, a2, 63, 32
|
||||
+ sll.d t1, t0, t1
|
||||
+ slli.d t3, a2, 7
|
||||
+ nor a3, zero, t3
|
||||
+
|
||||
+ orn t2, t2, t1
|
||||
+ sub.d t0, t2, a2
|
||||
+ nor t1, t2, a3
|
||||
+ and t0, t0, t1
|
||||
+
|
||||
+
|
||||
+ bnez t0, L(count_pos)
|
||||
+ addi.d a0, a0, 8
|
||||
+L(loop_16_7bit):
|
||||
+ ld.d t2, a0, 0
|
||||
+ sub.d t1, t2, a2
|
||||
+
|
||||
+ and t0, t1, t3
|
||||
+ bnez t0, L(more_check)
|
||||
+ ld.d t2, a0, 8
|
||||
+ sub.d t1, t2, a2
|
||||
+
|
||||
+ and t0, t1, t3
|
||||
+ addi.d a0, a0, 16
|
||||
+ beqz t0, L(loop_16_7bit)
|
||||
+ addi.d a0, a0, -8
|
||||
+
|
||||
+L(more_check):
|
||||
+ nor t0, t2, a3
|
||||
+ and t0, t1, t0
|
||||
+ bnez t0, L(count_pos)
|
||||
+ addi.d a0, a0, 8
|
||||
+
|
||||
+
|
||||
+L(loop_16_8bit):
|
||||
+ ld.d t2, a0, 0
|
||||
+ sub.d t1, t2, a2
|
||||
+ nor t0, t2, a3
|
||||
+ and t0, t0, t1
|
||||
+
|
||||
+ bnez t0, L(count_pos)
|
||||
+ ld.d t2, a0, 8
|
||||
+ addi.d a0, a0, 16
|
||||
+ sub.d t1, t2, a2
|
||||
+
|
||||
+ nor t0, t2, a3
|
||||
+ and t0, t0, t1
|
||||
+ beqz t0, L(loop_16_8bit)
|
||||
+ addi.d a0, a0, -8
|
||||
+
|
||||
+L(count_pos):
|
||||
+ ctz.d t1, t0
|
||||
+ sub.d a0, a0, a1
|
||||
+ srli.d t1, t1, 3
|
||||
+ add.d a0, a0, t1
|
||||
+
|
||||
+ jr ra
|
||||
+END(STRLEN)
|
||||
+
|
||||
+libc_hidden_builtin_def (STRLEN)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
|
||||
new file mode 100644
|
||||
index 00000000..258c47ce
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
|
||||
@@ -0,0 +1,63 @@
|
||||
+/* Optimized strlen implementation using loongarch LASX SIMD instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define STRLEN __strlen_lasx
|
||||
+
|
||||
+LEAF(STRLEN, 6)
|
||||
+ move a1, a0
|
||||
+ bstrins.d a0, zero, 4, 0
|
||||
+ li.d t1, -1
|
||||
+ xvld xr0, a0, 0
|
||||
+
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+ xvpickve.w xr1, xr0, 4
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ movfr2gr.s t0, fa0 # sign extend
|
||||
+
|
||||
+ sra.w t0, t0, a1
|
||||
+ beq t0, t1, L(loop)
|
||||
+ cto.w a0, t0
|
||||
+ jr ra
|
||||
+
|
||||
+L(loop):
|
||||
+ xvld xr0, a0, 32
|
||||
+ addi.d a0, a0, 32
|
||||
+ xvsetanyeqz.b fcc0, xr0
|
||||
+ bceqz fcc0, L(loop)
|
||||
+
|
||||
+
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+ sub.d a0, a0, a1
|
||||
+ xvpickve.w xr1, xr0, 4
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+
|
||||
+ movfr2gr.s t0, fa0
|
||||
+ cto.w t0, t0
|
||||
+ add.d a0, a0, t0
|
||||
+ jr ra
|
||||
+END(STRLEN)
|
||||
+
|
||||
+libc_hidden_builtin_def (STRLEN)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..b194355e
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
|
||||
@@ -0,0 +1,71 @@
|
||||
+/* Optimized strlen implementation using Loongarch LSX SIMD instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+# define STRLEN __strlen_lsx
|
||||
+
|
||||
+LEAF(STRLEN, 6)
|
||||
+ move a1, a0
|
||||
+ bstrins.d a0, zero, 4, 0
|
||||
+ vld vr0, a0, 0
|
||||
+ vld vr1, a0, 16
|
||||
+
|
||||
+ li.d t1, -1
|
||||
+ vmsknz.b vr0, vr0
|
||||
+ vmsknz.b vr1, vr1
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+
|
||||
+ movfr2gr.s t0, fa0
|
||||
+ sra.w t0, t0, a1
|
||||
+ beq t0, t1, L(loop)
|
||||
+ cto.w a0, t0
|
||||
+
|
||||
+ jr ra
|
||||
+ nop
|
||||
+ nop
|
||||
+ nop
|
||||
+
|
||||
+
|
||||
+L(loop):
|
||||
+ vld vr0, a0, 32
|
||||
+ vld vr1, a0, 48
|
||||
+ addi.d a0, a0, 32
|
||||
+ vmin.bu vr2, vr0, vr1
|
||||
+
|
||||
+ vsetanyeqz.b fcc0, vr2
|
||||
+ bceqz fcc0, L(loop)
|
||||
+ vmsknz.b vr0, vr0
|
||||
+ vmsknz.b vr1, vr1
|
||||
+
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ sub.d a0, a0, a1
|
||||
+ movfr2gr.s t0, fa0
|
||||
+ cto.w t0, t0
|
||||
+
|
||||
+ add.d a0, a0, t0
|
||||
+ jr ra
|
||||
+END(STRLEN)
|
||||
+
|
||||
+libc_hidden_builtin_def (STRLEN)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen.c b/sysdeps/loongarch/lp64/multiarch/strlen.c
|
||||
new file mode 100644
|
||||
index 00000000..381c2daa
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strlen.c
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* Multiple versions of strlen.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Define multiple versions only for the definition in libc. */
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define strlen __redirect_strlen
|
||||
+# include <string.h>
|
||||
+# undef strlen
|
||||
+
|
||||
+# define SYMBOL_NAME strlen
|
||||
+# include "ifunc-strlen.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_strlen, strlen, IFUNC_SELECTOR ());
|
||||
+
|
||||
+# ifdef SHARED
|
||||
+__hidden_ver1 (strlen, __GI_strlen, __redirect_strlen)
|
||||
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strlen);
|
||||
+# endif
|
||||
+
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
|
||||
index 5100f36d..524d2e32 100644
|
||||
--- a/sysdeps/loongarch/sys/regdef.h
|
||||
+++ b/sysdeps/loongarch/sys/regdef.h
|
||||
@@ -89,6 +89,14 @@
|
||||
#define fs5 $f29
|
||||
#define fs6 $f30
|
||||
#define fs7 $f31
|
||||
+#define fcc0 $fcc0
|
||||
+#define fcc1 $fcc1
|
||||
+#define fcc2 $fcc2
|
||||
+#define fcc3 $fcc3
|
||||
+#define fcc4 $fcc4
|
||||
+#define fcc5 $fcc5
|
||||
+#define fcc6 $fcc6
|
||||
+#define fcc7 $fcc7
|
||||
|
||||
#define vr0 $vr0
|
||||
#define vr1 $vr1
|
||||
@@ -98,6 +106,30 @@
|
||||
#define vr5 $vr5
|
||||
#define vr6 $vr6
|
||||
#define vr7 $vr7
|
||||
+#define vr8 $vr8
|
||||
+#define vr9 $vr9
|
||||
+#define vr10 $vr10
|
||||
+#define vr11 $vr11
|
||||
+#define vr12 $vr12
|
||||
+#define vr13 $vr13
|
||||
+#define vr14 $vr14
|
||||
+#define vr15 $vr15
|
||||
+#define vr16 $vr16
|
||||
+#define vr17 $vr17
|
||||
+#define vr18 $vr18
|
||||
+#define vr19 $vr19
|
||||
+#define vr20 $vr20
|
||||
+#define vr21 $vr21
|
||||
+#define vr22 $vr22
|
||||
+#define vr23 $vr23
|
||||
+#define vr24 $vr24
|
||||
+#define vr25 $vr25
|
||||
+#define vr26 $vr26
|
||||
+#define vr27 $vr27
|
||||
+#define vr28 $vr28
|
||||
+#define vr29 $vr29
|
||||
+#define vr30 $vr30
|
||||
+#define vr31 $vr31
|
||||
|
||||
#define xr0 $xr0
|
||||
#define xr1 $xr1
|
||||
@@ -107,5 +139,30 @@
|
||||
#define xr5 $xr5
|
||||
#define xr6 $xr6
|
||||
#define xr7 $xr7
|
||||
+#define xr7 $xr7
|
||||
+#define xr8 $xr8
|
||||
+#define xr9 $xr9
|
||||
+#define xr10 $xr10
|
||||
+#define xr11 $xr11
|
||||
+#define xr12 $xr12
|
||||
+#define xr13 $xr13
|
||||
+#define xr14 $xr14
|
||||
+#define xr15 $xr15
|
||||
+#define xr16 $xr16
|
||||
+#define xr17 $xr17
|
||||
+#define xr18 $xr18
|
||||
+#define xr19 $xr19
|
||||
+#define xr20 $xr20
|
||||
+#define xr21 $xr21
|
||||
+#define xr22 $xr22
|
||||
+#define xr23 $xr23
|
||||
+#define xr24 $xr24
|
||||
+#define xr25 $xr25
|
||||
+#define xr26 $xr26
|
||||
+#define xr27 $xr27
|
||||
+#define xr28 $xr28
|
||||
+#define xr29 $xr29
|
||||
+#define xr30 $xr30
|
||||
+#define xr31 $xr31
|
||||
|
||||
#endif /* _SYS_REGDEF_H */
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
|
||||
index e371e13b..d1a280a5 100644
|
||||
--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
|
||||
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
|
||||
@@ -25,5 +25,7 @@
|
||||
#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
|
||||
#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
|
||||
|
||||
+#define INIT_ARCH()
|
||||
+
|
||||
#endif /* _CPU_FEATURES_LOONGARCH64_H */
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
2570
Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch
Normal file
2570
Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch
Normal file
File diff suppressed because it is too large
Load diff
706
Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch
Normal file
706
Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch
Normal file
|
@ -0,0 +1,706 @@
|
|||
From aca7d7f0dde5f56344e8e58e5f6648c96bb1f1cc Mon Sep 17 00:00:00 2001
|
||||
From: dengjianbo <dengjianbo@loongson.cn>
|
||||
Date: Tue, 15 Aug 2023 09:08:11 +0800
|
||||
Subject: [PATCH 06/29] Loongarch: Add ifunc support for strchr{aligned, lsx,
|
||||
lasx} and strchrnul{aligned, lsx, lasx}
|
||||
|
||||
These implementations improve the time to run strchr{nul}
|
||||
microbenchmark in glibc as below:
|
||||
strchr-lasx reduces the runtime about 50%-83%
|
||||
strchr-lsx reduces the runtime about 30%-67%
|
||||
strchr-aligned reduces the runtime about 10%-20%
|
||||
strchrnul-lasx reduces the runtime about 50%-83%
|
||||
strchrnul-lsx reduces the runtime about 36%-65%
|
||||
strchrnul-aligned reduces the runtime about 6%-10%
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/lp64/multiarch/Makefile | 6 ++
|
||||
.../lp64/multiarch/ifunc-impl-list.c | 16 +++
|
||||
.../loongarch/lp64/multiarch/ifunc-strchr.h | 41 ++++++++
|
||||
.../lp64/multiarch/ifunc-strchrnul.h | 41 ++++++++
|
||||
.../loongarch/lp64/multiarch/strchr-aligned.S | 99 +++++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/strchr-lasx.S | 91 +++++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/strchr-lsx.S | 73 ++++++++++++++
|
||||
sysdeps/loongarch/lp64/multiarch/strchr.c | 36 +++++++
|
||||
.../lp64/multiarch/strchrnul-aligned.S | 95 ++++++++++++++++++
|
||||
.../loongarch/lp64/multiarch/strchrnul-lasx.S | 22 +++++
|
||||
.../loongarch/lp64/multiarch/strchrnul-lsx.S | 22 +++++
|
||||
sysdeps/loongarch/lp64/multiarch/strchrnul.c | 39 ++++++++
|
||||
12 files changed, 581 insertions(+)
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr.c
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
|
||||
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
index 76c506c9..110a8c5c 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
|
||||
@@ -3,5 +3,11 @@ sysdep_routines += \
|
||||
strlen-aligned \
|
||||
strlen-lsx \
|
||||
strlen-lasx \
|
||||
+ strchr-aligned \
|
||||
+ strchr-lsx \
|
||||
+ strchr-lasx \
|
||||
+ strchrnul-aligned \
|
||||
+ strchrnul-lsx \
|
||||
+ strchrnul-lasx \
|
||||
# sysdep_routines
|
||||
endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
index 1a2a576f..c7164b45 100644
|
||||
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
|
||||
@@ -37,5 +37,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
#endif
|
||||
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
|
||||
)
|
||||
+
|
||||
+ IFUNC_IMPL (i, name, strchr,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
|
||||
+ IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LSX, __strchr_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_aligned)
|
||||
+ )
|
||||
+
|
||||
+ IFUNC_IMPL (i, name, strchrnul,
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LASX, __strchrnul_lasx)
|
||||
+ IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LSX, __strchrnul_lsx)
|
||||
+#endif
|
||||
+ IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
|
||||
+ )
|
||||
return i;
|
||||
}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
|
||||
new file mode 100644
|
||||
index 00000000..4494db79
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
|
||||
@@ -0,0 +1,41 @@
|
||||
+/* Common definition for strchr ifunc selections.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-init.h>
|
||||
+
|
||||
+#if !defined __loongarch_soft_float
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||
+#endif
|
||||
+
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
|
||||
+
|
||||
+static inline void *
|
||||
+IFUNC_SELECTOR (void)
|
||||
+{
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LASX)
|
||||
+ return OPTIMIZE (lasx);
|
||||
+ else if (SUPPORT_LSX)
|
||||
+ return OPTIMIZE (lsx);
|
||||
+ else
|
||||
+#endif
|
||||
+ return OPTIMIZE (aligned);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
|
||||
new file mode 100644
|
||||
index 00000000..8a925120
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
|
||||
@@ -0,0 +1,41 @@
|
||||
+/* Common definition for strchrnul ifunc selections.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <ldsodefs.h>
|
||||
+#include <ifunc-init.h>
|
||||
+
|
||||
+#if !defined __loongarch_soft_float
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||
+#endif
|
||||
+
|
||||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
|
||||
+
|
||||
+static inline void *
|
||||
+IFUNC_SELECTOR (void)
|
||||
+{
|
||||
+#if !defined __loongarch_soft_float
|
||||
+ if (SUPPORT_LASX)
|
||||
+ return OPTIMIZE (lasx);
|
||||
+ else if (SUPPORT_LSX)
|
||||
+ return OPTIMIZE (lsx);
|
||||
+ else
|
||||
+#endif
|
||||
+ return OPTIMIZE (aligned);
|
||||
+}
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
|
||||
new file mode 100644
|
||||
index 00000000..5fb01806
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
|
||||
@@ -0,0 +1,99 @@
|
||||
+/* Optimized strchr implementation using basic Loongarch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define STRCHR_NAME __strchr_aligned
|
||||
+#else
|
||||
+# define STRCHR_NAME strchr
|
||||
+#endif
|
||||
+
|
||||
+LEAF(STRCHR_NAME, 6)
|
||||
+ slli.d t1, a0, 3
|
||||
+ bstrins.d a0, zero, 2, 0
|
||||
+ lu12i.w a2, 0x01010
|
||||
+ ld.d t2, a0, 0
|
||||
+
|
||||
+ ori a2, a2, 0x101
|
||||
+ andi a1, a1, 0xff
|
||||
+ bstrins.d a2, a2, 63, 32
|
||||
+ li.w t0, -1
|
||||
+
|
||||
+ mul.d a1, a1, a2
|
||||
+ sll.d t0, t0, t1
|
||||
+ slli.d a3, a2, 7
|
||||
+ orn t2, t2, t0
|
||||
+
|
||||
+ sll.d t3, a1, t1
|
||||
+ xor t4, t2, t3
|
||||
+ sub.d a4, t2, a2
|
||||
+ sub.d a5, t4, a2
|
||||
+
|
||||
+
|
||||
+ andn a4, a4, t2
|
||||
+ andn a5, a5, t4
|
||||
+ or t0, a4, a5
|
||||
+ and t0, t0, a3
|
||||
+
|
||||
+ bnez t0, L(end)
|
||||
+ addi.d a0, a0, 8
|
||||
+L(loop):
|
||||
+ ld.d t4, a0, 0
|
||||
+ xor t2, t4, a1
|
||||
+
|
||||
+ sub.d a4, t4, a2
|
||||
+ sub.d a5, t2, a2
|
||||
+ andn a4, a4, t4
|
||||
+ andn a5, a5, t2
|
||||
+
|
||||
+ or t0, a4, a5
|
||||
+ and t0, t0, a3
|
||||
+ bnez t0, L(end)
|
||||
+ ld.d t4, a0, 8
|
||||
+
|
||||
+
|
||||
+ addi.d a0, a0, 16
|
||||
+ xor t2, t4, a1
|
||||
+ sub.d a4, t4, a2
|
||||
+ sub.d a5, t2, a2
|
||||
+
|
||||
+ andn a4, a4, t4
|
||||
+ andn a5, a5, t2
|
||||
+ or t0, a4, a5
|
||||
+ and t0, t0, a3
|
||||
+
|
||||
+ beqz t0, L(loop)
|
||||
+ addi.d a0, a0, -8
|
||||
+L(end):
|
||||
+ and t0, a5, a3
|
||||
+ and t1, a4, a3
|
||||
+
|
||||
+ ctz.d t0, t0
|
||||
+ ctz.d t1, t1
|
||||
+ srli.w t2, t0, 3
|
||||
+ sltu t3, t1, t0
|
||||
+
|
||||
+
|
||||
+ add.d a0, a0, t2
|
||||
+ masknez a0, a0, t3
|
||||
+ jr ra
|
||||
+END(STRCHR_NAME)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
|
||||
new file mode 100644
|
||||
index 00000000..254402da
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
|
||||
@@ -0,0 +1,91 @@
|
||||
+/* Optimized strchr implementation using loongarch LASX SIMD instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+#ifndef AS_STRCHRNUL
|
||||
+# define STRCHR __strchr_lasx
|
||||
+#endif
|
||||
+
|
||||
+LEAF(STRCHR, 6)
|
||||
+ andi t1, a0, 0x1f
|
||||
+ bstrins.d a0, zero, 4, 0
|
||||
+ xvld xr0, a0, 0
|
||||
+ li.d t2, -1
|
||||
+
|
||||
+ xvreplgr2vr.b xr1, a1
|
||||
+ sll.d t1, t2, t1
|
||||
+ xvxor.v xr2, xr0, xr1
|
||||
+ xvmin.bu xr0, xr0, xr2
|
||||
+
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+ xvpickve.w xr3, xr0, 4
|
||||
+ vilvl.h vr0, vr3, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+
|
||||
+ orn t0, t0, t1
|
||||
+ bne t0, t2, L(end)
|
||||
+ addi.d a0, a0, 32
|
||||
+ nop
|
||||
+
|
||||
+
|
||||
+L(loop):
|
||||
+ xvld xr0, a0, 0
|
||||
+ xvxor.v xr2, xr0, xr1
|
||||
+ xvmin.bu xr0, xr0, xr2
|
||||
+ xvsetanyeqz.b fcc0, xr0
|
||||
+
|
||||
+ bcnez fcc0, L(loop_end)
|
||||
+ xvld xr0, a0, 32
|
||||
+ addi.d a0, a0, 64
|
||||
+ xvxor.v xr2, xr0, xr1
|
||||
+
|
||||
+ xvmin.bu xr0, xr0, xr2
|
||||
+ xvsetanyeqz.b fcc0, xr0
|
||||
+ bceqz fcc0, L(loop)
|
||||
+ addi.d a0, a0, -32
|
||||
+
|
||||
+L(loop_end):
|
||||
+ xvmsknz.b xr0, xr0
|
||||
+ xvpickve.w xr1, xr0, 4
|
||||
+ vilvl.h vr0, vr1, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+
|
||||
+
|
||||
+L(end):
|
||||
+ cto.w t0, t0
|
||||
+ add.d a0, a0, t0
|
||||
+#ifndef AS_STRCHRNUL
|
||||
+ vreplgr2vr.b vr0, t0
|
||||
+ xvpermi.q xr3, xr2, 1
|
||||
+
|
||||
+ vshuf.b vr0, vr3, vr2, vr0
|
||||
+ vpickve2gr.bu t0, vr0, 0
|
||||
+ masknez a0, a0, t0
|
||||
+#endif
|
||||
+ jr ra
|
||||
+
|
||||
+END(STRCHR)
|
||||
+
|
||||
+libc_hidden_builtin_def(STRCHR)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..dae98b0a
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
|
||||
@@ -0,0 +1,73 @@
|
||||
+/* Optimized strlen implementation using loongarch LSX SIMD instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||
+
|
||||
+#ifndef AS_STRCHRNUL
|
||||
+# define STRCHR __strchr_lsx
|
||||
+#endif
|
||||
+
|
||||
+LEAF(STRCHR, 6)
|
||||
+ andi t1, a0, 0xf
|
||||
+ bstrins.d a0, zero, 3, 0
|
||||
+ vld vr0, a0, 0
|
||||
+ li.d t2, -1
|
||||
+
|
||||
+ vreplgr2vr.b vr1, a1
|
||||
+ sll.d t3, t2, t1
|
||||
+ vxor.v vr2, vr0, vr1
|
||||
+ vmin.bu vr0, vr0, vr2
|
||||
+
|
||||
+ vmsknz.b vr0, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+ ext.w.h t0, t0
|
||||
+ orn t0, t0, t3
|
||||
+
|
||||
+ beq t0, t2, L(loop)
|
||||
+L(found):
|
||||
+ cto.w t0, t0
|
||||
+ add.d a0, a0, t0
|
||||
+#ifndef AS_STRCHRNUL
|
||||
+ vreplve.b vr2, vr2, t0
|
||||
+ vpickve2gr.bu t1, vr2, 0
|
||||
+ masknez a0, a0, t1
|
||||
+#endif
|
||||
+ jr ra
|
||||
+
|
||||
+
|
||||
+L(loop):
|
||||
+ vld vr0, a0, 16
|
||||
+ addi.d a0, a0, 16
|
||||
+ vxor.v vr2, vr0, vr1
|
||||
+ vmin.bu vr0, vr0, vr2
|
||||
+
|
||||
+ vsetanyeqz.b fcc0, vr0
|
||||
+ bceqz fcc0, L(loop)
|
||||
+ vmsknz.b vr0, vr0
|
||||
+ movfr2gr.s t0, fa0
|
||||
+
|
||||
+ b L(found)
|
||||
+END(STRCHR)
|
||||
+
|
||||
+libc_hidden_builtin_def (STRCHR)
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr.c b/sysdeps/loongarch/lp64/multiarch/strchr.c
|
||||
new file mode 100644
|
||||
index 00000000..404e97bd
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchr.c
|
||||
@@ -0,0 +1,36 @@
|
||||
+/* Multiple versions of strchr.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Define multiple versions only for the definition in libc. */
|
||||
+#if IS_IN (libc)
|
||||
+# define strchr __redirect_strchr
|
||||
+# include <string.h>
|
||||
+# undef strchr
|
||||
+
|
||||
+# define SYMBOL_NAME strchr
|
||||
+# include "ifunc-strchr.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
|
||||
+weak_alias(strchr, index)
|
||||
+# ifdef SHARED
|
||||
+__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr)
|
||||
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strchr);
|
||||
+# endif
|
||||
+
|
||||
+#endif
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
|
||||
new file mode 100644
|
||||
index 00000000..1c01a023
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
|
||||
@@ -0,0 +1,95 @@
|
||||
+/* Optimized strchrnul implementation using basic Loongarch instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+#include <sys/regdef.h>
|
||||
+#include <sys/asm.h>
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define STRCHRNUL_NAME __strchrnul_aligned
|
||||
+#else
|
||||
+# define STRCHRNUL_NAME __strchrnul
|
||||
+#endif
|
||||
+
|
||||
+LEAF(STRCHRNUL_NAME, 6)
|
||||
+ slli.d t1, a0, 3
|
||||
+ bstrins.d a0, zero, 2, 0
|
||||
+ lu12i.w a2, 0x01010
|
||||
+ ld.d t2, a0, 0
|
||||
+
|
||||
+ ori a2, a2, 0x101
|
||||
+ andi a1, a1, 0xff
|
||||
+ bstrins.d a2, a2, 63, 32
|
||||
+ li.w t0, -1
|
||||
+
|
||||
+ mul.d a1, a1, a2
|
||||
+ sll.d t0, t0, t1
|
||||
+ slli.d a3, a2, 7
|
||||
+ orn t2, t2, t0
|
||||
+
|
||||
+ sll.d t3, a1, t1
|
||||
+ xor t4, t2, t3
|
||||
+ sub.d a4, t2, a2
|
||||
+ sub.d a5, t4, a2
|
||||
+
|
||||
+
|
||||
+ andn a4, a4, t2
|
||||
+ andn a5, a5, t4
|
||||
+ or t0, a4, a5
|
||||
+ and t0, t0, a3
|
||||
+
|
||||
+ bnez t0, L(end)
|
||||
+ addi.d a0, a0, 8
|
||||
+L(loop):
|
||||
+ ld.d t4, a0, 0
|
||||
+ xor t2, t4, a1
|
||||
+
|
||||
+ sub.d a4, t4, a2
|
||||
+ sub.d a5, t2, a2
|
||||
+ andn a4, a4, t4
|
||||
+ andn a5, a5, t2
|
||||
+
|
||||
+ or t0, a4, a5
|
||||
+ and t0, t0, a3
|
||||
+ bnez t0, L(end)
|
||||
+ ld.d t4, a0, 8
|
||||
+
|
||||
+
|
||||
+ addi.d a0, a0, 16
|
||||
+ xor t2, t4, a1
|
||||
+ sub.d a4, t4, a2
|
||||
+ sub.d a5, t2, a2
|
||||
+
|
||||
+ andn a4, a4, t4
|
||||
+ andn a5, a5, t2
|
||||
+ or t0, a4, a5
|
||||
+ and t0, t0, a3
|
||||
+
|
||||
+ beqz t0, L(loop)
|
||||
+ addi.d a0, a0, -8
|
||||
+L(end):
|
||||
+ ctz.d t0, t0
|
||||
+ srli.w t0, t0, 3
|
||||
+
|
||||
+
|
||||
+ add.d a0, a0, t0
|
||||
+ jr ra
|
||||
+END(STRCHRNUL_NAME)
|
||||
+
|
||||
+libc_hidden_builtin_def (STRCHRNUL_NAME)
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
|
||||
new file mode 100644
|
||||
index 00000000..d45495e4
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
|
||||
@@ -0,0 +1,22 @@
|
||||
+/* Optimized strchrnul implementation using loongarch LASX SIMD instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#define STRCHR __strchrnul_lasx
|
||||
+#define AS_STRCHRNUL
|
||||
+#include "strchr-lasx.S"
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
|
||||
new file mode 100644
|
||||
index 00000000..07d793ae
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
|
||||
@@ -0,0 +1,22 @@
|
||||
+/* Optimized strchrnul implementation using loongarch LSX SIMD instructions.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#define STRCHR __strchrnul_lsx
|
||||
+#define AS_STRCHRNUL
|
||||
+#include "strchr-lsx.S"
|
||||
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul.c b/sysdeps/loongarch/lp64/multiarch/strchrnul.c
|
||||
new file mode 100644
|
||||
index 00000000..f3b8296e
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul.c
|
||||
@@ -0,0 +1,39 @@
|
||||
+/* Multiple versions of strchrnul.
|
||||
+ All versions must be listed in ifunc-impl-list.c.
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Define multiple versions only for the definition in libc. */
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+# define strchrnul __redirect_strchrnul
|
||||
+# define __strchrnul __redirect___strchrnul
|
||||
+# include <string.h>
|
||||
+# undef __strchrnul
|
||||
+# undef strchrnul
|
||||
+
|
||||
+# define SYMBOL_NAME strchrnul
|
||||
+# include "ifunc-strchrnul.h"
|
||||
+
|
||||
+libc_ifunc_redirected (__redirect_strchrnul, __strchrnul,
|
||||
+ IFUNC_SELECTOR ());
|
||||
+weak_alias (__strchrnul, strchrnul)
|
||||
+# ifdef SHARED
|
||||
+__hidden_ver1 (__strchrnul, __GI___strchrnul, __redirect_strchrnul)
|
||||
+ __attribute__((visibility ("hidden"))) __attribute_copy__ (strchrnul);
|
||||
+# endif
|
||||
+#endif
|
||||
--
|
||||
2.33.0
|
||||
|
478
Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch
Normal file
478
Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch
Normal file
|
@ -0,0 +1,478 @@
|
|||
From c0f3b0a8c71c26d5351e8ddabe3e8a323803e683 Mon Sep 17 00:00:00 2001
|
||||
From: caiyinyu <caiyinyu@loongson.cn>
|
||||
Date: Thu, 21 Sep 2023 09:10:11 +0800
|
||||
Subject: [PATCH 26/29] Revert "LoongArch: Add glibc.cpu.hwcap support."
|
||||
|
||||
This reverts commit a53451559dc9cce765ea5bcbb92c4007e058e92b.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
sysdeps/loongarch/Makefile | 4 -
|
||||
sysdeps/loongarch/Versions | 5 --
|
||||
sysdeps/loongarch/cpu-tunables.c | 89 -------------------
|
||||
sysdeps/loongarch/dl-get-cpu-features.c | 25 ------
|
||||
sysdeps/loongarch/dl-machine.h | 27 +-----
|
||||
sysdeps/loongarch/dl-tunables.list | 25 ------
|
||||
.../unix/sysv/linux/loongarch/cpu-features.c | 29 ------
|
||||
.../unix/sysv/linux/loongarch/cpu-features.h | 18 +---
|
||||
.../unix/sysv/linux/loongarch/dl-procinfo.c | 60 -------------
|
||||
sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 -----
|
||||
.../unix/sysv/linux/loongarch/libc-start.c | 34 -------
|
||||
11 files changed, 8 insertions(+), 329 deletions(-)
|
||||
delete mode 100644 sysdeps/loongarch/Versions
|
||||
delete mode 100644 sysdeps/loongarch/cpu-tunables.c
|
||||
delete mode 100644 sysdeps/loongarch/dl-get-cpu-features.c
|
||||
delete mode 100644 sysdeps/loongarch/dl-tunables.list
|
||||
delete mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c
|
||||
delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
|
||||
delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
|
||||
delete mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c
|
||||
|
||||
diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
|
||||
index 30a1f4a8..43d2f583 100644
|
||||
--- a/sysdeps/loongarch/Makefile
|
||||
+++ b/sysdeps/loongarch/Makefile
|
||||
@@ -6,10 +6,6 @@ ifeq ($(subdir),elf)
|
||||
gen-as-const-headers += dl-link.sym
|
||||
endif
|
||||
|
||||
-ifeq ($(subdir),elf)
|
||||
- sysdep-dl-routines += dl-get-cpu-features
|
||||
-endif
|
||||
-
|
||||
# LoongArch's assembler also needs to know about PIC as it changes the
|
||||
# definition of some assembler macros.
|
||||
ASFLAGS-.os += $(pic-ccflag)
|
||||
diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions
|
||||
deleted file mode 100644
|
||||
index 33ae2cc0..00000000
|
||||
--- a/sysdeps/loongarch/Versions
|
||||
+++ /dev/null
|
||||
@@ -1,5 +0,0 @@
|
||||
-ld {
|
||||
- GLIBC_PRIVATE {
|
||||
- _dl_larch_get_cpu_features;
|
||||
- }
|
||||
-}
|
||||
diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c
|
||||
deleted file mode 100644
|
||||
index 8e9fab93..00000000
|
||||
--- a/sysdeps/loongarch/cpu-tunables.c
|
||||
+++ /dev/null
|
||||
@@ -1,89 +0,0 @@
|
||||
-/* LoongArch CPU feature tuning.
|
||||
- This file is part of the GNU C Library.
|
||||
- Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
-
|
||||
- The GNU C Library is free software; you can redistribute it and/or
|
||||
- modify it under the terms of the GNU Lesser General Public
|
||||
- License as published by the Free Software Foundation; either
|
||||
- version 2.1 of the License, or (at your option) any later version.
|
||||
-
|
||||
- The GNU C Library is distributed in the hope that it will be useful,
|
||||
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
- Lesser General Public License for more details.
|
||||
-
|
||||
- You should have received a copy of the GNU Lesser General Public
|
||||
- License along with the GNU C Library; if not, see
|
||||
- <http://www.gnu.org/licenses/>. */
|
||||
-
|
||||
-# include <stdbool.h>
|
||||
-# include <stdint.h>
|
||||
-# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */
|
||||
-# include <elf/dl-tunables.h>
|
||||
-# include <string.h>
|
||||
-# include <cpu-features.h>
|
||||
-# include <ldsodefs.h>
|
||||
-# include <sys/auxv.h>
|
||||
-
|
||||
-# define HWCAP_LOONGARCH_IFUNC \
|
||||
- (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX)
|
||||
-
|
||||
-# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len) \
|
||||
- _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
|
||||
- if (!memcmp (f, #name, len) && \
|
||||
- (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name)) \
|
||||
- { \
|
||||
- hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC)); \
|
||||
- break; \
|
||||
- } \
|
||||
-
|
||||
-attribute_hidden
|
||||
-void
|
||||
-TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
|
||||
-{
|
||||
- const char *p = valp->strval;
|
||||
- size_t len;
|
||||
- unsigned long hwcap = 0;
|
||||
- const char *c;
|
||||
-
|
||||
- do {
|
||||
- for (c = p; *c != ','; c++)
|
||||
- if (*c == '\0')
|
||||
- break;
|
||||
-
|
||||
- len = c - p;
|
||||
-
|
||||
- switch(len)
|
||||
- {
|
||||
- default:
|
||||
- _dl_fatal_printf (
|
||||
- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
|
||||
- );
|
||||
- break;
|
||||
- case 3:
|
||||
- {
|
||||
- CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3);
|
||||
- CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3);
|
||||
- _dl_fatal_printf (
|
||||
- "Some features are invalid or not supported on this machine!!\n"
|
||||
- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
|
||||
- );
|
||||
- }
|
||||
- break;
|
||||
- case 4:
|
||||
- {
|
||||
- CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4);
|
||||
- _dl_fatal_printf (
|
||||
- "Some features are invalid or not supported on this machine!!\n"
|
||||
- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
|
||||
- );
|
||||
- }
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
- p += len + 1;
|
||||
- }
|
||||
- while (*c != '\0');
|
||||
-
|
||||
- GLRO (dl_larch_cpu_features).hwcap &= hwcap;
|
||||
-}
|
||||
diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c
|
||||
deleted file mode 100644
|
||||
index 7cd9bc15..00000000
|
||||
--- a/sysdeps/loongarch/dl-get-cpu-features.c
|
||||
+++ /dev/null
|
||||
@@ -1,25 +0,0 @@
|
||||
-/* Define _dl_larch_get_cpu_features.
|
||||
- Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
-
|
||||
- The GNU C Library is free software; you can redistribute it and/or
|
||||
- modify it under the terms of the GNU Lesser General Public
|
||||
- License as published by the Free Software Foundation; either
|
||||
- version 2.1 of the License, or (at your option) any later version.
|
||||
-
|
||||
- The GNU C Library is distributed in the hope that it will be useful,
|
||||
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
- Lesser General Public License for more details.
|
||||
-
|
||||
- You should have received a copy of the GNU Lesser General Public
|
||||
- License along with the GNU C Library; if not, see
|
||||
- <https://www.gnu.org/licenses/>. */
|
||||
-
|
||||
-
|
||||
-#include <ldsodefs.h>
|
||||
-
|
||||
-const struct cpu_features *
|
||||
-_dl_larch_get_cpu_features (void)
|
||||
-{
|
||||
- return &GLRO(dl_larch_cpu_features);
|
||||
-}
|
||||
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
|
||||
index b395a928..57913cef 100644
|
||||
--- a/sysdeps/loongarch/dl-machine.h
|
||||
+++ b/sysdeps/loongarch/dl-machine.h
|
||||
@@ -29,8 +29,6 @@
|
||||
#include <dl-static-tls.h>
|
||||
#include <dl-machine-rel.h>
|
||||
|
||||
-#include <cpu-features.c>
|
||||
-
|
||||
#ifndef _RTLD_PROLOGUE
|
||||
# define _RTLD_PROLOGUE(entry) \
|
||||
".globl\t" __STRING (entry) "\n\t" \
|
||||
@@ -55,23 +53,6 @@
|
||||
#define ELF_MACHINE_NO_REL 1
|
||||
#define ELF_MACHINE_NO_RELA 0
|
||||
|
||||
-#define DL_PLATFORM_INIT dl_platform_init ()
|
||||
-
|
||||
-static inline void __attribute__ ((unused))
|
||||
-dl_platform_init (void)
|
||||
-{
|
||||
- if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
|
||||
- /* Avoid an empty string which would disturb us. */
|
||||
- GLRO(dl_platform) = NULL;
|
||||
-
|
||||
-#ifdef SHARED
|
||||
- /* init_cpu_features has been called early from __libc_start_main in
|
||||
- static executable. */
|
||||
- init_cpu_features (&GLRO(dl_larch_cpu_features));
|
||||
-#endif
|
||||
-}
|
||||
-
|
||||
-
|
||||
/* Return nonzero iff ELF header is compatible with the running host. */
|
||||
static inline int
|
||||
elf_machine_matches_host (const ElfW (Ehdr) *ehdr)
|
||||
@@ -309,9 +290,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
||||
if (profile != 0)
|
||||
{
|
||||
#if !defined __loongarch_soft_float
|
||||
- if (RTLD_SUPPORT_LASX)
|
||||
+ if (SUPPORT_LASX)
|
||||
gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
|
||||
- else if (RTLD_SUPPORT_LSX)
|
||||
+ else if (SUPPORT_LSX)
|
||||
gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
|
||||
else
|
||||
#endif
|
||||
@@ -329,9 +310,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
||||
indicated by the offset on the stack, and then jump to
|
||||
the resolved address. */
|
||||
#if !defined __loongarch_soft_float
|
||||
- if (RTLD_SUPPORT_LASX)
|
||||
+ if (SUPPORT_LASX)
|
||||
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
|
||||
- else if (RTLD_SUPPORT_LSX)
|
||||
+ else if (SUPPORT_LSX)
|
||||
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
|
||||
else
|
||||
#endif
|
||||
diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list
|
||||
deleted file mode 100644
|
||||
index 66b34275..00000000
|
||||
--- a/sysdeps/loongarch/dl-tunables.list
|
||||
+++ /dev/null
|
||||
@@ -1,25 +0,0 @@
|
||||
-# LoongArch specific tunables.
|
||||
-# Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
-# This file is part of the GNU C Library.
|
||||
-
|
||||
-# The GNU C Library is free software; you can redistribute it and/or
|
||||
-# modify it under the terms of the GNU Lesser General Public
|
||||
-# License as published by the Free Software Foundation; either
|
||||
-# version 2.1 of the License, or (at your option) any later version.
|
||||
-
|
||||
-# The GNU C Library is distributed in the hope that it will be useful,
|
||||
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
-# Lesser General Public License for more details.
|
||||
-
|
||||
-# You should have received a copy of the GNU Lesser General Public
|
||||
-# License along with the GNU C Library; if not, see
|
||||
-# <http://www.gnu.org/licenses/>.
|
||||
-
|
||||
-glibc {
|
||||
- cpu {
|
||||
- hwcaps {
|
||||
- type: STRING
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
|
||||
deleted file mode 100644
|
||||
index 1290c4ce..00000000
|
||||
--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
|
||||
+++ /dev/null
|
||||
@@ -1,29 +0,0 @@
|
||||
-/* Initialize CPU feature data. LoongArch64 version.
|
||||
- This file is part of the GNU C Library.
|
||||
- Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
-
|
||||
- The GNU C Library is free software; you can redistribute it and/or
|
||||
- modify it under the terms of the GNU Lesser General Public
|
||||
- License as published by the Free Software Foundation; either
|
||||
- version 2.1 of the License, or (at your option) any later version.
|
||||
-
|
||||
- The GNU C Library is distributed in the hope that it will be useful,
|
||||
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
- Lesser General Public License for more details.
|
||||
-
|
||||
- You should have received a copy of the GNU Lesser General Public
|
||||
- License along with the GNU C Library; if not, see
|
||||
- <http://www.gnu.org/licenses/>. */
|
||||
-
|
||||
-#include <cpu-features.h>
|
||||
-#include <elf/dl-hwcaps.h>
|
||||
-#include <elf/dl-tunables.h>
|
||||
-extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden;
|
||||
-
|
||||
-static inline void
|
||||
-init_cpu_features (struct cpu_features *cpu_features)
|
||||
-{
|
||||
- GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap);
|
||||
- TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
|
||||
-}
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
|
||||
index 450963ce..d1a280a5 100644
|
||||
--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
|
||||
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
|
||||
@@ -19,23 +19,13 @@
|
||||
#ifndef _CPU_FEATURES_LOONGARCH64_H
|
||||
#define _CPU_FEATURES_LOONGARCH64_H
|
||||
|
||||
-#include <stdint.h>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
-struct cpu_features
|
||||
- {
|
||||
- uint64_t hwcap;
|
||||
- };
|
||||
+#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
|
||||
+#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
|
||||
+#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
|
||||
|
||||
-/* Get a pointer to the CPU features structure. */
|
||||
-extern const struct cpu_features *_dl_larch_get_cpu_features (void)
|
||||
- __attribute__ ((pure));
|
||||
-
|
||||
-#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL)
|
||||
-#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX)
|
||||
-#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX)
|
||||
-#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
|
||||
-#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
|
||||
#define INIT_ARCH()
|
||||
|
||||
#endif /* _CPU_FEATURES_LOONGARCH64_H */
|
||||
+
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
|
||||
deleted file mode 100644
|
||||
index 6217fda9..00000000
|
||||
--- a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
|
||||
+++ /dev/null
|
||||
@@ -1,60 +0,0 @@
|
||||
-/* Data for LoongArch64 version of processor capability information.
|
||||
- Linux version.
|
||||
- Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
- This file is part of the GNU C Library.
|
||||
-
|
||||
- The GNU C Library is free software; you can redistribute it and/or
|
||||
- modify it under the terms of the GNU Lesser General Public
|
||||
- License as published by the Free Software Foundation; either
|
||||
- version 2.1 of the License, or (at your option) any later version.
|
||||
-
|
||||
- The GNU C Library is distributed in the hope that it will be useful,
|
||||
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
- Lesser General Public License for more details.
|
||||
-
|
||||
- You should have received a copy of the GNU Lesser General Public
|
||||
- License along with the GNU C Library; if not, see
|
||||
- <http://www.gnu.org/licenses/>. */
|
||||
-
|
||||
-/* If anything should be added here check whether the size of each string
|
||||
- is still ok with the given array size.
|
||||
-
|
||||
- All the #ifdefs in the definitions are quite irritating but
|
||||
- necessary if we want to avoid duplicating the information. There
|
||||
- are three different modes:
|
||||
-
|
||||
- - PROCINFO_DECL is defined. This means we are only interested in
|
||||
- declarations.
|
||||
-
|
||||
- - PROCINFO_DECL is not defined:
|
||||
-
|
||||
- + if SHARED is defined the file is included in an array
|
||||
- initializer. The .element = { ... } syntax is needed.
|
||||
-
|
||||
- + if SHARED is not defined a normal array initialization is
|
||||
- needed.
|
||||
- */
|
||||
-
|
||||
-#ifndef PROCINFO_CLASS
|
||||
-# define PROCINFO_CLASS
|
||||
-#endif
|
||||
-
|
||||
-#if !IS_IN (ldconfig)
|
||||
-# if !defined PROCINFO_DECL && defined SHARED
|
||||
- ._dl_larch_cpu_features
|
||||
-# else
|
||||
-PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features
|
||||
-# endif
|
||||
-# ifndef PROCINFO_DECL
|
||||
-= { }
|
||||
-# endif
|
||||
-# if !defined SHARED || defined PROCINFO_DECL
|
||||
-;
|
||||
-# else
|
||||
-,
|
||||
-# endif
|
||||
-#endif
|
||||
-
|
||||
-#undef PROCINFO_DECL
|
||||
-#undef PROCINFO_CLASS
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
|
||||
deleted file mode 100644
|
||||
index 455fd71a..00000000
|
||||
--- a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
|
||||
+++ /dev/null
|
||||
@@ -1,21 +0,0 @@
|
||||
-/* Operating system support for run-time dynamic linker. LoongArch version.
|
||||
- Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
- This file is part of the GNU C Library.
|
||||
-
|
||||
- The GNU C Library is free software; you can redistribute it and/or
|
||||
- modify it under the terms of the GNU Lesser General Public
|
||||
- License as published by the Free Software Foundation; either
|
||||
- version 2.1 of the License, or (at your option) any later version.
|
||||
-
|
||||
- The GNU C Library is distributed in the hope that it will be useful,
|
||||
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
- Lesser General Public License for more details.
|
||||
-
|
||||
- You should have received a copy of the GNU Lesser General Public
|
||||
- License along with the GNU C Library; if not, see
|
||||
- <http://www.gnu.org/licenses/>. */
|
||||
-
|
||||
-#include <config.h>
|
||||
-#include <sysdeps/loongarch/cpu-tunables.c>
|
||||
-#include <sysdeps/unix/sysv/linux/dl-sysdep.c>
|
||||
diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
|
||||
deleted file mode 100644
|
||||
index f1346ece..00000000
|
||||
--- a/sysdeps/unix/sysv/linux/loongarch/libc-start.c
|
||||
+++ /dev/null
|
||||
@@ -1,34 +0,0 @@
|
||||
-/* Override csu/libc-start.c on LoongArch64.
|
||||
- Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
- This file is part of the GNU C Library.
|
||||
-
|
||||
- The GNU C Library is free software; you can redistribute it and/or
|
||||
- modify it under the terms of the GNU Lesser General Public
|
||||
- License as published by the Free Software Foundation; either
|
||||
- version 2.1 of the License, or (at your option) any later version.
|
||||
-
|
||||
- The GNU C Library is distributed in the hope that it will be useful,
|
||||
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
- Lesser General Public License for more details.
|
||||
-
|
||||
- You should have received a copy of the GNU Lesser General Public
|
||||
- License along with the GNU C Library; if not, see
|
||||
- <http://www.gnu.org/licenses/>. */
|
||||
-
|
||||
-#ifndef SHARED
|
||||
-
|
||||
-/* Mark symbols hidden in static PIE for early self relocation to work. */
|
||||
-# if BUILD_PIE_DEFAULT
|
||||
-# pragma GCC visibility push(hidden)
|
||||
-# endif
|
||||
-
|
||||
-# include <ldsodefs.h>
|
||||
-# include <cpu-features.c>
|
||||
-
|
||||
-extern struct cpu_features _dl_larch_cpu_features;
|
||||
-
|
||||
-# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features)
|
||||
-
|
||||
-#endif
|
||||
-#include <csu/libc-start.c>
|
||||
--
|
||||
2.33.0
|
||||
|
39
elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch
Normal file
39
elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch
Normal file
|
@ -0,0 +1,39 @@
|
|||
From fc60db3cf29ba157d09ba4f4b92e3ab382b0339d Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Wed, 9 Aug 2023 19:12:54 +0800
|
||||
Subject: [PATCH 04/29] elf: Add new LoongArch reloc types (101 to 108) into
|
||||
elf.h
|
||||
|
||||
These reloc types are generated by GNU assembler >= 2.41 for relaxation
|
||||
support.
|
||||
|
||||
Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=57a930e3
|
||||
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
elf/elf.h | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/elf/elf.h b/elf/elf.h
|
||||
index 89fc8021..d623bdeb 100644
|
||||
--- a/elf/elf.h
|
||||
+++ b/elf/elf.h
|
||||
@@ -4205,6 +4205,14 @@ enum
|
||||
#define R_LARCH_TLS_GD_HI20 98
|
||||
#define R_LARCH_32_PCREL 99
|
||||
#define R_LARCH_RELAX 100
|
||||
+#define R_LARCH_DELETE 101
|
||||
+#define R_LARCH_ALIGN 102
|
||||
+#define R_LARCH_PCREL20_S2 103
|
||||
+#define R_LARCH_CFA 104
|
||||
+#define R_LARCH_ADD6 105
|
||||
+#define R_LARCH_SUB6 106
|
||||
+#define R_LARCH_ADD_ULEB128 107
|
||||
+#define R_LARCH_SUB_ULEB128 108
|
||||
|
||||
/* ARC specific declarations. */
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
37
glibc.spec
37
glibc.spec
|
@ -1,4 +1,4 @@
|
|||
%define anolis_release 1
|
||||
%define anolis_release 2
|
||||
|
||||
%bcond_without testsuite
|
||||
%bcond_without benchtests
|
||||
|
@ -103,6 +103,37 @@ Patch0187: 0087-CVE-2023-6246.patch
|
|||
Patch0188: 0088-CVE-2023-6779.patch
|
||||
Patch0189: 0089-CVE-2023-6780.patch
|
||||
|
||||
# Part 3000 ~ 4999
|
||||
Patch3000: LoongArch-Redefine-macro-LEAF-ENTRY.patch
|
||||
Patch3001: LoongArch-Add-minuimum-binutils-required-version.patch
|
||||
Patch3002: Loongarch-Add-ifunc-support-and-add-different-versio.patch
|
||||
Patch3003: elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch
|
||||
Patch3004: LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch
|
||||
Patch3005: Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch
|
||||
Patch3006: Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch
|
||||
Patch3007: LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch
|
||||
Patch3008: LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch
|
||||
Patch3009: LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch
|
||||
Patch3010: LoongArch-Remove-support-code-for-old-linker-in-star.patch
|
||||
Patch3011: LoongArch-Micro-optimize-LD_PCREL.patch
|
||||
Patch3012: LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch
|
||||
Patch3013: LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch
|
||||
Patch3014: LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch
|
||||
Patch3015: LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch
|
||||
Patch3016: LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch
|
||||
Patch3017: LoongArch-Change-loongarch-to-LoongArch-in-comments.patch
|
||||
Patch3018: LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch
|
||||
Patch3019: LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch
|
||||
Patch3020: LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch
|
||||
Patch3021: LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch
|
||||
Patch3022: LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch
|
||||
Patch3023: LoongArch-Add-glibc.cpu.hwcap-support.patch
|
||||
Patch3024: Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch
|
||||
Patch3025: LoongArch-Unify-Register-Names.patch
|
||||
Patch3026: LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch
|
||||
Patch3027: linux-Sync-Linux-6.6-elf.h.patch
|
||||
Patch3028: Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch
|
||||
|
||||
BuildRequires: audit-libs-devel >= 1.1.3 libcap-devel systemtap-sdt-devel
|
||||
BuildRequires: procps-ng util-linux gawk sed >= 3.95 gettext
|
||||
BuildRequires: python3 python3-devel
|
||||
|
@ -1055,6 +1086,10 @@ update_gconv_modules_cache ()
|
|||
%{_libdir}/libpthread_nonshared.a
|
||||
|
||||
%changelog
|
||||
* Sat Mar 16 2024 Peng Fan <fanpeng@loongson.cn> - 2.38-2
|
||||
- LoongArch: sync patch from glibc upstream
|
||||
- Reduced kernel version requirements
|
||||
|
||||
* Tue Mar 05 2024 mgb01105731 <mgb01105731@alibaba-inc.com> - 2.38-1
|
||||
- update to 2.38
|
||||
|
||||
|
|
48
linux-Sync-Linux-6.6-elf.h.patch
Normal file
48
linux-Sync-Linux-6.6-elf.h.patch
Normal file
|
@ -0,0 +1,48 @@
|
|||
From 6b3d687470b8f91bc6eb87e924fe97d4592b3aa5 Mon Sep 17 00:00:00 2001
|
||||
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
Date: Tue, 31 Oct 2023 13:32:38 -0300
|
||||
Subject: [PATCH 29/29] linux: Sync Linux 6.6 elf.h
|
||||
|
||||
It adds NT_X86_SHSTK (2fab02b25ae7cf5), NT_RISCV_CSR/NT_RISCV_VECTOR
|
||||
(9300f00439743c4), and NT_LOONGARCH_HW_BREAK/NT_LOONGARCH_HW_WATCH
|
||||
(1a69f7a161a78ae).
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
elf/elf.h | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/elf/elf.h b/elf/elf.h
|
||||
index 9c51073f..51633079 100644
|
||||
--- a/elf/elf.h
|
||||
+++ b/elf/elf.h
|
||||
@@ -794,6 +794,7 @@ typedef struct
|
||||
#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
|
||||
#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
|
||||
#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
|
||||
+#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */
|
||||
#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */
|
||||
#define NT_S390_TIMER 0x301 /* s390 timer register */
|
||||
#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */
|
||||
@@ -832,6 +833,8 @@ typedef struct
|
||||
#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers. */
|
||||
#define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode. */
|
||||
#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers. */
|
||||
+#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */
|
||||
+#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */
|
||||
#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers. */
|
||||
#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and
|
||||
status registers. */
|
||||
@@ -841,6 +844,8 @@ typedef struct
|
||||
SIMD Extension registers. */
|
||||
#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary
|
||||
Translation registers. */
|
||||
+#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */
|
||||
+#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */
|
||||
|
||||
/* Legal values for the note segment descriptor types for object files. */
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
Loading…
Add table
Reference in a new issue