LoongArch: Sync from glibc upstream

Signed-off-by: ticat_fp <fanpeng@loongson.cn>
This commit is contained in:
ticat_fp 2024-03-18 14:24:33 +08:00
parent 8f862e2d80
commit 62fe06b740
30 changed files with 12740 additions and 1 deletions

View file

@ -0,0 +1,40 @@
From 2c8dfc45a8009e5110a9d2148b62d802e989fde7 Mon Sep 17 00:00:00 2001
From: ticat_fp <fanpeng@loongson.cn>
Date: Thu, 29 Feb 2024 15:58:31 +0800
Subject: [PATCH] Decrease value of arch_minimum_kernel with LoongArch
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/unix/sysv/linux/loongarch/configure | 2 +-
sysdeps/unix/sysv/linux/loongarch/configure.ac | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/sysdeps/unix/sysv/linux/loongarch/configure b/sysdeps/unix/sysv/linux/loongarch/configure
index 0d1159e9..851b2285 100644
--- a/sysdeps/unix/sysv/linux/loongarch/configure
+++ b/sysdeps/unix/sysv/linux/loongarch/configure
@@ -1,7 +1,7 @@
# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
# Local configure fragment for sysdeps/unix/sysv/linux/loongarch.
-arch_minimum_kernel=5.19.0
+arch_minimum_kernel=4.19.0
libc_cv_loongarch_int_abi=no
diff --git a/sysdeps/unix/sysv/linux/loongarch/configure.ac b/sysdeps/unix/sysv/linux/loongarch/configure.ac
index 04e9150a..00815c2f 100644
--- a/sysdeps/unix/sysv/linux/loongarch/configure.ac
+++ b/sysdeps/unix/sysv/linux/loongarch/configure.ac
@@ -2,7 +2,7 @@ sinclude(./aclocal.m4)dnl Autoconf lossage
GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
# Local configure fragment for sysdeps/unix/sysv/linux/loongarch.
-arch_minimum_kernel=5.19.0
+arch_minimum_kernel=4.19.0
libc_cv_loongarch_int_abi=no
AC_EGREP_CPP(4 8 8, [__SIZEOF_INT__ __SIZEOF_LONG__ __SIZEOF_POINTER__
--
2.33.0

View file

@ -0,0 +1,499 @@
From 8923e4e9c79e672fd6b3b89aba598a60d5c01211 Mon Sep 17 00:00:00 2001
From: caiyinyu <caiyinyu@loongson.cn>
Date: Fri, 15 Sep 2023 17:35:19 +0800
Subject: [PATCH 25/29] LoongArch: Add glibc.cpu.hwcap support.
Key Points:
1. On lasx & lsx platforms, We must use _dl_runtime_{profile, resolve}_{lsx, lasx}
to save vector registers.
2. Via "tunables", users can choose str/mem_{lasx,lsx,unaligned} functions with
`export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,...`.
Note: glibc.cpu.hwcaps doesn't affect _dl_runtime_{profile, resolve}_{lsx, lasx}
selection.
Usage Notes:
1. Only valid inputs: LASX, LSX, UAL. Case-sensitive, comma-separated, no spaces.
2. Example: `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL` turns on LASX & UAL.
Unmentioned features turn off. With default ifunc: lasx > lsx > unaligned >
aligned > generic, effect is: lasx > unaligned > aligned > generic; lsx off.
3. Incorrect GLIBC_TUNABLES settings will show error messages.
For example: On lsx platforms, you cannot enable lasx features. If you do
that, you will get error messages.
4. Valid input examples:
- GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX: lasx > aligned > generic.
- GLIBC_TUNABLES=glibc.cpu.hwcaps=LSX,UAL: lsx > unaligned > aligned > generic.
- GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL,LASX,UAL,LSX,LASX,UAL: Repetitions
allowed but not recommended. Results in: lasx > lsx > unaligned > aligned >
generic.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/Makefile | 4 +
sysdeps/loongarch/Versions | 5 ++
sysdeps/loongarch/cpu-tunables.c | 89 +++++++++++++++++++
sysdeps/loongarch/dl-get-cpu-features.c | 25 ++++++
sysdeps/loongarch/dl-machine.h | 27 +++++-
sysdeps/loongarch/dl-tunables.list | 25 ++++++
.../unix/sysv/linux/loongarch/cpu-features.c | 29 ++++++
.../unix/sysv/linux/loongarch/cpu-features.h | 18 +++-
.../unix/sysv/linux/loongarch/dl-procinfo.c | 60 +++++++++++++
sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 +++++
.../unix/sysv/linux/loongarch/libc-start.c | 34 +++++++
11 files changed, 329 insertions(+), 8 deletions(-)
create mode 100644 sysdeps/loongarch/Versions
create mode 100644 sysdeps/loongarch/cpu-tunables.c
create mode 100644 sysdeps/loongarch/dl-get-cpu-features.c
create mode 100644 sysdeps/loongarch/dl-tunables.list
create mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c
create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
create mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c
diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
index 43d2f583..30a1f4a8 100644
--- a/sysdeps/loongarch/Makefile
+++ b/sysdeps/loongarch/Makefile
@@ -6,6 +6,10 @@ ifeq ($(subdir),elf)
gen-as-const-headers += dl-link.sym
endif
+ifeq ($(subdir),elf)
+ sysdep-dl-routines += dl-get-cpu-features
+endif
+
# LoongArch's assembler also needs to know about PIC as it changes the
# definition of some assembler macros.
ASFLAGS-.os += $(pic-ccflag)
diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions
new file mode 100644
index 00000000..33ae2cc0
--- /dev/null
+++ b/sysdeps/loongarch/Versions
@@ -0,0 +1,5 @@
+ld {
+ GLIBC_PRIVATE {
+ _dl_larch_get_cpu_features;
+ }
+}
diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c
new file mode 100644
index 00000000..8e9fab93
--- /dev/null
+++ b/sysdeps/loongarch/cpu-tunables.c
@@ -0,0 +1,89 @@
+/* LoongArch CPU feature tuning.
+ This file is part of the GNU C Library.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+# include <stdbool.h>
+# include <stdint.h>
+# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */
+# include <elf/dl-tunables.h>
+# include <string.h>
+# include <cpu-features.h>
+# include <ldsodefs.h>
+# include <sys/auxv.h>
+
+# define HWCAP_LOONGARCH_IFUNC \
+ (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX)
+
+# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len) \
+ _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
+ if (!memcmp (f, #name, len) && \
+ (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name)) \
+ { \
+ hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC)); \
+ break; \
+ } \
+
+attribute_hidden
+void
+TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
+{
+ const char *p = valp->strval;
+ size_t len;
+ unsigned long hwcap = 0;
+ const char *c;
+
+ do {
+ for (c = p; *c != ','; c++)
+ if (*c == '\0')
+ break;
+
+ len = c - p;
+
+ switch(len)
+ {
+ default:
+ _dl_fatal_printf (
+ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+ );
+ break;
+ case 3:
+ {
+ CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3);
+ CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3);
+ _dl_fatal_printf (
+ "Some features are invalid or not supported on this machine!!\n"
+ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+ );
+ }
+ break;
+ case 4:
+ {
+ CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4);
+ _dl_fatal_printf (
+ "Some features are invalid or not supported on this machine!!\n"
+ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+ );
+ }
+ break;
+ }
+
+ p += len + 1;
+ }
+ while (*c != '\0');
+
+ GLRO (dl_larch_cpu_features).hwcap &= hwcap;
+}
diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c
new file mode 100644
index 00000000..7cd9bc15
--- /dev/null
+++ b/sysdeps/loongarch/dl-get-cpu-features.c
@@ -0,0 +1,25 @@
+/* Define _dl_larch_get_cpu_features.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#include <ldsodefs.h>
+
+const struct cpu_features *
+_dl_larch_get_cpu_features (void)
+{
+ return &GLRO(dl_larch_cpu_features);
+}
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index 57913cef..b395a928 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -29,6 +29,8 @@
#include <dl-static-tls.h>
#include <dl-machine-rel.h>
+#include <cpu-features.c>
+
#ifndef _RTLD_PROLOGUE
# define _RTLD_PROLOGUE(entry) \
".globl\t" __STRING (entry) "\n\t" \
@@ -53,6 +55,23 @@
#define ELF_MACHINE_NO_REL 1
#define ELF_MACHINE_NO_RELA 0
+#define DL_PLATFORM_INIT dl_platform_init ()
+
+static inline void __attribute__ ((unused))
+dl_platform_init (void)
+{
+ if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
+ /* Avoid an empty string which would disturb us. */
+ GLRO(dl_platform) = NULL;
+
+#ifdef SHARED
+ /* init_cpu_features has been called early from __libc_start_main in
+ static executable. */
+ init_cpu_features (&GLRO(dl_larch_cpu_features));
+#endif
+}
+
+
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int
elf_machine_matches_host (const ElfW (Ehdr) *ehdr)
@@ -290,9 +309,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
if (profile != 0)
{
#if !defined __loongarch_soft_float
- if (SUPPORT_LASX)
+ if (RTLD_SUPPORT_LASX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
- else if (SUPPORT_LSX)
+ else if (RTLD_SUPPORT_LSX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
else
#endif
@@ -310,9 +329,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
indicated by the offset on the stack, and then jump to
the resolved address. */
#if !defined __loongarch_soft_float
- if (SUPPORT_LASX)
+ if (RTLD_SUPPORT_LASX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
- else if (SUPPORT_LSX)
+ else if (RTLD_SUPPORT_LSX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
else
#endif
diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list
new file mode 100644
index 00000000..66b34275
--- /dev/null
+++ b/sysdeps/loongarch/dl-tunables.list
@@ -0,0 +1,25 @@
+# LoongArch specific tunables.
+# Copyright (C) 2023 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+glibc {
+ cpu {
+ hwcaps {
+ type: STRING
+ }
+ }
+}
diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
new file mode 100644
index 00000000..1290c4ce
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
@@ -0,0 +1,29 @@
+/* Initialize CPU feature data. LoongArch64 version.
+ This file is part of the GNU C Library.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <cpu-features.h>
+#include <elf/dl-hwcaps.h>
+#include <elf/dl-tunables.h>
+extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden;
+
+static inline void
+init_cpu_features (struct cpu_features *cpu_features)
+{
+ GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap);
+ TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
+}
diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
index d1a280a5..450963ce 100644
--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
@@ -19,13 +19,23 @@
#ifndef _CPU_FEATURES_LOONGARCH64_H
#define _CPU_FEATURES_LOONGARCH64_H
+#include <stdint.h>
#include <sys/auxv.h>
-#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
-#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
-#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+struct cpu_features
+ {
+ uint64_t hwcap;
+ };
+/* Get a pointer to the CPU features structure. */
+extern const struct cpu_features *_dl_larch_get_cpu_features (void)
+ __attribute__ ((pure));
+
+#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL)
+#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX)
+#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX)
+#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
#define INIT_ARCH()
#endif /* _CPU_FEATURES_LOONGARCH64_H */
-
diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
new file mode 100644
index 00000000..6217fda9
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
@@ -0,0 +1,60 @@
+/* Data for LoongArch64 version of processor capability information.
+ Linux version.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* If anything should be added here check whether the size of each string
+ is still ok with the given array size.
+
+ All the #ifdefs in the definitions are quite irritating but
+ necessary if we want to avoid duplicating the information. There
+ are three different modes:
+
+ - PROCINFO_DECL is defined. This means we are only interested in
+ declarations.
+
+ - PROCINFO_DECL is not defined:
+
+ + if SHARED is defined the file is included in an array
+ initializer. The .element = { ... } syntax is needed.
+
+ + if SHARED is not defined a normal array initialization is
+ needed.
+ */
+
+#ifndef PROCINFO_CLASS
+# define PROCINFO_CLASS
+#endif
+
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+ ._dl_larch_cpu_features
+# else
+PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features
+# endif
+# ifndef PROCINFO_DECL
+= { }
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
+
+#undef PROCINFO_DECL
+#undef PROCINFO_CLASS
diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
new file mode 100644
index 00000000..455fd71a
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
@@ -0,0 +1,21 @@
+/* Operating system support for run-time dynamic linker. LoongArch version.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <sysdeps/loongarch/cpu-tunables.c>
+#include <sysdeps/unix/sysv/linux/dl-sysdep.c>
diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
new file mode 100644
index 00000000..f1346ece
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
@@ -0,0 +1,34 @@
+/* Override csu/libc-start.c on LoongArch64.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef SHARED
+
+/* Mark symbols hidden in static PIE for early self relocation to work. */
+# if BUILD_PIE_DEFAULT
+# pragma GCC visibility push(hidden)
+# endif
+
+# include <ldsodefs.h>
+# include <cpu-features.c>
+
+extern struct cpu_features _dl_larch_cpu_features;
+
+# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features)
+
+#endif
+#include <csu/libc-start.c>
--
2.33.0

View file

@ -0,0 +1,485 @@
From 3ee56bbc56faa7b85a6513340db4a4fdd6ce709d Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Mon, 28 Aug 2023 10:08:36 +0800
Subject: [PATCH 15/29] LoongArch: Add ifunc support for memchr{aligned, lsx,
lasx}
According to glibc memchr microbenchmark, this implementation could reduce
the runtime as following:
Name Percent of runtime reduced
memchr-lasx 37%-83%
memchr-lsx 30%-66%
memchr-aligned 0%-15%
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
.../lp64/multiarch/ifunc-impl-list.c | 7 ++
.../loongarch/lp64/multiarch/ifunc-memchr.h | 40 ++++++
.../loongarch/lp64/multiarch/memchr-aligned.S | 95 ++++++++++++++
.../loongarch/lp64/multiarch/memchr-lasx.S | 117 ++++++++++++++++++
sysdeps/loongarch/lp64/multiarch/memchr-lsx.S | 102 +++++++++++++++
sysdeps/loongarch/lp64/multiarch/memchr.c | 37 ++++++
7 files changed, 401 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index 64416b02..2f4802cf 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -24,5 +24,8 @@ sysdep_routines += \
rawmemchr-aligned \
rawmemchr-lsx \
rawmemchr-lasx \
+ memchr-aligned \
+ memchr-lsx \
+ memchr-lasx \
# sysdep_routines
endif
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index 3db9af14..a567b9cf 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -102,5 +102,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
)
+ IFUNC_IMPL (i, name, memchr,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx)
+ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned)
+ )
return i;
}
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
new file mode 100644
index 00000000..9060ccd5
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
@@ -0,0 +1,40 @@
+/* Common definition for memchr ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
new file mode 100644
index 00000000..81d0d004
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
@@ -0,0 +1,95 @@
+/* Optimized memchr implementation using basic LoongArch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define MEMCHR_NAME __memchr_aligned
+#else
+# define MEMCHR_NAME memchr
+#endif
+
+LEAF(MEMCHR_NAME, 6)
+ beqz a2, L(out)
+ andi t1, a0, 0x7
+ add.d a5, a0, a2
+ bstrins.d a0, zero, 2, 0
+
+ ld.d t0, a0, 0
+ bstrins.d a1, a1, 15, 8
+ lu12i.w a3, 0x01010
+ slli.d t2, t1, 03
+
+ bstrins.d a1, a1, 31, 16
+ ori a3, a3, 0x101
+ li.d t7, -1
+ li.d t8, 8
+
+ bstrins.d a1, a1, 63, 32
+ bstrins.d a3, a3, 63, 32
+ sll.d t2, t7, t2
+ xor t0, t0, a1
+
+
+ addi.d a6, a5, -1
+ slli.d a4, a3, 7
+ sub.d t1, t8, t1
+ orn t0, t0, t2
+
+ sub.d t2, t0, a3
+ andn t3, a4, t0
+ bstrins.d a6, zero, 2, 0
+ and t0, t2, t3
+
+ bgeu t1, a2, L(end)
+L(loop):
+ bnez t0, L(found)
+ ld.d t1, a0, 8
+ xor t0, t1, a1
+
+ addi.d a0, a0, 8
+ sub.d t2, t0, a3
+ andn t3, a4, t0
+ and t0, t2, t3
+
+
+ bne a0, a6, L(loop)
+L(end):
+ sub.d t1, a5, a6
+ ctz.d t0, t0
+ srli.d t0, t0, 3
+
+ sltu t1, t0, t1
+ add.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+
+L(found):
+ ctz.d t0, t0
+ srli.d t0, t0, 3
+ add.d a0, a0, t0
+ jr ra
+
+L(out):
+ move a0, zero
+ jr ra
+END(MEMCHR_NAME)
+
+libc_hidden_builtin_def (MEMCHR_NAME)
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
new file mode 100644
index 00000000..a26cdf48
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
@@ -0,0 +1,117 @@
+/* Optimized memchr implementation using LoongArch LASX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMCHR __memchr_lasx
+
+LEAF(MEMCHR, 6)
+ beqz a2, L(ret0)
+ add.d a3, a0, a2
+ andi t0, a0, 0x3f
+ bstrins.d a0, zero, 5, 0
+
+ xvld xr0, a0, 0
+ xvld xr1, a0, 32
+ li.d t1, -1
+ li.d t2, 64
+
+ xvreplgr2vr.b xr2, a1
+ sll.d t3, t1, t0
+ sub.d t2, t2, t0
+ xvseq.b xr0, xr0, xr2
+
+ xvseq.b xr1, xr1, xr2
+ xvmsknz.b xr0, xr0
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr3, xr0, 4
+
+
+ xvpickve.w xr4, xr1, 4
+ vilvl.h vr0, vr3, vr0
+ vilvl.h vr1, vr4, vr1
+ vilvl.w vr0, vr1, vr0
+
+ movfr2gr.d t0, fa0
+ and t0, t0, t3
+ bgeu t2, a2, L(end)
+ bnez t0, L(found)
+
+ addi.d a4, a3, -1
+ bstrins.d a4, zero, 5, 0
+L(loop):
+ xvld xr0, a0, 64
+ xvld xr1, a0, 96
+
+ addi.d a0, a0, 64
+ xvseq.b xr0, xr0, xr2
+ xvseq.b xr1, xr1, xr2
+ beq a0, a4, L(out)
+
+
+ xvmax.bu xr3, xr0, xr1
+ xvseteqz.v fcc0, xr3
+ bcnez fcc0, L(loop)
+ xvmsknz.b xr0, xr0
+
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr3, xr0, 4
+ xvpickve.w xr4, xr1, 4
+ vilvl.h vr0, vr3, vr0
+
+ vilvl.h vr1, vr4, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+L(found):
+ ctz.d t1, t0
+
+ add.d a0, a0, t1
+ jr ra
+L(ret0):
+ move a0, zero
+ jr ra
+
+
+L(out):
+ xvmsknz.b xr0, xr0
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr3, xr0, 4
+ xvpickve.w xr4, xr1, 4
+
+ vilvl.h vr0, vr3, vr0
+ vilvl.h vr1, vr4, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+
+L(end):
+ sub.d t2, zero, a3
+ srl.d t1, t1, t2
+ and t0, t0, t1
+ ctz.d t1, t0
+
+ add.d a0, a0, t1
+ maskeqz a0, a0, t0
+ jr ra
+END(MEMCHR)
+
+libc_hidden_builtin_def (MEMCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
new file mode 100644
index 00000000..a73ecd25
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
@@ -0,0 +1,102 @@
+/* Optimized memchr implementation using LoongArch LSX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMCHR __memchr_lsx
+
+LEAF(MEMCHR, 6)
+ beqz a2, L(ret0)
+ add.d a3, a0, a2
+ andi t0, a0, 0x1f
+ bstrins.d a0, zero, 4, 0
+
+ vld vr0, a0, 0
+ vld vr1, a0, 16
+ li.d t1, -1
+ li.d t2, 32
+
+ vreplgr2vr.b vr2, a1
+ sll.d t3, t1, t0
+ sub.d t2, t2, t0
+ vseq.b vr0, vr0, vr2
+
+ vseq.b vr1, vr1, vr2
+ vmsknz.b vr0, vr0
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+
+
+ movfr2gr.s t0, fa0
+ and t0, t0, t3
+ bgeu t2, a2, L(end)
+ bnez t0, L(found)
+
+ addi.d a4, a3, -1
+ bstrins.d a4, zero, 4, 0
+L(loop):
+ vld vr0, a0, 32
+ vld vr1, a0, 48
+
+ addi.d a0, a0, 32
+ vseq.b vr0, vr0, vr2
+ vseq.b vr1, vr1, vr2
+ beq a0, a4, L(out)
+
+ vmax.bu vr3, vr0, vr1
+ vseteqz.v fcc0, vr3
+ bcnez fcc0, L(loop)
+ vmsknz.b vr0, vr0
+
+
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+L(found):
+ ctz.w t0, t0
+
+ add.d a0, a0, t0
+ jr ra
+L(ret0):
+ move a0, zero
+ jr ra
+
+L(out):
+ vmsknz.b vr0, vr0
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+
+L(end):
+ sub.d t2, zero, a3
+ srl.w t1, t1, t2
+ and t0, t0, t1
+ ctz.w t1, t0
+
+
+ add.d a0, a0, t1
+ maskeqz a0, a0, t0
+ jr ra
+END(MEMCHR)
+
+libc_hidden_builtin_def (MEMCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr.c b/sysdeps/loongarch/lp64/multiarch/memchr.c
new file mode 100644
index 00000000..059479c0
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr.c
@@ -0,0 +1,37 @@
+/* Multiple versions of memchr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define memchr __redirect_memchr
+# include <string.h>
+# undef memchr
+
+# define SYMBOL_NAME memchr
+# include "ifunc-memchr.h"
+
+libc_ifunc_redirected (__redirect_memchr, memchr,
+ IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
+# endif
+
+#endif
--
2.33.0

View file

@ -0,0 +1,946 @@
From 60f4bbd1eec528ba8df044ae6b3091f6337a7fcc Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Mon, 28 Aug 2023 10:08:39 +0800
Subject: [PATCH 18/29] LoongArch: Add ifunc support for memcmp{aligned, lsx,
lasx}
According to glibc memcmp microbenchmark test results(Add generic
memcmp), this implementation have performance improvement
except the length is less than 3, details as below:
Name Percent of time reduced
memcmp-lasx 16%-74%
memcmp-lsx 20%-50%
memcmp-aligned 5%-20%
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
.../lp64/multiarch/ifunc-impl-list.c | 7 +
.../loongarch/lp64/multiarch/ifunc-memcmp.h | 40 +++
.../loongarch/lp64/multiarch/memcmp-aligned.S | 292 ++++++++++++++++++
.../loongarch/lp64/multiarch/memcmp-lasx.S | 207 +++++++++++++
sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S | 269 ++++++++++++++++
sysdeps/loongarch/lp64/multiarch/memcmp.c | 43 +++
7 files changed, 861 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index 216886c5..360a6718 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -34,5 +34,8 @@ sysdep_routines += \
memset-unaligned \
memset-lsx \
memset-lasx \
+ memcmp-aligned \
+ memcmp-lsx \
+ memcmp-lasx \
# sysdep_routines
endif
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index 37f60dde..e397d58c 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -127,5 +127,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned)
)
+ IFUNC_IMPL (i, name, memcmp,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LASX, __memcmp_lasx)
+ IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LSX, __memcmp_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_aligned)
+ )
return i;
}
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
new file mode 100644
index 00000000..04adc2e5
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
@@ -0,0 +1,40 @@
+/* Common definition for memcmp ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
new file mode 100644
index 00000000..14a7caa9
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
@@ -0,0 +1,292 @@
+/* Optimized memcmp implementation using basic LoongArch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define MEMCMP_NAME __memcmp_aligned
+#else
+# define MEMCMP_NAME memcmp
+#endif
+
+LEAF(MEMCMP_NAME, 6)
+ beqz a2, L(ret)
+ andi a4, a1, 0x7
+ andi a3, a0, 0x7
+ sltu a5, a4, a3
+
+ xor t0, a0, a1
+ li.w t8, 8
+ maskeqz t0, t0, a5
+ li.w t7, -1
+
+ xor a0, a0, t0
+ xor a1, a1, t0
+ andi a3, a0, 0x7
+ andi a4, a1, 0x7
+
+ xor a0, a0, a3
+ xor a1, a1, a4
+ ld.d t2, a0, 0
+ ld.d t1, a1, 0
+
+ slli.d t3, a3, 3
+ slli.d t4, a4, 3
+ sub.d a6, t3, t4
+ srl.d t1, t1, t4
+
+ srl.d t0, t2, t3
+ srl.d t5, t7, t4
+ sub.d t6, t0, t1
+ and t6, t6, t5
+
+ sub.d t5, t8, a4
+ bnez t6, L(first_out)
+ bgeu t5, a2, L(ret)
+ sub.d a2, a2, t5
+
+ bnez a6, L(unaligned)
+ blt a2, t8, L(al_less_8bytes)
+ andi t1, a2, 31
+ beq t1, a2, L(al_less_32bytes)
+
+ sub.d t2, a2, t1
+ add.d a4, a0, t2
+ move a2, t1
+
+L(al_loop):
+ ld.d t0, a0, 8
+
+ ld.d t1, a1, 8
+ ld.d t2, a0, 16
+ ld.d t3, a1, 16
+ ld.d t4, a0, 24
+
+ ld.d t5, a1, 24
+ ld.d t6, a0, 32
+ ld.d t7, a1, 32
+ addi.d a0, a0, 32
+
+ addi.d a1, a1, 32
+ bne t0, t1, L(out1)
+ bne t2, t3, L(out2)
+ bne t4, t5, L(out3)
+
+ bne t6, t7, L(out4)
+ bne a0, a4, L(al_loop)
+
+L(al_less_32bytes):
+ srai.d a4, a2, 4
+ beqz a4, L(al_less_16bytes)
+
+ ld.d t0, a0, 8
+ ld.d t1, a1, 8
+ ld.d t2, a0, 16
+ ld.d t3, a1, 16
+
+ addi.d a0, a0, 16
+ addi.d a1, a1, 16
+ addi.d a2, a2, -16
+ bne t0, t1, L(out1)
+
+ bne t2, t3, L(out2)
+
+L(al_less_16bytes):
+ srai.d a4, a2, 3
+ beqz a4, L(al_less_8bytes)
+ ld.d t0, a0, 8
+
+ ld.d t1, a1, 8
+ addi.d a0, a0, 8
+ addi.d a1, a1, 8
+ addi.d a2, a2, -8
+
+ bne t0, t1, L(out1)
+
+L(al_less_8bytes):
+ beqz a2, L(ret)
+ ld.d t0, a0, 8
+ ld.d t1, a1, 8
+
+ li.d t7, -1
+ slli.d t2, a2, 3
+ sll.d t2, t7, t2
+ sub.d t3, t0, t1
+
+ andn t6, t3, t2
+ bnez t6, L(count_diff)
+
+L(ret):
+ move a0, zero
+ jr ra
+
+L(out4):
+ move t0, t6
+ move t1, t7
+ sub.d t6, t6, t7
+ b L(count_diff)
+
+L(out3):
+ move t0, t4
+ move t1, t5
+ sub.d t6, t4, t5
+ b L(count_diff)
+
+L(out2):
+ move t0, t2
+ move t1, t3
+L(out1):
+ sub.d t6, t0, t1
+ b L(count_diff)
+
+L(first_out):
+ slli.d t4, a2, 3
+ slt t3, a2, t5
+ sll.d t4, t7, t4
+ maskeqz t4, t4, t3
+
+ andn t6, t6, t4
+
+L(count_diff):
+ ctz.d t2, t6
+ bstrins.d t2, zero, 2, 0
+ srl.d t0, t0, t2
+
+ srl.d t1, t1, t2
+ andi t0, t0, 0xff
+ andi t1, t1, 0xff
+ sub.d t2, t0, t1
+
+ sub.d t3, t1, t0
+ masknez t2, t2, a5
+ maskeqz t3, t3, a5
+ or a0, t2, t3
+
+ jr ra
+
+L(unaligned):
+ sub.d a7, zero, a6
+ srl.d t0, t2, a6
+ blt a2, t8, L(un_less_8bytes)
+
+ andi t1, a2, 31
+ beq t1, a2, L(un_less_32bytes)
+ sub.d t2, a2, t1
+ add.d a4, a0, t2
+
+ move a2, t1
+
+L(un_loop):
+ ld.d t2, a0, 8
+ ld.d t1, a1, 8
+ ld.d t4, a0, 16
+
+ ld.d t3, a1, 16
+ ld.d t6, a0, 24
+ ld.d t5, a1, 24
+ ld.d t8, a0, 32
+
+ ld.d t7, a1, 32
+ addi.d a0, a0, 32
+ addi.d a1, a1, 32
+ sll.d a3, t2, a7
+
+ or t0, a3, t0
+ bne t0, t1, L(out1)
+ srl.d t0, t2, a6
+ sll.d a3, t4, a7
+
+ or t2, a3, t0
+ bne t2, t3, L(out2)
+ srl.d t0, t4, a6
+ sll.d a3, t6, a7
+
+ or t4, a3, t0
+ bne t4, t5, L(out3)
+ srl.d t0, t6, a6
+ sll.d a3, t8, a7
+
+ or t6, t0, a3
+ bne t6, t7, L(out4)
+ srl.d t0, t8, a6
+ bne a0, a4, L(un_loop)
+
+L(un_less_32bytes):
+ srai.d a4, a2, 4
+ beqz a4, L(un_less_16bytes)
+ ld.d t2, a0, 8
+ ld.d t1, a1, 8
+
+ ld.d t4, a0, 16
+ ld.d t3, a1, 16
+ addi.d a0, a0, 16
+ addi.d a1, a1, 16
+
+ addi.d a2, a2, -16
+ sll.d a3, t2, a7
+ or t0, a3, t0
+ bne t0, t1, L(out1)
+
+ srl.d t0, t2, a6
+ sll.d a3, t4, a7
+ or t2, a3, t0
+ bne t2, t3, L(out2)
+
+ srl.d t0, t4, a6
+
+L(un_less_16bytes):
+ srai.d a4, a2, 3
+ beqz a4, L(un_less_8bytes)
+ ld.d t2, a0, 8
+
+ ld.d t1, a1, 8
+ addi.d a0, a0, 8
+ addi.d a1, a1, 8
+ addi.d a2, a2, -8
+
+ sll.d a3, t2, a7
+ or t0, a3, t0
+ bne t0, t1, L(out1)
+ srl.d t0, t2, a6
+
+L(un_less_8bytes):
+ beqz a2, L(ret)
+ andi a7, a7, 63
+ slli.d a4, a2, 3
+ bgeu a7, a4, L(last_cmp)
+
+ ld.d t2, a0, 8
+ sll.d a3, t2, a7
+ or t0, a3, t0
+
+L(last_cmp):
+ ld.d t1, a1, 8
+
+ li.d t7, -1
+ sll.d t2, t7, a4
+ sub.d t3, t0, t1
+ andn t6, t3, t2
+
+ bnez t6, L(count_diff)
+ move a0, zero
+ jr ra
+END(MEMCMP_NAME)
+
+libc_hidden_builtin_def (MEMCMP_NAME)
diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
new file mode 100644
index 00000000..3151a179
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
@@ -0,0 +1,207 @@
+/* Optimized memcmp implementation using LoongArch LASX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMCMP __memcmp_lasx
+
+LEAF(MEMCMP, 6)
+ li.d t2, 32
+ add.d a3, a0, a2
+ add.d a4, a1, a2
+ bgeu t2, a2, L(less32)
+
+ li.d t1, 160
+ bgeu a2, t1, L(make_aligned)
+L(loop32):
+ xvld xr0, a0, 0
+ xvld xr1, a1, 0
+
+ addi.d a0, a0, 32
+ addi.d a1, a1, 32
+ addi.d a2, a2, -32
+ xvseq.b xr2, xr0, xr1
+
+ xvsetanyeqz.b fcc0, xr2
+ bcnez fcc0, L(end)
+L(last_bytes):
+ bltu t2, a2, L(loop32)
+ xvld xr0, a3, -32
+
+
+ xvld xr1, a4, -32
+ xvseq.b xr2, xr0, xr1
+L(end):
+ xvmsknz.b xr2, xr2
+ xvpermi.q xr4, xr0, 1
+
+ xvpickve.w xr3, xr2, 4
+ xvpermi.q xr5, xr1, 1
+ vilvl.h vr2, vr3, vr2
+ movfr2gr.s t0, fa2
+
+ cto.w t0, t0
+ vreplgr2vr.b vr2, t0
+ vshuf.b vr0, vr4, vr0, vr2
+ vshuf.b vr1, vr5, vr1, vr2
+
+ vpickve2gr.bu t0, vr0, 0
+ vpickve2gr.bu t1, vr1, 0
+ sub.d a0, t0, t1
+ jr ra
+
+
+L(less32):
+ srli.d t0, a2, 4
+ beqz t0, L(less16)
+ vld vr0, a0, 0
+ vld vr1, a1, 0
+
+ vld vr2, a3, -16
+ vld vr3, a4, -16
+L(short_ret):
+ vseq.b vr4, vr0, vr1
+ vseq.b vr5, vr2, vr3
+
+ vmsknz.b vr4, vr4
+ vmsknz.b vr5, vr5
+ vilvl.h vr4, vr5, vr4
+ movfr2gr.s t0, fa4
+
+ cto.w t0, t0
+ vreplgr2vr.b vr4, t0
+ vshuf.b vr0, vr2, vr0, vr4
+ vshuf.b vr1, vr3, vr1, vr4
+
+
+ vpickve2gr.bu t0, vr0, 0
+ vpickve2gr.bu t1, vr1, 0
+ sub.d a0, t0, t1
+ jr ra
+
+L(less16):
+ srli.d t0, a2, 3
+ beqz t0, L(less8)
+ vldrepl.d vr0, a0, 0
+ vldrepl.d vr1, a1, 0
+
+ vldrepl.d vr2, a3, -8
+ vldrepl.d vr3, a4, -8
+ b L(short_ret)
+ nop
+
+L(less8):
+ srli.d t0, a2, 2
+ beqz t0, L(less4)
+ vldrepl.w vr0, a0, 0
+ vldrepl.w vr1, a1, 0
+
+
+ vldrepl.w vr2, a3, -4
+ vldrepl.w vr3, a4, -4
+ b L(short_ret)
+ nop
+
+L(less4):
+ srli.d t0, a2, 1
+ beqz t0, L(less2)
+ vldrepl.h vr0, a0, 0
+ vldrepl.h vr1, a1, 0
+
+ vldrepl.h vr2, a3, -2
+ vldrepl.h vr3, a4, -2
+ b L(short_ret)
+ nop
+
+L(less2):
+ beqz a2, L(ret0)
+ ld.bu t0, a0, 0
+ ld.bu t1, a1, 0
+ sub.d a0, t0, t1
+
+ jr ra
+L(ret0):
+ move a0, zero
+ jr ra
+
+L(make_aligned):
+ xvld xr0, a0, 0
+
+ xvld xr1, a1, 0
+ xvseq.b xr2, xr0, xr1
+ xvsetanyeqz.b fcc0, xr2
+ bcnez fcc0, L(end)
+
+ andi t0, a0, 0x1f
+ sub.d t0, t2, t0
+ sub.d t1, a2, t0
+ add.d a0, a0, t0
+
+ add.d a1, a1, t0
+ andi a2, t1, 0x3f
+ sub.d t0, t1, a2
+ add.d a5, a0, t0
+
+
+L(loop_align):
+ xvld xr0, a0, 0
+ xvld xr1, a1, 0
+ xvld xr2, a0, 32
+ xvld xr3, a1, 32
+
+ xvseq.b xr0, xr0, xr1
+ xvseq.b xr1, xr2, xr3
+ xvmin.bu xr2, xr1, xr0
+ xvsetanyeqz.b fcc0, xr2
+
+ bcnez fcc0, L(pair_end)
+ addi.d a0, a0, 64
+ addi.d a1, a1, 64
+ bne a0, a5, L(loop_align)
+
+ bnez a2, L(last_bytes)
+ move a0, zero
+ jr ra
+ nop
+
+
+L(pair_end):
+ xvmsknz.b xr0, xr0
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr2, xr0, 4
+ xvpickve.w xr3, xr1, 4
+
+ vilvl.h vr0, vr2, vr0
+ vilvl.h vr1, vr3, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+
+ cto.d t0, t0
+ ldx.bu t1, a0, t0
+ ldx.bu t2, a1, t0
+ sub.d a0, t1, t2
+
+ jr ra
+END(MEMCMP)
+
+libc_hidden_builtin_def (MEMCMP)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
new file mode 100644
index 00000000..38a50a4c
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
@@ -0,0 +1,269 @@
+/* Optimized memcmp implementation using LoongArch LSX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#define MEMCMP __memcmp_lsx
+
+LEAF(MEMCMP, 6)
+ beqz a2, L(out)
+ pcalau12i t0, %pc_hi20(L(INDEX))
+ andi a3, a0, 0xf
+ vld vr5, t0, %pc_lo12(L(INDEX))
+
+ andi a4, a1, 0xf
+ bne a3, a4, L(unaligned)
+ bstrins.d a0, zero, 3, 0
+ xor a1, a1, a4
+
+ vld vr0, a0, 0
+ vld vr1, a1, 0
+ li.d t0, 16
+ vreplgr2vr.b vr3, a3
+
+ sub.d t1, t0, a3
+ vadd.b vr3, vr3, vr5
+ vshuf.b vr0, vr3, vr0, vr3
+ vshuf.b vr1, vr3, vr1, vr3
+
+
+ vseq.b vr4, vr0, vr1
+ bgeu t1, a2, L(al_end)
+ vsetanyeqz.b fcc0, vr4
+ bcnez fcc0, L(al_found)
+
+ sub.d t1, a2, t1
+ andi a2, t1, 31
+ beq a2, t1, L(al_less_32bytes)
+ sub.d t2, t1, a2
+
+ add.d a4, a0, t2
+L(al_loop):
+ vld vr0, a0, 16
+ vld vr1, a1, 16
+ vld vr2, a0, 32
+
+ vld vr3, a1, 32
+ addi.d a0, a0, 32
+ addi.d a1, a1, 32
+ vseq.b vr4, vr0, vr1
+
+
+ vseq.b vr6, vr2, vr3
+ vand.v vr6, vr4, vr6
+ vsetanyeqz.b fcc0, vr6
+ bcnez fcc0, L(al_pair_end)
+
+ bne a0, a4, L(al_loop)
+L(al_less_32bytes):
+ bgeu t0, a2, L(al_less_16bytes)
+ vld vr0, a0, 16
+ vld vr1, a1, 16
+
+ vld vr2, a0, 32
+ vld vr3, a1, 32
+ addi.d a2, a2, -16
+ vreplgr2vr.b vr6, a2
+
+ vslt.b vr5, vr5, vr6
+ vseq.b vr4, vr0, vr1
+ vseq.b vr6, vr2, vr3
+ vorn.v vr6, vr6, vr5
+
+
+L(al_pair_end):
+ vsetanyeqz.b fcc0, vr4
+ bcnez fcc0, L(al_found)
+ vnori.b vr4, vr6, 0
+ vfrstpi.b vr4, vr4, 0
+
+ vshuf.b vr0, vr2, vr2, vr4
+ vshuf.b vr1, vr3, vr3, vr4
+ vpickve2gr.bu t0, vr0, 0
+ vpickve2gr.bu t1, vr1, 0
+
+ sub.d a0, t0, t1
+ jr ra
+ nop
+ nop
+
+L(al_less_16bytes):
+ beqz a2, L(out)
+ vld vr0, a0, 16
+ vld vr1, a1, 16
+ vseq.b vr4, vr0, vr1
+
+
+L(al_end):
+ vreplgr2vr.b vr6, a2
+ vslt.b vr5, vr5, vr6
+ vorn.v vr4, vr4, vr5
+ nop
+
+L(al_found):
+ vnori.b vr4, vr4, 0
+ vfrstpi.b vr4, vr4, 0
+ vshuf.b vr0, vr0, vr0, vr4
+ vshuf.b vr1, vr1, vr1, vr4
+
+ vpickve2gr.bu t0, vr0, 0
+ vpickve2gr.bu t1, vr1, 0
+ sub.d a0, t0, t1
+ jr ra
+
+L(out):
+ move a0, zero
+ jr ra
+ nop
+ nop
+
+
+L(unaligned):
+ xor t2, a0, a1
+ sltu a5, a3, a4
+ masknez t2, t2, a5
+ xor a0, a0, t2
+
+ xor a1, a1, t2
+ andi a3, a0, 0xf
+ andi a4, a1, 0xf
+ bstrins.d a0, zero, 3, 0
+
+ xor a1, a1, a4
+ vld vr4, a0, 0
+ vld vr1, a1, 0
+ li.d t0, 16
+
+ vreplgr2vr.b vr2, a4
+ sub.d a6, a4, a3
+ sub.d t1, t0, a4
+ sub.d t2, t0, a6
+
+
+ vadd.b vr2, vr2, vr5
+ vreplgr2vr.b vr6, t2
+ vadd.b vr6, vr6, vr5
+ vshuf.b vr0, vr4, vr4, vr6
+
+ vshuf.b vr1, vr2, vr1, vr2
+ vshuf.b vr0, vr2, vr0, vr2
+ vseq.b vr7, vr0, vr1
+ bgeu t1, a2, L(un_end)
+
+ vsetanyeqz.b fcc0, vr7
+ bcnez fcc0, L(un_found)
+ sub.d a2, a2, t1
+ andi t1, a2, 31
+
+ beq a2, t1, L(un_less_32bytes)
+ sub.d t2, a2, t1
+ move a2, t1
+ add.d a4, a1, t2
+
+
+L(un_loop):
+ vld vr2, a0, 16
+ vld vr1, a1, 16
+ vld vr3, a1, 32
+ addi.d a1, a1, 32
+
+ addi.d a0, a0, 32
+ vshuf.b vr0, vr2, vr4, vr6
+ vld vr4, a0, 0
+ vseq.b vr7, vr0, vr1
+
+ vshuf.b vr2, vr4, vr2, vr6
+ vseq.b vr8, vr2, vr3
+ vand.v vr8, vr7, vr8
+ vsetanyeqz.b fcc0, vr8
+
+ bcnez fcc0, L(un_pair_end)
+ bne a1, a4, L(un_loop)
+
+L(un_less_32bytes):
+ bltu a2, t0, L(un_less_16bytes)
+ vld vr2, a0, 16
+ vld vr1, a1, 16
+ addi.d a0, a0, 16
+
+ addi.d a1, a1, 16
+ addi.d a2, a2, -16
+ vshuf.b vr0, vr2, vr4, vr6
+ vor.v vr4, vr2, vr2
+
+ vseq.b vr7, vr0, vr1
+ vsetanyeqz.b fcc0, vr7
+ bcnez fcc0, L(un_found)
+L(un_less_16bytes):
+ beqz a2, L(out)
+ vld vr1, a1, 16
+ bgeu a6, a2, 1f
+
+ vld vr2, a0, 16
+1:
+ vshuf.b vr0, vr2, vr4, vr6
+ vseq.b vr7, vr0, vr1
+L(un_end):
+ vreplgr2vr.b vr3, a2
+
+
+ vslt.b vr3, vr5, vr3
+ vorn.v vr7, vr7, vr3
+
+L(un_found):
+ vnori.b vr7, vr7, 0
+ vfrstpi.b vr7, vr7, 0
+
+ vshuf.b vr0, vr0, vr0, vr7
+ vshuf.b vr1, vr1, vr1, vr7
+L(calc_result):
+ vpickve2gr.bu t0, vr0, 0
+ vpickve2gr.bu t1, vr1, 0
+
+ sub.d t2, t0, t1
+ sub.d t3, t1, t0
+ masknez t0, t3, a5
+ maskeqz t1, t2, a5
+
+ or a0, t0, t1
+ jr ra
+L(un_pair_end):
+ vsetanyeqz.b fcc0, vr7
+ bcnez fcc0, L(un_found)
+
+
+ vnori.b vr7, vr8, 0
+ vfrstpi.b vr7, vr7, 0
+ vshuf.b vr0, vr2, vr2, vr7
+ vshuf.b vr1, vr3, vr3, vr7
+
+ b L(calc_result)
+END(MEMCMP)
+
+ .section .rodata.cst16,"M",@progbits,16
+ .align 4
+L(INDEX):
+ .dword 0x0706050403020100
+ .dword 0x0f0e0d0c0b0a0908
+
+libc_hidden_builtin_def (MEMCMP)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp.c b/sysdeps/loongarch/lp64/multiarch/memcmp.c
new file mode 100644
index 00000000..32eccac2
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp.c
@@ -0,0 +1,43 @@
+/* Multiple versions of memcmp.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define memcmp __redirect_memcmp
+# include <string.h>
+# undef memcmp
+
+# define SYMBOL_NAME memcmp
+# include "ifunc-memcmp.h"
+
+libc_ifunc_redirected (__redirect_memcmp, memcmp,
+ IFUNC_SELECTOR ());
+# undef bcmp
+weak_alias (memcmp, bcmp)
+
+# undef __memcmpeq
+strong_alias (memcmp, __memcmpeq)
+libc_hidden_def (__memcmpeq)
+
+# ifdef SHARED
+__hidden_ver1 (memcmp, __GI_memcmp, __redirect_memcmp)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp);
+# endif
+
+#endif
--
2.33.0

View file

@ -0,0 +1,417 @@
From c4c272fb8067364530a2a78df92c37403acc963f Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Mon, 28 Aug 2023 10:08:37 +0800
Subject: [PATCH 16/29] LoongArch: Add ifunc support for memrchr{lsx, lasx}
According to glibc memrchr microbenchmark, this implementation could reduce
the runtime as following:
Name Percent of rutime reduced
memrchr-lasx 20%-83%
memrchr-lsx 20%-64%
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
.../lp64/multiarch/ifunc-impl-list.c | 8 ++
.../loongarch/lp64/multiarch/ifunc-memrchr.h | 40 ++++++
.../lp64/multiarch/memrchr-generic.c | 23 ++++
.../loongarch/lp64/multiarch/memrchr-lasx.S | 123 ++++++++++++++++++
.../loongarch/lp64/multiarch/memrchr-lsx.S | 105 +++++++++++++++
sysdeps/loongarch/lp64/multiarch/memrchr.c | 33 +++++
7 files changed, 335 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index 2f4802cf..7b87bc90 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -27,5 +27,8 @@ sysdep_routines += \
memchr-aligned \
memchr-lsx \
memchr-lasx \
+ memrchr-generic \
+ memrchr-lsx \
+ memrchr-lasx \
# sysdep_routines
endif
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index a567b9cf..8bd5489e 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -109,5 +109,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#endif
IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned)
)
+
+ IFUNC_IMPL (i, name, memrchr,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LASX, __memrchr_lasx)
+ IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LSX, __memrchr_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic)
+ )
return i;
}
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
new file mode 100644
index 00000000..8215f9ad
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
@@ -0,0 +1,40 @@
+/* Common definition for memrchr implementation.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (generic);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
new file mode 100644
index 00000000..ced61ebc
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
@@ -0,0 +1,23 @@
+/* Generic implementation of memrchr.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define MEMRCHR __memrchr_generic
+#endif
+
+#include <string/memrchr.c>
diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
new file mode 100644
index 00000000..5f3e0d06
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
@@ -0,0 +1,123 @@
+/* Optimized memrchr implementation using LoongArch LASX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#ifndef MEMRCHR
+# define MEMRCHR __memrchr_lasx
+#endif
+
+LEAF(MEMRCHR, 6)
+ beqz a2, L(ret0)
+ addi.d a2, a2, -1
+ add.d a3, a0, a2
+ andi t1, a3, 0x3f
+
+ bstrins.d a3, zero, 5, 0
+ addi.d t1, t1, 1
+ xvld xr0, a3, 0
+ xvld xr1, a3, 32
+
+ sub.d t2, zero, t1
+ li.d t3, -1
+ xvreplgr2vr.b xr2, a1
+ andi t4, a0, 0x3f
+
+ srl.d t2, t3, t2
+ xvseq.b xr0, xr0, xr2
+ xvseq.b xr1, xr1, xr2
+ xvmsknz.b xr0, xr0
+
+
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr3, xr0, 4
+ xvpickve.w xr4, xr1, 4
+ vilvl.h vr0, vr3, vr0
+
+ vilvl.h vr1, vr4, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+ and t0, t0, t2
+
+ bltu a2, t1, L(end)
+ bnez t0, L(found)
+ bstrins.d a0, zero, 5, 0
+L(loop):
+ xvld xr0, a3, -64
+
+ xvld xr1, a3, -32
+ addi.d a3, a3, -64
+ xvseq.b xr0, xr0, xr2
+ xvseq.b xr1, xr1, xr2
+
+
+ beq a0, a3, L(out)
+ xvmax.bu xr3, xr0, xr1
+ xvseteqz.v fcc0, xr3
+ bcnez fcc0, L(loop)
+
+ xvmsknz.b xr0, xr0
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr3, xr0, 4
+ xvpickve.w xr4, xr1, 4
+
+ vilvl.h vr0, vr3, vr0
+ vilvl.h vr1, vr4, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+
+L(found):
+ addi.d a0, a3, 63
+ clz.d t1, t0
+ sub.d a0, a0, t1
+ jr ra
+
+
+L(out):
+ xvmsknz.b xr0, xr0
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr3, xr0, 4
+ xvpickve.w xr4, xr1, 4
+
+ vilvl.h vr0, vr3, vr0
+ vilvl.h vr1, vr4, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+
+L(end):
+ sll.d t2, t3, t4
+ and t0, t0, t2
+ addi.d a0, a3, 63
+ clz.d t1, t0
+
+ sub.d a0, a0, t1
+ maskeqz a0, a0, t0
+ jr ra
+L(ret0):
+ move a0, zero
+
+
+ jr ra
+END(MEMRCHR)
+
+libc_hidden_builtin_def (MEMRCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
new file mode 100644
index 00000000..39a7c8b0
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
@@ -0,0 +1,105 @@
+/* Optimized memrchr implementation using LoongArch LSX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMRCHR __memrchr_lsx
+
+LEAF(MEMRCHR, 6)
+ beqz a2, L(ret0)
+ addi.d a2, a2, -1
+ add.d a3, a0, a2
+ andi t1, a3, 0x1f
+
+ bstrins.d a3, zero, 4, 0
+ addi.d t1, t1, 1
+ vld vr0, a3, 0
+ vld vr1, a3, 16
+
+ sub.d t2, zero, t1
+ li.d t3, -1
+ vreplgr2vr.b vr2, a1
+ andi t4, a0, 0x1f
+
+ srl.d t2, t3, t2
+ vseq.b vr0, vr0, vr2
+ vseq.b vr1, vr1, vr2
+ vmsknz.b vr0, vr0
+
+
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+ and t0, t0, t2
+
+ bltu a2, t1, L(end)
+ bnez t0, L(found)
+ bstrins.d a0, zero, 4, 0
+L(loop):
+ vld vr0, a3, -32
+
+ vld vr1, a3, -16
+ addi.d a3, a3, -32
+ vseq.b vr0, vr0, vr2
+ vseq.b vr1, vr1, vr2
+
+ beq a0, a3, L(out)
+ vmax.bu vr3, vr0, vr1
+ vseteqz.v fcc0, vr3
+ bcnez fcc0, L(loop)
+
+
+ vmsknz.b vr0, vr0
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+
+L(found):
+ addi.d a0, a3, 31
+ clz.w t1, t0
+ sub.d a0, a0, t1
+ jr ra
+
+L(out):
+ vmsknz.b vr0, vr0
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+
+L(end):
+ sll.d t2, t3, t4
+ and t0, t0, t2
+ addi.d a0, a3, 31
+ clz.w t1, t0
+
+
+ sub.d a0, a0, t1
+ maskeqz a0, a0, t0
+ jr ra
+L(ret0):
+ move a0, zero
+
+ jr ra
+END(MEMRCHR)
+
+libc_hidden_builtin_def (MEMRCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr.c b/sysdeps/loongarch/lp64/multiarch/memrchr.c
new file mode 100644
index 00000000..8baba9ab
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr.c
@@ -0,0 +1,33 @@
+/* Multiple versions of memrchr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define memrchr __redirect_memrchr
+# include <string.h>
+# undef memrchr
+
+# define SYMBOL_NAME memrchr
+# include "ifunc-memrchr.h"
+
+libc_ifunc_redirected (__redirect_memrchr, __memrchr, IFUNC_SELECTOR ());
+libc_hidden_def (__memrchr)
+weak_alias (__memrchr, memrchr)
+
+#endif
--
2.33.0

View file

@ -0,0 +1,784 @@
From 14032f7bbe18443af8492f5d0365f72b76701673 Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Mon, 28 Aug 2023 10:08:38 +0800
Subject: [PATCH 17/29] LoongArch: Add ifunc support for memset{aligned,
unaligned, lsx, lasx}
According to glibc memset microbenchmark test results, for LSX and LASX
versions, A few cases with length less than 8 experience performace
degradation, overall, the LASX version could reduce the runtime about
15% - 75%, LSX version could reduce the runtime about 15%-50%.
The unaligned version uses unaligned memmory access to set data which
length is less than 64 and make address aligned with 8. For this part,
the performace is better than aligned version. Comparing with the generic
version, the performance is close when the length is larger than 128. When
the length is 8-128, the unaligned version could reduce the runtime about
30%-70%, the aligned version could reduce the runtime about 20%-50%.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/Makefile | 4 +
.../lp64/multiarch/dl-symbol-redir-ifunc.h | 24 +++
.../lp64/multiarch/ifunc-impl-list.c | 10 +
.../loongarch/lp64/multiarch/memset-aligned.S | 174 ++++++++++++++++++
.../loongarch/lp64/multiarch/memset-lasx.S | 142 ++++++++++++++
sysdeps/loongarch/lp64/multiarch/memset-lsx.S | 135 ++++++++++++++
.../lp64/multiarch/memset-unaligned.S | 162 ++++++++++++++++
sysdeps/loongarch/lp64/multiarch/memset.c | 37 ++++
8 files changed, 688 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lasx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/memset.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index 7b87bc90..216886c5 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -30,5 +30,9 @@ sysdep_routines += \
memrchr-generic \
memrchr-lsx \
memrchr-lasx \
+ memset-aligned \
+ memset-unaligned \
+ memset-lsx \
+ memset-lasx \
# sysdep_routines
endif
diff --git a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
new file mode 100644
index 00000000..e2723873
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
@@ -0,0 +1,24 @@
+/* Symbol rediretion for loader/static initialization code.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_IFUNC_GENERIC_H
+#define _DL_IFUNC_GENERIC_H
+
+asm ("memset = __memset_aligned");
+
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index 8bd5489e..37f60dde 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -117,5 +117,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#endif
IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic)
)
+
+ IFUNC_IMPL (i, name, memset,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LASX, __memset_lasx)
+ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LSX, __memset_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_UAL, __memset_unaligned)
+ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned)
+ )
+
return i;
}
diff --git a/sysdeps/loongarch/lp64/multiarch/memset-aligned.S b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S
new file mode 100644
index 00000000..1fce95b7
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S
@@ -0,0 +1,174 @@
+/* Optimized memset aligned implementation using basic LoongArch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define MEMSET_NAME __memset_aligned
+#else
+# define MEMSET_NAME memset
+#endif
+
+LEAF(MEMSET_NAME, 6)
+ move t0, a0
+ andi a3, a0, 0x7
+ li.w t6, 16
+ beqz a3, L(align)
+ bltu a2, t6, L(short_data)
+
+L(make_align):
+ li.w t8, 8
+ sub.d t2, t8, a3
+ pcaddi t1, 11
+ slli.d t3, t2, 2
+ sub.d t1, t1, t3
+ jr t1
+
+L(al7):
+ st.b a1, t0, 6
+L(al6):
+ st.b a1, t0, 5
+L(al5):
+ st.b a1, t0, 4
+L(al4):
+ st.b a1, t0, 3
+L(al3):
+ st.b a1, t0, 2
+L(al2):
+ st.b a1, t0, 1
+L(al1):
+ st.b a1, t0, 0
+L(al0):
+ add.d t0, t0, t2
+ sub.d a2, a2, t2
+
+L(align):
+ bstrins.d a1, a1, 15, 8
+ bstrins.d a1, a1, 31, 16
+ bstrins.d a1, a1, 63, 32
+ bltu a2, t6, L(less_16bytes)
+
+ andi a4, a2, 0x3f
+ beq a4, a2, L(less_64bytes)
+
+ sub.d t1, a2, a4
+ move a2, a4
+ add.d a5, t0, t1
+
+L(loop_64bytes):
+ addi.d t0, t0, 64
+ st.d a1, t0, -64
+ st.d a1, t0, -56
+ st.d a1, t0, -48
+ st.d a1, t0, -40
+
+ st.d a1, t0, -32
+ st.d a1, t0, -24
+ st.d a1, t0, -16
+ st.d a1, t0, -8
+ bne t0, a5, L(loop_64bytes)
+
+L(less_64bytes):
+ srai.d a4, a2, 5
+ beqz a4, L(less_32bytes)
+ addi.d a2, a2, -32
+ st.d a1, t0, 0
+
+ st.d a1, t0, 8
+ st.d a1, t0, 16
+ st.d a1, t0, 24
+ addi.d t0, t0, 32
+
+L(less_32bytes):
+ bltu a2, t6, L(less_16bytes)
+ addi.d a2, a2, -16
+ st.d a1, t0, 0
+ st.d a1, t0, 8
+ addi.d t0, t0, 16
+
+L(less_16bytes):
+ srai.d a4, a2, 3
+ beqz a4, L(less_8bytes)
+ addi.d a2, a2, -8
+ st.d a1, t0, 0
+ addi.d t0, t0, 8
+
+L(less_8bytes):
+ beqz a2, L(less_1byte)
+ srai.d a4, a2, 2
+ beqz a4, L(less_4bytes)
+ addi.d a2, a2, -4
+ st.w a1, t0, 0
+ addi.d t0, t0, 4
+
+L(less_4bytes):
+ srai.d a3, a2, 1
+ beqz a3, L(less_2bytes)
+ addi.d a2, a2, -2
+ st.h a1, t0, 0
+ addi.d t0, t0, 2
+
+L(less_2bytes):
+ beqz a2, L(less_1byte)
+ st.b a1, t0, 0
+L(less_1byte):
+ jr ra
+
+L(short_data):
+ pcaddi t1, 19
+ slli.d t3, a2, 2
+ sub.d t1, t1, t3
+ jr t1
+L(short_15):
+ st.b a1, a0, 14
+L(short_14):
+ st.b a1, a0, 13
+L(short_13):
+ st.b a1, a0, 12
+L(short_12):
+ st.b a1, a0, 11
+L(short_11):
+ st.b a1, a0, 10
+L(short_10):
+ st.b a1, a0, 9
+L(short_9):
+ st.b a1, a0, 8
+L(short_8):
+ st.b a1, a0, 7
+L(short_7):
+ st.b a1, a0, 6
+L(short_6):
+ st.b a1, a0, 5
+L(short_5):
+ st.b a1, a0, 4
+L(short_4):
+ st.b a1, a0, 3
+L(short_3):
+ st.b a1, a0, 2
+L(short_2):
+ st.b a1, a0, 1
+L(short_1):
+ st.b a1, a0, 0
+L(short_0):
+ jr ra
+END(MEMSET_NAME)
+
+libc_hidden_builtin_def (MEMSET_NAME)
diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lasx.S b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S
new file mode 100644
index 00000000..041abbac
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S
@@ -0,0 +1,142 @@
+/* Optimized memset implementation using LoongArch LASX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMSET __memset_lasx
+
+LEAF(MEMSET, 6)
+ li.d t1, 32
+ move a3, a0
+ xvreplgr2vr.b xr0, a1
+ add.d a4, a0, a2
+
+ bgeu t1, a2, L(less_32bytes)
+ li.d t3, 128
+ li.d t2, 64
+ blt t3, a2, L(long_bytes)
+
+L(less_128bytes):
+ bgeu t2, a2, L(less_64bytes)
+ xvst xr0, a3, 0
+ xvst xr0, a3, 32
+ xvst xr0, a4, -32
+
+ xvst xr0, a4, -64
+ jr ra
+L(less_64bytes):
+ xvst xr0, a3, 0
+ xvst xr0, a4, -32
+
+
+ jr ra
+L(less_32bytes):
+ srli.d t0, a2, 4
+ beqz t0, L(less_16bytes)
+ vst vr0, a3, 0
+
+ vst vr0, a4, -16
+ jr ra
+L(less_16bytes):
+ srli.d t0, a2, 3
+ beqz t0, L(less_8bytes)
+
+ vstelm.d vr0, a3, 0, 0
+ vstelm.d vr0, a4, -8, 0
+ jr ra
+L(less_8bytes):
+ srli.d t0, a2, 2
+
+ beqz t0, L(less_4bytes)
+ vstelm.w vr0, a3, 0, 0
+ vstelm.w vr0, a4, -4, 0
+ jr ra
+
+
+L(less_4bytes):
+ srli.d t0, a2, 1
+ beqz t0, L(less_2bytes)
+ vstelm.h vr0, a3, 0, 0
+ vstelm.h vr0, a4, -2, 0
+
+ jr ra
+L(less_2bytes):
+ beqz a2, L(less_1bytes)
+ st.b a1, a3, 0
+L(less_1bytes):
+ jr ra
+
+L(long_bytes):
+ xvst xr0, a3, 0
+ bstrins.d a3, zero, 4, 0
+ addi.d a3, a3, 32
+ sub.d a2, a4, a3
+
+ andi t0, a2, 0xff
+ beq t0, a2, L(long_end)
+ move a2, t0
+ sub.d t0, a4, t0
+
+
+L(loop_256):
+ xvst xr0, a3, 0
+ xvst xr0, a3, 32
+ xvst xr0, a3, 64
+ xvst xr0, a3, 96
+
+ xvst xr0, a3, 128
+ xvst xr0, a3, 160
+ xvst xr0, a3, 192
+ xvst xr0, a3, 224
+
+ addi.d a3, a3, 256
+ bne a3, t0, L(loop_256)
+L(long_end):
+ bltu a2, t3, L(end_less_128)
+ addi.d a2, a2, -128
+
+ xvst xr0, a3, 0
+ xvst xr0, a3, 32
+ xvst xr0, a3, 64
+ xvst xr0, a3, 96
+
+
+ addi.d a3, a3, 128
+L(end_less_128):
+ bltu a2, t2, L(end_less_64)
+ addi.d a2, a2, -64
+ xvst xr0, a3, 0
+
+ xvst xr0, a3, 32
+ addi.d a3, a3, 64
+L(end_less_64):
+ bltu a2, t1, L(end_less_32)
+ xvst xr0, a3, 0
+
+L(end_less_32):
+ xvst xr0, a4, -32
+ jr ra
+END(MEMSET)
+
+libc_hidden_builtin_def (MEMSET)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lsx.S b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S
new file mode 100644
index 00000000..3d3982aa
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S
@@ -0,0 +1,135 @@
+/* Optimized memset implementation using LoongArch LSX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMSET __memset_lsx
+
+LEAF(MEMSET, 6)
+ li.d t1, 16
+ move a3, a0
+ vreplgr2vr.b vr0, a1
+ add.d a4, a0, a2
+
+ bgeu t1, a2, L(less_16bytes)
+ li.d t3, 64
+ li.d t2, 32
+ bgeu a2, t3, L(long_bytes)
+
+L(less_64bytes):
+ bgeu t2, a2, L(less_32bytes)
+ vst vr0, a3, 0
+ vst vr0, a3, 16
+ vst vr0, a4, -32
+
+ vst vr0, a4, -16
+ jr ra
+L(less_32bytes):
+ vst vr0, a3, 0
+ vst vr0, a4, -16
+
+
+ jr ra
+L(less_16bytes):
+ srli.d t0, a2, 3
+ beqz t0, L(less_8bytes)
+ vstelm.d vr0, a3, 0, 0
+
+ vstelm.d vr0, a4, -8, 0
+ jr ra
+L(less_8bytes):
+ srli.d t0, a2, 2
+ beqz t0, L(less_4bytes)
+
+ vstelm.w vr0, a3, 0, 0
+ vstelm.w vr0, a4, -4, 0
+ jr ra
+L(less_4bytes):
+ srli.d t0, a2, 1
+
+ beqz t0, L(less_2bytes)
+ vstelm.h vr0, a3, 0, 0
+ vstelm.h vr0, a4, -2, 0
+ jr ra
+
+
+L(less_2bytes):
+ beqz a2, L(less_1bytes)
+ vstelm.b vr0, a3, 0, 0
+L(less_1bytes):
+ jr ra
+L(long_bytes):
+ vst vr0, a3, 0
+
+ bstrins.d a3, zero, 3, 0
+ addi.d a3, a3, 16
+ sub.d a2, a4, a3
+ andi t0, a2, 0x7f
+
+ beq t0, a2, L(long_end)
+ move a2, t0
+ sub.d t0, a4, t0
+
+L(loop_128):
+ vst vr0, a3, 0
+
+ vst vr0, a3, 16
+ vst vr0, a3, 32
+ vst vr0, a3, 48
+ vst vr0, a3, 64
+
+
+ vst vr0, a3, 80
+ vst vr0, a3, 96
+ vst vr0, a3, 112
+ addi.d a3, a3, 128
+
+ bne a3, t0, L(loop_128)
+L(long_end):
+ bltu a2, t3, L(end_less_64)
+ addi.d a2, a2, -64
+ vst vr0, a3, 0
+
+ vst vr0, a3, 16
+ vst vr0, a3, 32
+ vst vr0, a3, 48
+ addi.d a3, a3, 64
+
+L(end_less_64):
+ bltu a2, t2, L(end_less_32)
+ addi.d a2, a2, -32
+ vst vr0, a3, 0
+ vst vr0, a3, 16
+
+ addi.d a3, a3, 32
+L(end_less_32):
+ bltu a2, t1, L(end_less_16)
+ vst vr0, a3, 0
+
+L(end_less_16):
+ vst vr0, a4, -16
+ jr ra
+END(MEMSET)
+
+libc_hidden_builtin_def (MEMSET)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
new file mode 100644
index 00000000..f7d32039
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
@@ -0,0 +1,162 @@
+/* Optimized memset unaligned implementation using basic LoongArch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+
+# define MEMSET_NAME __memset_unaligned
+
+#define ST_128(n) \
+ st.d a1, a0, n; \
+ st.d a1, a0, n+8 ; \
+ st.d a1, a0, n+16 ; \
+ st.d a1, a0, n+24 ; \
+ st.d a1, a0, n+32 ; \
+ st.d a1, a0, n+40 ; \
+ st.d a1, a0, n+48 ; \
+ st.d a1, a0, n+56 ; \
+ st.d a1, a0, n+64 ; \
+ st.d a1, a0, n+72 ; \
+ st.d a1, a0, n+80 ; \
+ st.d a1, a0, n+88 ; \
+ st.d a1, a0, n+96 ; \
+ st.d a1, a0, n+104; \
+ st.d a1, a0, n+112; \
+ st.d a1, a0, n+120;
+
+LEAF(MEMSET_NAME, 6)
+ bstrins.d a1, a1, 15, 8
+ add.d t7, a0, a2
+ bstrins.d a1, a1, 31, 16
+ move t0, a0
+
+ bstrins.d a1, a1, 63, 32
+ srai.d t8, a2, 4
+ beqz t8, L(less_16bytes)
+ srai.d t8, a2, 6
+
+ bnez t8, L(more_64bytes)
+ srai.d t8, a2, 5
+ beqz t8, L(less_32bytes)
+
+ st.d a1, a0, 0
+ st.d a1, a0, 8
+ st.d a1, a0, 16
+ st.d a1, a0, 24
+
+ st.d a1, t7, -32
+ st.d a1, t7, -24
+ st.d a1, t7, -16
+ st.d a1, t7, -8
+
+ jr ra
+
+L(less_32bytes):
+ st.d a1, a0, 0
+ st.d a1, a0, 8
+ st.d a1, t7, -16
+ st.d a1, t7, -8
+
+ jr ra
+
+L(less_16bytes):
+ srai.d t8, a2, 3
+ beqz t8, L(less_8bytes)
+ st.d a1, a0, 0
+ st.d a1, t7, -8
+
+ jr ra
+
+L(less_8bytes):
+ srai.d t8, a2, 2
+ beqz t8, L(less_4bytes)
+ st.w a1, a0, 0
+ st.w a1, t7, -4
+
+ jr ra
+
+L(less_4bytes):
+ srai.d t8, a2, 1
+ beqz t8, L(less_2bytes)
+ st.h a1, a0, 0
+ st.h a1, t7, -2
+
+ jr ra
+
+L(less_2bytes):
+ beqz a2, L(less_1bytes)
+ st.b a1, a0, 0
+
+ jr ra
+
+L(less_1bytes):
+ jr ra
+
+L(more_64bytes):
+ srli.d a0, a0, 3
+ slli.d a0, a0, 3
+ addi.d a0, a0, 0x8
+ st.d a1, t0, 0
+
+ sub.d t2, t0, a0
+ add.d a2, t2, a2
+ addi.d a2, a2, -0x80
+ blt a2, zero, L(end_unalign_proc)
+
+L(loop_less):
+ ST_128(0)
+ addi.d a0, a0, 0x80
+ addi.d a2, a2, -0x80
+ bge a2, zero, L(loop_less)
+
+L(end_unalign_proc):
+ addi.d a2, a2, 0x80
+ pcaddi t1, 20
+ andi t5, a2, 0x78
+ srli.d t5, t5, 1
+
+ sub.d t1, t1, t5
+ jr t1
+
+ st.d a1, a0, 112
+ st.d a1, a0, 104
+ st.d a1, a0, 96
+ st.d a1, a0, 88
+ st.d a1, a0, 80
+ st.d a1, a0, 72
+ st.d a1, a0, 64
+ st.d a1, a0, 56
+ st.d a1, a0, 48
+ st.d a1, a0, 40
+ st.d a1, a0, 32
+ st.d a1, a0, 24
+ st.d a1, a0, 16
+ st.d a1, a0, 8
+ st.d a1, a0, 0
+ st.d a1, t7, -8
+
+ move a0, t0
+ jr ra
+END(MEMSET_NAME)
+
+libc_hidden_builtin_def (MEMSET_NAME)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/memset.c b/sysdeps/loongarch/lp64/multiarch/memset.c
new file mode 100644
index 00000000..3ff60d8a
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memset.c
@@ -0,0 +1,37 @@
+/* Multiple versions of memset.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define memset __redirect_memset
+# include <string.h>
+# undef memset
+
+# define SYMBOL_NAME memset
+# include "ifunc-lasx.h"
+
+libc_ifunc_redirected (__redirect_memset, memset,
+ IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (memset, __GI_memset, __redirect_memset)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memset);
+# endif
+
+#endif
--
2.33.0

View file

@ -0,0 +1,448 @@
From b412bcb2cf4914a664bcd24924d670a2e37394b3 Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Mon, 28 Aug 2023 10:08:35 +0800
Subject: [PATCH 14/29] LoongArch: Add ifunc support for rawmemchr{aligned,
lsx, lasx}
According to glibc rawmemchr microbenchmark, A few cases tested with
char '\0' experience performance degradation due to the lasx and lsx
versions don't handle the '\0' separately. Overall, rawmemchr-lasx
implementation could reduce the runtime about 40%-80%, rawmemchr-lsx
implementation could reduce the runtime about 40%-66%, rawmemchr-aligned
implementation could reduce the runtime about 20%-40%.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
.../lp64/multiarch/ifunc-impl-list.c | 8 ++
.../lp64/multiarch/ifunc-rawmemchr.h | 40 ++++++
.../lp64/multiarch/rawmemchr-aligned.S | 124 ++++++++++++++++++
.../loongarch/lp64/multiarch/rawmemchr-lasx.S | 82 ++++++++++++
.../loongarch/lp64/multiarch/rawmemchr-lsx.S | 71 ++++++++++
sysdeps/loongarch/lp64/multiarch/rawmemchr.c | 37 ++++++
7 files changed, 365 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index 5d7ae7ae..64416b02 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -21,5 +21,8 @@ sysdep_routines += \
memmove-unaligned \
memmove-lsx \
memmove-lasx \
+ rawmemchr-aligned \
+ rawmemchr-lsx \
+ rawmemchr-lasx \
# sysdep_routines
endif
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index c8ba87bd..3db9af14 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -94,5 +94,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned)
)
+ IFUNC_IMPL (i, name, rawmemchr,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx)
+ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
+ )
+
return i;
}
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
new file mode 100644
index 00000000..a7bb4cf9
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
@@ -0,0 +1,40 @@
+/* Common definition for rawmemchr ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
new file mode 100644
index 00000000..9c7155ae
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
@@ -0,0 +1,124 @@
+/* Optimized rawmemchr implementation using basic LoongArch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define RAWMEMCHR_NAME __rawmemchr_aligned
+#else
+# define RAWMEMCHR_NAME __rawmemchr
+#endif
+
+LEAF(RAWMEMCHR_NAME, 6)
+ andi t1, a0, 0x7
+ bstrins.d a0, zero, 2, 0
+ lu12i.w a2, 0x01010
+ bstrins.d a1, a1, 15, 8
+
+ ld.d t0, a0, 0
+ slli.d t1, t1, 3
+ ori a2, a2, 0x101
+ bstrins.d a1, a1, 31, 16
+
+ li.w t8, -1
+ bstrins.d a1, a1, 63, 32
+ bstrins.d a2, a2, 63, 32
+ sll.d t2, t8, t1
+
+ sll.d t3, a1, t1
+ orn t0, t0, t2
+ slli.d a3, a2, 7
+ beqz a1, L(find_zero)
+
+ xor t0, t0, t3
+ sub.d t1, t0, a2
+ andn t2, a3, t0
+ and t3, t1, t2
+
+ bnez t3, L(count_pos)
+ addi.d a0, a0, 8
+
+L(loop):
+ ld.d t0, a0, 0
+ xor t0, t0, a1
+
+ sub.d t1, t0, a2
+ andn t2, a3, t0
+ and t3, t1, t2
+ bnez t3, L(count_pos)
+
+ ld.d t0, a0, 8
+ addi.d a0, a0, 16
+ xor t0, t0, a1
+ sub.d t1, t0, a2
+
+ andn t2, a3, t0
+ and t3, t1, t2
+ beqz t3, L(loop)
+ addi.d a0, a0, -8
+L(count_pos):
+ ctz.d t0, t3
+ srli.d t0, t0, 3
+ add.d a0, a0, t0
+ jr ra
+
+L(loop_7bit):
+ ld.d t0, a0, 0
+L(find_zero):
+ sub.d t1, t0, a2
+ and t2, t1, a3
+ bnez t2, L(more_check)
+
+ ld.d t0, a0, 8
+ addi.d a0, a0, 16
+ sub.d t1, t0, a2
+ and t2, t1, a3
+
+ beqz t2, L(loop_7bit)
+ addi.d a0, a0, -8
+
+L(more_check):
+ andn t2, a3, t0
+ and t3, t1, t2
+ bnez t3, L(count_pos)
+ addi.d a0, a0, 8
+
+L(loop_8bit):
+ ld.d t0, a0, 0
+
+ sub.d t1, t0, a2
+ andn t2, a3, t0
+ and t3, t1, t2
+ bnez t3, L(count_pos)
+
+ ld.d t0, a0, 8
+ addi.d a0, a0, 16
+ sub.d t1, t0, a2
+
+ andn t2, a3, t0
+ and t3, t1, t2
+ beqz t3, L(loop_8bit)
+
+ addi.d a0, a0, -8
+ b L(count_pos)
+
+END(RAWMEMCHR_NAME)
+
+libc_hidden_builtin_def (__rawmemchr)
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
new file mode 100644
index 00000000..be2eb59d
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
@@ -0,0 +1,82 @@
+/* Optimized rawmemchr implementation using LoongArch LASX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+#include <sys/regdef.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define RAWMEMCHR __rawmemchr_lasx
+
+LEAF(RAWMEMCHR, 6)
+ move a2, a0
+ bstrins.d a0, zero, 5, 0
+ xvld xr0, a0, 0
+ xvld xr1, a0, 32
+
+ xvreplgr2vr.b xr2, a1
+ xvseq.b xr0, xr0, xr2
+ xvseq.b xr1, xr1, xr2
+ xvmsknz.b xr0, xr0
+
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr3, xr0, 4
+ xvpickve.w xr4, xr1, 4
+ vilvl.h vr0, vr3, vr0
+
+ vilvl.h vr1, vr4, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+ sra.d t0, t0, a2
+
+
+ beqz t0, L(loop)
+ ctz.d t0, t0
+ add.d a0, a2, t0
+ jr ra
+
+L(loop):
+ xvld xr0, a0, 64
+ xvld xr1, a0, 96
+ addi.d a0, a0, 64
+ xvseq.b xr0, xr0, xr2
+
+ xvseq.b xr1, xr1, xr2
+ xvmax.bu xr3, xr0, xr1
+ xvseteqz.v fcc0, xr3
+ bcnez fcc0, L(loop)
+
+ xvmsknz.b xr0, xr0
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr3, xr0, 4
+ xvpickve.w xr4, xr1, 4
+
+
+ vilvl.h vr0, vr3, vr0
+ vilvl.h vr1, vr4, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+
+ ctz.d t0, t0
+ add.d a0, a0, t0
+ jr ra
+END(RAWMEMCHR)
+
+libc_hidden_builtin_def (RAWMEMCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
new file mode 100644
index 00000000..2f6fe024
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
@@ -0,0 +1,71 @@
+/* Optimized rawmemchr implementation using LoongArch LSX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define RAWMEMCHR __rawmemchr_lsx
+
+LEAF(RAWMEMCHR, 6)
+ move a2, a0
+ bstrins.d a0, zero, 4, 0
+ vld vr0, a0, 0
+ vld vr1, a0, 16
+
+ vreplgr2vr.b vr2, a1
+ vseq.b vr0, vr0, vr2
+ vseq.b vr1, vr1, vr2
+ vmsknz.b vr0, vr0
+
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+ sra.w t0, t0, a2
+
+ beqz t0, L(loop)
+ ctz.w t0, t0
+ add.d a0, a2, t0
+ jr ra
+
+
+L(loop):
+ vld vr0, a0, 32
+ vld vr1, a0, 48
+ addi.d a0, a0, 32
+ vseq.b vr0, vr0, vr2
+
+ vseq.b vr1, vr1, vr2
+ vmax.bu vr3, vr0, vr1
+ vseteqz.v fcc0, vr3
+ bcnez fcc0, L(loop)
+
+ vmsknz.b vr0, vr0
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+
+ ctz.w t0, t0
+ add.d a0, a0, t0
+ jr ra
+END(RAWMEMCHR)
+
+libc_hidden_builtin_def (RAWMEMCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
new file mode 100644
index 00000000..89c7ffff
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
@@ -0,0 +1,37 @@
+/* Multiple versions of rawmemchr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define rawmemchr __redirect_rawmemchr
+# define __rawmemchr __redirect___rawmemchr
+# include <string.h>
+# undef rawmemchr
+# undef __rawmemchr
+
+# define SYMBOL_NAME rawmemchr
+# include "ifunc-rawmemchr.h"
+
+libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr,
+ IFUNC_SELECTOR ());
+weak_alias (__rawmemchr, rawmemchr)
+# ifdef SHARED
+__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr)
+ __attribute__((visibility ("hidden")));
+# endif
+#endif
--
2.33.0

View file

@ -0,0 +1,499 @@
From e258cfcf92f5e31e902fa045b41652f00fcf2521 Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Thu, 24 Aug 2023 16:50:18 +0800
Subject: [PATCH 09/29] LoongArch: Add ifunc support for strcmp{aligned, lsx}
Based on the glibc microbenchmark, strcmp-aligned implementation could
reduce the runtime 0%-10% for aligned comparison, 10%-20% for unaligned
comparison, strcmp-lsx implemenation could reduce the runtime 0%-50%.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/Makefile | 2 +
.../lp64/multiarch/ifunc-impl-list.c | 7 +
.../loongarch/lp64/multiarch/ifunc-strcmp.h | 38 ++++
.../loongarch/lp64/multiarch/strcmp-aligned.S | 179 ++++++++++++++++++
sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 165 ++++++++++++++++
sysdeps/loongarch/lp64/multiarch/strcmp.c | 35 ++++
6 files changed, 426 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index c4dd3143..d5a500de 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -12,6 +12,8 @@ sysdep_routines += \
strchrnul-aligned \
strchrnul-lsx \
strchrnul-lasx \
+ strcmp-aligned \
+ strcmp-lsx \
memcpy-aligned \
memcpy-unaligned \
memmove-unaligned \
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index 7cec0b77..9183b7da 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -62,6 +62,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
)
+ IFUNC_IMPL (i, name, strcmp,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, strcmp, SUPPORT_LSX, __strcmp_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned)
+ )
+
IFUNC_IMPL (i, name, memcpy,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
new file mode 100644
index 00000000..ca26352b
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
@@ -0,0 +1,38 @@
+/* Common definition for strcmp ifunc selection.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
new file mode 100644
index 00000000..f5f4f336
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
@@ -0,0 +1,179 @@
+/* Optimized strcmp implementation using basic Loongarch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRCMP_NAME __strcmp_aligned
+#else
+# define STRCMP_NAME strcmp
+#endif
+
+LEAF(STRCMP_NAME, 6)
+ lu12i.w a4, 0x01010
+ andi a2, a0, 0x7
+ ori a4, a4, 0x101
+ andi a3, a1, 0x7
+
+ bstrins.d a4, a4, 63, 32
+ li.d t7, -1
+ li.d t8, 8
+ slli.d a5, a4, 7
+
+ bne a2, a3, L(unaligned)
+ bstrins.d a0, zero, 2, 0
+ bstrins.d a1, zero, 2, 0
+ ld.d t0, a0, 0
+
+ ld.d t1, a1, 0
+ slli.d t3, a2, 3
+ sll.d t2, t7, t3
+ orn t0, t0, t2
+
+
+ orn t1, t1, t2
+ sub.d t2, t0, a4
+ andn t3, a5, t0
+ and t2, t2, t3
+
+ bne t0, t1, L(al_end)
+L(al_loop):
+ bnez t2, L(ret0)
+ ldx.d t0, a0, t8
+ ldx.d t1, a1, t8
+
+ addi.d t8, t8, 8
+ sub.d t2, t0, a4
+ andn t3, a5, t0
+ and t2, t2, t3
+
+ beq t0, t1, L(al_loop)
+L(al_end):
+ xor t3, t0, t1
+ or t2, t2, t3
+ ctz.d t3, t2
+
+
+ bstrins.d t3, zero, 2, 0
+ srl.d t0, t0, t3
+ srl.d t1, t1, t3
+ andi t0, t0, 0xff
+
+ andi t1, t1, 0xff
+ sub.d a0, t0, t1
+ jr ra
+ nop
+
+L(ret0):
+ move a0, zero
+ jr ra
+ nop
+ nop
+
+L(unaligned):
+ slt a6, a3, a2
+ xor t0, a0, a1
+ maskeqz t0, t0, a6
+ xor a0, a0, t0
+
+
+ xor a1, a1, t0
+ andi a2, a0, 0x7
+ andi a3, a1, 0x7
+ bstrins.d a0, zero, 2, 0
+
+ bstrins.d a1, zero, 2, 0
+ ld.d t4, a0, 0
+ ld.d t1, a1, 0
+ slli.d a2, a2, 3
+
+ slli.d a3, a3, 3
+ srl.d t0, t4, a2
+ srl.d t1, t1, a3
+ srl.d t5, t7, a3
+
+ orn t0, t0, t5
+ orn t1, t1, t5
+ bne t0, t1, L(not_equal)
+ sll.d t5, t7, a2
+
+
+ sub.d a3, a2, a3
+ orn t4, t4, t5
+ sub.d a2, zero, a3
+ sub.d t2, t4, a4
+
+ andn t3, a5, t4
+ and t2, t2, t3
+ bnez t2, L(find_zero)
+L(un_loop):
+ srl.d t5, t4, a3
+
+ ldx.d t4, a0, t8
+ ldx.d t1, a1, t8
+ addi.d t8, t8, 8
+ sll.d t0, t4, a2
+
+ or t0, t0, t5
+ bne t0, t1, L(not_equal)
+ sub.d t2, t4, a4
+ andn t3, a5, t4
+
+
+ and t2, t2, t3
+ beqz t2, L(un_loop)
+L(find_zero):
+ sub.d t2, t0, a4
+ andn t3, a5, t0
+
+ and t2, t2, t3
+ bnez t2, L(ret0)
+ ldx.d t1, a1, t8
+ srl.d t0, t4, a3
+
+L(not_equal):
+ sub.d t2, t0, a4
+ andn t3, a5, t0
+ and t2, t2, t3
+ xor t3, t0, t1
+
+ or t2, t2, t3
+L(un_end):
+ ctz.d t3, t2
+ bstrins.d t3, zero, 2, 0
+ srl.d t0, t0, t3
+
+
+ srl.d t1, t1, t3
+ andi t0, t0, 0xff
+ andi t1, t1, 0xff
+ sub.d t2, t0, t1
+
+
+ sub.d t3, t1, t0
+ masknez t0, t2, a6
+ maskeqz t1, t3, a6
+ or a0, t0, t1
+
+ jr ra
+END(STRCMP_NAME)
+
+libc_hidden_builtin_def (STRCMP_NAME)
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
new file mode 100644
index 00000000..2e177a38
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
@@ -0,0 +1,165 @@
+/* Optimized strcmp implementation using Loongarch LSX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRCMP __strcmp_lsx
+
+LEAF(STRCMP, 6)
+ pcalau12i t0, %pc_hi20(L(INDEX))
+ andi a2, a0, 0xf
+ vld vr2, t0, %pc_lo12(L(INDEX))
+ andi a3, a1, 0xf
+
+ bne a2, a3, L(unaligned)
+ bstrins.d a0, zero, 3, 0
+ bstrins.d a1, zero, 3, 0
+ vld vr0, a0, 0
+
+ vld vr1, a1, 0
+ vreplgr2vr.b vr3, a2
+ vslt.b vr2, vr2, vr3
+ vseq.b vr3, vr0, vr1
+
+ vmin.bu vr3, vr0, vr3
+ vor.v vr3, vr3, vr2
+ vsetanyeqz.b fcc0, vr3
+ bcnez fcc0, L(al_out)
+
+
+L(al_loop):
+ vld vr0, a0, 16
+ vld vr1, a1, 16
+ addi.d a0, a0, 16
+ addi.d a1, a1, 16
+
+ vseq.b vr3, vr0, vr1
+ vmin.bu vr3, vr0, vr3
+ vsetanyeqz.b fcc0, vr3
+ bceqz fcc0, L(al_loop)
+
+L(al_out):
+ vseqi.b vr3, vr3, 0
+ vfrstpi.b vr3, vr3, 0
+ vshuf.b vr0, vr0, vr0, vr3
+ vshuf.b vr1, vr1, vr1, vr3
+
+ vpickve2gr.bu t0, vr0, 0
+ vpickve2gr.bu t1, vr1, 0
+ sub.d a0, t0, t1
+ jr ra
+
+
+L(unaligned):
+ slt a4, a3, a2
+ xor t0, a0, a1
+ maskeqz t0, t0, a4
+ xor a0, a0, t0
+
+ xor a1, a1, t0
+ andi a2, a0, 0xf
+ andi a3, a1, 0xf
+ bstrins.d a0, zero, 3, 0
+
+ bstrins.d a1, zero, 3, 0
+ vld vr3, a0, 0
+ vld vr1, a1, 0
+ vreplgr2vr.b vr4, a2
+
+ vreplgr2vr.b vr5, a3
+ vslt.b vr7, vr2, vr5
+ vsub.b vr5, vr5, vr4
+ vaddi.bu vr6, vr2, 16
+
+
+ vsub.b vr6, vr6, vr5
+ vshuf.b vr0, vr3, vr3, vr6
+ vor.v vr0, vr0, vr7
+ vor.v vr1, vr1, vr7
+
+ vseq.b vr5, vr0, vr1
+ vsetanyeqz.b fcc0, vr5
+ bcnez fcc0, L(not_equal)
+ vslt.b vr4, vr2, vr4
+
+ vor.v vr0, vr3, vr4
+ vsetanyeqz.b fcc0, vr0
+ bcnez fcc0, L(find_zero)
+ nop
+
+L(un_loop):
+ vld vr3, a0, 16
+ vld vr1, a1, 16
+ addi.d a0, a0, 16
+ addi.d a1, a1, 16
+
+
+ vshuf.b vr0, vr3, vr0, vr6
+ vseq.b vr5, vr0, vr1
+ vsetanyeqz.b fcc0, vr5
+ bcnez fcc0, L(not_equal)
+
+ vsetanyeqz.b fcc0, vr3
+ vor.v vr0, vr3, vr3
+ bceqz fcc0, L(un_loop)
+L(find_zero):
+ vmin.bu vr5, vr1, vr5
+
+ vsetanyeqz.b fcc0, vr5
+ bcnez fcc0, L(ret0)
+ vld vr1, a1, 16
+ vshuf.b vr0, vr3, vr3, vr6
+
+ vseq.b vr5, vr0, vr1
+L(not_equal):
+ vmin.bu vr5, vr0, vr5
+L(un_end):
+ vseqi.b vr5, vr5, 0
+ vfrstpi.b vr5, vr5, 0
+
+
+ vshuf.b vr0, vr0, vr0, vr5
+ vshuf.b vr1, vr1, vr1, vr5
+ vpickve2gr.bu t0, vr0, 0
+ vpickve2gr.bu t1, vr1, 0
+
+ sub.d t3, t0, t1
+ sub.d t4, t1, t0
+ masknez t0, t3, a4
+ maskeqz t1, t4, a4
+
+ or a0, t0, t1
+ jr ra
+L(ret0):
+ move a0, zero
+ jr ra
+END(STRCMP)
+
+ .section .rodata.cst16,"M",@progbits,16
+ .align 4
+L(INDEX):
+ .dword 0x0706050403020100
+ .dword 0x0f0e0d0c0b0a0908
+
+libc_hidden_builtin_def (STRCMP)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp.c b/sysdeps/loongarch/lp64/multiarch/strcmp.c
new file mode 100644
index 00000000..6f249c0b
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strcmp.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strcmp __redirect_strcmp
+# include <string.h>
+# undef strcmp
+
+# define SYMBOL_NAME strcmp
+# include "ifunc-strcmp.h"
+
+libc_ifunc_redirected (__redirect_strcmp, strcmp, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strcmp, __GI_strcmp, __redirect_strcmp)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcmp);
+# endif
+#endif
--
2.33.0

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,583 @@
From 6f03da2d7ef218c0f78375cf706dada59c3fee63 Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Thu, 24 Aug 2023 16:50:19 +0800
Subject: [PATCH 10/29] LoongArch: Add ifunc support for strncmp{aligned, lsx}
Based on the glibc microbenchmark, only a few short inputs with this
strncmp-aligned and strncmp-lsx implementation experience performance
degradation, overall, strncmp-aligned could reduce the runtime 0%-10%
for aligned comparision, 10%-25% for unaligend comparision, strncmp-lsx
could reduce the runtime about 0%-60%.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/Makefile | 2 +
.../lp64/multiarch/ifunc-impl-list.c | 7 +
.../loongarch/lp64/multiarch/ifunc-strncmp.h | 38 +++
.../lp64/multiarch/strncmp-aligned.S | 218 ++++++++++++++++++
.../loongarch/lp64/multiarch/strncmp-lsx.S | 208 +++++++++++++++++
sysdeps/loongarch/lp64/multiarch/strncmp.c | 35 +++
6 files changed, 508 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index d5a500de..5d7ae7ae 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -14,6 +14,8 @@ sysdep_routines += \
strchrnul-lasx \
strcmp-aligned \
strcmp-lsx \
+ strncmp-aligned \
+ strncmp-lsx \
memcpy-aligned \
memcpy-unaligned \
memmove-unaligned \
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index 9183b7da..c8ba87bd 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -69,6 +69,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned)
)
+ IFUNC_IMPL (i, name, strncmp,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, strncmp, SUPPORT_LSX, __strncmp_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned)
+ )
+
IFUNC_IMPL (i, name, memcpy,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
new file mode 100644
index 00000000..1a7dc36b
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
@@ -0,0 +1,38 @@
+/* Common definition for strncmp ifunc selection.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
new file mode 100644
index 00000000..e2687fa7
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
@@ -0,0 +1,218 @@
+/* Optimized strncmp implementation using basic Loongarch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRNCMP __strncmp_aligned
+#else
+# define STRNCMP strncmp
+#endif
+
+LEAF(STRNCMP, 6)
+ beqz a2, L(ret0)
+ lu12i.w a5, 0x01010
+ andi a3, a0, 0x7
+ ori a5, a5, 0x101
+
+ andi a4, a1, 0x7
+ bstrins.d a5, a5, 63, 32
+ li.d t7, -1
+ li.d t8, 8
+
+ addi.d a2, a2, -1
+ slli.d a6, a5, 7
+ bne a3, a4, L(unaligned)
+ bstrins.d a0, zero, 2, 0
+
+ bstrins.d a1, zero, 2, 0
+ ld.d t0, a0, 0
+ ld.d t1, a1, 0
+ slli.d t2, a3, 3
+
+
+ sub.d t5, t8, a3
+ srl.d t3, t7, t2
+ srl.d t0, t0, t2
+ srl.d t1, t1, t2
+
+ orn t0, t0, t3
+ orn t1, t1, t3
+ sub.d t2, t0, a5
+ andn t3, a6, t0
+
+ and t2, t2, t3
+ bne t0, t1, L(al_end)
+ sltu t4, a2, t5
+ sub.d a2, a2, t5
+
+L(al_loop):
+ or t4, t2, t4
+ bnez t4, L(ret0)
+ ldx.d t0, a0, t8
+ ldx.d t1, a1, t8
+
+
+ addi.d t8, t8, 8
+ sltui t4, a2, 8
+ addi.d a2, a2, -8
+ sub.d t2, t0, a5
+
+ andn t3, a6, t0
+ and t2, t2, t3
+ beq t0, t1, L(al_loop)
+ addi.d a2, a2, 8
+
+L(al_end):
+ xor t3, t0, t1
+ or t2, t2, t3
+ ctz.d t2, t2
+ srli.d t4, t2, 3
+
+ bstrins.d t2, zero, 2, 0
+ srl.d t0, t0, t2
+ srl.d t1, t1, t2
+ andi t0, t0, 0xff
+
+
+ andi t1, t1, 0xff
+ sltu t2, a2, t4
+ sub.d a0, t0, t1
+ masknez a0, a0, t2
+
+ jr ra
+L(ret0):
+ move a0, zero
+ jr ra
+ nop
+
+L(unaligned):
+ slt a7, a4, a3
+ xor t0, a0, a1
+ maskeqz t0, t0, a7
+ xor a0, a0, t0
+
+ xor a1, a1, t0
+ andi a3, a0, 0x7
+ andi a4, a1, 0x7
+ bstrins.d a0, zero, 2, 0
+
+
+ bstrins.d a1, zero, 2, 0
+ ld.d t4, a0, 0
+ ld.d t1, a1, 0
+ slli.d t2, a3, 3
+
+ slli.d t3, a4, 3
+ srl.d t5, t7, t3
+ srl.d t0, t4, t2
+ srl.d t1, t1, t3
+
+ orn t0, t0, t5
+ orn t1, t1, t5
+ bne t0, t1, L(not_equal)
+ sub.d t6, t8, a4
+
+ sub.d a4, t2, t3
+ sll.d t2, t7, t2
+ sub.d t5, t8, a3
+ orn t4, t4, t2
+
+
+ sub.d t2, t4, a5
+ andn t3, a6, t4
+ sltu t7, a2, t5
+ and t2, t2, t3
+
+ sub.d a3, zero, a4
+ or t2, t2, t7
+ bnez t2, L(un_end)
+ sub.d t7, t5, t6
+
+ sub.d a2, a2, t5
+ sub.d t6, t8, t7
+L(un_loop):
+ srl.d t5, t4, a4
+ ldx.d t4, a0, t8
+
+ ldx.d t1, a1, t8
+ addi.d t8, t8, 8
+ sll.d t0, t4, a3
+ or t0, t0, t5
+
+
+ bne t0, t1, L(loop_not_equal)
+ sub.d t2, t4, a5
+ andn t3, a6, t4
+ sltui t5, a2, 8
+
+ and t2, t2, t3
+ addi.d a2, a2, -8
+ or t3, t2, t5
+ beqz t3, L(un_loop)
+
+ addi.d a2, a2, 8
+L(un_end):
+ sub.d t2, t0, a5
+ andn t3, a6, t0
+ sltu t5, a2, t6
+
+ and t2, t2, t3
+ or t2, t2, t5
+ bnez t2, L(ret0)
+ ldx.d t1, a1, t8
+
+
+ srl.d t0, t4, a4
+ sub.d a2, a2, t6
+L(not_equal):
+ sub.d t2, t0, a5
+ andn t3, a6, t0
+
+ xor t4, t0, t1
+ and t2, t2, t3
+ or t2, t2, t4
+ ctz.d t2, t2
+
+ bstrins.d t2, zero, 2, 0
+ srli.d t4, t2, 3
+ srl.d t0, t0, t2
+ srl.d t1, t1, t2
+
+ andi t0, t0, 0xff
+ andi t1, t1, 0xff
+ sub.d t2, t0, t1
+ sub.d t3, t1, t0
+
+
+ masknez t0, t2, a7
+ maskeqz t1, t3, a7
+ sltu t2, a2, t4
+ or a0, t0, t1
+
+ masknez a0, a0, t2
+ jr ra
+L(loop_not_equal):
+ add.d a2, a2, t7
+ b L(not_equal)
+END(STRNCMP)
+
+libc_hidden_builtin_def (STRNCMP)
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
new file mode 100644
index 00000000..0b4eee2a
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
@@ -0,0 +1,208 @@
+/* Optimized strncmp implementation using Loongarch LSX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRNCMP __strncmp_lsx
+
+LEAF(STRNCMP, 6)
+ beqz a2, L(ret0)
+ pcalau12i t0, %pc_hi20(L(INDEX))
+ andi a3, a0, 0xf
+ vld vr2, t0, %pc_lo12(L(INDEX))
+
+ andi a4, a1, 0xf
+ li.d t2, 16
+ bne a3, a4, L(unaligned)
+ xor t0, a0, a3
+
+ xor t1, a1, a4
+ vld vr0, t0, 0
+ vld vr1, t1, 0
+ vreplgr2vr.b vr3, a3
+
+
+ sub.d t2, t2, a3
+ vadd.b vr3, vr3, vr2
+ vshuf.b vr0, vr3, vr0, vr3
+ vshuf.b vr1, vr3, vr1, vr3
+
+ vseq.b vr3, vr0, vr1
+ vmin.bu vr3, vr0, vr3
+ bgeu t2, a2, L(al_early_end)
+ vsetanyeqz.b fcc0, vr3
+
+ bcnez fcc0, L(al_end)
+ add.d a3, a0, a2
+ addi.d a4, a3, -1
+ bstrins.d a4, zero, 3, 0
+
+ sub.d a2, a3, a4
+L(al_loop):
+ vld vr0, t0, 16
+ vld vr1, t1, 16
+ addi.d t0, t0, 16
+
+
+ addi.d t1, t1, 16
+ vseq.b vr3, vr0, vr1
+ vmin.bu vr3, vr0, vr3
+ beq t0, a4, L(al_early_end)
+
+ vsetanyeqz.b fcc0, vr3
+ bceqz fcc0, L(al_loop)
+L(al_end):
+ vseqi.b vr3, vr3, 0
+ vfrstpi.b vr3, vr3, 0
+
+ vshuf.b vr0, vr0, vr0, vr3
+ vshuf.b vr1, vr1, vr1, vr3
+ vpickve2gr.bu t0, vr0, 0
+ vpickve2gr.bu t1, vr1, 0
+
+ sub.d a0, t0, t1
+ jr ra
+L(al_early_end):
+ vreplgr2vr.b vr4, a2
+ vslt.b vr4, vr2, vr4
+
+
+ vorn.v vr3, vr3, vr4
+ b L(al_end)
+L(unaligned):
+ slt a5, a3, a4
+ xor t0, a0, a1
+
+ maskeqz t0, t0, a5
+ xor a0, a0, t0
+ xor a1, a1, t0
+ andi a3, a0, 0xf
+
+ andi a4, a1, 0xf
+ xor t0, a0, a3
+ xor t1, a1, a4
+ vld vr0, t0, 0
+
+ vld vr3, t1, 0
+ sub.d t2, t2, a3
+ vreplgr2vr.b vr4, a3
+ vreplgr2vr.b vr5, a4
+
+
+ vaddi.bu vr6, vr2, 16
+ vsub.b vr7, vr4, vr5
+ vsub.b vr6, vr6, vr7
+ vadd.b vr4, vr2, vr4
+
+ vshuf.b vr1, vr3, vr3, vr6
+ vshuf.b vr0, vr7, vr0, vr4
+ vshuf.b vr1, vr7, vr1, vr4
+ vseq.b vr4, vr0, vr1
+
+ vmin.bu vr4, vr0, vr4
+ bgeu t2, a2, L(un_early_end)
+ vsetanyeqz.b fcc0, vr4
+ bcnez fcc0, L(un_end)
+
+ add.d a6, a0, a2
+ vslt.b vr5, vr2, vr5
+ addi.d a7, a6, -1
+ vor.v vr3, vr3, vr5
+
+
+ bstrins.d a7, zero, 3, 0
+ sub.d a2, a6, a7
+L(un_loop):
+ vld vr0, t0, 16
+ addi.d t0, t0, 16
+
+ vsetanyeqz.b fcc0, vr3
+ bcnez fcc0, L(has_zero)
+ beq t0, a7, L(end_with_len)
+ vor.v vr1, vr3, vr3
+
+ vld vr3, t1, 16
+ addi.d t1, t1, 16
+ vshuf.b vr1, vr3, vr1, vr6
+ vseq.b vr4, vr0, vr1
+
+ vmin.bu vr4, vr0, vr4
+ vsetanyeqz.b fcc0, vr4
+ bceqz fcc0, L(un_loop)
+L(un_end):
+ vseqi.b vr4, vr4, 0
+
+
+ vfrstpi.b vr4, vr4, 0
+ vshuf.b vr0, vr0, vr0, vr4
+ vshuf.b vr1, vr1, vr1, vr4
+ vpickve2gr.bu t0, vr0, 0
+
+ vpickve2gr.bu t1, vr1, 0
+ sub.d t2, t0, t1
+ sub.d t3, t1, t0
+ masknez t0, t2, a5
+
+ maskeqz t1, t3, a5
+ or a0, t0, t1
+ jr ra
+L(has_zero):
+ vshuf.b vr1, vr3, vr3, vr6
+
+ vseq.b vr4, vr0, vr1
+ vmin.bu vr4, vr0, vr4
+ bne t0, a7, L(un_end)
+L(un_early_end):
+ vreplgr2vr.b vr5, a2
+
+ vslt.b vr5, vr2, vr5
+ vorn.v vr4, vr4, vr5
+ b L(un_end)
+L(end_with_len):
+ sub.d a6, a3, a4
+
+ bgeu a6, a2, 1f
+ vld vr4, t1, 16
+1:
+ vshuf.b vr1, vr4, vr3, vr6
+ vseq.b vr4, vr0, vr1
+
+ vmin.bu vr4, vr0, vr4
+ vreplgr2vr.b vr5, a2
+ vslt.b vr5, vr2, vr5
+ vorn.v vr4, vr4, vr5
+
+ b L(un_end)
+L(ret0):
+ move a0, zero
+ jr ra
+END(STRNCMP)
+
+ .section .rodata.cst16,"M",@progbits,16
+ .align 4
+L(INDEX):
+ .dword 0x0706050403020100
+ .dword 0x0f0e0d0c0b0a0908
+
+libc_hidden_builtin_def (STRNCMP)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp.c b/sysdeps/loongarch/lp64/multiarch/strncmp.c
new file mode 100644
index 00000000..af6d0bc4
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strncmp.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strncmp __redirect_strncmp
+# include <string.h>
+# undef strncmp
+
+# define SYMBOL_NAME strncmp
+# include "ifunc-strncmp.h"
+
+libc_ifunc_redirected (__redirect_strncmp, strncmp, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strncmp, __GI_strncmp, __redirect_strncmp)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strncmp);
+# endif
+#endif
--
2.33.0

View file

@ -0,0 +1,465 @@
From e494d32d3b76eee0d59cfab37789a356459b517a Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Thu, 24 Aug 2023 16:50:17 +0800
Subject: [PATCH 08/29] LoongArch: Add ifunc support for strnlen{aligned, lsx,
lasx}
Based on the glibc microbenchmark, strnlen-aligned implementation could
reduce the runtime more than 10%, strnlen-lsx implementation could reduce
the runtime about 50%-78%, strnlen-lasx implementation could reduce the
runtime about 50%-88%.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
.../lp64/multiarch/ifunc-impl-list.c | 8 ++
.../loongarch/lp64/multiarch/ifunc-strnlen.h | 41 +++++++
.../lp64/multiarch/strnlen-aligned.S | 102 ++++++++++++++++++
.../loongarch/lp64/multiarch/strnlen-lasx.S | 100 +++++++++++++++++
.../loongarch/lp64/multiarch/strnlen-lsx.S | 89 +++++++++++++++
sysdeps/loongarch/lp64/multiarch/strnlen.c | 39 +++++++
7 files changed, 382 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index afa51041..c4dd3143 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -3,6 +3,9 @@ sysdep_routines += \
strlen-aligned \
strlen-lsx \
strlen-lasx \
+ strnlen-aligned \
+ strnlen-lsx \
+ strnlen-lasx \
strchr-aligned \
strchr-lsx \
strchr-lasx \
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index 25eb96b0..7cec0b77 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -38,6 +38,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
)
+ IFUNC_IMPL (i, name, strnlen,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LASX, __strnlen_lasx)
+ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LSX, __strnlen_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_aligned)
+ )
+
IFUNC_IMPL (i, name, strchr,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
new file mode 100644
index 00000000..5cf89810
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
@@ -0,0 +1,41 @@
+/* Common definition for strnlen ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
new file mode 100644
index 00000000..b900430a
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
@@ -0,0 +1,102 @@
+/* Optimized strnlen implementation using basic Loongarch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRNLEN __strnlen_aligned
+#else
+# define STRNLEN __strnlen
+#endif
+
+LEAF(STRNLEN, 6)
+ beqz a1, L(out)
+ lu12i.w a2, 0x01010
+ andi t1, a0, 0x7
+ move t4, a0
+
+ bstrins.d a0, zero, 2, 0
+ ori a2, a2, 0x101
+ li.w t0, -1
+ ld.d t2, a0, 0
+
+ slli.d t3, t1, 3
+ bstrins.d a2, a2, 63, 32
+ li.w t5, 8
+ slli.d a3, a2, 7
+
+ sub.w t1, t5, t1
+ sll.d t0, t0, t3
+ orn t2, t2, t0
+ sub.d t0, t2, a2
+
+
+ andn t3, a3, t2
+ and t0, t0, t3
+ bnez t0, L(count_pos)
+ sub.d t5, a1, t1
+
+ bgeu t1, a1, L(out)
+ addi.d a0, a0, 8
+L(loop):
+ ld.d t2, a0, 0
+ sub.d t0, t2, a2
+
+ andn t1, a3, t2
+ sltui t6, t5, 9
+ and t0, t0, t1
+ or t7, t0, t6
+
+ bnez t7, L(count_pos)
+ ld.d t2, a0, 8
+ addi.d a0, a0, 16
+ sub.d t0, t2, a2
+
+
+ andn t1, a3, t2
+ sltui t6, t5, 17
+ and t0, t0, t1
+ addi.d t5, t5, -16
+
+ or t7, t0, t6
+ beqz t7, L(loop)
+ addi.d a0, a0, -8
+L(count_pos):
+ ctz.d t1, t0
+
+ sub.d a0, a0, t4
+ srli.d t1, t1, 3
+ add.d a0, t1, a0
+ sltu t0, a0, a1
+
+ masknez t1, a1, t0
+ maskeqz a0, a0, t0
+ or a0, a0, t1
+ jr ra
+
+
+L(out):
+ move a0, a1
+ jr ra
+END(STRNLEN)
+
+weak_alias (STRNLEN, strnlen)
+libc_hidden_builtin_def (STRNLEN)
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
new file mode 100644
index 00000000..2c03d3d9
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
@@ -0,0 +1,100 @@
+/* Optimized strnlen implementation using loongarch LASX instructions
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRNLEN __strnlen_lasx
+
+LEAF(STRNLEN, 6)
+ beqz a1, L(ret0)
+ andi t1, a0, 0x3f
+ li.d t3, 65
+ sub.d a2, a0, t1
+
+ xvld xr0, a2, 0
+ xvld xr1, a2, 32
+ sub.d t1, t3, t1
+ move a3, a0
+
+ sltu t1, a1, t1
+ xvmsknz.b xr0, xr0
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr2, xr0, 4
+
+ xvpickve.w xr3, xr1, 4
+ vilvl.h vr0, vr2, vr0
+ vilvl.h vr1, vr3, vr1
+ vilvl.w vr0, vr1, vr0
+
+
+ movfr2gr.d t0, fa0
+ sra.d t0, t0, a0
+ orn t1, t1, t0
+ bnez t1, L(end)
+
+ add.d a4, a0, a1
+ move a0, a2
+ addi.d a4, a4, -1
+ bstrins.d a4, zero, 5, 0
+
+L(loop):
+ xvld xr0, a0, 64
+ xvld xr1, a0, 96
+ addi.d a0, a0, 64
+ beq a0, a4, L(out)
+
+ xvmin.bu xr2, xr0, xr1
+ xvsetanyeqz.b fcc0, xr2
+ bceqz fcc0, L(loop)
+L(out):
+ xvmsknz.b xr0, xr0
+
+
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr2, xr0, 4
+ xvpickve.w xr3, xr1, 4
+ vilvl.h vr0, vr2, vr0
+
+ vilvl.h vr1, vr3, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+L(end):
+ sub.d a0, a0, a3
+
+ cto.d t0, t0
+ add.d a0, a0, t0
+ sltu t1, a0, a1
+ masknez t0, a1, t1
+
+ maskeqz t1, a0, t1
+ or a0, t0, t1
+ jr ra
+L(ret0):
+ move a0, zero
+
+
+ jr ra
+END(STRNLEN)
+
+libc_hidden_def (STRNLEN)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
new file mode 100644
index 00000000..b769a895
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
@@ -0,0 +1,89 @@
+/* Optimized strnlen implementation using loongarch LSX instructions
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRNLEN __strnlen_lsx
+
+LEAF(STRNLEN, 6)
+ beqz a1, L(ret0)
+ andi t1, a0, 0x1f
+ li.d t3, 33
+ sub.d a2, a0, t1
+
+ vld vr0, a2, 0
+ vld vr1, a2, 16
+ sub.d t1, t3, t1
+ move a3, a0
+
+ sltu t1, a1, t1
+ vmsknz.b vr0, vr0
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+
+ movfr2gr.s t0, fa0
+ sra.w t0, t0, a0
+ orn t1, t1, t0
+ bnez t1, L(end)
+
+
+ add.d a4, a0, a1
+ move a0, a2
+ addi.d a4, a4, -1
+ bstrins.d a4, zero, 4, 0
+
+L(loop):
+ vld vr0, a0, 32
+ vld vr1, a0, 48
+ addi.d a0, a0, 32
+ beq a0, a4, L(out)
+
+ vmin.bu vr2, vr0, vr1
+ vsetanyeqz.b fcc0, vr2
+ bceqz fcc0, L(loop)
+L(out):
+ vmsknz.b vr0, vr0
+
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+L(end):
+ sub.d a0, a0, a3
+
+
+ cto.w t0, t0
+ add.d a0, a0, t0
+ sltu t1, a0, a1
+ masknez t0, a1, t1
+
+ maskeqz t1, a0, t1
+ or a0, t0, t1
+ jr ra
+L(ret0):
+ move a0, zero
+
+ jr ra
+END(STRNLEN)
+
+libc_hidden_builtin_def (STRNLEN)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen.c b/sysdeps/loongarch/lp64/multiarch/strnlen.c
new file mode 100644
index 00000000..38b7a25a
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen.c
@@ -0,0 +1,39 @@
+/* Multiple versions of strnlen.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strnlen __redirect_strnlen
+# define __strnlen __redirect___strnlen
+# include <string.h>
+# undef __strnlen
+# undef strnlen
+
+# define SYMBOL_NAME strnlen
+# include "ifunc-strnlen.h"
+
+libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ());
+weak_alias (__strnlen, strnlen);
+# ifdef SHARED
+__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen)
+ __attribute__((visibility ("hidden"))) __attribute_copy__ (strnlen);
+__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen)
+ __attribute__((weak, visibility ("hidden"))) __attribute_copy__ (strnlen);
+# endif
+#endif
--
2.33.0

View file

@ -0,0 +1,670 @@
From d537d0ab45a55048c8da483e73be4448ddb45525 Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Wed, 13 Sep 2023 15:35:00 +0800
Subject: [PATCH 23/29] LoongArch: Add ifunc support for strrchr{aligned, lsx,
lasx}
According to glibc strrchr microbenchmark test results, this implementation
could reduce the runtime time as following:
Name Percent of rutime reduced
strrchr-lasx 10%-50%
strrchr-lsx 0%-50%
strrchr-aligned 5%-50%
Generic strrchr is implemented by function strlen + memrchr, the lasx version
will compare with generic strrchr implemented by strlen-lasx + memrchr-lasx,
the lsx version will compare with generic strrchr implemented by strlen-lsx +
memrchr-lsx, the aligned version will compare with generic strrchr implemented
by strlen-aligned + memrchr-generic.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
.../lp64/multiarch/ifunc-impl-list.c | 8 +
.../loongarch/lp64/multiarch/ifunc-strrchr.h | 41 ++++
.../lp64/multiarch/strrchr-aligned.S | 170 +++++++++++++++++
.../loongarch/lp64/multiarch/strrchr-lasx.S | 176 ++++++++++++++++++
.../loongarch/lp64/multiarch/strrchr-lsx.S | 144 ++++++++++++++
sysdeps/loongarch/lp64/multiarch/strrchr.c | 36 ++++
7 files changed, 578 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index 39550bea..fe863e1b 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -9,6 +9,9 @@ sysdep_routines += \
strchr-aligned \
strchr-lsx \
strchr-lasx \
+ strrchr-aligned \
+ strrchr-lsx \
+ strrchr-lasx \
strchrnul-aligned \
strchrnul-lsx \
strchrnul-lasx \
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index 39a14f1d..529e2369 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -94,6 +94,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
)
+ IFUNC_IMPL (i, name, strrchr,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx)
+ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned)
+ )
+
IFUNC_IMPL (i, name, memcpy,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
new file mode 100644
index 00000000..bbb34089
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
@@ -0,0 +1,41 @@
+/* Common definition for strrchr ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
new file mode 100644
index 00000000..a73deb78
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
@@ -0,0 +1,170 @@
+/* Optimized strrchr implementation using basic LoongArch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRRCHR __strrchr_aligned
+#else
+# define STRRCHR strrchr
+#endif
+
+LEAF(STRRCHR, 6)
+ slli.d t0, a0, 3
+ bstrins.d a0, zero, 2, 0
+ lu12i.w a2, 0x01010
+ ld.d t2, a0, 0
+
+ andi a1, a1, 0xff
+ ori a2, a2, 0x101
+ li.d t3, -1
+ bstrins.d a2, a2, 63, 32
+
+ sll.d t5, t3, t0
+ slli.d a3, a2, 7
+ orn t4, t2, t5
+ mul.d a1, a1, a2
+
+ sub.d t0, t4, a2
+ andn t1, a3, t4
+ and t1, t0, t1
+ beqz t1, L(find_tail)
+
+
+ ctz.d t0, t1
+ orn t0, zero, t0
+ xor t2, t4, a1
+ srl.d t0, t3, t0
+
+ orn t2, t2, t0
+ orn t2, t2, t5
+ revb.d t2, t2
+ sub.d t1, t2, a2
+
+ andn t0, a3, t2
+ and t1, t0, t1
+ ctz.d t0, t1
+ srli.d t0, t0, 3
+
+ addi.d a0, a0, 7
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+
+
+L(find_tail):
+ addi.d a4, a0, 8
+ addi.d a0, a0, 8
+L(loop_ascii):
+ ld.d t2, a0, 0
+ sub.d t1, t2, a2
+
+ and t0, t1, a3
+ bnez t0, L(more_check)
+ ld.d t2, a0, 8
+ sub.d t1, t2, a2
+
+ and t0, t1, a3
+ addi.d a0, a0, 16
+ beqz t0, L(loop_ascii)
+ addi.d a0, a0, -8
+
+L(more_check):
+ andn t0, a3, t2
+ and t1, t1, t0
+ bnez t1, L(tail)
+ addi.d a0, a0, 8
+
+
+L(loop_nonascii):
+ ld.d t2, a0, 0
+ sub.d t1, t2, a2
+ andn t0, a3, t2
+ and t1, t0, t1
+
+ bnez t1, L(tail)
+ ld.d t2, a0, 8
+ addi.d a0, a0, 16
+ sub.d t1, t2, a2
+
+ andn t0, a3, t2
+ and t1, t0, t1
+ beqz t1, L(loop_nonascii)
+ addi.d a0, a0, -8
+
+L(tail):
+ ctz.d t0, t1
+ orn t0, zero, t0
+ xor t2, t2, a1
+ srl.d t0, t3, t0
+
+
+ orn t2, t2, t0
+ revb.d t2, t2
+ sub.d t1, t2, a2
+ andn t0, a3, t2
+
+ and t1, t0, t1
+ bnez t1, L(count_pos)
+L(find_loop):
+ beq a0, a4, L(find_end)
+ ld.d t2, a0, -8
+
+ addi.d a0, a0, -8
+ xor t2, t2, a1
+ sub.d t1, t2, a2
+ andn t0, a3, t2
+
+ and t1, t0, t1
+ beqz t1, L(find_loop)
+ revb.d t2, t2
+ sub.d t1, t2, a2
+
+
+ andn t0, a3, t2
+ and t1, t0, t1
+L(count_pos):
+ ctz.d t0, t1
+ addi.d a0, a0, 7
+
+ srli.d t0, t0, 3
+ sub.d a0, a0, t0
+ jr ra
+ nop
+
+L(find_end):
+ xor t2, t4, a1
+ orn t2, t2, t5
+ revb.d t2, t2
+ sub.d t1, t2, a2
+
+
+ andn t0, a3, t2
+ and t1, t0, t1
+ ctz.d t0, t1
+ srli.d t0, t0, 3
+
+ addi.d a0, a4, -1
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+END(STRRCHR)
+
+libc_hidden_builtin_def(STRRCHR)
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
new file mode 100644
index 00000000..5a6e2297
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
@@ -0,0 +1,176 @@
+/* Optimized strrchr implementation using LoongArch LASX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#define STRRCHR __strrchr_lasx
+
+LEAF(STRRCHR, 6)
+ move a2, a0
+ bstrins.d a0, zero, 5, 0
+ xvld xr0, a0, 0
+ xvld xr1, a0, 32
+
+ li.d t2, -1
+ xvreplgr2vr.b xr4, a1
+ xvmsknz.b xr2, xr0
+ xvmsknz.b xr3, xr1
+
+ xvpickve.w xr5, xr2, 4
+ xvpickve.w xr6, xr3, 4
+ vilvl.h vr2, vr5, vr2
+ vilvl.h vr3, vr6, vr3
+
+ vilvl.w vr2, vr3, vr2
+ movfr2gr.d t0, fa2
+ sra.d t0, t0, a2
+ beq t0, t2, L(find_tail)
+
+
+ xvseq.b xr2, xr0, xr4
+ xvseq.b xr3, xr1, xr4
+ xvmsknz.b xr2, xr2
+ xvmsknz.b xr3, xr3
+
+ xvpickve.w xr4, xr2, 4
+ xvpickve.w xr5, xr3, 4
+ vilvl.h vr2, vr4, vr2
+ vilvl.h vr3, vr5, vr3
+
+ vilvl.w vr1, vr3, vr2
+ slli.d t3, t2, 1
+ movfr2gr.d t1, fa1
+ cto.d t0, t0
+
+ srl.d t1, t1, a2
+ sll.d t3, t3, t0
+ addi.d a0, a2, 63
+ andn t1, t1, t3
+
+
+ clz.d t0, t1
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+
+ .align 5
+L(find_tail):
+ addi.d a3, a0, 64
+L(loop):
+ xvld xr2, a0, 64
+ xvld xr3, a0, 96
+ addi.d a0, a0, 64
+
+ xvmin.bu xr5, xr2, xr3
+ xvsetanyeqz.b fcc0, xr5
+ bceqz fcc0, L(loop)
+ xvmsknz.b xr5, xr2
+
+
+ xvmsknz.b xr6, xr3
+ xvpickve.w xr7, xr5, 4
+ xvpickve.w xr8, xr6, 4
+ vilvl.h vr5, vr7, vr5
+
+ vilvl.h vr6, vr8, vr6
+ xvseq.b xr2, xr2, xr4
+ xvseq.b xr3, xr3, xr4
+ xvmsknz.b xr2, xr2
+
+ xvmsknz.b xr3, xr3
+ xvpickve.w xr7, xr2, 4
+ xvpickve.w xr8, xr3, 4
+ vilvl.h vr2, vr7, vr2
+
+ vilvl.h vr3, vr8, vr3
+ vilvl.w vr5, vr6, vr5
+ vilvl.w vr2, vr3, vr2
+ movfr2gr.d t0, fa5
+
+
+ movfr2gr.d t1, fa2
+ slli.d t3, t2, 1
+ cto.d t0, t0
+ sll.d t3, t3, t0
+
+ andn t1, t1, t3
+ beqz t1, L(find_loop)
+ clz.d t0, t1
+ addi.d a0, a0, 63
+
+ sub.d a0, a0, t0
+ jr ra
+L(find_loop):
+ beq a0, a3, L(find_end)
+ xvld xr2, a0, -64
+
+ xvld xr3, a0, -32
+ addi.d a0, a0, -64
+ xvseq.b xr2, xr2, xr4
+ xvseq.b xr3, xr3, xr4
+
+
+ xvmax.bu xr5, xr2, xr3
+ xvseteqz.v fcc0, xr5
+ bcnez fcc0, L(find_loop)
+ xvmsknz.b xr0, xr2
+
+ xvmsknz.b xr1, xr3
+ xvpickve.w xr2, xr0, 4
+ xvpickve.w xr3, xr1, 4
+ vilvl.h vr0, vr2, vr0
+
+ vilvl.h vr1, vr3, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+ addi.d a0, a0, 63
+
+ clz.d t0, t0
+ sub.d a0, a0, t0
+ jr ra
+ nop
+
+
+L(find_end):
+ xvseq.b xr2, xr0, xr4
+ xvseq.b xr3, xr1, xr4
+ xvmsknz.b xr2, xr2
+ xvmsknz.b xr3, xr3
+
+ xvpickve.w xr4, xr2, 4
+ xvpickve.w xr5, xr3, 4
+ vilvl.h vr2, vr4, vr2
+ vilvl.h vr3, vr5, vr3
+
+ vilvl.w vr1, vr3, vr2
+ movfr2gr.d t1, fa1
+ addi.d a0, a2, 63
+ srl.d t1, t1, a2
+
+ clz.d t0, t1
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+END(STRRCHR)
+
+libc_hidden_builtin_def(STRRCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
new file mode 100644
index 00000000..8f2fd22e
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
@@ -0,0 +1,144 @@
+/* Optimized strrchr implementation using LoongArch LSX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#define STRRCHR __strrchr_lsx
+
+LEAF(STRRCHR, 6)
+ move a2, a0
+ bstrins.d a0, zero, 4, 0
+ vld vr0, a0, 0
+ vld vr1, a0, 16
+
+ li.d t2, -1
+ vreplgr2vr.b vr4, a1
+ vmsknz.b vr2, vr0
+ vmsknz.b vr3, vr1
+
+ vilvl.h vr2, vr3, vr2
+ movfr2gr.s t0, fa2
+ sra.w t0, t0, a2
+ beq t0, t2, L(find_tail)
+
+ vseq.b vr2, vr0, vr4
+ vseq.b vr3, vr1, vr4
+ vmsknz.b vr2, vr2
+ vmsknz.b vr3, vr3
+
+
+ vilvl.h vr1, vr3, vr2
+ slli.d t3, t2, 1
+ movfr2gr.s t1, fa1
+ cto.w t0, t0
+
+ srl.w t1, t1, a2
+ sll.d t3, t3, t0
+ addi.d a0, a2, 31
+ andn t1, t1, t3
+
+ clz.w t0, t1
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+
+ .align 5
+L(find_tail):
+ addi.d a3, a0, 32
+L(loop):
+ vld vr2, a0, 32
+ vld vr3, a0, 48
+ addi.d a0, a0, 32
+
+ vmin.bu vr5, vr2, vr3
+ vsetanyeqz.b fcc0, vr5
+ bceqz fcc0, L(loop)
+ vmsknz.b vr5, vr2
+
+ vmsknz.b vr6, vr3
+ vilvl.h vr5, vr6, vr5
+ vseq.b vr2, vr2, vr4
+ vseq.b vr3, vr3, vr4
+
+ vmsknz.b vr2, vr2
+ vmsknz.b vr3, vr3
+ vilvl.h vr2, vr3, vr2
+ movfr2gr.s t0, fa5
+
+
+ movfr2gr.s t1, fa2
+ slli.d t3, t2, 1
+ cto.w t0, t0
+ sll.d t3, t3, t0
+
+ andn t1, t1, t3
+ beqz t1, L(find_loop)
+ clz.w t0, t1
+ addi.d a0, a0, 31
+
+ sub.d a0, a0, t0
+ jr ra
+L(find_loop):
+ beq a0, a3, L(find_end)
+ vld vr2, a0, -32
+
+ vld vr3, a0, -16
+ addi.d a0, a0, -32
+ vseq.b vr2, vr2, vr4
+ vseq.b vr3, vr3, vr4
+
+
+ vmax.bu vr5, vr2, vr3
+ vseteqz.v fcc0, vr5
+ bcnez fcc0, L(find_loop)
+ vmsknz.b vr0, vr2
+
+ vmsknz.b vr1, vr3
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+ addi.d a0, a0, 31
+
+ clz.w t0, t0
+ sub.d a0, a0, t0
+ jr ra
+ nop
+
+L(find_end):
+ vseq.b vr2, vr0, vr4
+ vseq.b vr3, vr1, vr4
+ vmsknz.b vr2, vr2
+ vmsknz.b vr3, vr3
+
+
+ vilvl.h vr1, vr3, vr2
+ movfr2gr.s t1, fa1
+ addi.d a0, a2, 31
+ srl.w t1, t1, a2
+
+ clz.w t0, t1
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+END(STRRCHR)
+
+libc_hidden_builtin_def(STRRCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr.c b/sysdeps/loongarch/lp64/multiarch/strrchr.c
new file mode 100644
index 00000000..d9c9f660
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr.c
@@ -0,0 +1,36 @@
+/* Multiple versions of strrchr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strrchr __redirect_strrchr
+# include <string.h>
+# undef strrchr
+
+# define SYMBOL_NAME strrchr
+# include "ifunc-strrchr.h"
+
+libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ());
+weak_alias (strrchr, rindex)
+# ifdef SHARED
+__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strrchr);
+# endif
+
+#endif
--
2.33.0

View file

@ -0,0 +1,626 @@
From b5979df8ad07823c79a934c1fa0a91ec0abffb61 Mon Sep 17 00:00:00 2001
From: caiyinyu <caiyinyu@loongson.cn>
Date: Fri, 8 Sep 2023 14:10:55 +0800
Subject: [PATCH 20/29] LoongArch: Add lasx/lsx support for
_dl_runtime_profile.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/bits/link.h | 24 ++-
sysdeps/loongarch/bits/link_lavcurrent.h | 25 +++
sysdeps/loongarch/dl-audit-check.h | 23 +++
sysdeps/loongarch/dl-link.sym | 8 +-
sysdeps/loongarch/dl-machine.h | 11 +-
sysdeps/loongarch/dl-trampoline.S | 177 +----------------
sysdeps/loongarch/dl-trampoline.h | 242 +++++++++++++++++++++++
7 files changed, 331 insertions(+), 179 deletions(-)
create mode 100644 sysdeps/loongarch/bits/link_lavcurrent.h
create mode 100644 sysdeps/loongarch/dl-audit-check.h
diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h
index 7fa61312..00f6f25f 100644
--- a/sysdeps/loongarch/bits/link.h
+++ b/sysdeps/loongarch/bits/link.h
@@ -20,10 +20,26 @@
#error "Never include <bits/link.h> directly; use <link.h> instead."
#endif
+#ifndef __loongarch_soft_float
+typedef float La_loongarch_vr
+ __attribute__ ((__vector_size__ (16), __aligned__ (16)));
+typedef float La_loongarch_xr
+ __attribute__ ((__vector_size__ (32), __aligned__ (16)));
+
+typedef union
+{
+ double fpreg[4];
+ La_loongarch_vr vr[2];
+ La_loongarch_xr xr[1];
+} La_loongarch_vector __attribute__ ((__aligned__ (16)));
+#endif
+
typedef struct La_loongarch_regs
{
unsigned long int lr_reg[8]; /* a0 - a7 */
- double lr_fpreg[8]; /* fa0 - fa7 */
+#ifndef __loongarch_soft_float
+ La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/
+#endif
unsigned long int lr_ra;
unsigned long int lr_sp;
} La_loongarch_regs;
@@ -33,8 +49,10 @@ typedef struct La_loongarch_retval
{
unsigned long int lrv_a0;
unsigned long int lrv_a1;
- double lrv_fa0;
- double lrv_fa1;
+#ifndef __loongarch_soft_float
+ La_loongarch_vector lrv_vec0;
+ La_loongarch_vector lrv_vec1;
+#endif
} La_loongarch_retval;
__BEGIN_DECLS
diff --git a/sysdeps/loongarch/bits/link_lavcurrent.h b/sysdeps/loongarch/bits/link_lavcurrent.h
new file mode 100644
index 00000000..15f1eb84
--- /dev/null
+++ b/sysdeps/loongarch/bits/link_lavcurrent.h
@@ -0,0 +1,25 @@
+/* Data structure for communication from the run-time dynamic linker for
+ loaded ELF shared objects. LAV_CURRENT definition.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _LINK_H
+# error "Never include <bits/link_lavcurrent.h> directly; use <link.h> instead."
+#endif
+
+/* Version numbers for la_version handshake interface. */
+#define LAV_CURRENT 3
diff --git a/sysdeps/loongarch/dl-audit-check.h b/sysdeps/loongarch/dl-audit-check.h
new file mode 100644
index 00000000..a139c939
--- /dev/null
+++ b/sysdeps/loongarch/dl-audit-check.h
@@ -0,0 +1,23 @@
+/* rtld-audit version check. LoongArch version.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+static inline bool
+_dl_audit_check_version (unsigned int lav)
+{
+ return lav == LAV_CURRENT;
+}
diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
index 868ab7c6..b534968e 100644
--- a/sysdeps/loongarch/dl-link.sym
+++ b/sysdeps/loongarch/dl-link.sym
@@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg)
-DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg)
+#ifndef __loongarch_soft_float
+DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec)
+#endif
DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra)
DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp)
DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0)
-DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1)
+#ifndef __loongarch_soft_float
+DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0)
+#endif
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index 066bb233..8a2db9de 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
#if !defined __loongarch_soft_float
extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
+ extern void _dl_runtime_profile_lasx (void) attribute_hidden;
+ extern void _dl_runtime_profile_lsx (void) attribute_hidden;
#endif
extern void _dl_runtime_resolve (void) attribute_hidden;
extern void _dl_runtime_profile (void) attribute_hidden;
@@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
end in this function. */
if (profile != 0)
{
- gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
+ else if (SUPPORT_LSX)
+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
+ else
+#endif
+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
if (GLRO(dl_profile) != NULL
&& _dl_name_match_p (GLRO(dl_profile), l))
diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
index 8fd91469..bb449ecf 100644
--- a/sysdeps/loongarch/dl-trampoline.S
+++ b/sysdeps/loongarch/dl-trampoline.S
@@ -22,190 +22,21 @@
#if !defined __loongarch_soft_float
#define USE_LASX
#define _dl_runtime_resolve _dl_runtime_resolve_lasx
+#define _dl_runtime_profile _dl_runtime_profile_lasx
#include "dl-trampoline.h"
#undef FRAME_SIZE
#undef USE_LASX
#undef _dl_runtime_resolve
+#undef _dl_runtime_profile
#define USE_LSX
#define _dl_runtime_resolve _dl_runtime_resolve_lsx
+#define _dl_runtime_profile _dl_runtime_profile_lsx
#include "dl-trampoline.h"
#undef FRAME_SIZE
#undef USE_LSX
#undef _dl_runtime_resolve
+#undef _dl_runtime_profile
#endif
#include "dl-trampoline.h"
-
-#include "dl-link.h"
-
-ENTRY (_dl_runtime_profile)
- /* LoongArch we get called with:
- t0 linkr_map pointer
- t1 the scaled offset stored in t0, which can be used
- to calculate the offset of the current symbol in .rela.plt
- t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function
- t3 dl resolver entry point, no use in this function
-
- Stack frame layout:
- [sp, #96] La_loongarch_regs
- [sp, #48] La_loongarch_retval
- [sp, #40] frame size return from pltenter
- [sp, #32] dl_profile_call saved a1
- [sp, #24] dl_profile_call saved a0
- [sp, #16] T1
- [sp, #0] ra, fp <- fp
- */
-
-# define OFFSET_T1 16
-# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8
-# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16
-# define OFFSET_RV OFFSET_FS + 8
-# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
-
-# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
-
- /* Save arguments to stack. */
- ADDI sp, sp, -SF_SIZE
- REG_S ra, sp, 0
- REG_S fp, sp, 8
-
- or fp, sp, zero
-
- REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
- REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
- REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
- REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
- REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
- REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
- REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
- REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
-
-#ifndef __loongarch_soft_float
- FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
- FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
- FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
- FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
- FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
- FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
- FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
- FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
-#endif
-
- /* Update .got.plt and obtain runtime address of callee. */
- SLLI a1, t1, 1
- or a0, t0, zero
- ADD a1, a1, t1
- or a2, ra, zero /* return addr */
- ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */
- ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */
-
- REG_S a0, fp, OFFSET_SAVED_CALL_A0
- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
-
- la t2, _dl_profile_fixup
- jirl ra, t2, 0
-
- REG_L t3, fp, OFFSET_FS
- bge t3, zero, 1f
-
- /* Save the return. */
- or t4, v0, zero
-
- /* Restore arguments from stack. */
- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
-
-#ifndef __loongarch_soft_float
- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
-#endif
-
- REG_L ra, fp, 0
- REG_L fp, fp, SZREG
-
- ADDI sp, sp, SF_SIZE
- jirl zero, t4, 0
-
-1:
- /* The new frame size is in t3. */
- SUB sp, fp, t3
- BSTRINS sp, zero, 3, 0
-
- REG_S a0, fp, OFFSET_T1
-
- or a0, sp, zero
- ADDI a1, fp, SF_SIZE
- or a2, t3, zero
- la t5, memcpy
- jirl ra, t5, 0
-
- REG_L t6, fp, OFFSET_T1
-
- /* Call the function. */
- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
-
-#ifndef __loongarch_soft_float
- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
-#endif
- jirl ra, t6, 0
-
- REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
-
-#ifndef __loongarch_soft_float
- FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0
- FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG
-#endif
-
- /* Setup call to pltexit. */
- REG_L a0, fp, OFFSET_SAVED_CALL_A0
- REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
- ADDI a2, fp, OFFSET_RG
- ADDI a3, fp, OFFSET_RV
- la t7, _dl_audit_pltexit
- jirl ra, t7, 0
-
- REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
- REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
-
-#ifndef __loongarch_soft_float
- FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0
- FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG
-#endif
-
- /* RA from within La_loongarch_reg. */
- REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
- or sp, fp, zero
- ADDI sp, sp, SF_SIZE
- REG_S fp, fp, SZREG
-
- jirl zero, ra, 0
-
-END (_dl_runtime_profile)
diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
index 99fcacab..e298439d 100644
--- a/sysdeps/loongarch/dl-trampoline.h
+++ b/sysdeps/loongarch/dl-trampoline.h
@@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve)
/* Invoke the callee. */
jirl zero, t1, 0
END (_dl_runtime_resolve)
+
+#include "dl-link.h"
+
+ENTRY (_dl_runtime_profile)
+ /* LoongArch we get called with:
+ t0 linkr_map pointer
+ t1 the scaled offset stored in t0, which can be used
+ to calculate the offset of the current symbol in .rela.plt
+ t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function
+ t3 dl resolver entry point, no use in this function
+
+ Stack frame layout:
+ [sp, #208] La_loongarch_regs
+ [sp, #128] La_loongarch_retval // align: 16
+ [sp, #112] frame size return from pltenter
+ [sp, #80 ] dl_profile_call saved vec1
+ [sp, #48 ] dl_profile_call saved vec0 // align: 16
+ [sp, #32 ] dl_profile_call saved a1
+ [sp, #24 ] dl_profile_call saved a0
+ [sp, #16 ] T1
+ [sp, #0 ] ra, fp <- fp
+ */
+
+# define OFFSET_T1 16
+# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8
+# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64
+# define OFFSET_RV OFFSET_FS + 8 + 8
+# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
+
+# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
+
+ /* Save arguments to stack. */
+ ADDI sp, sp, -SF_SIZE
+ REG_S ra, sp, 0
+ REG_S fp, sp, 8
+
+ or fp, sp, zero
+
+ REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+ REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+ REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+ REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+ REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+ REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+ REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+ REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+
+#ifdef USE_LASX
+ xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
+ xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
+ xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
+ xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
+ xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
+ xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
+ xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
+ xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
+#elif defined USE_LSX
+ vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
+ vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
+ vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
+ vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
+ vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
+ vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
+ vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
+ vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
+#elif !defined __loongarch_soft_float
+ FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
+ FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
+ FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
+ FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
+ FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
+ FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
+ FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
+ FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
+#endif
+
+ /* Update .got.plt and obtain runtime address of callee. */
+ SLLI a1, t1, 1
+ or a0, t0, zero
+ ADD a1, a1, t1
+ or a2, ra, zero /* return addr */
+ ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */
+ ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */
+
+ REG_S a0, fp, OFFSET_SAVED_CALL_A0
+ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+
+ la t2, _dl_profile_fixup
+ jirl ra, t2, 0
+
+ REG_L t3, fp, OFFSET_FS
+ bge t3, zero, 1f
+
+ /* Save the return. */
+ or t4, v0, zero
+
+ /* Restore arguments from stack. */
+ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+
+#ifdef USE_LASX
+ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
+ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
+ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
+ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
+ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
+ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
+ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
+ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
+#elif defined USE_LSX
+ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
+ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
+ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
+ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
+ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
+ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
+ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
+ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
+#elif !defined __loongarch_soft_float
+ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
+ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
+ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
+ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
+ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
+ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
+ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
+ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
+#endif
+
+ REG_L ra, fp, 0
+ REG_L fp, fp, SZREG
+
+ ADDI sp, sp, SF_SIZE
+ jirl zero, t4, 0
+
+1:
+ /* The new frame size is in t3. */
+ SUB sp, fp, t3
+ BSTRINS sp, zero, 3, 0
+
+ REG_S a0, fp, OFFSET_T1
+
+ or a0, sp, zero
+ ADDI a1, fp, SF_SIZE
+ or a2, t3, zero
+ la t5, memcpy
+ jirl ra, t5, 0
+
+ REG_L t6, fp, OFFSET_T1
+
+ /* Call the function. */
+ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+
+#ifdef USE_LASX
+ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
+ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
+ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
+ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
+ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
+ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
+ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
+ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
+#elif defined USE_LSX
+ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
+ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
+ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
+ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
+ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
+ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
+ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
+ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
+#elif !defined __loongarch_soft_float
+ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
+ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
+ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
+ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
+ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
+ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
+ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
+ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
+#endif
+
+ jirl ra, t6, 0
+
+ REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
+ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
+
+#ifdef USE_LASX
+ xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
+#elif defined USE_LSX
+ vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
+#elif !defined __loongarch_soft_float
+ FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
+#endif
+
+ /* Setup call to pltexit. */
+ REG_L a0, fp, OFFSET_SAVED_CALL_A0
+ REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+ ADDI a2, fp, OFFSET_RG
+ ADDI a3, fp, OFFSET_RV
+ la t7, _dl_audit_pltexit
+ jirl ra, t7, 0
+
+ REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
+ REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
+
+#ifdef USE_LASX
+ xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
+#elif defined USE_LSX
+ vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
+#elif !defined __loongarch_soft_float
+ FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+ FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
+#endif
+
+ /* RA from within La_loongarch_reg. */
+ REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
+ or sp, fp, zero
+ ADDI sp, sp, SF_SIZE
+ REG_S fp, fp, SZREG
+
+ jirl zero, ra, 0
+
+END (_dl_runtime_profile)
--
2.33.0

View file

@ -0,0 +1,102 @@
From 7353f21f6ed1754b67e455e2b80123787efa9e91 Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Tue, 8 Aug 2023 14:15:43 +0800
Subject: [PATCH 02/29] LoongArch: Add minuimum binutils required version
LoongArch glibc can add some LASX/LSX vector instructions codes,
change the required minimum binutils version to 2.41 which could
support vector instructions. HAVE_LOONGARCH_VEC_ASM is removed
accordingly.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
config.h.in | 5 -----
sysdeps/loongarch/configure | 5 ++---
sysdeps/loongarch/configure.ac | 4 ++--
sysdeps/loongarch/dl-machine.h | 4 ++--
sysdeps/loongarch/dl-trampoline.S | 2 +-
5 files changed, 7 insertions(+), 13 deletions(-)
diff --git a/config.h.in b/config.h.in
index 0dedc124..44a34072 100644
--- a/config.h.in
+++ b/config.h.in
@@ -141,11 +141,6 @@
/* LOONGARCH floating-point ABI for ld.so. */
#undef LOONGARCH_ABI_FRLEN
-/* Assembler support LoongArch LASX/LSX vector instructions.
- This macro becomes obsolete when glibc increased the minimum
- required version of GNU 'binutils' to 2.41 or later. */
-#define HAVE_LOONGARCH_VEC_ASM 0
-
/* Linux specific: minimum supported kernel version. */
#undef __LINUX_KERNEL_VERSION
diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure
index 5843c7cf..395ddc92 100644
--- a/sysdeps/loongarch/configure
+++ b/sysdeps/loongarch/configure
@@ -128,8 +128,7 @@ rm -f conftest*
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_asm" >&5
printf "%s\n" "$libc_cv_loongarch_vec_asm" >&6; }
-if test $libc_cv_loongarch_vec_asm = yes; then
- printf "%s\n" "#define HAVE_LOONGARCH_VEC_ASM 1" >>confdefs.h
-
+if test $libc_cv_loongarch_vec_asm = no; then
+ as_fn_error $? "binutils version is too old, use 2.41 or newer version" "$LINENO" 5
fi
diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac
index ba89d834..989287c6 100644
--- a/sysdeps/loongarch/configure.ac
+++ b/sysdeps/loongarch/configure.ac
@@ -74,6 +74,6 @@ else
libc_cv_loongarch_vec_asm=no
fi
rm -f conftest*])
-if test $libc_cv_loongarch_vec_asm = yes; then
- AC_DEFINE(HAVE_LOONGARCH_VEC_ASM)
+if test $libc_cv_loongarch_vec_asm = no; then
+ AC_MSG_ERROR([binutils version is too old, use 2.41 or newer version])
fi
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index 51ce9af8..066bb233 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -270,7 +270,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* If using PLTs, fill in the first two entries of .got.plt. */
if (l->l_info[DT_JMPREL])
{
-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
+#if !defined __loongarch_soft_float
extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
#endif
@@ -300,7 +300,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* This function will get called to fix up the GOT entry
indicated by the offset on the stack, and then jump to
the resolved address. */
-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
+#if !defined __loongarch_soft_float
if (SUPPORT_LASX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
else if (SUPPORT_LSX)
diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
index f6ba5e44..8fd91469 100644
--- a/sysdeps/loongarch/dl-trampoline.S
+++ b/sysdeps/loongarch/dl-trampoline.S
@@ -19,7 +19,7 @@
#include <sysdep.h>
#include <sys/asm.h>
-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
+#if !defined __loongarch_soft_float
#define USE_LASX
#define _dl_runtime_resolve _dl_runtime_resolve_lasx
#include "dl-trampoline.h"
--
2.33.0

View file

@ -0,0 +1,277 @@
From e5ccd79e81de7ad5821fde83875973e878d85d4b Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Mon, 28 Aug 2023 10:08:40 +0800
Subject: [PATCH 19/29] LoongArch: Change loongarch to LoongArch in comments
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S | 2 +-
sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S | 2 +-
sysdeps/loongarch/lp64/multiarch/memmove-aligned.S | 2 +-
sysdeps/loongarch/lp64/multiarch/memmove-lasx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/memmove-lsx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strchr-aligned.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strchr-lasx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strchr-lsx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strlen-aligned.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strlen-lasx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strlen-lsx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S | 2 +-
sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S | 2 +-
24 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
index 299dd49c..7eb34395 100644
--- a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
@@ -1,4 +1,4 @@
-/* Optimized memcpy_aligned implementation using basic Loongarch instructions.
+/* Optimized memcpy_aligned implementation using basic LoongArch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
index 4aae5bf8..ae148df5 100644
--- a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
@@ -1,4 +1,4 @@
-/* Optimized memcpy implementation using Loongarch LASX instructions.
+/* Optimized memcpy implementation using LoongArch LASX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
index 6ebbe7a2..feb2bb0e 100644
--- a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
@@ -1,4 +1,4 @@
-/* Optimized memcpy implementation using Loongarch LSX instructions.
+/* Optimized memcpy implementation using LoongArch LSX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
index 8e60a22d..31019b13 100644
--- a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
@@ -1,4 +1,4 @@
-/* Optimized unaligned memcpy implementation using basic Loongarch instructions.
+/* Optimized unaligned memcpy implementation using basic LoongArch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
index 5354f383..a02114c0 100644
--- a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
@@ -1,4 +1,4 @@
-/* Optimized memmove_aligned implementation using basic Loongarch instructions.
+/* Optimized memmove_aligned implementation using basic LoongArch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
index ff68e7a2..95d8ee7b 100644
--- a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
@@ -1,4 +1,4 @@
-/* Optimized memmove implementation using Loongarch LASX instructions.
+/* Optimized memmove implementation using LoongArch LASX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
index 9e1502a7..8a936770 100644
--- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
@@ -1,4 +1,4 @@
-/* Optimized memmove implementation using Loongarch LSX instructions.
+/* Optimized memmove implementation using LoongArch LSX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
index 90a64b6b..3284ce25 100644
--- a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
@@ -1,4 +1,4 @@
-/* Optimized memmove_unaligned implementation using basic Loongarch instructions.
+/* Optimized memmove_unaligned implementation using basic LoongArch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
index 5fb01806..62020054 100644
--- a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
@@ -1,4 +1,4 @@
-/* Optimized strchr implementation using basic Loongarch instructions.
+/* Optimized strchr implementation using basic LoongArch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
index 254402da..4d3cc588 100644
--- a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
@@ -1,4 +1,4 @@
-/* Optimized strchr implementation using loongarch LASX SIMD instructions.
+/* Optimized strchr implementation using LoongArch LASX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
index dae98b0a..8b78c35c 100644
--- a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
@@ -1,4 +1,4 @@
-/* Optimized strlen implementation using loongarch LSX SIMD instructions.
+/* Optimized strlen implementation using LoongArch LSX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
index 1c01a023..20856a06 100644
--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
@@ -1,4 +1,4 @@
-/* Optimized strchrnul implementation using basic Loongarch instructions.
+/* Optimized strchrnul implementation using basic LoongArch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
index d45495e4..4753d4ce 100644
--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
@@ -1,4 +1,4 @@
-/* Optimized strchrnul implementation using loongarch LASX SIMD instructions.
+/* Optimized strchrnul implementation using LoongArch LASX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
index 07d793ae..671e740c 100644
--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
@@ -1,4 +1,4 @@
-/* Optimized strchrnul implementation using loongarch LSX SIMD instructions.
+/* Optimized strchrnul implementation using LoongArch LSX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
index f5f4f336..ba1f9667 100644
--- a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
@@ -1,4 +1,4 @@
-/* Optimized strcmp implementation using basic Loongarch instructions.
+/* Optimized strcmp implementation using basic LoongArch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
index 2e177a38..091c8c9e 100644
--- a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
@@ -1,4 +1,4 @@
-/* Optimized strcmp implementation using Loongarch LSX instructions.
+/* Optimized strcmp implementation using LoongArch LSX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
index e9e1d2fc..ed0548e4 100644
--- a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
@@ -1,4 +1,4 @@
-/* Optimized strlen implementation using basic Loongarch instructions.
+/* Optimized strlen implementation using basic LoongArch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
index 258c47ce..91342f34 100644
--- a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
@@ -1,4 +1,4 @@
-/* Optimized strlen implementation using loongarch LASX SIMD instructions.
+/* Optimized strlen implementation using LoongArch LASX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
index b194355e..b09c12e0 100644
--- a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
@@ -1,4 +1,4 @@
-/* Optimized strlen implementation using Loongarch LSX SIMD instructions.
+/* Optimized strlen implementation using LoongArch LSX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
index e2687fa7..f63de872 100644
--- a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
@@ -1,4 +1,4 @@
-/* Optimized strncmp implementation using basic Loongarch instructions.
+/* Optimized strncmp implementation using basic LoongArch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
index 0b4eee2a..83cb801d 100644
--- a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
@@ -1,4 +1,4 @@
-/* Optimized strncmp implementation using Loongarch LSX instructions.
+/* Optimized strncmp implementation using LoongArch LSX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
index b900430a..a8296a1b 100644
--- a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
@@ -1,4 +1,4 @@
-/* Optimized strnlen implementation using basic Loongarch instructions.
+/* Optimized strnlen implementation using basic LoongArch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
index 2c03d3d9..aa6c812d 100644
--- a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
@@ -1,4 +1,4 @@
-/* Optimized strnlen implementation using loongarch LASX instructions
+/* Optimized strnlen implementation using LoongArch LASX instructions
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
index b769a895..d0febe3e 100644
--- a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
@@ -1,4 +1,4 @@
-/* Optimized strnlen implementation using loongarch LSX instructions
+/* Optimized strnlen implementation using LoongArch LSX instructions
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
--
2.33.0

View file

@ -0,0 +1,67 @@
From fb72c81f9894b23797f6e2e066532c0963f5155f Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Wed, 13 Sep 2023 15:35:01 +0800
Subject: [PATCH 24/29] LoongArch: Change to put magic number to .rodata
section
Change to put magic number to .rodata section in memmove-lsx, and use
pcalau12i and %pc_lo12 with vld to get the data.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
.../loongarch/lp64/multiarch/memmove-lsx.S | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
index 8a936770..5eb819ef 100644
--- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
@@ -209,13 +209,10 @@ L(al_less_16):
nop
-L(magic_num):
- .dword 0x0706050403020100
- .dword 0x0f0e0d0c0b0a0908
L(unaligned):
- pcaddi t2, -4
+ pcalau12i t2, %pc_hi20(L(INDEX))
bstrins.d a1, zero, 3, 0
- vld vr8, t2, 0
+ vld vr8, t2, %pc_lo12(L(INDEX))
vld vr0, a1, 0
vld vr1, a1, 16
@@ -413,13 +410,10 @@ L(back_al_less_16):
vst vr1, a0, 0
jr ra
-L(magic_num_2):
- .dword 0x0706050403020100
- .dword 0x0f0e0d0c0b0a0908
L(back_unaligned):
- pcaddi t2, -4
+ pcalau12i t2, %pc_hi20(L(INDEX))
bstrins.d a4, zero, 3, 0
- vld vr8, t2, 0
+ vld vr8, t2, %pc_lo12(L(INDEX))
vld vr0, a4, 0
vld vr1, a4, -16
@@ -529,6 +523,12 @@ L(back_un_less_16):
jr ra
END(MEMMOVE_NAME)
+ .section .rodata.cst16,"M",@progbits,16
+ .align 4
+L(INDEX):
+ .dword 0x0706050403020100
+ .dword 0x0f0e0d0c0b0a0908
+
libc_hidden_builtin_def (MEMCPY_NAME)
libc_hidden_builtin_def (MEMMOVE_NAME)
#endif
--
2.33.0

View file

@ -0,0 +1,44 @@
From 7f703cf758c4f185dd62f2a4f463002bb514af16 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sun, 27 Aug 2023 00:36:51 +0800
Subject: [PATCH 13/29] LoongArch: Micro-optimize LD_PCREL
We are requiring Binutils >= 2.41, so explicit relocation syntax is
always supported by the assembler. Use it to reduce one instruction.
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/unix/sysv/linux/loongarch/pointer_guard.h | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
index b25e353b..d6c78687 100644
--- a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
+++ b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
@@ -19,17 +19,15 @@
#ifndef POINTER_GUARD_H
#define POINTER_GUARD_H
-/* Load a got-relative EXPR into G, using T.
- Note G and T are register names. */
+/* Load a got-relative EXPR into register G. */
#define LD_GLOBAL(G, EXPR) \
la.global G, EXPR; \
REG_L G, G, 0;
-/* Load a pc-relative EXPR into G, using T.
- Note G and T are register names. */
+/* Load a pc-relative EXPR into register G. */
#define LD_PCREL(G, EXPR) \
- la.pcrel G, EXPR; \
- REG_L G, G, 0;
+ pcalau12i G, %pc_hi20(EXPR); \
+ REG_L G, G, %pc_lo12(EXPR);
#if (IS_IN (rtld) \
|| (!defined SHARED && (IS_IN (libc) \
--
2.33.0

View file

@ -0,0 +1,65 @@
From 8dcd8c837df2e3cf81675522487697522f1542f8 Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Tue, 8 Aug 2023 14:15:42 +0800
Subject: [PATCH 01/29] LoongArch: Redefine macro LEAF/ENTRY.
The following usage of macro LEAF/ENTRY are all feasible:
1. LEAF(fcn) -- the align value of fcn is .align 3(default value)
2. LEAF(fcn, 6) -- the align value of fcn is .align 6
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/sys/asm.h | 36 ++++++++++++++++++++++++++----------
1 file changed, 26 insertions(+), 10 deletions(-)
diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
index d1a279b8..c5eb8afa 100644
--- a/sysdeps/loongarch/sys/asm.h
+++ b/sysdeps/loongarch/sys/asm.h
@@ -39,16 +39,32 @@
#define FREG_L fld.d
#define FREG_S fst.d
-/* Declare leaf routine. */
-#define LEAF(symbol) \
- .text; \
- .globl symbol; \
- .align 3; \
- cfi_startproc; \
- .type symbol, @function; \
- symbol:
-
-#define ENTRY(symbol) LEAF (symbol)
+/* Declare leaf routine.
+ The usage of macro LEAF/ENTRY is as follows:
+ 1. LEAF(fcn) -- the align value of fcn is .align 3 (default value)
+ 2. LEAF(fcn, 6) -- the align value of fcn is .align 6
+*/
+#define LEAF_IMPL(symbol, aln, ...) \
+ .text; \
+ .globl symbol; \
+ .align aln; \
+ .type symbol, @function; \
+symbol: \
+ cfi_startproc;
+
+
+#define LEAF(...) LEAF_IMPL(__VA_ARGS__, 3)
+#define ENTRY(...) LEAF(__VA_ARGS__)
+
+#define LEAF_NO_ALIGN(symbol) \
+ .text; \
+ .globl symbol; \
+ .type symbol, @function; \
+symbol: \
+ cfi_startproc;
+
+#define ENTRY_NO_ALIGN(symbol) LEAF_NO_ALIGN(symbol)
+
/* Mark end of function. */
#undef END
--
2.33.0

View file

@ -0,0 +1,56 @@
From f8d66a269cb6f1a7087afadf3375bdf0553abf53 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sun, 27 Aug 2023 00:36:50 +0800
Subject: [PATCH 12/29] LoongArch: Remove support code for old linker in
start.S
We are requiring Binutils >= 2.41, so la.pcrel always works here.
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/start.S | 19 +++----------------
1 file changed, 3 insertions(+), 16 deletions(-)
diff --git a/sysdeps/loongarch/start.S b/sysdeps/loongarch/start.S
index e9d82033..bf6bfc9e 100644
--- a/sysdeps/loongarch/start.S
+++ b/sysdeps/loongarch/start.S
@@ -60,20 +60,7 @@ ENTRY (ENTRY_POINT)
cfi_undefined (1)
or a5, a0, zero /* rtld_fini */
-#if ENABLE_STATIC_PIE
-/* For static PIE, the GOT cannot be used in _start because the GOT entries are
- offsets instead of real addresses before __libc_start_main.
- __libc_start_main and/or main may be not local, so we rely on the linker to
- produce PLT entries for them. GNU ld >= 2.40 supports this. */
-# define LA la.pcrel
-#else
-/* Old GNU ld (< 2.40) cannot handle PC relative address against a non-local
- function correctly. We deem these old linkers failing to support static PIE
- and load the addresses from GOT. */
-# define LA la.got
-#endif
-
- LA a0, t0, main
+ la.pcrel a0, t0, main
REG_L a1, sp, 0
ADDI a2, sp, SZREG
@@ -84,9 +71,9 @@ ENTRY (ENTRY_POINT)
move a4, zero /* used to be fini */
or a6, sp, zero /* stack_end */
- LA ra, t0, __libc_start_main
+ la.pcrel ra, t0, __libc_start_main
jirl ra, ra, 0
- LA ra, t0, abort
+ la.pcrel ra, t0, abort
jirl ra, ra, 0
END (ENTRY_POINT)
--
2.33.0

View file

@ -0,0 +1,28 @@
From b4b4bb7c9220a0bbdf5aec0ac8c1de1d22329280 Mon Sep 17 00:00:00 2001
From: caiyinyu <caiyinyu@loongson.cn>
Date: Thu, 14 Sep 2023 19:48:24 +0800
Subject: [PATCH 21/29] LoongArch: Replace deprecated $v0 with $a0 to eliminate
'as' Warnings.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/dl-machine.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index 8a2db9de..57913cef 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -90,7 +90,7 @@ static inline ElfW (Addr) elf_machine_dynamic (void)
or $a0, $sp, $zero \n\
bl _dl_start \n\
# Stash user entry point in s0. \n\
- or $s0, $v0, $zero \n\
+ or $s0, $a0, $zero \n\
# Load the original argument count. \n\
ld.d $a1, $sp, 0 \n\
# Call _dl_init (struct link_map *main_map, int argc, \
--
2.33.0

View file

@ -0,0 +1,81 @@
From 458ab6d5f39cca1cabd83abd2022f67491f6f5ed Mon Sep 17 00:00:00 2001
From: caiyinyu <caiyinyu@loongson.cn>
Date: Fri, 20 Oct 2023 09:20:02 +0800
Subject: [PATCH 27/29] LoongArch: Unify Register Names.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/__longjmp.S | 20 ++++++++++----------
sysdeps/loongarch/setjmp.S | 18 +++++++++---------
2 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/sysdeps/loongarch/__longjmp.S b/sysdeps/loongarch/__longjmp.S
index cbde1946..e87ce311 100644
--- a/sysdeps/loongarch/__longjmp.S
+++ b/sysdeps/loongarch/__longjmp.S
@@ -43,18 +43,18 @@ ENTRY (__longjmp)
REG_L s8, a0, 12*SZREG
#ifndef __loongarch_soft_float
- FREG_L $f24, a0, 13*SZREG + 0*SZFREG
- FREG_L $f25, a0, 13*SZREG + 1*SZFREG
- FREG_L $f26, a0, 13*SZREG + 2*SZFREG
- FREG_L $f27, a0, 13*SZREG + 3*SZFREG
- FREG_L $f28, a0, 13*SZREG + 4*SZFREG
- FREG_L $f29, a0, 13*SZREG + 5*SZFREG
- FREG_L $f30, a0, 13*SZREG + 6*SZFREG
- FREG_L $f31, a0, 13*SZREG + 7*SZFREG
+ FREG_L fs0, a0, 13*SZREG + 0*SZFREG
+ FREG_L fs1, a0, 13*SZREG + 1*SZFREG
+ FREG_L fs2, a0, 13*SZREG + 2*SZFREG
+ FREG_L fs3, a0, 13*SZREG + 3*SZFREG
+ FREG_L fs4, a0, 13*SZREG + 4*SZFREG
+ FREG_L fs5, a0, 13*SZREG + 5*SZFREG
+ FREG_L fs6, a0, 13*SZREG + 6*SZFREG
+ FREG_L fs7, a0, 13*SZREG + 7*SZFREG
#endif
- sltui a0,a1,1
+ sltui a0, a1, 1
ADD a0, a0, a1 # a0 = (a1 == 0) ? 1 : a1
- jirl zero,ra,0
+ jirl zero, ra, 0
END (__longjmp)
diff --git a/sysdeps/loongarch/setjmp.S b/sysdeps/loongarch/setjmp.S
index 6c7065cd..b6e4f727 100644
--- a/sysdeps/loongarch/setjmp.S
+++ b/sysdeps/loongarch/setjmp.S
@@ -52,19 +52,19 @@ ENTRY (__sigsetjmp)
REG_S s8, a0, 12*SZREG
#ifndef __loongarch_soft_float
- FREG_S $f24, a0, 13*SZREG + 0*SZFREG
- FREG_S $f25, a0, 13*SZREG + 1*SZFREG
- FREG_S $f26, a0, 13*SZREG + 2*SZFREG
- FREG_S $f27, a0, 13*SZREG + 3*SZFREG
- FREG_S $f28, a0, 13*SZREG + 4*SZFREG
- FREG_S $f29, a0, 13*SZREG + 5*SZFREG
- FREG_S $f30, a0, 13*SZREG + 6*SZFREG
- FREG_S $f31, a0, 13*SZREG + 7*SZFREG
+ FREG_S fs0, a0, 13*SZREG + 0*SZFREG
+ FREG_S fs1, a0, 13*SZREG + 1*SZFREG
+ FREG_S fs2, a0, 13*SZREG + 2*SZFREG
+ FREG_S fs3, a0, 13*SZREG + 3*SZFREG
+ FREG_S fs4, a0, 13*SZREG + 4*SZFREG
+ FREG_S fs5, a0, 13*SZREG + 5*SZFREG
+ FREG_S fs6, a0, 13*SZREG + 6*SZFREG
+ FREG_S fs7, a0, 13*SZREG + 7*SZFREG
#endif
#if !IS_IN (libc) && IS_IN(rtld)
li.w v0, 0
- jirl zero,ra,0
+ jirl zero, ra, 0
#else
b __sigjmp_save
#endif
--
2.33.0

View file

@ -0,0 +1,24 @@
From 4828d1aa0028e819a5fb336d962e8f7cbfedf8b4 Mon Sep 17 00:00:00 2001
From: caiyinyu <caiyinyu@loongson.cn>
Date: Mon, 23 Oct 2023 15:53:38 +0800
Subject: [PATCH 28/29] LoongArch: Update hwcap.h to sync with LoongArch
kernel.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
index 5104b69c..7acec23d 100644
--- a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
+++ b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
@@ -35,3 +35,4 @@
#define HWCAP_LOONGARCH_LBT_X86 (1 << 10)
#define HWCAP_LOONGARCH_LBT_ARM (1 << 11)
#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12)
+#define HWCAP_LOONGARCH_PTW (1 << 13)
--
2.33.0

View file

@ -0,0 +1,30 @@
From 4938840b15ff9734fdcc63cc0744ce3f3bbb0b16 Mon Sep 17 00:00:00 2001
From: caiyinyu <caiyinyu@loongson.cn>
Date: Mon, 14 Aug 2023 15:34:08 +0800
Subject: [PATCH 05/29] LoongArch: elf: Add new LoongArch reloc types 109 into
elf.h
These reloc types are generated by GNU assembler >= 2.41 for relaxation
support.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
elf/elf.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/elf/elf.h b/elf/elf.h
index d623bdeb..9c51073f 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -4213,6 +4213,7 @@ enum
#define R_LARCH_SUB6 106
#define R_LARCH_ADD_ULEB128 107
#define R_LARCH_SUB_ULEB128 108
+#define R_LARCH_64_PCREL 109
/* ARC specific declarations. */
--
2.33.0

View file

@ -0,0 +1,528 @@
From 43abd8772a143cd96688c081500397dd712e631b Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Tue, 8 Aug 2023 14:15:44 +0800
Subject: [PATCH 03/29] Loongarch: Add ifunc support and add different versions
of strlen
strlen-lasx is implemeted by LASX simd instructions(256bit)
strlen-lsx is implemeted by LSX simd instructions(128bit)
strlen-align is implemented by LA basic instructions and never use unaligned memory acess
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/Makefile | 7 ++
.../lp64/multiarch/ifunc-impl-list.c | 41 +++++++
.../loongarch/lp64/multiarch/ifunc-strlen.h | 40 +++++++
.../loongarch/lp64/multiarch/strlen-aligned.S | 100 ++++++++++++++++++
.../loongarch/lp64/multiarch/strlen-lasx.S | 63 +++++++++++
sysdeps/loongarch/lp64/multiarch/strlen-lsx.S | 71 +++++++++++++
sysdeps/loongarch/lp64/multiarch/strlen.c | 37 +++++++
sysdeps/loongarch/sys/regdef.h | 57 ++++++++++
.../unix/sysv/linux/loongarch/cpu-features.h | 2 +
9 files changed, 418 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/Makefile
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
new file mode 100644
index 00000000..76c506c9
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -0,0 +1,7 @@
+ifeq ($(subdir),string)
+sysdep_routines += \
+ strlen-aligned \
+ strlen-lsx \
+ strlen-lasx \
+# sysdep_routines
+endif
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
new file mode 100644
index 00000000..1a2a576f
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -0,0 +1,41 @@
+/* Enumerate available IFUNC implementations of a function LoongArch64 version.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <assert.h>
+#include <string.h>
+#include <wchar.h>
+#include <ldsodefs.h>
+#include <ifunc-impl-list.h>
+#include <stdio.h>
+
+size_t
+__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ size_t max)
+{
+
+ size_t i = max;
+
+ IFUNC_IMPL (i, name, strlen,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LASX, __strlen_lasx)
+ IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LSX, __strlen_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
+ )
+ return i;
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
new file mode 100644
index 00000000..6258bb76
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
@@ -0,0 +1,40 @@
+/* Common definition for strlen ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
new file mode 100644
index 00000000..e9e1d2fc
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
@@ -0,0 +1,100 @@
+/* Optimized strlen implementation using basic Loongarch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRLEN __strlen_aligned
+#else
+# define STRLEN strlen
+#endif
+
+LEAF(STRLEN, 6)
+ move a1, a0
+ bstrins.d a0, zero, 2, 0
+ lu12i.w a2, 0x01010
+ li.w t0, -1
+
+ ld.d t2, a0, 0
+ andi t1, a1, 0x7
+ ori a2, a2, 0x101
+ slli.d t1, t1, 3
+
+ bstrins.d a2, a2, 63, 32
+ sll.d t1, t0, t1
+ slli.d t3, a2, 7
+ nor a3, zero, t3
+
+ orn t2, t2, t1
+ sub.d t0, t2, a2
+ nor t1, t2, a3
+ and t0, t0, t1
+
+
+ bnez t0, L(count_pos)
+ addi.d a0, a0, 8
+L(loop_16_7bit):
+ ld.d t2, a0, 0
+ sub.d t1, t2, a2
+
+ and t0, t1, t3
+ bnez t0, L(more_check)
+ ld.d t2, a0, 8
+ sub.d t1, t2, a2
+
+ and t0, t1, t3
+ addi.d a0, a0, 16
+ beqz t0, L(loop_16_7bit)
+ addi.d a0, a0, -8
+
+L(more_check):
+ nor t0, t2, a3
+ and t0, t1, t0
+ bnez t0, L(count_pos)
+ addi.d a0, a0, 8
+
+
+L(loop_16_8bit):
+ ld.d t2, a0, 0
+ sub.d t1, t2, a2
+ nor t0, t2, a3
+ and t0, t0, t1
+
+ bnez t0, L(count_pos)
+ ld.d t2, a0, 8
+ addi.d a0, a0, 16
+ sub.d t1, t2, a2
+
+ nor t0, t2, a3
+ and t0, t0, t1
+ beqz t0, L(loop_16_8bit)
+ addi.d a0, a0, -8
+
+L(count_pos):
+ ctz.d t1, t0
+ sub.d a0, a0, a1
+ srli.d t1, t1, 3
+ add.d a0, a0, t1
+
+ jr ra
+END(STRLEN)
+
+libc_hidden_builtin_def (STRLEN)
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
new file mode 100644
index 00000000..258c47ce
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
@@ -0,0 +1,63 @@
+/* Optimized strlen implementation using loongarch LASX SIMD instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRLEN __strlen_lasx
+
+LEAF(STRLEN, 6)
+ move a1, a0
+ bstrins.d a0, zero, 4, 0
+ li.d t1, -1
+ xvld xr0, a0, 0
+
+ xvmsknz.b xr0, xr0
+ xvpickve.w xr1, xr0, 4
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0 # sign extend
+
+ sra.w t0, t0, a1
+ beq t0, t1, L(loop)
+ cto.w a0, t0
+ jr ra
+
+L(loop):
+ xvld xr0, a0, 32
+ addi.d a0, a0, 32
+ xvsetanyeqz.b fcc0, xr0
+ bceqz fcc0, L(loop)
+
+
+ xvmsknz.b xr0, xr0
+ sub.d a0, a0, a1
+ xvpickve.w xr1, xr0, 4
+ vilvl.h vr0, vr1, vr0
+
+ movfr2gr.s t0, fa0
+ cto.w t0, t0
+ add.d a0, a0, t0
+ jr ra
+END(STRLEN)
+
+libc_hidden_builtin_def (STRLEN)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
new file mode 100644
index 00000000..b194355e
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
@@ -0,0 +1,71 @@
+/* Optimized strlen implementation using Loongarch LSX SIMD instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRLEN __strlen_lsx
+
+LEAF(STRLEN, 6)
+ move a1, a0
+ bstrins.d a0, zero, 4, 0
+ vld vr0, a0, 0
+ vld vr1, a0, 16
+
+ li.d t1, -1
+ vmsknz.b vr0, vr0
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+
+ movfr2gr.s t0, fa0
+ sra.w t0, t0, a1
+ beq t0, t1, L(loop)
+ cto.w a0, t0
+
+ jr ra
+ nop
+ nop
+ nop
+
+
+L(loop):
+ vld vr0, a0, 32
+ vld vr1, a0, 48
+ addi.d a0, a0, 32
+ vmin.bu vr2, vr0, vr1
+
+ vsetanyeqz.b fcc0, vr2
+ bceqz fcc0, L(loop)
+ vmsknz.b vr0, vr0
+ vmsknz.b vr1, vr1
+
+ vilvl.h vr0, vr1, vr0
+ sub.d a0, a0, a1
+ movfr2gr.s t0, fa0
+ cto.w t0, t0
+
+ add.d a0, a0, t0
+ jr ra
+END(STRLEN)
+
+libc_hidden_builtin_def (STRLEN)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strlen.c b/sysdeps/loongarch/lp64/multiarch/strlen.c
new file mode 100644
index 00000000..381c2daa
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strlen.c
@@ -0,0 +1,37 @@
+/* Multiple versions of strlen.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+
+#if IS_IN (libc)
+# define strlen __redirect_strlen
+# include <string.h>
+# undef strlen
+
+# define SYMBOL_NAME strlen
+# include "ifunc-strlen.h"
+
+libc_ifunc_redirected (__redirect_strlen, strlen, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strlen, __GI_strlen, __redirect_strlen)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strlen);
+# endif
+
+#endif
diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
index 5100f36d..524d2e32 100644
--- a/sysdeps/loongarch/sys/regdef.h
+++ b/sysdeps/loongarch/sys/regdef.h
@@ -89,6 +89,14 @@
#define fs5 $f29
#define fs6 $f30
#define fs7 $f31
+#define fcc0 $fcc0
+#define fcc1 $fcc1
+#define fcc2 $fcc2
+#define fcc3 $fcc3
+#define fcc4 $fcc4
+#define fcc5 $fcc5
+#define fcc6 $fcc6
+#define fcc7 $fcc7
#define vr0 $vr0
#define vr1 $vr1
@@ -98,6 +106,30 @@
#define vr5 $vr5
#define vr6 $vr6
#define vr7 $vr7
+#define vr8 $vr8
+#define vr9 $vr9
+#define vr10 $vr10
+#define vr11 $vr11
+#define vr12 $vr12
+#define vr13 $vr13
+#define vr14 $vr14
+#define vr15 $vr15
+#define vr16 $vr16
+#define vr17 $vr17
+#define vr18 $vr18
+#define vr19 $vr19
+#define vr20 $vr20
+#define vr21 $vr21
+#define vr22 $vr22
+#define vr23 $vr23
+#define vr24 $vr24
+#define vr25 $vr25
+#define vr26 $vr26
+#define vr27 $vr27
+#define vr28 $vr28
+#define vr29 $vr29
+#define vr30 $vr30
+#define vr31 $vr31
#define xr0 $xr0
#define xr1 $xr1
@@ -107,5 +139,30 @@
#define xr5 $xr5
#define xr6 $xr6
#define xr7 $xr7
+#define xr7 $xr7
+#define xr8 $xr8
+#define xr9 $xr9
+#define xr10 $xr10
+#define xr11 $xr11
+#define xr12 $xr12
+#define xr13 $xr13
+#define xr14 $xr14
+#define xr15 $xr15
+#define xr16 $xr16
+#define xr17 $xr17
+#define xr18 $xr18
+#define xr19 $xr19
+#define xr20 $xr20
+#define xr21 $xr21
+#define xr22 $xr22
+#define xr23 $xr23
+#define xr24 $xr24
+#define xr25 $xr25
+#define xr26 $xr26
+#define xr27 $xr27
+#define xr28 $xr28
+#define xr29 $xr29
+#define xr30 $xr30
+#define xr31 $xr31
#endif /* _SYS_REGDEF_H */
diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
index e371e13b..d1a280a5 100644
--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
@@ -25,5 +25,7 @@
#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+#define INIT_ARCH()
+
#endif /* _CPU_FEATURES_LOONGARCH64_H */
--
2.33.0

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,706 @@
From aca7d7f0dde5f56344e8e58e5f6648c96bb1f1cc Mon Sep 17 00:00:00 2001
From: dengjianbo <dengjianbo@loongson.cn>
Date: Tue, 15 Aug 2023 09:08:11 +0800
Subject: [PATCH 06/29] Loongarch: Add ifunc support for strchr{aligned, lsx,
lasx} and strchrnul{aligned, lsx, lasx}
These implementations improve the time to run strchr{nul}
microbenchmark in glibc as below:
strchr-lasx reduces the runtime about 50%-83%
strchr-lsx reduces the runtime about 30%-67%
strchr-aligned reduces the runtime about 10%-20%
strchrnul-lasx reduces the runtime about 50%-83%
strchrnul-lsx reduces the runtime about 36%-65%
strchrnul-aligned reduces the runtime about 6%-10%
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/lp64/multiarch/Makefile | 6 ++
.../lp64/multiarch/ifunc-impl-list.c | 16 +++
.../loongarch/lp64/multiarch/ifunc-strchr.h | 41 ++++++++
.../lp64/multiarch/ifunc-strchrnul.h | 41 ++++++++
.../loongarch/lp64/multiarch/strchr-aligned.S | 99 +++++++++++++++++++
.../loongarch/lp64/multiarch/strchr-lasx.S | 91 +++++++++++++++++
sysdeps/loongarch/lp64/multiarch/strchr-lsx.S | 73 ++++++++++++++
sysdeps/loongarch/lp64/multiarch/strchr.c | 36 +++++++
.../lp64/multiarch/strchrnul-aligned.S | 95 ++++++++++++++++++
.../loongarch/lp64/multiarch/strchrnul-lasx.S | 22 +++++
.../loongarch/lp64/multiarch/strchrnul-lsx.S | 22 +++++
sysdeps/loongarch/lp64/multiarch/strchrnul.c | 39 ++++++++
12 files changed, 581 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr.c
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index 76c506c9..110a8c5c 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -3,5 +3,11 @@ sysdep_routines += \
strlen-aligned \
strlen-lsx \
strlen-lasx \
+ strchr-aligned \
+ strchr-lsx \
+ strchr-lasx \
+ strchrnul-aligned \
+ strchrnul-lsx \
+ strchrnul-lasx \
# sysdep_routines
endif
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index 1a2a576f..c7164b45 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -37,5 +37,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#endif
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
)
+
+ IFUNC_IMPL (i, name, strchr,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
+ IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LSX, __strchr_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_aligned)
+ )
+
+ IFUNC_IMPL (i, name, strchrnul,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LASX, __strchrnul_lasx)
+ IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LSX, __strchrnul_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
+ )
return i;
}
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
new file mode 100644
index 00000000..4494db79
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
@@ -0,0 +1,41 @@
+/* Common definition for strchr ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
new file mode 100644
index 00000000..8a925120
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
@@ -0,0 +1,41 @@
+/* Common definition for strchrnul ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
new file mode 100644
index 00000000..5fb01806
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
@@ -0,0 +1,99 @@
+/* Optimized strchr implementation using basic Loongarch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRCHR_NAME __strchr_aligned
+#else
+# define STRCHR_NAME strchr
+#endif
+
+LEAF(STRCHR_NAME, 6)
+ slli.d t1, a0, 3
+ bstrins.d a0, zero, 2, 0
+ lu12i.w a2, 0x01010
+ ld.d t2, a0, 0
+
+ ori a2, a2, 0x101
+ andi a1, a1, 0xff
+ bstrins.d a2, a2, 63, 32
+ li.w t0, -1
+
+ mul.d a1, a1, a2
+ sll.d t0, t0, t1
+ slli.d a3, a2, 7
+ orn t2, t2, t0
+
+ sll.d t3, a1, t1
+ xor t4, t2, t3
+ sub.d a4, t2, a2
+ sub.d a5, t4, a2
+
+
+ andn a4, a4, t2
+ andn a5, a5, t4
+ or t0, a4, a5
+ and t0, t0, a3
+
+ bnez t0, L(end)
+ addi.d a0, a0, 8
+L(loop):
+ ld.d t4, a0, 0
+ xor t2, t4, a1
+
+ sub.d a4, t4, a2
+ sub.d a5, t2, a2
+ andn a4, a4, t4
+ andn a5, a5, t2
+
+ or t0, a4, a5
+ and t0, t0, a3
+ bnez t0, L(end)
+ ld.d t4, a0, 8
+
+
+ addi.d a0, a0, 16
+ xor t2, t4, a1
+ sub.d a4, t4, a2
+ sub.d a5, t2, a2
+
+ andn a4, a4, t4
+ andn a5, a5, t2
+ or t0, a4, a5
+ and t0, t0, a3
+
+ beqz t0, L(loop)
+ addi.d a0, a0, -8
+L(end):
+ and t0, a5, a3
+ and t1, a4, a3
+
+ ctz.d t0, t0
+ ctz.d t1, t1
+ srli.w t2, t0, 3
+ sltu t3, t1, t0
+
+
+ add.d a0, a0, t2
+ masknez a0, a0, t3
+ jr ra
+END(STRCHR_NAME)
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
new file mode 100644
index 00000000..254402da
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
@@ -0,0 +1,91 @@
+/* Optimized strchr implementation using loongarch LASX SIMD instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#ifndef AS_STRCHRNUL
+# define STRCHR __strchr_lasx
+#endif
+
+LEAF(STRCHR, 6)
+ andi t1, a0, 0x1f
+ bstrins.d a0, zero, 4, 0
+ xvld xr0, a0, 0
+ li.d t2, -1
+
+ xvreplgr2vr.b xr1, a1
+ sll.d t1, t2, t1
+ xvxor.v xr2, xr0, xr1
+ xvmin.bu xr0, xr0, xr2
+
+ xvmsknz.b xr0, xr0
+ xvpickve.w xr3, xr0, 4
+ vilvl.h vr0, vr3, vr0
+ movfr2gr.s t0, fa0
+
+ orn t0, t0, t1
+ bne t0, t2, L(end)
+ addi.d a0, a0, 32
+ nop
+
+
+L(loop):
+ xvld xr0, a0, 0
+ xvxor.v xr2, xr0, xr1
+ xvmin.bu xr0, xr0, xr2
+ xvsetanyeqz.b fcc0, xr0
+
+ bcnez fcc0, L(loop_end)
+ xvld xr0, a0, 32
+ addi.d a0, a0, 64
+ xvxor.v xr2, xr0, xr1
+
+ xvmin.bu xr0, xr0, xr2
+ xvsetanyeqz.b fcc0, xr0
+ bceqz fcc0, L(loop)
+ addi.d a0, a0, -32
+
+L(loop_end):
+ xvmsknz.b xr0, xr0
+ xvpickve.w xr1, xr0, 4
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+
+
+L(end):
+ cto.w t0, t0
+ add.d a0, a0, t0
+#ifndef AS_STRCHRNUL
+ vreplgr2vr.b vr0, t0
+ xvpermi.q xr3, xr2, 1
+
+ vshuf.b vr0, vr3, vr2, vr0
+ vpickve2gr.bu t0, vr0, 0
+ masknez a0, a0, t0
+#endif
+ jr ra
+
+END(STRCHR)
+
+libc_hidden_builtin_def(STRCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
new file mode 100644
index 00000000..dae98b0a
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
@@ -0,0 +1,73 @@
+/* Optimized strlen implementation using loongarch LSX SIMD instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#ifndef AS_STRCHRNUL
+# define STRCHR __strchr_lsx
+#endif
+
+LEAF(STRCHR, 6)
+ andi t1, a0, 0xf
+ bstrins.d a0, zero, 3, 0
+ vld vr0, a0, 0
+ li.d t2, -1
+
+ vreplgr2vr.b vr1, a1
+ sll.d t3, t2, t1
+ vxor.v vr2, vr0, vr1
+ vmin.bu vr0, vr0, vr2
+
+ vmsknz.b vr0, vr0
+ movfr2gr.s t0, fa0
+ ext.w.h t0, t0
+ orn t0, t0, t3
+
+ beq t0, t2, L(loop)
+L(found):
+ cto.w t0, t0
+ add.d a0, a0, t0
+#ifndef AS_STRCHRNUL
+ vreplve.b vr2, vr2, t0
+ vpickve2gr.bu t1, vr2, 0
+ masknez a0, a0, t1
+#endif
+ jr ra
+
+
+L(loop):
+ vld vr0, a0, 16
+ addi.d a0, a0, 16
+ vxor.v vr2, vr0, vr1
+ vmin.bu vr0, vr0, vr2
+
+ vsetanyeqz.b fcc0, vr0
+ bceqz fcc0, L(loop)
+ vmsknz.b vr0, vr0
+ movfr2gr.s t0, fa0
+
+ b L(found)
+END(STRCHR)
+
+libc_hidden_builtin_def (STRCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strchr.c b/sysdeps/loongarch/lp64/multiarch/strchr.c
new file mode 100644
index 00000000..404e97bd
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchr.c
@@ -0,0 +1,36 @@
+/* Multiple versions of strchr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strchr __redirect_strchr
+# include <string.h>
+# undef strchr
+
+# define SYMBOL_NAME strchr
+# include "ifunc-strchr.h"
+
+libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
+weak_alias(strchr, index)
+# ifdef SHARED
+__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strchr);
+# endif
+
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
new file mode 100644
index 00000000..1c01a023
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
@@ -0,0 +1,95 @@
+/* Optimized strchrnul implementation using basic Loongarch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRCHRNUL_NAME __strchrnul_aligned
+#else
+# define STRCHRNUL_NAME __strchrnul
+#endif
+
+LEAF(STRCHRNUL_NAME, 6)
+ slli.d t1, a0, 3
+ bstrins.d a0, zero, 2, 0
+ lu12i.w a2, 0x01010
+ ld.d t2, a0, 0
+
+ ori a2, a2, 0x101
+ andi a1, a1, 0xff
+ bstrins.d a2, a2, 63, 32
+ li.w t0, -1
+
+ mul.d a1, a1, a2
+ sll.d t0, t0, t1
+ slli.d a3, a2, 7
+ orn t2, t2, t0
+
+ sll.d t3, a1, t1
+ xor t4, t2, t3
+ sub.d a4, t2, a2
+ sub.d a5, t4, a2
+
+
+ andn a4, a4, t2
+ andn a5, a5, t4
+ or t0, a4, a5
+ and t0, t0, a3
+
+ bnez t0, L(end)
+ addi.d a0, a0, 8
+L(loop):
+ ld.d t4, a0, 0
+ xor t2, t4, a1
+
+ sub.d a4, t4, a2
+ sub.d a5, t2, a2
+ andn a4, a4, t4
+ andn a5, a5, t2
+
+ or t0, a4, a5
+ and t0, t0, a3
+ bnez t0, L(end)
+ ld.d t4, a0, 8
+
+
+ addi.d a0, a0, 16
+ xor t2, t4, a1
+ sub.d a4, t4, a2
+ sub.d a5, t2, a2
+
+ andn a4, a4, t4
+ andn a5, a5, t2
+ or t0, a4, a5
+ and t0, t0, a3
+
+ beqz t0, L(loop)
+ addi.d a0, a0, -8
+L(end):
+ ctz.d t0, t0
+ srli.w t0, t0, 3
+
+
+ add.d a0, a0, t0
+ jr ra
+END(STRCHRNUL_NAME)
+
+libc_hidden_builtin_def (STRCHRNUL_NAME)
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
new file mode 100644
index 00000000..d45495e4
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
@@ -0,0 +1,22 @@
+/* Optimized strchrnul implementation using loongarch LASX SIMD instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define STRCHR __strchrnul_lasx
+#define AS_STRCHRNUL
+#include "strchr-lasx.S"
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
new file mode 100644
index 00000000..07d793ae
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
@@ -0,0 +1,22 @@
+/* Optimized strchrnul implementation using loongarch LSX SIMD instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define STRCHR __strchrnul_lsx
+#define AS_STRCHRNUL
+#include "strchr-lsx.S"
diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul.c b/sysdeps/loongarch/lp64/multiarch/strchrnul.c
new file mode 100644
index 00000000..f3b8296e
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul.c
@@ -0,0 +1,39 @@
+/* Multiple versions of strchrnul.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+
+#if IS_IN (libc)
+# define strchrnul __redirect_strchrnul
+# define __strchrnul __redirect___strchrnul
+# include <string.h>
+# undef __strchrnul
+# undef strchrnul
+
+# define SYMBOL_NAME strchrnul
+# include "ifunc-strchrnul.h"
+
+libc_ifunc_redirected (__redirect_strchrnul, __strchrnul,
+ IFUNC_SELECTOR ());
+weak_alias (__strchrnul, strchrnul)
+# ifdef SHARED
+__hidden_ver1 (__strchrnul, __GI___strchrnul, __redirect_strchrnul)
+ __attribute__((visibility ("hidden"))) __attribute_copy__ (strchrnul);
+# endif
+#endif
--
2.33.0

View file

@ -0,0 +1,478 @@
From c0f3b0a8c71c26d5351e8ddabe3e8a323803e683 Mon Sep 17 00:00:00 2001
From: caiyinyu <caiyinyu@loongson.cn>
Date: Thu, 21 Sep 2023 09:10:11 +0800
Subject: [PATCH 26/29] Revert "LoongArch: Add glibc.cpu.hwcap support."
This reverts commit a53451559dc9cce765ea5bcbb92c4007e058e92b.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
sysdeps/loongarch/Makefile | 4 -
sysdeps/loongarch/Versions | 5 --
sysdeps/loongarch/cpu-tunables.c | 89 -------------------
sysdeps/loongarch/dl-get-cpu-features.c | 25 ------
sysdeps/loongarch/dl-machine.h | 27 +-----
sysdeps/loongarch/dl-tunables.list | 25 ------
.../unix/sysv/linux/loongarch/cpu-features.c | 29 ------
.../unix/sysv/linux/loongarch/cpu-features.h | 18 +---
.../unix/sysv/linux/loongarch/dl-procinfo.c | 60 -------------
sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 -----
.../unix/sysv/linux/loongarch/libc-start.c | 34 -------
11 files changed, 8 insertions(+), 329 deletions(-)
delete mode 100644 sysdeps/loongarch/Versions
delete mode 100644 sysdeps/loongarch/cpu-tunables.c
delete mode 100644 sysdeps/loongarch/dl-get-cpu-features.c
delete mode 100644 sysdeps/loongarch/dl-tunables.list
delete mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c
delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
delete mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c
diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
index 30a1f4a8..43d2f583 100644
--- a/sysdeps/loongarch/Makefile
+++ b/sysdeps/loongarch/Makefile
@@ -6,10 +6,6 @@ ifeq ($(subdir),elf)
gen-as-const-headers += dl-link.sym
endif
-ifeq ($(subdir),elf)
- sysdep-dl-routines += dl-get-cpu-features
-endif
-
# LoongArch's assembler also needs to know about PIC as it changes the
# definition of some assembler macros.
ASFLAGS-.os += $(pic-ccflag)
diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions
deleted file mode 100644
index 33ae2cc0..00000000
--- a/sysdeps/loongarch/Versions
+++ /dev/null
@@ -1,5 +0,0 @@
-ld {
- GLIBC_PRIVATE {
- _dl_larch_get_cpu_features;
- }
-}
diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c
deleted file mode 100644
index 8e9fab93..00000000
--- a/sysdeps/loongarch/cpu-tunables.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/* LoongArch CPU feature tuning.
- This file is part of the GNU C Library.
- Copyright (C) 2023 Free Software Foundation, Inc.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-# include <stdbool.h>
-# include <stdint.h>
-# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */
-# include <elf/dl-tunables.h>
-# include <string.h>
-# include <cpu-features.h>
-# include <ldsodefs.h>
-# include <sys/auxv.h>
-
-# define HWCAP_LOONGARCH_IFUNC \
- (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX)
-
-# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len) \
- _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
- if (!memcmp (f, #name, len) && \
- (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name)) \
- { \
- hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC)); \
- break; \
- } \
-
-attribute_hidden
-void
-TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
-{
- const char *p = valp->strval;
- size_t len;
- unsigned long hwcap = 0;
- const char *c;
-
- do {
- for (c = p; *c != ','; c++)
- if (*c == '\0')
- break;
-
- len = c - p;
-
- switch(len)
- {
- default:
- _dl_fatal_printf (
- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
- );
- break;
- case 3:
- {
- CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3);
- CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3);
- _dl_fatal_printf (
- "Some features are invalid or not supported on this machine!!\n"
- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
- );
- }
- break;
- case 4:
- {
- CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4);
- _dl_fatal_printf (
- "Some features are invalid or not supported on this machine!!\n"
- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
- );
- }
- break;
- }
-
- p += len + 1;
- }
- while (*c != '\0');
-
- GLRO (dl_larch_cpu_features).hwcap &= hwcap;
-}
diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c
deleted file mode 100644
index 7cd9bc15..00000000
--- a/sysdeps/loongarch/dl-get-cpu-features.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Define _dl_larch_get_cpu_features.
- Copyright (C) 2023 Free Software Foundation, Inc.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-
-#include <ldsodefs.h>
-
-const struct cpu_features *
-_dl_larch_get_cpu_features (void)
-{
- return &GLRO(dl_larch_cpu_features);
-}
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index b395a928..57913cef 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -29,8 +29,6 @@
#include <dl-static-tls.h>
#include <dl-machine-rel.h>
-#include <cpu-features.c>
-
#ifndef _RTLD_PROLOGUE
# define _RTLD_PROLOGUE(entry) \
".globl\t" __STRING (entry) "\n\t" \
@@ -55,23 +53,6 @@
#define ELF_MACHINE_NO_REL 1
#define ELF_MACHINE_NO_RELA 0
-#define DL_PLATFORM_INIT dl_platform_init ()
-
-static inline void __attribute__ ((unused))
-dl_platform_init (void)
-{
- if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
- /* Avoid an empty string which would disturb us. */
- GLRO(dl_platform) = NULL;
-
-#ifdef SHARED
- /* init_cpu_features has been called early from __libc_start_main in
- static executable. */
- init_cpu_features (&GLRO(dl_larch_cpu_features));
-#endif
-}
-
-
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int
elf_machine_matches_host (const ElfW (Ehdr) *ehdr)
@@ -309,9 +290,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
if (profile != 0)
{
#if !defined __loongarch_soft_float
- if (RTLD_SUPPORT_LASX)
+ if (SUPPORT_LASX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
- else if (RTLD_SUPPORT_LSX)
+ else if (SUPPORT_LSX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
else
#endif
@@ -329,9 +310,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
indicated by the offset on the stack, and then jump to
the resolved address. */
#if !defined __loongarch_soft_float
- if (RTLD_SUPPORT_LASX)
+ if (SUPPORT_LASX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
- else if (RTLD_SUPPORT_LSX)
+ else if (SUPPORT_LSX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
else
#endif
diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list
deleted file mode 100644
index 66b34275..00000000
--- a/sysdeps/loongarch/dl-tunables.list
+++ /dev/null
@@ -1,25 +0,0 @@
-# LoongArch specific tunables.
-# Copyright (C) 2023 Free Software Foundation, Inc.
-# This file is part of the GNU C Library.
-
-# The GNU C Library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-
-# The GNU C Library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-
-# You should have received a copy of the GNU Lesser General Public
-# License along with the GNU C Library; if not, see
-# <http://www.gnu.org/licenses/>.
-
-glibc {
- cpu {
- hwcaps {
- type: STRING
- }
- }
-}
diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
deleted file mode 100644
index 1290c4ce..00000000
--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Initialize CPU feature data. LoongArch64 version.
- This file is part of the GNU C Library.
- Copyright (C) 2023 Free Software Foundation, Inc.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <cpu-features.h>
-#include <elf/dl-hwcaps.h>
-#include <elf/dl-tunables.h>
-extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden;
-
-static inline void
-init_cpu_features (struct cpu_features *cpu_features)
-{
- GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap);
- TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
-}
diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
index 450963ce..d1a280a5 100644
--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
@@ -19,23 +19,13 @@
#ifndef _CPU_FEATURES_LOONGARCH64_H
#define _CPU_FEATURES_LOONGARCH64_H
-#include <stdint.h>
#include <sys/auxv.h>
-struct cpu_features
- {
- uint64_t hwcap;
- };
+#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
+#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
-/* Get a pointer to the CPU features structure. */
-extern const struct cpu_features *_dl_larch_get_cpu_features (void)
- __attribute__ ((pure));
-
-#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL)
-#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX)
-#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX)
-#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
-#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
#define INIT_ARCH()
#endif /* _CPU_FEATURES_LOONGARCH64_H */
+
diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
deleted file mode 100644
index 6217fda9..00000000
--- a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Data for LoongArch64 version of processor capability information.
- Linux version.
- Copyright (C) 2023 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-/* If anything should be added here check whether the size of each string
- is still ok with the given array size.
-
- All the #ifdefs in the definitions are quite irritating but
- necessary if we want to avoid duplicating the information. There
- are three different modes:
-
- - PROCINFO_DECL is defined. This means we are only interested in
- declarations.
-
- - PROCINFO_DECL is not defined:
-
- + if SHARED is defined the file is included in an array
- initializer. The .element = { ... } syntax is needed.
-
- + if SHARED is not defined a normal array initialization is
- needed.
- */
-
-#ifndef PROCINFO_CLASS
-# define PROCINFO_CLASS
-#endif
-
-#if !IS_IN (ldconfig)
-# if !defined PROCINFO_DECL && defined SHARED
- ._dl_larch_cpu_features
-# else
-PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features
-# endif
-# ifndef PROCINFO_DECL
-= { }
-# endif
-# if !defined SHARED || defined PROCINFO_DECL
-;
-# else
-,
-# endif
-#endif
-
-#undef PROCINFO_DECL
-#undef PROCINFO_CLASS
diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
deleted file mode 100644
index 455fd71a..00000000
--- a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Operating system support for run-time dynamic linker. LoongArch version.
- Copyright (C) 2023 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-#include <sysdeps/loongarch/cpu-tunables.c>
-#include <sysdeps/unix/sysv/linux/dl-sysdep.c>
diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
deleted file mode 100644
index f1346ece..00000000
--- a/sysdeps/unix/sysv/linux/loongarch/libc-start.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Override csu/libc-start.c on LoongArch64.
- Copyright (C) 2023 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef SHARED
-
-/* Mark symbols hidden in static PIE for early self relocation to work. */
-# if BUILD_PIE_DEFAULT
-# pragma GCC visibility push(hidden)
-# endif
-
-# include <ldsodefs.h>
-# include <cpu-features.c>
-
-extern struct cpu_features _dl_larch_cpu_features;
-
-# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features)
-
-#endif
-#include <csu/libc-start.c>
--
2.33.0

View file

@ -0,0 +1,39 @@
From fc60db3cf29ba157d09ba4f4b92e3ab382b0339d Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Wed, 9 Aug 2023 19:12:54 +0800
Subject: [PATCH 04/29] elf: Add new LoongArch reloc types (101 to 108) into
elf.h
These reloc types are generated by GNU assembler >= 2.41 for relaxation
support.
Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=57a930e3
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
elf/elf.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/elf/elf.h b/elf/elf.h
index 89fc8021..d623bdeb 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -4205,6 +4205,14 @@ enum
#define R_LARCH_TLS_GD_HI20 98
#define R_LARCH_32_PCREL 99
#define R_LARCH_RELAX 100
+#define R_LARCH_DELETE 101
+#define R_LARCH_ALIGN 102
+#define R_LARCH_PCREL20_S2 103
+#define R_LARCH_CFA 104
+#define R_LARCH_ADD6 105
+#define R_LARCH_SUB6 106
+#define R_LARCH_ADD_ULEB128 107
+#define R_LARCH_SUB_ULEB128 108
/* ARC specific declarations. */
--
2.33.0

View file

@ -1,4 +1,4 @@
%define anolis_release 1
%define anolis_release 2
%bcond_without testsuite
%bcond_without benchtests
@ -103,6 +103,37 @@ Patch0187: 0087-CVE-2023-6246.patch
Patch0188: 0088-CVE-2023-6779.patch
Patch0189: 0089-CVE-2023-6780.patch
# Part 3000 ~ 4999
Patch3000: LoongArch-Redefine-macro-LEAF-ENTRY.patch
Patch3001: LoongArch-Add-minuimum-binutils-required-version.patch
Patch3002: Loongarch-Add-ifunc-support-and-add-different-versio.patch
Patch3003: elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch
Patch3004: LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch
Patch3005: Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch
Patch3006: Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch
Patch3007: LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch
Patch3008: LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch
Patch3009: LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch
Patch3010: LoongArch-Remove-support-code-for-old-linker-in-star.patch
Patch3011: LoongArch-Micro-optimize-LD_PCREL.patch
Patch3012: LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch
Patch3013: LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch
Patch3014: LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch
Patch3015: LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch
Patch3016: LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch
Patch3017: LoongArch-Change-loongarch-to-LoongArch-in-comments.patch
Patch3018: LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch
Patch3019: LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch
Patch3020: LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch
Patch3021: LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch
Patch3022: LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch
Patch3023: LoongArch-Add-glibc.cpu.hwcap-support.patch
Patch3024: Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch
Patch3025: LoongArch-Unify-Register-Names.patch
Patch3026: LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch
Patch3027: linux-Sync-Linux-6.6-elf.h.patch
Patch3028: Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch
BuildRequires: audit-libs-devel >= 1.1.3 libcap-devel systemtap-sdt-devel
BuildRequires: procps-ng util-linux gawk sed >= 3.95 gettext
BuildRequires: python3 python3-devel
@ -1055,6 +1086,10 @@ update_gconv_modules_cache ()
%{_libdir}/libpthread_nonshared.a
%changelog
* Sat Mar 16 2024 Peng Fan <fanpeng@loongson.cn> - 2.38-2
- LoongArch: sync patch from glibc upstream
- Reduced kernel version requirements
* Tue Mar 05 2024 mgb01105731 <mgb01105731@alibaba-inc.com> - 2.38-1
- update to 2.38

View file

@ -0,0 +1,48 @@
From 6b3d687470b8f91bc6eb87e924fe97d4592b3aa5 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Tue, 31 Oct 2023 13:32:38 -0300
Subject: [PATCH 29/29] linux: Sync Linux 6.6 elf.h
It adds NT_X86_SHSTK (2fab02b25ae7cf5), NT_RISCV_CSR/NT_RISCV_VECTOR
(9300f00439743c4), and NT_LOONGARCH_HW_BREAK/NT_LOONGARCH_HW_WATCH
(1a69f7a161a78ae).
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
elf/elf.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/elf/elf.h b/elf/elf.h
index 9c51073f..51633079 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -794,6 +794,7 @@ typedef struct
#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
+#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */
#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */
#define NT_S390_TIMER 0x301 /* s390 timer register */
#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */
@@ -832,6 +833,8 @@ typedef struct
#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers. */
#define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode. */
#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers. */
+#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */
+#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */
#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers. */
#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and
status registers. */
@@ -841,6 +844,8 @@ typedef struct
SIMD Extension registers. */
#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary
Translation registers. */
+#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */
+#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */
/* Legal values for the note segment descriptor types for object files. */
--
2.33.0