diff --git a/0090-CVE-2024-2961.patch b/0090-CVE-2024-2961.patch deleted file mode 100644 index aa33e4c..0000000 --- a/0090-CVE-2024-2961.patch +++ /dev/null @@ -1,213 +0,0 @@ -From f9dc609e06b1136bb0408be9605ce7973a767ada Mon Sep 17 00:00:00 2001 -From: Charles Fol -Date: Thu, 28 Mar 2024 12:25:38 -0300 -Subject: [PATCH] iconv: ISO-2022-CN-EXT: fix out-of-bound writes when writing - escape sequence (CVE-2024-2961) - -ISO-2022-CN-EXT uses escape sequences to indicate character set changes -(as specified by RFC 1922). While the SOdesignation has the expected -bounds checks, neither SS2designation nor SS3designation have its; -allowing a write overflow of 1, 2, or 3 bytes with fixed values: -'$+I', '$+J', '$+K', '$+L', '$+M', or '$*H'. - -Checked on aarch64-linux-gnu. - -Co-authored-by: Adhemerval Zanella -Reviewed-by: Carlos O'Donell -Tested-by: Carlos O'Donell ---- - iconvdata/Makefile | 5 +- - iconvdata/iso-2022-cn-ext.c | 12 +++ - iconvdata/tst-iconv-iso-2022-cn-ext.c | 128 ++++++++++++++++++++++++++ - 3 files changed, 144 insertions(+), 1 deletion(-) - create mode 100644 iconvdata/tst-iconv-iso-2022-cn-ext.c - -diff --git a/iconvdata/Makefile b/iconvdata/Makefile -index ea019ce5c0..7196a8744b 100644 ---- a/iconvdata/Makefile -+++ b/iconvdata/Makefile -@@ -75,7 +75,8 @@ ifeq (yes,$(build-shared)) - tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ - tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ - bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \ -- bug-iconv13 bug-iconv14 bug-iconv15 -+ bug-iconv13 bug-iconv14 bug-iconv15 \ -+ tst-iconv-iso-2022-cn-ext - ifeq ($(have-thread-library),yes) - tests += bug-iconv3 - endif -@@ -330,6 +331,8 @@ $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \ - $(addprefix $(objpfx),$(modules.so)) - $(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \ - $(addprefix $(objpfx),$(modules.so)) -+$(objpfx)tst-iconv-iso-2022-cn-ext.out: $(addprefix $(objpfx), $(gconv-modules)) \ -+ $(addprefix $(objpfx),$(modules.so)) - - $(objpfx)iconv-test.out: run-iconv-test.sh \ - $(addprefix $(objpfx), $(gconv-modules)) \ -diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c -index b34c8a36f4..cce29b1969 100644 ---- a/iconvdata/iso-2022-cn-ext.c -+++ b/iconvdata/iso-2022-cn-ext.c -@@ -574,6 +574,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); - { \ - const char *escseq; \ - \ -+ if (outptr + 4 > outend) \ -+ { \ -+ result = __GCONV_FULL_OUTPUT; \ -+ break; \ -+ } \ -+ \ - assert (used == CNS11643_2_set); /* XXX */ \ - escseq = "*H"; \ - *outptr++ = ESC; \ -@@ -587,6 +593,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); - { \ - const char *escseq; \ - \ -+ if (outptr + 4 > outend) \ -+ { \ -+ result = __GCONV_FULL_OUTPUT; \ -+ break; \ -+ } \ -+ \ - assert ((used >> 5) >= 3 && (used >> 5) <= 7); \ - escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2; \ - *outptr++ = ESC; \ -diff --git a/iconvdata/tst-iconv-iso-2022-cn-ext.c b/iconvdata/tst-iconv-iso-2022-cn-ext.c -new file mode 100644 -index 0000000000..96a8765fd5 ---- /dev/null -+++ b/iconvdata/tst-iconv-iso-2022-cn-ext.c -@@ -0,0 +1,128 @@ -+/* Verify ISO-2022-CN-EXT does not write out of the bounds. -+ Copyright (C) 2024 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+/* The test sets up a two memory page buffer with the second page marked -+ PROT_NONE to trigger a fault if the conversion writes beyond the exact -+ expected amount. Then we carry out various conversions and precisely -+ place the start of the output buffer in order to trigger a SIGSEGV if the -+ process writes anywhere between 1 and page sized bytes more (only one -+ PROT_NONE page is setup as a canary) than expected. These tests exercise -+ all three of the cases in ISO-2022-CN-EXT where the converter must switch -+ character sets and may run out of buffer space while doing the -+ operation. */ -+ -+static int -+do_test (void) -+{ -+ iconv_t cd = iconv_open ("ISO-2022-CN-EXT", "UTF-8"); -+ TEST_VERIFY_EXIT (cd != (iconv_t) -1); -+ -+ char *ntf; -+ size_t ntfsize; -+ char *outbufbase; -+ { -+ int pgz = getpagesize (); -+ TEST_VERIFY_EXIT (pgz > 0); -+ ntfsize = 2 * pgz; -+ -+ ntf = xmmap (NULL, ntfsize, PROT_READ | PROT_WRITE, MAP_PRIVATE -+ | MAP_ANONYMOUS, -1); -+ xmprotect (ntf + pgz, pgz, PROT_NONE); -+ -+ outbufbase = ntf + pgz; -+ } -+ -+ /* Check if SOdesignation escape sequence does not trigger an OOB write. */ -+ { -+ char inbuf[] = "\xe4\xba\xa4\xe6\x8d\xa2"; -+ -+ for (int i = 0; i < 9; i++) -+ { -+ char *inp = inbuf; -+ size_t inleft = sizeof (inbuf) - 1; -+ -+ char *outp = outbufbase - i; -+ size_t outleft = i; -+ -+ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) -+ == (size_t) -1); -+ TEST_COMPARE (errno, E2BIG); -+ -+ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); -+ } -+ } -+ -+ /* Same as before for SS2designation. */ -+ { -+ char inbuf[] = "㴽 \xe3\xb4\xbd"; -+ -+ for (int i = 0; i < 14; i++) -+ { -+ char *inp = inbuf; -+ size_t inleft = sizeof (inbuf) - 1; -+ -+ char *outp = outbufbase - i; -+ size_t outleft = i; -+ -+ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) -+ == (size_t) -1); -+ TEST_COMPARE (errno, E2BIG); -+ -+ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); -+ } -+ } -+ -+ /* Same as before for SS3designation. */ -+ { -+ char inbuf[] = "劄 \xe5\x8a\x84"; -+ -+ for (int i = 0; i < 14; i++) -+ { -+ char *inp = inbuf; -+ size_t inleft = sizeof (inbuf) - 1; -+ -+ char *outp = outbufbase - i; -+ size_t outleft = i; -+ -+ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) -+ == (size_t) -1); -+ TEST_COMPARE (errno, E2BIG); -+ -+ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); -+ } -+ } -+ -+ TEST_VERIFY_EXIT (iconv_close (cd) != -1); -+ -+ xmunmap (ntf, ntfsize); -+ -+ return 0; -+} -+ -+#include --- -2.39.3 diff --git a/Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch b/Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch deleted file mode 100644 index 0c43817..0000000 --- a/Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 2c8dfc45a8009e5110a9d2148b62d802e989fde7 Mon Sep 17 00:00:00 2001 -From: ticat_fp -Date: Thu, 29 Feb 2024 15:58:31 +0800 -Subject: [PATCH] Decrease value of arch_minimum_kernel with LoongArch - -Signed-off-by: ticat_fp ---- - sysdeps/unix/sysv/linux/loongarch/configure | 2 +- - sysdeps/unix/sysv/linux/loongarch/configure.ac | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/sysdeps/unix/sysv/linux/loongarch/configure b/sysdeps/unix/sysv/linux/loongarch/configure -index 0d1159e9..851b2285 100644 ---- a/sysdeps/unix/sysv/linux/loongarch/configure -+++ b/sysdeps/unix/sysv/linux/loongarch/configure -@@ -1,7 +1,7 @@ - # This file is generated from configure.ac by Autoconf. DO NOT EDIT! - # Local configure fragment for sysdeps/unix/sysv/linux/loongarch. - --arch_minimum_kernel=5.19.0 -+arch_minimum_kernel=4.19.0 - - libc_cv_loongarch_int_abi=no - -diff --git a/sysdeps/unix/sysv/linux/loongarch/configure.ac b/sysdeps/unix/sysv/linux/loongarch/configure.ac -index 04e9150a..00815c2f 100644 ---- a/sysdeps/unix/sysv/linux/loongarch/configure.ac -+++ b/sysdeps/unix/sysv/linux/loongarch/configure.ac -@@ -2,7 +2,7 @@ sinclude(./aclocal.m4)dnl Autoconf lossage - GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. - # Local configure fragment for sysdeps/unix/sysv/linux/loongarch. - --arch_minimum_kernel=5.19.0 -+arch_minimum_kernel=4.19.0 - - libc_cv_loongarch_int_abi=no - AC_EGREP_CPP(4 8 8, [__SIZEOF_INT__ __SIZEOF_LONG__ __SIZEOF_POINTER__ --- -2.33.0 - diff --git a/LoongArch-Add-glibc.cpu.hwcap-support.patch b/LoongArch-Add-glibc.cpu.hwcap-support.patch deleted file mode 100644 index 3629d05..0000000 --- a/LoongArch-Add-glibc.cpu.hwcap-support.patch +++ /dev/null @@ -1,499 +0,0 @@ -From 8923e4e9c79e672fd6b3b89aba598a60d5c01211 Mon Sep 17 00:00:00 2001 -From: caiyinyu -Date: Fri, 15 Sep 2023 17:35:19 +0800 -Subject: [PATCH 25/29] LoongArch: Add glibc.cpu.hwcap support. - -Key Points: -1. On lasx & lsx platforms, We must use _dl_runtime_{profile, resolve}_{lsx, lasx} - to save vector registers. -2. Via "tunables", users can choose str/mem_{lasx,lsx,unaligned} functions with - `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,...`. - Note: glibc.cpu.hwcaps doesn't affect _dl_runtime_{profile, resolve}_{lsx, lasx} - selection. - -Usage Notes: -1. Only valid inputs: LASX, LSX, UAL. Case-sensitive, comma-separated, no spaces. -2. Example: `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL` turns on LASX & UAL. - Unmentioned features turn off. With default ifunc: lasx > lsx > unaligned > - aligned > generic, effect is: lasx > unaligned > aligned > generic; lsx off. -3. Incorrect GLIBC_TUNABLES settings will show error messages. - For example: On lsx platforms, you cannot enable lasx features. If you do - that, you will get error messages. -4. Valid input examples: - - GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX: lasx > aligned > generic. - - GLIBC_TUNABLES=glibc.cpu.hwcaps=LSX,UAL: lsx > unaligned > aligned > generic. - - GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL,LASX,UAL,LSX,LASX,UAL: Repetitions - allowed but not recommended. Results in: lasx > lsx > unaligned > aligned > - generic. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/Makefile | 4 + - sysdeps/loongarch/Versions | 5 ++ - sysdeps/loongarch/cpu-tunables.c | 89 +++++++++++++++++++ - sysdeps/loongarch/dl-get-cpu-features.c | 25 ++++++ - sysdeps/loongarch/dl-machine.h | 27 +++++- - sysdeps/loongarch/dl-tunables.list | 25 ++++++ - .../unix/sysv/linux/loongarch/cpu-features.c | 29 ++++++ - .../unix/sysv/linux/loongarch/cpu-features.h | 18 +++- - .../unix/sysv/linux/loongarch/dl-procinfo.c | 60 +++++++++++++ - sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 +++++ - .../unix/sysv/linux/loongarch/libc-start.c | 34 +++++++ - 11 files changed, 329 insertions(+), 8 deletions(-) - create mode 100644 sysdeps/loongarch/Versions - create mode 100644 sysdeps/loongarch/cpu-tunables.c - create mode 100644 sysdeps/loongarch/dl-get-cpu-features.c - create mode 100644 sysdeps/loongarch/dl-tunables.list - create mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c - create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c - create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c - create mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c - -diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile -index 43d2f583..30a1f4a8 100644 ---- a/sysdeps/loongarch/Makefile -+++ b/sysdeps/loongarch/Makefile -@@ -6,6 +6,10 @@ ifeq ($(subdir),elf) - gen-as-const-headers += dl-link.sym - endif - -+ifeq ($(subdir),elf) -+ sysdep-dl-routines += dl-get-cpu-features -+endif -+ - # LoongArch's assembler also needs to know about PIC as it changes the - # definition of some assembler macros. - ASFLAGS-.os += $(pic-ccflag) -diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions -new file mode 100644 -index 00000000..33ae2cc0 ---- /dev/null -+++ b/sysdeps/loongarch/Versions -@@ -0,0 +1,5 @@ -+ld { -+ GLIBC_PRIVATE { -+ _dl_larch_get_cpu_features; -+ } -+} -diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c -new file mode 100644 -index 00000000..8e9fab93 ---- /dev/null -+++ b/sysdeps/loongarch/cpu-tunables.c -@@ -0,0 +1,89 @@ -+/* LoongArch CPU feature tuning. -+ This file is part of the GNU C Library. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+# include -+# include -+# include /* Get STDOUT_FILENO for _dl_printf. */ -+# include -+# include -+# include -+# include -+# include -+ -+# define HWCAP_LOONGARCH_IFUNC \ -+ (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX) -+ -+# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len) \ -+ _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ -+ if (!memcmp (f, #name, len) && \ -+ (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name)) \ -+ { \ -+ hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC)); \ -+ break; \ -+ } \ -+ -+attribute_hidden -+void -+TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) -+{ -+ const char *p = valp->strval; -+ size_t len; -+ unsigned long hwcap = 0; -+ const char *c; -+ -+ do { -+ for (c = p; *c != ','; c++) -+ if (*c == '\0') -+ break; -+ -+ len = c - p; -+ -+ switch(len) -+ { -+ default: -+ _dl_fatal_printf ( -+ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" -+ ); -+ break; -+ case 3: -+ { -+ CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3); -+ CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3); -+ _dl_fatal_printf ( -+ "Some features are invalid or not supported on this machine!!\n" -+ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" -+ ); -+ } -+ break; -+ case 4: -+ { -+ CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4); -+ _dl_fatal_printf ( -+ "Some features are invalid or not supported on this machine!!\n" -+ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" -+ ); -+ } -+ break; -+ } -+ -+ p += len + 1; -+ } -+ while (*c != '\0'); -+ -+ GLRO (dl_larch_cpu_features).hwcap &= hwcap; -+} -diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c -new file mode 100644 -index 00000000..7cd9bc15 ---- /dev/null -+++ b/sysdeps/loongarch/dl-get-cpu-features.c -@@ -0,0 +1,25 @@ -+/* Define _dl_larch_get_cpu_features. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+ -+#include -+ -+const struct cpu_features * -+_dl_larch_get_cpu_features (void) -+{ -+ return &GLRO(dl_larch_cpu_features); -+} -diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h -index 57913cef..b395a928 100644 ---- a/sysdeps/loongarch/dl-machine.h -+++ b/sysdeps/loongarch/dl-machine.h -@@ -29,6 +29,8 @@ - #include - #include - -+#include -+ - #ifndef _RTLD_PROLOGUE - # define _RTLD_PROLOGUE(entry) \ - ".globl\t" __STRING (entry) "\n\t" \ -@@ -53,6 +55,23 @@ - #define ELF_MACHINE_NO_REL 1 - #define ELF_MACHINE_NO_RELA 0 - -+#define DL_PLATFORM_INIT dl_platform_init () -+ -+static inline void __attribute__ ((unused)) -+dl_platform_init (void) -+{ -+ if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') -+ /* Avoid an empty string which would disturb us. */ -+ GLRO(dl_platform) = NULL; -+ -+#ifdef SHARED -+ /* init_cpu_features has been called early from __libc_start_main in -+ static executable. */ -+ init_cpu_features (&GLRO(dl_larch_cpu_features)); -+#endif -+} -+ -+ - /* Return nonzero iff ELF header is compatible with the running host. */ - static inline int - elf_machine_matches_host (const ElfW (Ehdr) *ehdr) -@@ -290,9 +309,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], - if (profile != 0) - { - #if !defined __loongarch_soft_float -- if (SUPPORT_LASX) -+ if (RTLD_SUPPORT_LASX) - gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx; -- else if (SUPPORT_LSX) -+ else if (RTLD_SUPPORT_LSX) - gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx; - else - #endif -@@ -310,9 +329,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], - indicated by the offset on the stack, and then jump to - the resolved address. */ - #if !defined __loongarch_soft_float -- if (SUPPORT_LASX) -+ if (RTLD_SUPPORT_LASX) - gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx; -- else if (SUPPORT_LSX) -+ else if (RTLD_SUPPORT_LSX) - gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx; - else - #endif -diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list -new file mode 100644 -index 00000000..66b34275 ---- /dev/null -+++ b/sysdeps/loongarch/dl-tunables.list -@@ -0,0 +1,25 @@ -+# LoongArch specific tunables. -+# Copyright (C) 2023 Free Software Foundation, Inc. -+# This file is part of the GNU C Library. -+ -+# The GNU C Library is free software; you can redistribute it and/or -+# modify it under the terms of the GNU Lesser General Public -+# License as published by the Free Software Foundation; either -+# version 2.1 of the License, or (at your option) any later version. -+ -+# The GNU C Library is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# Lesser General Public License for more details. -+ -+# You should have received a copy of the GNU Lesser General Public -+# License along with the GNU C Library; if not, see -+# . -+ -+glibc { -+ cpu { -+ hwcaps { -+ type: STRING -+ } -+ } -+} -diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c -new file mode 100644 -index 00000000..1290c4ce ---- /dev/null -+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c -@@ -0,0 +1,29 @@ -+/* Initialize CPU feature data. LoongArch64 version. -+ This file is part of the GNU C Library. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+#include -+extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden; -+ -+static inline void -+init_cpu_features (struct cpu_features *cpu_features) -+{ -+ GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap); -+ TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps)); -+} -diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h -index d1a280a5..450963ce 100644 ---- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h -+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h -@@ -19,13 +19,23 @@ - #ifndef _CPU_FEATURES_LOONGARCH64_H - #define _CPU_FEATURES_LOONGARCH64_H - -+#include - #include - --#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL) --#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) --#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) -+struct cpu_features -+ { -+ uint64_t hwcap; -+ }; - -+/* Get a pointer to the CPU features structure. */ -+extern const struct cpu_features *_dl_larch_get_cpu_features (void) -+ __attribute__ ((pure)); -+ -+#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL) -+#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX) -+#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX) -+#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) -+#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) - #define INIT_ARCH() - - #endif /* _CPU_FEATURES_LOONGARCH64_H */ -- -diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c -new file mode 100644 -index 00000000..6217fda9 ---- /dev/null -+++ b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c -@@ -0,0 +1,60 @@ -+/* Data for LoongArch64 version of processor capability information. -+ Linux version. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* If anything should be added here check whether the size of each string -+ is still ok with the given array size. -+ -+ All the #ifdefs in the definitions are quite irritating but -+ necessary if we want to avoid duplicating the information. There -+ are three different modes: -+ -+ - PROCINFO_DECL is defined. This means we are only interested in -+ declarations. -+ -+ - PROCINFO_DECL is not defined: -+ -+ + if SHARED is defined the file is included in an array -+ initializer. The .element = { ... } syntax is needed. -+ -+ + if SHARED is not defined a normal array initialization is -+ needed. -+ */ -+ -+#ifndef PROCINFO_CLASS -+# define PROCINFO_CLASS -+#endif -+ -+#if !IS_IN (ldconfig) -+# if !defined PROCINFO_DECL && defined SHARED -+ ._dl_larch_cpu_features -+# else -+PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features -+# endif -+# ifndef PROCINFO_DECL -+= { } -+# endif -+# if !defined SHARED || defined PROCINFO_DECL -+; -+# else -+, -+# endif -+#endif -+ -+#undef PROCINFO_DECL -+#undef PROCINFO_CLASS -diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c -new file mode 100644 -index 00000000..455fd71a ---- /dev/null -+++ b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c -@@ -0,0 +1,21 @@ -+/* Operating system support for run-time dynamic linker. LoongArch version. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+#include -diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c -new file mode 100644 -index 00000000..f1346ece ---- /dev/null -+++ b/sysdeps/unix/sysv/linux/loongarch/libc-start.c -@@ -0,0 +1,34 @@ -+/* Override csu/libc-start.c on LoongArch64. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#ifndef SHARED -+ -+/* Mark symbols hidden in static PIE for early self relocation to work. */ -+# if BUILD_PIE_DEFAULT -+# pragma GCC visibility push(hidden) -+# endif -+ -+# include -+# include -+ -+extern struct cpu_features _dl_larch_cpu_features; -+ -+# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features) -+ -+#endif -+#include --- -2.33.0 - diff --git a/LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch b/LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch deleted file mode 100644 index ad7e613..0000000 --- a/LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch +++ /dev/null @@ -1,485 +0,0 @@ -From 3ee56bbc56faa7b85a6513340db4a4fdd6ce709d Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Mon, 28 Aug 2023 10:08:36 +0800 -Subject: [PATCH 15/29] LoongArch: Add ifunc support for memchr{aligned, lsx, - lasx} - -According to glibc memchr microbenchmark, this implementation could reduce -the runtime as following: - -Name Percent of runtime reduced -memchr-lasx 37%-83% -memchr-lsx 30%-66% -memchr-aligned 0%-15% - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 3 + - .../lp64/multiarch/ifunc-impl-list.c | 7 ++ - .../loongarch/lp64/multiarch/ifunc-memchr.h | 40 ++++++ - .../loongarch/lp64/multiarch/memchr-aligned.S | 95 ++++++++++++++ - .../loongarch/lp64/multiarch/memchr-lasx.S | 117 ++++++++++++++++++ - sysdeps/loongarch/lp64/multiarch/memchr-lsx.S | 102 +++++++++++++++ - sysdeps/loongarch/lp64/multiarch/memchr.c | 37 ++++++ - 7 files changed, 401 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index 64416b02..2f4802cf 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -24,5 +24,8 @@ sysdep_routines += \ - rawmemchr-aligned \ - rawmemchr-lsx \ - rawmemchr-lasx \ -+ memchr-aligned \ -+ memchr-lsx \ -+ memchr-lasx \ - # sysdep_routines - endif -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index 3db9af14..a567b9cf 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -102,5 +102,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned) - ) - -+ IFUNC_IMPL (i, name, memchr, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx) -+ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned) -+ ) - return i; - } -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h -new file mode 100644 -index 00000000..9060ccd5 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h -@@ -0,0 +1,40 @@ -+/* Common definition for memchr ifunc selections. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LASX) -+ return OPTIMIZE (lasx); -+ else if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ return OPTIMIZE (aligned); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S -new file mode 100644 -index 00000000..81d0d004 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S -@@ -0,0 +1,95 @@ -+/* Optimized memchr implementation using basic LoongArch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define MEMCHR_NAME __memchr_aligned -+#else -+# define MEMCHR_NAME memchr -+#endif -+ -+LEAF(MEMCHR_NAME, 6) -+ beqz a2, L(out) -+ andi t1, a0, 0x7 -+ add.d a5, a0, a2 -+ bstrins.d a0, zero, 2, 0 -+ -+ ld.d t0, a0, 0 -+ bstrins.d a1, a1, 15, 8 -+ lu12i.w a3, 0x01010 -+ slli.d t2, t1, 03 -+ -+ bstrins.d a1, a1, 31, 16 -+ ori a3, a3, 0x101 -+ li.d t7, -1 -+ li.d t8, 8 -+ -+ bstrins.d a1, a1, 63, 32 -+ bstrins.d a3, a3, 63, 32 -+ sll.d t2, t7, t2 -+ xor t0, t0, a1 -+ -+ -+ addi.d a6, a5, -1 -+ slli.d a4, a3, 7 -+ sub.d t1, t8, t1 -+ orn t0, t0, t2 -+ -+ sub.d t2, t0, a3 -+ andn t3, a4, t0 -+ bstrins.d a6, zero, 2, 0 -+ and t0, t2, t3 -+ -+ bgeu t1, a2, L(end) -+L(loop): -+ bnez t0, L(found) -+ ld.d t1, a0, 8 -+ xor t0, t1, a1 -+ -+ addi.d a0, a0, 8 -+ sub.d t2, t0, a3 -+ andn t3, a4, t0 -+ and t0, t2, t3 -+ -+ -+ bne a0, a6, L(loop) -+L(end): -+ sub.d t1, a5, a6 -+ ctz.d t0, t0 -+ srli.d t0, t0, 3 -+ -+ sltu t1, t0, t1 -+ add.d a0, a0, t0 -+ maskeqz a0, a0, t1 -+ jr ra -+ -+L(found): -+ ctz.d t0, t0 -+ srli.d t0, t0, 3 -+ add.d a0, a0, t0 -+ jr ra -+ -+L(out): -+ move a0, zero -+ jr ra -+END(MEMCHR_NAME) -+ -+libc_hidden_builtin_def (MEMCHR_NAME) -diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S -new file mode 100644 -index 00000000..a26cdf48 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S -@@ -0,0 +1,117 @@ -+/* Optimized memchr implementation using LoongArch LASX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define MEMCHR __memchr_lasx -+ -+LEAF(MEMCHR, 6) -+ beqz a2, L(ret0) -+ add.d a3, a0, a2 -+ andi t0, a0, 0x3f -+ bstrins.d a0, zero, 5, 0 -+ -+ xvld xr0, a0, 0 -+ xvld xr1, a0, 32 -+ li.d t1, -1 -+ li.d t2, 64 -+ -+ xvreplgr2vr.b xr2, a1 -+ sll.d t3, t1, t0 -+ sub.d t2, t2, t0 -+ xvseq.b xr0, xr0, xr2 -+ -+ xvseq.b xr1, xr1, xr2 -+ xvmsknz.b xr0, xr0 -+ xvmsknz.b xr1, xr1 -+ xvpickve.w xr3, xr0, 4 -+ -+ -+ xvpickve.w xr4, xr1, 4 -+ vilvl.h vr0, vr3, vr0 -+ vilvl.h vr1, vr4, vr1 -+ vilvl.w vr0, vr1, vr0 -+ -+ movfr2gr.d t0, fa0 -+ and t0, t0, t3 -+ bgeu t2, a2, L(end) -+ bnez t0, L(found) -+ -+ addi.d a4, a3, -1 -+ bstrins.d a4, zero, 5, 0 -+L(loop): -+ xvld xr0, a0, 64 -+ xvld xr1, a0, 96 -+ -+ addi.d a0, a0, 64 -+ xvseq.b xr0, xr0, xr2 -+ xvseq.b xr1, xr1, xr2 -+ beq a0, a4, L(out) -+ -+ -+ xvmax.bu xr3, xr0, xr1 -+ xvseteqz.v fcc0, xr3 -+ bcnez fcc0, L(loop) -+ xvmsknz.b xr0, xr0 -+ -+ xvmsknz.b xr1, xr1 -+ xvpickve.w xr3, xr0, 4 -+ xvpickve.w xr4, xr1, 4 -+ vilvl.h vr0, vr3, vr0 -+ -+ vilvl.h vr1, vr4, vr1 -+ vilvl.w vr0, vr1, vr0 -+ movfr2gr.d t0, fa0 -+L(found): -+ ctz.d t1, t0 -+ -+ add.d a0, a0, t1 -+ jr ra -+L(ret0): -+ move a0, zero -+ jr ra -+ -+ -+L(out): -+ xvmsknz.b xr0, xr0 -+ xvmsknz.b xr1, xr1 -+ xvpickve.w xr3, xr0, 4 -+ xvpickve.w xr4, xr1, 4 -+ -+ vilvl.h vr0, vr3, vr0 -+ vilvl.h vr1, vr4, vr1 -+ vilvl.w vr0, vr1, vr0 -+ movfr2gr.d t0, fa0 -+ -+L(end): -+ sub.d t2, zero, a3 -+ srl.d t1, t1, t2 -+ and t0, t0, t1 -+ ctz.d t1, t0 -+ -+ add.d a0, a0, t1 -+ maskeqz a0, a0, t0 -+ jr ra -+END(MEMCHR) -+ -+libc_hidden_builtin_def (MEMCHR) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S -new file mode 100644 -index 00000000..a73ecd25 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S -@@ -0,0 +1,102 @@ -+/* Optimized memchr implementation using LoongArch LSX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define MEMCHR __memchr_lsx -+ -+LEAF(MEMCHR, 6) -+ beqz a2, L(ret0) -+ add.d a3, a0, a2 -+ andi t0, a0, 0x1f -+ bstrins.d a0, zero, 4, 0 -+ -+ vld vr0, a0, 0 -+ vld vr1, a0, 16 -+ li.d t1, -1 -+ li.d t2, 32 -+ -+ vreplgr2vr.b vr2, a1 -+ sll.d t3, t1, t0 -+ sub.d t2, t2, t0 -+ vseq.b vr0, vr0, vr2 -+ -+ vseq.b vr1, vr1, vr2 -+ vmsknz.b vr0, vr0 -+ vmsknz.b vr1, vr1 -+ vilvl.h vr0, vr1, vr0 -+ -+ -+ movfr2gr.s t0, fa0 -+ and t0, t0, t3 -+ bgeu t2, a2, L(end) -+ bnez t0, L(found) -+ -+ addi.d a4, a3, -1 -+ bstrins.d a4, zero, 4, 0 -+L(loop): -+ vld vr0, a0, 32 -+ vld vr1, a0, 48 -+ -+ addi.d a0, a0, 32 -+ vseq.b vr0, vr0, vr2 -+ vseq.b vr1, vr1, vr2 -+ beq a0, a4, L(out) -+ -+ vmax.bu vr3, vr0, vr1 -+ vseteqz.v fcc0, vr3 -+ bcnez fcc0, L(loop) -+ vmsknz.b vr0, vr0 -+ -+ -+ vmsknz.b vr1, vr1 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+L(found): -+ ctz.w t0, t0 -+ -+ add.d a0, a0, t0 -+ jr ra -+L(ret0): -+ move a0, zero -+ jr ra -+ -+L(out): -+ vmsknz.b vr0, vr0 -+ vmsknz.b vr1, vr1 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+ -+L(end): -+ sub.d t2, zero, a3 -+ srl.w t1, t1, t2 -+ and t0, t0, t1 -+ ctz.w t1, t0 -+ -+ -+ add.d a0, a0, t1 -+ maskeqz a0, a0, t0 -+ jr ra -+END(MEMCHR) -+ -+libc_hidden_builtin_def (MEMCHR) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memchr.c b/sysdeps/loongarch/lp64/multiarch/memchr.c -new file mode 100644 -index 00000000..059479c0 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memchr.c -@@ -0,0 +1,37 @@ -+/* Multiple versions of memchr. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define memchr __redirect_memchr -+# include -+# undef memchr -+ -+# define SYMBOL_NAME memchr -+# include "ifunc-memchr.h" -+ -+libc_ifunc_redirected (__redirect_memchr, memchr, -+ IFUNC_SELECTOR ()); -+ -+# ifdef SHARED -+__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr); -+# endif -+ -+#endif --- -2.33.0 - diff --git a/LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch b/LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch deleted file mode 100644 index 72c26d0..0000000 --- a/LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch +++ /dev/null @@ -1,946 +0,0 @@ -From 60f4bbd1eec528ba8df044ae6b3091f6337a7fcc Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Mon, 28 Aug 2023 10:08:39 +0800 -Subject: [PATCH 18/29] LoongArch: Add ifunc support for memcmp{aligned, lsx, - lasx} - -According to glibc memcmp microbenchmark test results(Add generic -memcmp), this implementation have performance improvement -except the length is less than 3, details as below: - -Name Percent of time reduced -memcmp-lasx 16%-74% -memcmp-lsx 20%-50% -memcmp-aligned 5%-20% - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 3 + - .../lp64/multiarch/ifunc-impl-list.c | 7 + - .../loongarch/lp64/multiarch/ifunc-memcmp.h | 40 +++ - .../loongarch/lp64/multiarch/memcmp-aligned.S | 292 ++++++++++++++++++ - .../loongarch/lp64/multiarch/memcmp-lasx.S | 207 +++++++++++++ - sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S | 269 ++++++++++++++++ - sysdeps/loongarch/lp64/multiarch/memcmp.c | 43 +++ - 7 files changed, 861 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index 216886c5..360a6718 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -34,5 +34,8 @@ sysdep_routines += \ - memset-unaligned \ - memset-lsx \ - memset-lasx \ -+ memcmp-aligned \ -+ memcmp-lsx \ -+ memcmp-lasx \ - # sysdep_routines - endif -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index 37f60dde..e397d58c 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -127,5 +127,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned) - ) - -+ IFUNC_IMPL (i, name, memcmp, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LASX, __memcmp_lasx) -+ IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LSX, __memcmp_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_aligned) -+ ) - return i; - } -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h -new file mode 100644 -index 00000000..04adc2e5 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h -@@ -0,0 +1,40 @@ -+/* Common definition for memcmp ifunc selections. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LASX) -+ return OPTIMIZE (lasx); -+ else if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ return OPTIMIZE (aligned); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S -new file mode 100644 -index 00000000..14a7caa9 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S -@@ -0,0 +1,292 @@ -+/* Optimized memcmp implementation using basic LoongArch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define MEMCMP_NAME __memcmp_aligned -+#else -+# define MEMCMP_NAME memcmp -+#endif -+ -+LEAF(MEMCMP_NAME, 6) -+ beqz a2, L(ret) -+ andi a4, a1, 0x7 -+ andi a3, a0, 0x7 -+ sltu a5, a4, a3 -+ -+ xor t0, a0, a1 -+ li.w t8, 8 -+ maskeqz t0, t0, a5 -+ li.w t7, -1 -+ -+ xor a0, a0, t0 -+ xor a1, a1, t0 -+ andi a3, a0, 0x7 -+ andi a4, a1, 0x7 -+ -+ xor a0, a0, a3 -+ xor a1, a1, a4 -+ ld.d t2, a0, 0 -+ ld.d t1, a1, 0 -+ -+ slli.d t3, a3, 3 -+ slli.d t4, a4, 3 -+ sub.d a6, t3, t4 -+ srl.d t1, t1, t4 -+ -+ srl.d t0, t2, t3 -+ srl.d t5, t7, t4 -+ sub.d t6, t0, t1 -+ and t6, t6, t5 -+ -+ sub.d t5, t8, a4 -+ bnez t6, L(first_out) -+ bgeu t5, a2, L(ret) -+ sub.d a2, a2, t5 -+ -+ bnez a6, L(unaligned) -+ blt a2, t8, L(al_less_8bytes) -+ andi t1, a2, 31 -+ beq t1, a2, L(al_less_32bytes) -+ -+ sub.d t2, a2, t1 -+ add.d a4, a0, t2 -+ move a2, t1 -+ -+L(al_loop): -+ ld.d t0, a0, 8 -+ -+ ld.d t1, a1, 8 -+ ld.d t2, a0, 16 -+ ld.d t3, a1, 16 -+ ld.d t4, a0, 24 -+ -+ ld.d t5, a1, 24 -+ ld.d t6, a0, 32 -+ ld.d t7, a1, 32 -+ addi.d a0, a0, 32 -+ -+ addi.d a1, a1, 32 -+ bne t0, t1, L(out1) -+ bne t2, t3, L(out2) -+ bne t4, t5, L(out3) -+ -+ bne t6, t7, L(out4) -+ bne a0, a4, L(al_loop) -+ -+L(al_less_32bytes): -+ srai.d a4, a2, 4 -+ beqz a4, L(al_less_16bytes) -+ -+ ld.d t0, a0, 8 -+ ld.d t1, a1, 8 -+ ld.d t2, a0, 16 -+ ld.d t3, a1, 16 -+ -+ addi.d a0, a0, 16 -+ addi.d a1, a1, 16 -+ addi.d a2, a2, -16 -+ bne t0, t1, L(out1) -+ -+ bne t2, t3, L(out2) -+ -+L(al_less_16bytes): -+ srai.d a4, a2, 3 -+ beqz a4, L(al_less_8bytes) -+ ld.d t0, a0, 8 -+ -+ ld.d t1, a1, 8 -+ addi.d a0, a0, 8 -+ addi.d a1, a1, 8 -+ addi.d a2, a2, -8 -+ -+ bne t0, t1, L(out1) -+ -+L(al_less_8bytes): -+ beqz a2, L(ret) -+ ld.d t0, a0, 8 -+ ld.d t1, a1, 8 -+ -+ li.d t7, -1 -+ slli.d t2, a2, 3 -+ sll.d t2, t7, t2 -+ sub.d t3, t0, t1 -+ -+ andn t6, t3, t2 -+ bnez t6, L(count_diff) -+ -+L(ret): -+ move a0, zero -+ jr ra -+ -+L(out4): -+ move t0, t6 -+ move t1, t7 -+ sub.d t6, t6, t7 -+ b L(count_diff) -+ -+L(out3): -+ move t0, t4 -+ move t1, t5 -+ sub.d t6, t4, t5 -+ b L(count_diff) -+ -+L(out2): -+ move t0, t2 -+ move t1, t3 -+L(out1): -+ sub.d t6, t0, t1 -+ b L(count_diff) -+ -+L(first_out): -+ slli.d t4, a2, 3 -+ slt t3, a2, t5 -+ sll.d t4, t7, t4 -+ maskeqz t4, t4, t3 -+ -+ andn t6, t6, t4 -+ -+L(count_diff): -+ ctz.d t2, t6 -+ bstrins.d t2, zero, 2, 0 -+ srl.d t0, t0, t2 -+ -+ srl.d t1, t1, t2 -+ andi t0, t0, 0xff -+ andi t1, t1, 0xff -+ sub.d t2, t0, t1 -+ -+ sub.d t3, t1, t0 -+ masknez t2, t2, a5 -+ maskeqz t3, t3, a5 -+ or a0, t2, t3 -+ -+ jr ra -+ -+L(unaligned): -+ sub.d a7, zero, a6 -+ srl.d t0, t2, a6 -+ blt a2, t8, L(un_less_8bytes) -+ -+ andi t1, a2, 31 -+ beq t1, a2, L(un_less_32bytes) -+ sub.d t2, a2, t1 -+ add.d a4, a0, t2 -+ -+ move a2, t1 -+ -+L(un_loop): -+ ld.d t2, a0, 8 -+ ld.d t1, a1, 8 -+ ld.d t4, a0, 16 -+ -+ ld.d t3, a1, 16 -+ ld.d t6, a0, 24 -+ ld.d t5, a1, 24 -+ ld.d t8, a0, 32 -+ -+ ld.d t7, a1, 32 -+ addi.d a0, a0, 32 -+ addi.d a1, a1, 32 -+ sll.d a3, t2, a7 -+ -+ or t0, a3, t0 -+ bne t0, t1, L(out1) -+ srl.d t0, t2, a6 -+ sll.d a3, t4, a7 -+ -+ or t2, a3, t0 -+ bne t2, t3, L(out2) -+ srl.d t0, t4, a6 -+ sll.d a3, t6, a7 -+ -+ or t4, a3, t0 -+ bne t4, t5, L(out3) -+ srl.d t0, t6, a6 -+ sll.d a3, t8, a7 -+ -+ or t6, t0, a3 -+ bne t6, t7, L(out4) -+ srl.d t0, t8, a6 -+ bne a0, a4, L(un_loop) -+ -+L(un_less_32bytes): -+ srai.d a4, a2, 4 -+ beqz a4, L(un_less_16bytes) -+ ld.d t2, a0, 8 -+ ld.d t1, a1, 8 -+ -+ ld.d t4, a0, 16 -+ ld.d t3, a1, 16 -+ addi.d a0, a0, 16 -+ addi.d a1, a1, 16 -+ -+ addi.d a2, a2, -16 -+ sll.d a3, t2, a7 -+ or t0, a3, t0 -+ bne t0, t1, L(out1) -+ -+ srl.d t0, t2, a6 -+ sll.d a3, t4, a7 -+ or t2, a3, t0 -+ bne t2, t3, L(out2) -+ -+ srl.d t0, t4, a6 -+ -+L(un_less_16bytes): -+ srai.d a4, a2, 3 -+ beqz a4, L(un_less_8bytes) -+ ld.d t2, a0, 8 -+ -+ ld.d t1, a1, 8 -+ addi.d a0, a0, 8 -+ addi.d a1, a1, 8 -+ addi.d a2, a2, -8 -+ -+ sll.d a3, t2, a7 -+ or t0, a3, t0 -+ bne t0, t1, L(out1) -+ srl.d t0, t2, a6 -+ -+L(un_less_8bytes): -+ beqz a2, L(ret) -+ andi a7, a7, 63 -+ slli.d a4, a2, 3 -+ bgeu a7, a4, L(last_cmp) -+ -+ ld.d t2, a0, 8 -+ sll.d a3, t2, a7 -+ or t0, a3, t0 -+ -+L(last_cmp): -+ ld.d t1, a1, 8 -+ -+ li.d t7, -1 -+ sll.d t2, t7, a4 -+ sub.d t3, t0, t1 -+ andn t6, t3, t2 -+ -+ bnez t6, L(count_diff) -+ move a0, zero -+ jr ra -+END(MEMCMP_NAME) -+ -+libc_hidden_builtin_def (MEMCMP_NAME) -diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S -new file mode 100644 -index 00000000..3151a179 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S -@@ -0,0 +1,207 @@ -+/* Optimized memcmp implementation using LoongArch LASX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define MEMCMP __memcmp_lasx -+ -+LEAF(MEMCMP, 6) -+ li.d t2, 32 -+ add.d a3, a0, a2 -+ add.d a4, a1, a2 -+ bgeu t2, a2, L(less32) -+ -+ li.d t1, 160 -+ bgeu a2, t1, L(make_aligned) -+L(loop32): -+ xvld xr0, a0, 0 -+ xvld xr1, a1, 0 -+ -+ addi.d a0, a0, 32 -+ addi.d a1, a1, 32 -+ addi.d a2, a2, -32 -+ xvseq.b xr2, xr0, xr1 -+ -+ xvsetanyeqz.b fcc0, xr2 -+ bcnez fcc0, L(end) -+L(last_bytes): -+ bltu t2, a2, L(loop32) -+ xvld xr0, a3, -32 -+ -+ -+ xvld xr1, a4, -32 -+ xvseq.b xr2, xr0, xr1 -+L(end): -+ xvmsknz.b xr2, xr2 -+ xvpermi.q xr4, xr0, 1 -+ -+ xvpickve.w xr3, xr2, 4 -+ xvpermi.q xr5, xr1, 1 -+ vilvl.h vr2, vr3, vr2 -+ movfr2gr.s t0, fa2 -+ -+ cto.w t0, t0 -+ vreplgr2vr.b vr2, t0 -+ vshuf.b vr0, vr4, vr0, vr2 -+ vshuf.b vr1, vr5, vr1, vr2 -+ -+ vpickve2gr.bu t0, vr0, 0 -+ vpickve2gr.bu t1, vr1, 0 -+ sub.d a0, t0, t1 -+ jr ra -+ -+ -+L(less32): -+ srli.d t0, a2, 4 -+ beqz t0, L(less16) -+ vld vr0, a0, 0 -+ vld vr1, a1, 0 -+ -+ vld vr2, a3, -16 -+ vld vr3, a4, -16 -+L(short_ret): -+ vseq.b vr4, vr0, vr1 -+ vseq.b vr5, vr2, vr3 -+ -+ vmsknz.b vr4, vr4 -+ vmsknz.b vr5, vr5 -+ vilvl.h vr4, vr5, vr4 -+ movfr2gr.s t0, fa4 -+ -+ cto.w t0, t0 -+ vreplgr2vr.b vr4, t0 -+ vshuf.b vr0, vr2, vr0, vr4 -+ vshuf.b vr1, vr3, vr1, vr4 -+ -+ -+ vpickve2gr.bu t0, vr0, 0 -+ vpickve2gr.bu t1, vr1, 0 -+ sub.d a0, t0, t1 -+ jr ra -+ -+L(less16): -+ srli.d t0, a2, 3 -+ beqz t0, L(less8) -+ vldrepl.d vr0, a0, 0 -+ vldrepl.d vr1, a1, 0 -+ -+ vldrepl.d vr2, a3, -8 -+ vldrepl.d vr3, a4, -8 -+ b L(short_ret) -+ nop -+ -+L(less8): -+ srli.d t0, a2, 2 -+ beqz t0, L(less4) -+ vldrepl.w vr0, a0, 0 -+ vldrepl.w vr1, a1, 0 -+ -+ -+ vldrepl.w vr2, a3, -4 -+ vldrepl.w vr3, a4, -4 -+ b L(short_ret) -+ nop -+ -+L(less4): -+ srli.d t0, a2, 1 -+ beqz t0, L(less2) -+ vldrepl.h vr0, a0, 0 -+ vldrepl.h vr1, a1, 0 -+ -+ vldrepl.h vr2, a3, -2 -+ vldrepl.h vr3, a4, -2 -+ b L(short_ret) -+ nop -+ -+L(less2): -+ beqz a2, L(ret0) -+ ld.bu t0, a0, 0 -+ ld.bu t1, a1, 0 -+ sub.d a0, t0, t1 -+ -+ jr ra -+L(ret0): -+ move a0, zero -+ jr ra -+ -+L(make_aligned): -+ xvld xr0, a0, 0 -+ -+ xvld xr1, a1, 0 -+ xvseq.b xr2, xr0, xr1 -+ xvsetanyeqz.b fcc0, xr2 -+ bcnez fcc0, L(end) -+ -+ andi t0, a0, 0x1f -+ sub.d t0, t2, t0 -+ sub.d t1, a2, t0 -+ add.d a0, a0, t0 -+ -+ add.d a1, a1, t0 -+ andi a2, t1, 0x3f -+ sub.d t0, t1, a2 -+ add.d a5, a0, t0 -+ -+ -+L(loop_align): -+ xvld xr0, a0, 0 -+ xvld xr1, a1, 0 -+ xvld xr2, a0, 32 -+ xvld xr3, a1, 32 -+ -+ xvseq.b xr0, xr0, xr1 -+ xvseq.b xr1, xr2, xr3 -+ xvmin.bu xr2, xr1, xr0 -+ xvsetanyeqz.b fcc0, xr2 -+ -+ bcnez fcc0, L(pair_end) -+ addi.d a0, a0, 64 -+ addi.d a1, a1, 64 -+ bne a0, a5, L(loop_align) -+ -+ bnez a2, L(last_bytes) -+ move a0, zero -+ jr ra -+ nop -+ -+ -+L(pair_end): -+ xvmsknz.b xr0, xr0 -+ xvmsknz.b xr1, xr1 -+ xvpickve.w xr2, xr0, 4 -+ xvpickve.w xr3, xr1, 4 -+ -+ vilvl.h vr0, vr2, vr0 -+ vilvl.h vr1, vr3, vr1 -+ vilvl.w vr0, vr1, vr0 -+ movfr2gr.d t0, fa0 -+ -+ cto.d t0, t0 -+ ldx.bu t1, a0, t0 -+ ldx.bu t2, a1, t0 -+ sub.d a0, t1, t2 -+ -+ jr ra -+END(MEMCMP) -+ -+libc_hidden_builtin_def (MEMCMP) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S -new file mode 100644 -index 00000000..38a50a4c ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S -@@ -0,0 +1,269 @@ -+/* Optimized memcmp implementation using LoongArch LSX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+#define MEMCMP __memcmp_lsx -+ -+LEAF(MEMCMP, 6) -+ beqz a2, L(out) -+ pcalau12i t0, %pc_hi20(L(INDEX)) -+ andi a3, a0, 0xf -+ vld vr5, t0, %pc_lo12(L(INDEX)) -+ -+ andi a4, a1, 0xf -+ bne a3, a4, L(unaligned) -+ bstrins.d a0, zero, 3, 0 -+ xor a1, a1, a4 -+ -+ vld vr0, a0, 0 -+ vld vr1, a1, 0 -+ li.d t0, 16 -+ vreplgr2vr.b vr3, a3 -+ -+ sub.d t1, t0, a3 -+ vadd.b vr3, vr3, vr5 -+ vshuf.b vr0, vr3, vr0, vr3 -+ vshuf.b vr1, vr3, vr1, vr3 -+ -+ -+ vseq.b vr4, vr0, vr1 -+ bgeu t1, a2, L(al_end) -+ vsetanyeqz.b fcc0, vr4 -+ bcnez fcc0, L(al_found) -+ -+ sub.d t1, a2, t1 -+ andi a2, t1, 31 -+ beq a2, t1, L(al_less_32bytes) -+ sub.d t2, t1, a2 -+ -+ add.d a4, a0, t2 -+L(al_loop): -+ vld vr0, a0, 16 -+ vld vr1, a1, 16 -+ vld vr2, a0, 32 -+ -+ vld vr3, a1, 32 -+ addi.d a0, a0, 32 -+ addi.d a1, a1, 32 -+ vseq.b vr4, vr0, vr1 -+ -+ -+ vseq.b vr6, vr2, vr3 -+ vand.v vr6, vr4, vr6 -+ vsetanyeqz.b fcc0, vr6 -+ bcnez fcc0, L(al_pair_end) -+ -+ bne a0, a4, L(al_loop) -+L(al_less_32bytes): -+ bgeu t0, a2, L(al_less_16bytes) -+ vld vr0, a0, 16 -+ vld vr1, a1, 16 -+ -+ vld vr2, a0, 32 -+ vld vr3, a1, 32 -+ addi.d a2, a2, -16 -+ vreplgr2vr.b vr6, a2 -+ -+ vslt.b vr5, vr5, vr6 -+ vseq.b vr4, vr0, vr1 -+ vseq.b vr6, vr2, vr3 -+ vorn.v vr6, vr6, vr5 -+ -+ -+L(al_pair_end): -+ vsetanyeqz.b fcc0, vr4 -+ bcnez fcc0, L(al_found) -+ vnori.b vr4, vr6, 0 -+ vfrstpi.b vr4, vr4, 0 -+ -+ vshuf.b vr0, vr2, vr2, vr4 -+ vshuf.b vr1, vr3, vr3, vr4 -+ vpickve2gr.bu t0, vr0, 0 -+ vpickve2gr.bu t1, vr1, 0 -+ -+ sub.d a0, t0, t1 -+ jr ra -+ nop -+ nop -+ -+L(al_less_16bytes): -+ beqz a2, L(out) -+ vld vr0, a0, 16 -+ vld vr1, a1, 16 -+ vseq.b vr4, vr0, vr1 -+ -+ -+L(al_end): -+ vreplgr2vr.b vr6, a2 -+ vslt.b vr5, vr5, vr6 -+ vorn.v vr4, vr4, vr5 -+ nop -+ -+L(al_found): -+ vnori.b vr4, vr4, 0 -+ vfrstpi.b vr4, vr4, 0 -+ vshuf.b vr0, vr0, vr0, vr4 -+ vshuf.b vr1, vr1, vr1, vr4 -+ -+ vpickve2gr.bu t0, vr0, 0 -+ vpickve2gr.bu t1, vr1, 0 -+ sub.d a0, t0, t1 -+ jr ra -+ -+L(out): -+ move a0, zero -+ jr ra -+ nop -+ nop -+ -+ -+L(unaligned): -+ xor t2, a0, a1 -+ sltu a5, a3, a4 -+ masknez t2, t2, a5 -+ xor a0, a0, t2 -+ -+ xor a1, a1, t2 -+ andi a3, a0, 0xf -+ andi a4, a1, 0xf -+ bstrins.d a0, zero, 3, 0 -+ -+ xor a1, a1, a4 -+ vld vr4, a0, 0 -+ vld vr1, a1, 0 -+ li.d t0, 16 -+ -+ vreplgr2vr.b vr2, a4 -+ sub.d a6, a4, a3 -+ sub.d t1, t0, a4 -+ sub.d t2, t0, a6 -+ -+ -+ vadd.b vr2, vr2, vr5 -+ vreplgr2vr.b vr6, t2 -+ vadd.b vr6, vr6, vr5 -+ vshuf.b vr0, vr4, vr4, vr6 -+ -+ vshuf.b vr1, vr2, vr1, vr2 -+ vshuf.b vr0, vr2, vr0, vr2 -+ vseq.b vr7, vr0, vr1 -+ bgeu t1, a2, L(un_end) -+ -+ vsetanyeqz.b fcc0, vr7 -+ bcnez fcc0, L(un_found) -+ sub.d a2, a2, t1 -+ andi t1, a2, 31 -+ -+ beq a2, t1, L(un_less_32bytes) -+ sub.d t2, a2, t1 -+ move a2, t1 -+ add.d a4, a1, t2 -+ -+ -+L(un_loop): -+ vld vr2, a0, 16 -+ vld vr1, a1, 16 -+ vld vr3, a1, 32 -+ addi.d a1, a1, 32 -+ -+ addi.d a0, a0, 32 -+ vshuf.b vr0, vr2, vr4, vr6 -+ vld vr4, a0, 0 -+ vseq.b vr7, vr0, vr1 -+ -+ vshuf.b vr2, vr4, vr2, vr6 -+ vseq.b vr8, vr2, vr3 -+ vand.v vr8, vr7, vr8 -+ vsetanyeqz.b fcc0, vr8 -+ -+ bcnez fcc0, L(un_pair_end) -+ bne a1, a4, L(un_loop) -+ -+L(un_less_32bytes): -+ bltu a2, t0, L(un_less_16bytes) -+ vld vr2, a0, 16 -+ vld vr1, a1, 16 -+ addi.d a0, a0, 16 -+ -+ addi.d a1, a1, 16 -+ addi.d a2, a2, -16 -+ vshuf.b vr0, vr2, vr4, vr6 -+ vor.v vr4, vr2, vr2 -+ -+ vseq.b vr7, vr0, vr1 -+ vsetanyeqz.b fcc0, vr7 -+ bcnez fcc0, L(un_found) -+L(un_less_16bytes): -+ beqz a2, L(out) -+ vld vr1, a1, 16 -+ bgeu a6, a2, 1f -+ -+ vld vr2, a0, 16 -+1: -+ vshuf.b vr0, vr2, vr4, vr6 -+ vseq.b vr7, vr0, vr1 -+L(un_end): -+ vreplgr2vr.b vr3, a2 -+ -+ -+ vslt.b vr3, vr5, vr3 -+ vorn.v vr7, vr7, vr3 -+ -+L(un_found): -+ vnori.b vr7, vr7, 0 -+ vfrstpi.b vr7, vr7, 0 -+ -+ vshuf.b vr0, vr0, vr0, vr7 -+ vshuf.b vr1, vr1, vr1, vr7 -+L(calc_result): -+ vpickve2gr.bu t0, vr0, 0 -+ vpickve2gr.bu t1, vr1, 0 -+ -+ sub.d t2, t0, t1 -+ sub.d t3, t1, t0 -+ masknez t0, t3, a5 -+ maskeqz t1, t2, a5 -+ -+ or a0, t0, t1 -+ jr ra -+L(un_pair_end): -+ vsetanyeqz.b fcc0, vr7 -+ bcnez fcc0, L(un_found) -+ -+ -+ vnori.b vr7, vr8, 0 -+ vfrstpi.b vr7, vr7, 0 -+ vshuf.b vr0, vr2, vr2, vr7 -+ vshuf.b vr1, vr3, vr3, vr7 -+ -+ b L(calc_result) -+END(MEMCMP) -+ -+ .section .rodata.cst16,"M",@progbits,16 -+ .align 4 -+L(INDEX): -+ .dword 0x0706050403020100 -+ .dword 0x0f0e0d0c0b0a0908 -+ -+libc_hidden_builtin_def (MEMCMP) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp.c b/sysdeps/loongarch/lp64/multiarch/memcmp.c -new file mode 100644 -index 00000000..32eccac2 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memcmp.c -@@ -0,0 +1,43 @@ -+/* Multiple versions of memcmp. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define memcmp __redirect_memcmp -+# include -+# undef memcmp -+ -+# define SYMBOL_NAME memcmp -+# include "ifunc-memcmp.h" -+ -+libc_ifunc_redirected (__redirect_memcmp, memcmp, -+ IFUNC_SELECTOR ()); -+# undef bcmp -+weak_alias (memcmp, bcmp) -+ -+# undef __memcmpeq -+strong_alias (memcmp, __memcmpeq) -+libc_hidden_def (__memcmpeq) -+ -+# ifdef SHARED -+__hidden_ver1 (memcmp, __GI_memcmp, __redirect_memcmp) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp); -+# endif -+ -+#endif --- -2.33.0 - diff --git a/LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch b/LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch deleted file mode 100644 index 26a0f40..0000000 --- a/LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch +++ /dev/null @@ -1,417 +0,0 @@ -From c4c272fb8067364530a2a78df92c37403acc963f Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Mon, 28 Aug 2023 10:08:37 +0800 -Subject: [PATCH 16/29] LoongArch: Add ifunc support for memrchr{lsx, lasx} - -According to glibc memrchr microbenchmark, this implementation could reduce -the runtime as following: - -Name Percent of rutime reduced -memrchr-lasx 20%-83% -memrchr-lsx 20%-64% - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 3 + - .../lp64/multiarch/ifunc-impl-list.c | 8 ++ - .../loongarch/lp64/multiarch/ifunc-memrchr.h | 40 ++++++ - .../lp64/multiarch/memrchr-generic.c | 23 ++++ - .../loongarch/lp64/multiarch/memrchr-lasx.S | 123 ++++++++++++++++++ - .../loongarch/lp64/multiarch/memrchr-lsx.S | 105 +++++++++++++++ - sysdeps/loongarch/lp64/multiarch/memrchr.c | 33 +++++ - 7 files changed, 335 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-generic.c - create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index 2f4802cf..7b87bc90 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -27,5 +27,8 @@ sysdep_routines += \ - memchr-aligned \ - memchr-lsx \ - memchr-lasx \ -+ memrchr-generic \ -+ memrchr-lsx \ -+ memrchr-lasx \ - # sysdep_routines - endif -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index a567b9cf..8bd5489e 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -109,5 +109,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - #endif - IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned) - ) -+ -+ IFUNC_IMPL (i, name, memrchr, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LASX, __memrchr_lasx) -+ IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LSX, __memrchr_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic) -+ ) - return i; - } -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h -new file mode 100644 -index 00000000..8215f9ad ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h -@@ -0,0 +1,40 @@ -+/* Common definition for memrchr implementation. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LASX) -+ return OPTIMIZE (lasx); -+ else if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ return OPTIMIZE (generic); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c -new file mode 100644 -index 00000000..ced61ebc ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c -@@ -0,0 +1,23 @@ -+/* Generic implementation of memrchr. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#if IS_IN (libc) -+# define MEMRCHR __memrchr_generic -+#endif -+ -+#include -diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S -new file mode 100644 -index 00000000..5f3e0d06 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S -@@ -0,0 +1,123 @@ -+/* Optimized memrchr implementation using LoongArch LASX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+#ifndef MEMRCHR -+# define MEMRCHR __memrchr_lasx -+#endif -+ -+LEAF(MEMRCHR, 6) -+ beqz a2, L(ret0) -+ addi.d a2, a2, -1 -+ add.d a3, a0, a2 -+ andi t1, a3, 0x3f -+ -+ bstrins.d a3, zero, 5, 0 -+ addi.d t1, t1, 1 -+ xvld xr0, a3, 0 -+ xvld xr1, a3, 32 -+ -+ sub.d t2, zero, t1 -+ li.d t3, -1 -+ xvreplgr2vr.b xr2, a1 -+ andi t4, a0, 0x3f -+ -+ srl.d t2, t3, t2 -+ xvseq.b xr0, xr0, xr2 -+ xvseq.b xr1, xr1, xr2 -+ xvmsknz.b xr0, xr0 -+ -+ -+ xvmsknz.b xr1, xr1 -+ xvpickve.w xr3, xr0, 4 -+ xvpickve.w xr4, xr1, 4 -+ vilvl.h vr0, vr3, vr0 -+ -+ vilvl.h vr1, vr4, vr1 -+ vilvl.w vr0, vr1, vr0 -+ movfr2gr.d t0, fa0 -+ and t0, t0, t2 -+ -+ bltu a2, t1, L(end) -+ bnez t0, L(found) -+ bstrins.d a0, zero, 5, 0 -+L(loop): -+ xvld xr0, a3, -64 -+ -+ xvld xr1, a3, -32 -+ addi.d a3, a3, -64 -+ xvseq.b xr0, xr0, xr2 -+ xvseq.b xr1, xr1, xr2 -+ -+ -+ beq a0, a3, L(out) -+ xvmax.bu xr3, xr0, xr1 -+ xvseteqz.v fcc0, xr3 -+ bcnez fcc0, L(loop) -+ -+ xvmsknz.b xr0, xr0 -+ xvmsknz.b xr1, xr1 -+ xvpickve.w xr3, xr0, 4 -+ xvpickve.w xr4, xr1, 4 -+ -+ vilvl.h vr0, vr3, vr0 -+ vilvl.h vr1, vr4, vr1 -+ vilvl.w vr0, vr1, vr0 -+ movfr2gr.d t0, fa0 -+ -+L(found): -+ addi.d a0, a3, 63 -+ clz.d t1, t0 -+ sub.d a0, a0, t1 -+ jr ra -+ -+ -+L(out): -+ xvmsknz.b xr0, xr0 -+ xvmsknz.b xr1, xr1 -+ xvpickve.w xr3, xr0, 4 -+ xvpickve.w xr4, xr1, 4 -+ -+ vilvl.h vr0, vr3, vr0 -+ vilvl.h vr1, vr4, vr1 -+ vilvl.w vr0, vr1, vr0 -+ movfr2gr.d t0, fa0 -+ -+L(end): -+ sll.d t2, t3, t4 -+ and t0, t0, t2 -+ addi.d a0, a3, 63 -+ clz.d t1, t0 -+ -+ sub.d a0, a0, t1 -+ maskeqz a0, a0, t0 -+ jr ra -+L(ret0): -+ move a0, zero -+ -+ -+ jr ra -+END(MEMRCHR) -+ -+libc_hidden_builtin_def (MEMRCHR) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S -new file mode 100644 -index 00000000..39a7c8b0 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S -@@ -0,0 +1,105 @@ -+/* Optimized memrchr implementation using LoongArch LSX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define MEMRCHR __memrchr_lsx -+ -+LEAF(MEMRCHR, 6) -+ beqz a2, L(ret0) -+ addi.d a2, a2, -1 -+ add.d a3, a0, a2 -+ andi t1, a3, 0x1f -+ -+ bstrins.d a3, zero, 4, 0 -+ addi.d t1, t1, 1 -+ vld vr0, a3, 0 -+ vld vr1, a3, 16 -+ -+ sub.d t2, zero, t1 -+ li.d t3, -1 -+ vreplgr2vr.b vr2, a1 -+ andi t4, a0, 0x1f -+ -+ srl.d t2, t3, t2 -+ vseq.b vr0, vr0, vr2 -+ vseq.b vr1, vr1, vr2 -+ vmsknz.b vr0, vr0 -+ -+ -+ vmsknz.b vr1, vr1 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+ and t0, t0, t2 -+ -+ bltu a2, t1, L(end) -+ bnez t0, L(found) -+ bstrins.d a0, zero, 4, 0 -+L(loop): -+ vld vr0, a3, -32 -+ -+ vld vr1, a3, -16 -+ addi.d a3, a3, -32 -+ vseq.b vr0, vr0, vr2 -+ vseq.b vr1, vr1, vr2 -+ -+ beq a0, a3, L(out) -+ vmax.bu vr3, vr0, vr1 -+ vseteqz.v fcc0, vr3 -+ bcnez fcc0, L(loop) -+ -+ -+ vmsknz.b vr0, vr0 -+ vmsknz.b vr1, vr1 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+ -+L(found): -+ addi.d a0, a3, 31 -+ clz.w t1, t0 -+ sub.d a0, a0, t1 -+ jr ra -+ -+L(out): -+ vmsknz.b vr0, vr0 -+ vmsknz.b vr1, vr1 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+ -+L(end): -+ sll.d t2, t3, t4 -+ and t0, t0, t2 -+ addi.d a0, a3, 31 -+ clz.w t1, t0 -+ -+ -+ sub.d a0, a0, t1 -+ maskeqz a0, a0, t0 -+ jr ra -+L(ret0): -+ move a0, zero -+ -+ jr ra -+END(MEMRCHR) -+ -+libc_hidden_builtin_def (MEMRCHR) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr.c b/sysdeps/loongarch/lp64/multiarch/memrchr.c -new file mode 100644 -index 00000000..8baba9ab ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memrchr.c -@@ -0,0 +1,33 @@ -+/* Multiple versions of memrchr. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define memrchr __redirect_memrchr -+# include -+# undef memrchr -+ -+# define SYMBOL_NAME memrchr -+# include "ifunc-memrchr.h" -+ -+libc_ifunc_redirected (__redirect_memrchr, __memrchr, IFUNC_SELECTOR ()); -+libc_hidden_def (__memrchr) -+weak_alias (__memrchr, memrchr) -+ -+#endif --- -2.33.0 - diff --git a/LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch b/LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch deleted file mode 100644 index 2e18ba2..0000000 --- a/LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch +++ /dev/null @@ -1,784 +0,0 @@ -From 14032f7bbe18443af8492f5d0365f72b76701673 Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Mon, 28 Aug 2023 10:08:38 +0800 -Subject: [PATCH 17/29] LoongArch: Add ifunc support for memset{aligned, - unaligned, lsx, lasx} - -According to glibc memset microbenchmark test results, for LSX and LASX -versions, A few cases with length less than 8 experience performace -degradation, overall, the LASX version could reduce the runtime about -15% - 75%, LSX version could reduce the runtime about 15%-50%. - -The unaligned version uses unaligned memmory access to set data which -length is less than 64 and make address aligned with 8. For this part, -the performace is better than aligned version. Comparing with the generic -version, the performance is close when the length is larger than 128. When -the length is 8-128, the unaligned version could reduce the runtime about -30%-70%, the aligned version could reduce the runtime about 20%-50%. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 4 + - .../lp64/multiarch/dl-symbol-redir-ifunc.h | 24 +++ - .../lp64/multiarch/ifunc-impl-list.c | 10 + - .../loongarch/lp64/multiarch/memset-aligned.S | 174 ++++++++++++++++++ - .../loongarch/lp64/multiarch/memset-lasx.S | 142 ++++++++++++++ - sysdeps/loongarch/lp64/multiarch/memset-lsx.S | 135 ++++++++++++++ - .../lp64/multiarch/memset-unaligned.S | 162 ++++++++++++++++ - sysdeps/loongarch/lp64/multiarch/memset.c | 37 ++++ - 8 files changed, 688 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-unaligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memset.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index 7b87bc90..216886c5 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -30,5 +30,9 @@ sysdep_routines += \ - memrchr-generic \ - memrchr-lsx \ - memrchr-lasx \ -+ memset-aligned \ -+ memset-unaligned \ -+ memset-lsx \ -+ memset-lasx \ - # sysdep_routines - endif -diff --git a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h -new file mode 100644 -index 00000000..e2723873 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h -@@ -0,0 +1,24 @@ -+/* Symbol rediretion for loader/static initialization code. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#ifndef _DL_IFUNC_GENERIC_H -+#define _DL_IFUNC_GENERIC_H -+ -+asm ("memset = __memset_aligned"); -+ -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index 8bd5489e..37f60dde 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -117,5 +117,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - #endif - IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic) - ) -+ -+ IFUNC_IMPL (i, name, memset, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LASX, __memset_lasx) -+ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LSX, __memset_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_UAL, __memset_unaligned) -+ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned) -+ ) -+ - return i; - } -diff --git a/sysdeps/loongarch/lp64/multiarch/memset-aligned.S b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S -new file mode 100644 -index 00000000..1fce95b7 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S -@@ -0,0 +1,174 @@ -+/* Optimized memset aligned implementation using basic LoongArch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define MEMSET_NAME __memset_aligned -+#else -+# define MEMSET_NAME memset -+#endif -+ -+LEAF(MEMSET_NAME, 6) -+ move t0, a0 -+ andi a3, a0, 0x7 -+ li.w t6, 16 -+ beqz a3, L(align) -+ bltu a2, t6, L(short_data) -+ -+L(make_align): -+ li.w t8, 8 -+ sub.d t2, t8, a3 -+ pcaddi t1, 11 -+ slli.d t3, t2, 2 -+ sub.d t1, t1, t3 -+ jr t1 -+ -+L(al7): -+ st.b a1, t0, 6 -+L(al6): -+ st.b a1, t0, 5 -+L(al5): -+ st.b a1, t0, 4 -+L(al4): -+ st.b a1, t0, 3 -+L(al3): -+ st.b a1, t0, 2 -+L(al2): -+ st.b a1, t0, 1 -+L(al1): -+ st.b a1, t0, 0 -+L(al0): -+ add.d t0, t0, t2 -+ sub.d a2, a2, t2 -+ -+L(align): -+ bstrins.d a1, a1, 15, 8 -+ bstrins.d a1, a1, 31, 16 -+ bstrins.d a1, a1, 63, 32 -+ bltu a2, t6, L(less_16bytes) -+ -+ andi a4, a2, 0x3f -+ beq a4, a2, L(less_64bytes) -+ -+ sub.d t1, a2, a4 -+ move a2, a4 -+ add.d a5, t0, t1 -+ -+L(loop_64bytes): -+ addi.d t0, t0, 64 -+ st.d a1, t0, -64 -+ st.d a1, t0, -56 -+ st.d a1, t0, -48 -+ st.d a1, t0, -40 -+ -+ st.d a1, t0, -32 -+ st.d a1, t0, -24 -+ st.d a1, t0, -16 -+ st.d a1, t0, -8 -+ bne t0, a5, L(loop_64bytes) -+ -+L(less_64bytes): -+ srai.d a4, a2, 5 -+ beqz a4, L(less_32bytes) -+ addi.d a2, a2, -32 -+ st.d a1, t0, 0 -+ -+ st.d a1, t0, 8 -+ st.d a1, t0, 16 -+ st.d a1, t0, 24 -+ addi.d t0, t0, 32 -+ -+L(less_32bytes): -+ bltu a2, t6, L(less_16bytes) -+ addi.d a2, a2, -16 -+ st.d a1, t0, 0 -+ st.d a1, t0, 8 -+ addi.d t0, t0, 16 -+ -+L(less_16bytes): -+ srai.d a4, a2, 3 -+ beqz a4, L(less_8bytes) -+ addi.d a2, a2, -8 -+ st.d a1, t0, 0 -+ addi.d t0, t0, 8 -+ -+L(less_8bytes): -+ beqz a2, L(less_1byte) -+ srai.d a4, a2, 2 -+ beqz a4, L(less_4bytes) -+ addi.d a2, a2, -4 -+ st.w a1, t0, 0 -+ addi.d t0, t0, 4 -+ -+L(less_4bytes): -+ srai.d a3, a2, 1 -+ beqz a3, L(less_2bytes) -+ addi.d a2, a2, -2 -+ st.h a1, t0, 0 -+ addi.d t0, t0, 2 -+ -+L(less_2bytes): -+ beqz a2, L(less_1byte) -+ st.b a1, t0, 0 -+L(less_1byte): -+ jr ra -+ -+L(short_data): -+ pcaddi t1, 19 -+ slli.d t3, a2, 2 -+ sub.d t1, t1, t3 -+ jr t1 -+L(short_15): -+ st.b a1, a0, 14 -+L(short_14): -+ st.b a1, a0, 13 -+L(short_13): -+ st.b a1, a0, 12 -+L(short_12): -+ st.b a1, a0, 11 -+L(short_11): -+ st.b a1, a0, 10 -+L(short_10): -+ st.b a1, a0, 9 -+L(short_9): -+ st.b a1, a0, 8 -+L(short_8): -+ st.b a1, a0, 7 -+L(short_7): -+ st.b a1, a0, 6 -+L(short_6): -+ st.b a1, a0, 5 -+L(short_5): -+ st.b a1, a0, 4 -+L(short_4): -+ st.b a1, a0, 3 -+L(short_3): -+ st.b a1, a0, 2 -+L(short_2): -+ st.b a1, a0, 1 -+L(short_1): -+ st.b a1, a0, 0 -+L(short_0): -+ jr ra -+END(MEMSET_NAME) -+ -+libc_hidden_builtin_def (MEMSET_NAME) -diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lasx.S b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S -new file mode 100644 -index 00000000..041abbac ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S -@@ -0,0 +1,142 @@ -+/* Optimized memset implementation using LoongArch LASX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define MEMSET __memset_lasx -+ -+LEAF(MEMSET, 6) -+ li.d t1, 32 -+ move a3, a0 -+ xvreplgr2vr.b xr0, a1 -+ add.d a4, a0, a2 -+ -+ bgeu t1, a2, L(less_32bytes) -+ li.d t3, 128 -+ li.d t2, 64 -+ blt t3, a2, L(long_bytes) -+ -+L(less_128bytes): -+ bgeu t2, a2, L(less_64bytes) -+ xvst xr0, a3, 0 -+ xvst xr0, a3, 32 -+ xvst xr0, a4, -32 -+ -+ xvst xr0, a4, -64 -+ jr ra -+L(less_64bytes): -+ xvst xr0, a3, 0 -+ xvst xr0, a4, -32 -+ -+ -+ jr ra -+L(less_32bytes): -+ srli.d t0, a2, 4 -+ beqz t0, L(less_16bytes) -+ vst vr0, a3, 0 -+ -+ vst vr0, a4, -16 -+ jr ra -+L(less_16bytes): -+ srli.d t0, a2, 3 -+ beqz t0, L(less_8bytes) -+ -+ vstelm.d vr0, a3, 0, 0 -+ vstelm.d vr0, a4, -8, 0 -+ jr ra -+L(less_8bytes): -+ srli.d t0, a2, 2 -+ -+ beqz t0, L(less_4bytes) -+ vstelm.w vr0, a3, 0, 0 -+ vstelm.w vr0, a4, -4, 0 -+ jr ra -+ -+ -+L(less_4bytes): -+ srli.d t0, a2, 1 -+ beqz t0, L(less_2bytes) -+ vstelm.h vr0, a3, 0, 0 -+ vstelm.h vr0, a4, -2, 0 -+ -+ jr ra -+L(less_2bytes): -+ beqz a2, L(less_1bytes) -+ st.b a1, a3, 0 -+L(less_1bytes): -+ jr ra -+ -+L(long_bytes): -+ xvst xr0, a3, 0 -+ bstrins.d a3, zero, 4, 0 -+ addi.d a3, a3, 32 -+ sub.d a2, a4, a3 -+ -+ andi t0, a2, 0xff -+ beq t0, a2, L(long_end) -+ move a2, t0 -+ sub.d t0, a4, t0 -+ -+ -+L(loop_256): -+ xvst xr0, a3, 0 -+ xvst xr0, a3, 32 -+ xvst xr0, a3, 64 -+ xvst xr0, a3, 96 -+ -+ xvst xr0, a3, 128 -+ xvst xr0, a3, 160 -+ xvst xr0, a3, 192 -+ xvst xr0, a3, 224 -+ -+ addi.d a3, a3, 256 -+ bne a3, t0, L(loop_256) -+L(long_end): -+ bltu a2, t3, L(end_less_128) -+ addi.d a2, a2, -128 -+ -+ xvst xr0, a3, 0 -+ xvst xr0, a3, 32 -+ xvst xr0, a3, 64 -+ xvst xr0, a3, 96 -+ -+ -+ addi.d a3, a3, 128 -+L(end_less_128): -+ bltu a2, t2, L(end_less_64) -+ addi.d a2, a2, -64 -+ xvst xr0, a3, 0 -+ -+ xvst xr0, a3, 32 -+ addi.d a3, a3, 64 -+L(end_less_64): -+ bltu a2, t1, L(end_less_32) -+ xvst xr0, a3, 0 -+ -+L(end_less_32): -+ xvst xr0, a4, -32 -+ jr ra -+END(MEMSET) -+ -+libc_hidden_builtin_def (MEMSET) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lsx.S b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S -new file mode 100644 -index 00000000..3d3982aa ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S -@@ -0,0 +1,135 @@ -+/* Optimized memset implementation using LoongArch LSX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define MEMSET __memset_lsx -+ -+LEAF(MEMSET, 6) -+ li.d t1, 16 -+ move a3, a0 -+ vreplgr2vr.b vr0, a1 -+ add.d a4, a0, a2 -+ -+ bgeu t1, a2, L(less_16bytes) -+ li.d t3, 64 -+ li.d t2, 32 -+ bgeu a2, t3, L(long_bytes) -+ -+L(less_64bytes): -+ bgeu t2, a2, L(less_32bytes) -+ vst vr0, a3, 0 -+ vst vr0, a3, 16 -+ vst vr0, a4, -32 -+ -+ vst vr0, a4, -16 -+ jr ra -+L(less_32bytes): -+ vst vr0, a3, 0 -+ vst vr0, a4, -16 -+ -+ -+ jr ra -+L(less_16bytes): -+ srli.d t0, a2, 3 -+ beqz t0, L(less_8bytes) -+ vstelm.d vr0, a3, 0, 0 -+ -+ vstelm.d vr0, a4, -8, 0 -+ jr ra -+L(less_8bytes): -+ srli.d t0, a2, 2 -+ beqz t0, L(less_4bytes) -+ -+ vstelm.w vr0, a3, 0, 0 -+ vstelm.w vr0, a4, -4, 0 -+ jr ra -+L(less_4bytes): -+ srli.d t0, a2, 1 -+ -+ beqz t0, L(less_2bytes) -+ vstelm.h vr0, a3, 0, 0 -+ vstelm.h vr0, a4, -2, 0 -+ jr ra -+ -+ -+L(less_2bytes): -+ beqz a2, L(less_1bytes) -+ vstelm.b vr0, a3, 0, 0 -+L(less_1bytes): -+ jr ra -+L(long_bytes): -+ vst vr0, a3, 0 -+ -+ bstrins.d a3, zero, 3, 0 -+ addi.d a3, a3, 16 -+ sub.d a2, a4, a3 -+ andi t0, a2, 0x7f -+ -+ beq t0, a2, L(long_end) -+ move a2, t0 -+ sub.d t0, a4, t0 -+ -+L(loop_128): -+ vst vr0, a3, 0 -+ -+ vst vr0, a3, 16 -+ vst vr0, a3, 32 -+ vst vr0, a3, 48 -+ vst vr0, a3, 64 -+ -+ -+ vst vr0, a3, 80 -+ vst vr0, a3, 96 -+ vst vr0, a3, 112 -+ addi.d a3, a3, 128 -+ -+ bne a3, t0, L(loop_128) -+L(long_end): -+ bltu a2, t3, L(end_less_64) -+ addi.d a2, a2, -64 -+ vst vr0, a3, 0 -+ -+ vst vr0, a3, 16 -+ vst vr0, a3, 32 -+ vst vr0, a3, 48 -+ addi.d a3, a3, 64 -+ -+L(end_less_64): -+ bltu a2, t2, L(end_less_32) -+ addi.d a2, a2, -32 -+ vst vr0, a3, 0 -+ vst vr0, a3, 16 -+ -+ addi.d a3, a3, 32 -+L(end_less_32): -+ bltu a2, t1, L(end_less_16) -+ vst vr0, a3, 0 -+ -+L(end_less_16): -+ vst vr0, a4, -16 -+ jr ra -+END(MEMSET) -+ -+libc_hidden_builtin_def (MEMSET) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S -new file mode 100644 -index 00000000..f7d32039 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S -@@ -0,0 +1,162 @@ -+/* Optimized memset unaligned implementation using basic LoongArch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+ -+# define MEMSET_NAME __memset_unaligned -+ -+#define ST_128(n) \ -+ st.d a1, a0, n; \ -+ st.d a1, a0, n+8 ; \ -+ st.d a1, a0, n+16 ; \ -+ st.d a1, a0, n+24 ; \ -+ st.d a1, a0, n+32 ; \ -+ st.d a1, a0, n+40 ; \ -+ st.d a1, a0, n+48 ; \ -+ st.d a1, a0, n+56 ; \ -+ st.d a1, a0, n+64 ; \ -+ st.d a1, a0, n+72 ; \ -+ st.d a1, a0, n+80 ; \ -+ st.d a1, a0, n+88 ; \ -+ st.d a1, a0, n+96 ; \ -+ st.d a1, a0, n+104; \ -+ st.d a1, a0, n+112; \ -+ st.d a1, a0, n+120; -+ -+LEAF(MEMSET_NAME, 6) -+ bstrins.d a1, a1, 15, 8 -+ add.d t7, a0, a2 -+ bstrins.d a1, a1, 31, 16 -+ move t0, a0 -+ -+ bstrins.d a1, a1, 63, 32 -+ srai.d t8, a2, 4 -+ beqz t8, L(less_16bytes) -+ srai.d t8, a2, 6 -+ -+ bnez t8, L(more_64bytes) -+ srai.d t8, a2, 5 -+ beqz t8, L(less_32bytes) -+ -+ st.d a1, a0, 0 -+ st.d a1, a0, 8 -+ st.d a1, a0, 16 -+ st.d a1, a0, 24 -+ -+ st.d a1, t7, -32 -+ st.d a1, t7, -24 -+ st.d a1, t7, -16 -+ st.d a1, t7, -8 -+ -+ jr ra -+ -+L(less_32bytes): -+ st.d a1, a0, 0 -+ st.d a1, a0, 8 -+ st.d a1, t7, -16 -+ st.d a1, t7, -8 -+ -+ jr ra -+ -+L(less_16bytes): -+ srai.d t8, a2, 3 -+ beqz t8, L(less_8bytes) -+ st.d a1, a0, 0 -+ st.d a1, t7, -8 -+ -+ jr ra -+ -+L(less_8bytes): -+ srai.d t8, a2, 2 -+ beqz t8, L(less_4bytes) -+ st.w a1, a0, 0 -+ st.w a1, t7, -4 -+ -+ jr ra -+ -+L(less_4bytes): -+ srai.d t8, a2, 1 -+ beqz t8, L(less_2bytes) -+ st.h a1, a0, 0 -+ st.h a1, t7, -2 -+ -+ jr ra -+ -+L(less_2bytes): -+ beqz a2, L(less_1bytes) -+ st.b a1, a0, 0 -+ -+ jr ra -+ -+L(less_1bytes): -+ jr ra -+ -+L(more_64bytes): -+ srli.d a0, a0, 3 -+ slli.d a0, a0, 3 -+ addi.d a0, a0, 0x8 -+ st.d a1, t0, 0 -+ -+ sub.d t2, t0, a0 -+ add.d a2, t2, a2 -+ addi.d a2, a2, -0x80 -+ blt a2, zero, L(end_unalign_proc) -+ -+L(loop_less): -+ ST_128(0) -+ addi.d a0, a0, 0x80 -+ addi.d a2, a2, -0x80 -+ bge a2, zero, L(loop_less) -+ -+L(end_unalign_proc): -+ addi.d a2, a2, 0x80 -+ pcaddi t1, 20 -+ andi t5, a2, 0x78 -+ srli.d t5, t5, 1 -+ -+ sub.d t1, t1, t5 -+ jr t1 -+ -+ st.d a1, a0, 112 -+ st.d a1, a0, 104 -+ st.d a1, a0, 96 -+ st.d a1, a0, 88 -+ st.d a1, a0, 80 -+ st.d a1, a0, 72 -+ st.d a1, a0, 64 -+ st.d a1, a0, 56 -+ st.d a1, a0, 48 -+ st.d a1, a0, 40 -+ st.d a1, a0, 32 -+ st.d a1, a0, 24 -+ st.d a1, a0, 16 -+ st.d a1, a0, 8 -+ st.d a1, a0, 0 -+ st.d a1, t7, -8 -+ -+ move a0, t0 -+ jr ra -+END(MEMSET_NAME) -+ -+libc_hidden_builtin_def (MEMSET_NAME) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memset.c b/sysdeps/loongarch/lp64/multiarch/memset.c -new file mode 100644 -index 00000000..3ff60d8a ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memset.c -@@ -0,0 +1,37 @@ -+/* Multiple versions of memset. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define memset __redirect_memset -+# include -+# undef memset -+ -+# define SYMBOL_NAME memset -+# include "ifunc-lasx.h" -+ -+libc_ifunc_redirected (__redirect_memset, memset, -+ IFUNC_SELECTOR ()); -+ -+# ifdef SHARED -+__hidden_ver1 (memset, __GI_memset, __redirect_memset) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memset); -+# endif -+ -+#endif --- -2.33.0 - diff --git a/LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch b/LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch deleted file mode 100644 index 1ac8637..0000000 --- a/LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch +++ /dev/null @@ -1,448 +0,0 @@ -From b412bcb2cf4914a664bcd24924d670a2e37394b3 Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Mon, 28 Aug 2023 10:08:35 +0800 -Subject: [PATCH 14/29] LoongArch: Add ifunc support for rawmemchr{aligned, - lsx, lasx} - -According to glibc rawmemchr microbenchmark, A few cases tested with -char '\0' experience performance degradation due to the lasx and lsx -versions don't handle the '\0' separately. Overall, rawmemchr-lasx -implementation could reduce the runtime about 40%-80%, rawmemchr-lsx -implementation could reduce the runtime about 40%-66%, rawmemchr-aligned -implementation could reduce the runtime about 20%-40%. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 3 + - .../lp64/multiarch/ifunc-impl-list.c | 8 ++ - .../lp64/multiarch/ifunc-rawmemchr.h | 40 ++++++ - .../lp64/multiarch/rawmemchr-aligned.S | 124 ++++++++++++++++++ - .../loongarch/lp64/multiarch/rawmemchr-lasx.S | 82 ++++++++++++ - .../loongarch/lp64/multiarch/rawmemchr-lsx.S | 71 ++++++++++ - sysdeps/loongarch/lp64/multiarch/rawmemchr.c | 37 ++++++ - 7 files changed, 365 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index 5d7ae7ae..64416b02 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -21,5 +21,8 @@ sysdep_routines += \ - memmove-unaligned \ - memmove-lsx \ - memmove-lasx \ -+ rawmemchr-aligned \ -+ rawmemchr-lsx \ -+ rawmemchr-lasx \ - # sysdep_routines - endif -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index c8ba87bd..3db9af14 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -94,5 +94,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned) - ) - -+ IFUNC_IMPL (i, name, rawmemchr, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx) -+ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned) -+ ) -+ - return i; - } -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h -new file mode 100644 -index 00000000..a7bb4cf9 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h -@@ -0,0 +1,40 @@ -+/* Common definition for rawmemchr ifunc selections. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LASX) -+ return OPTIMIZE (lasx); -+ else if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ return OPTIMIZE (aligned); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S -new file mode 100644 -index 00000000..9c7155ae ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S -@@ -0,0 +1,124 @@ -+/* Optimized rawmemchr implementation using basic LoongArch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define RAWMEMCHR_NAME __rawmemchr_aligned -+#else -+# define RAWMEMCHR_NAME __rawmemchr -+#endif -+ -+LEAF(RAWMEMCHR_NAME, 6) -+ andi t1, a0, 0x7 -+ bstrins.d a0, zero, 2, 0 -+ lu12i.w a2, 0x01010 -+ bstrins.d a1, a1, 15, 8 -+ -+ ld.d t0, a0, 0 -+ slli.d t1, t1, 3 -+ ori a2, a2, 0x101 -+ bstrins.d a1, a1, 31, 16 -+ -+ li.w t8, -1 -+ bstrins.d a1, a1, 63, 32 -+ bstrins.d a2, a2, 63, 32 -+ sll.d t2, t8, t1 -+ -+ sll.d t3, a1, t1 -+ orn t0, t0, t2 -+ slli.d a3, a2, 7 -+ beqz a1, L(find_zero) -+ -+ xor t0, t0, t3 -+ sub.d t1, t0, a2 -+ andn t2, a3, t0 -+ and t3, t1, t2 -+ -+ bnez t3, L(count_pos) -+ addi.d a0, a0, 8 -+ -+L(loop): -+ ld.d t0, a0, 0 -+ xor t0, t0, a1 -+ -+ sub.d t1, t0, a2 -+ andn t2, a3, t0 -+ and t3, t1, t2 -+ bnez t3, L(count_pos) -+ -+ ld.d t0, a0, 8 -+ addi.d a0, a0, 16 -+ xor t0, t0, a1 -+ sub.d t1, t0, a2 -+ -+ andn t2, a3, t0 -+ and t3, t1, t2 -+ beqz t3, L(loop) -+ addi.d a0, a0, -8 -+L(count_pos): -+ ctz.d t0, t3 -+ srli.d t0, t0, 3 -+ add.d a0, a0, t0 -+ jr ra -+ -+L(loop_7bit): -+ ld.d t0, a0, 0 -+L(find_zero): -+ sub.d t1, t0, a2 -+ and t2, t1, a3 -+ bnez t2, L(more_check) -+ -+ ld.d t0, a0, 8 -+ addi.d a0, a0, 16 -+ sub.d t1, t0, a2 -+ and t2, t1, a3 -+ -+ beqz t2, L(loop_7bit) -+ addi.d a0, a0, -8 -+ -+L(more_check): -+ andn t2, a3, t0 -+ and t3, t1, t2 -+ bnez t3, L(count_pos) -+ addi.d a0, a0, 8 -+ -+L(loop_8bit): -+ ld.d t0, a0, 0 -+ -+ sub.d t1, t0, a2 -+ andn t2, a3, t0 -+ and t3, t1, t2 -+ bnez t3, L(count_pos) -+ -+ ld.d t0, a0, 8 -+ addi.d a0, a0, 16 -+ sub.d t1, t0, a2 -+ -+ andn t2, a3, t0 -+ and t3, t1, t2 -+ beqz t3, L(loop_8bit) -+ -+ addi.d a0, a0, -8 -+ b L(count_pos) -+ -+END(RAWMEMCHR_NAME) -+ -+libc_hidden_builtin_def (__rawmemchr) -diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S -new file mode 100644 -index 00000000..be2eb59d ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S -@@ -0,0 +1,82 @@ -+/* Optimized rawmemchr implementation using LoongArch LASX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define RAWMEMCHR __rawmemchr_lasx -+ -+LEAF(RAWMEMCHR, 6) -+ move a2, a0 -+ bstrins.d a0, zero, 5, 0 -+ xvld xr0, a0, 0 -+ xvld xr1, a0, 32 -+ -+ xvreplgr2vr.b xr2, a1 -+ xvseq.b xr0, xr0, xr2 -+ xvseq.b xr1, xr1, xr2 -+ xvmsknz.b xr0, xr0 -+ -+ xvmsknz.b xr1, xr1 -+ xvpickve.w xr3, xr0, 4 -+ xvpickve.w xr4, xr1, 4 -+ vilvl.h vr0, vr3, vr0 -+ -+ vilvl.h vr1, vr4, vr1 -+ vilvl.w vr0, vr1, vr0 -+ movfr2gr.d t0, fa0 -+ sra.d t0, t0, a2 -+ -+ -+ beqz t0, L(loop) -+ ctz.d t0, t0 -+ add.d a0, a2, t0 -+ jr ra -+ -+L(loop): -+ xvld xr0, a0, 64 -+ xvld xr1, a0, 96 -+ addi.d a0, a0, 64 -+ xvseq.b xr0, xr0, xr2 -+ -+ xvseq.b xr1, xr1, xr2 -+ xvmax.bu xr3, xr0, xr1 -+ xvseteqz.v fcc0, xr3 -+ bcnez fcc0, L(loop) -+ -+ xvmsknz.b xr0, xr0 -+ xvmsknz.b xr1, xr1 -+ xvpickve.w xr3, xr0, 4 -+ xvpickve.w xr4, xr1, 4 -+ -+ -+ vilvl.h vr0, vr3, vr0 -+ vilvl.h vr1, vr4, vr1 -+ vilvl.w vr0, vr1, vr0 -+ movfr2gr.d t0, fa0 -+ -+ ctz.d t0, t0 -+ add.d a0, a0, t0 -+ jr ra -+END(RAWMEMCHR) -+ -+libc_hidden_builtin_def (RAWMEMCHR) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S -new file mode 100644 -index 00000000..2f6fe024 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S -@@ -0,0 +1,71 @@ -+/* Optimized rawmemchr implementation using LoongArch LSX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define RAWMEMCHR __rawmemchr_lsx -+ -+LEAF(RAWMEMCHR, 6) -+ move a2, a0 -+ bstrins.d a0, zero, 4, 0 -+ vld vr0, a0, 0 -+ vld vr1, a0, 16 -+ -+ vreplgr2vr.b vr2, a1 -+ vseq.b vr0, vr0, vr2 -+ vseq.b vr1, vr1, vr2 -+ vmsknz.b vr0, vr0 -+ -+ vmsknz.b vr1, vr1 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+ sra.w t0, t0, a2 -+ -+ beqz t0, L(loop) -+ ctz.w t0, t0 -+ add.d a0, a2, t0 -+ jr ra -+ -+ -+L(loop): -+ vld vr0, a0, 32 -+ vld vr1, a0, 48 -+ addi.d a0, a0, 32 -+ vseq.b vr0, vr0, vr2 -+ -+ vseq.b vr1, vr1, vr2 -+ vmax.bu vr3, vr0, vr1 -+ vseteqz.v fcc0, vr3 -+ bcnez fcc0, L(loop) -+ -+ vmsknz.b vr0, vr0 -+ vmsknz.b vr1, vr1 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+ -+ ctz.w t0, t0 -+ add.d a0, a0, t0 -+ jr ra -+END(RAWMEMCHR) -+ -+libc_hidden_builtin_def (RAWMEMCHR) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c -new file mode 100644 -index 00000000..89c7ffff ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c -@@ -0,0 +1,37 @@ -+/* Multiple versions of rawmemchr. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#if IS_IN (libc) -+# define rawmemchr __redirect_rawmemchr -+# define __rawmemchr __redirect___rawmemchr -+# include -+# undef rawmemchr -+# undef __rawmemchr -+ -+# define SYMBOL_NAME rawmemchr -+# include "ifunc-rawmemchr.h" -+ -+libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr, -+ IFUNC_SELECTOR ()); -+weak_alias (__rawmemchr, rawmemchr) -+# ifdef SHARED -+__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr) -+ __attribute__((visibility ("hidden"))); -+# endif -+#endif --- -2.33.0 - diff --git a/LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch b/LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch deleted file mode 100644 index d960bc6..0000000 --- a/LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch +++ /dev/null @@ -1,499 +0,0 @@ -From e258cfcf92f5e31e902fa045b41652f00fcf2521 Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Thu, 24 Aug 2023 16:50:18 +0800 -Subject: [PATCH 09/29] LoongArch: Add ifunc support for strcmp{aligned, lsx} - -Based on the glibc microbenchmark, strcmp-aligned implementation could -reduce the runtime 0%-10% for aligned comparison, 10%-20% for unaligned -comparison, strcmp-lsx implemenation could reduce the runtime 0%-50%. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 2 + - .../lp64/multiarch/ifunc-impl-list.c | 7 + - .../loongarch/lp64/multiarch/ifunc-strcmp.h | 38 ++++ - .../loongarch/lp64/multiarch/strcmp-aligned.S | 179 ++++++++++++++++++ - sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 165 ++++++++++++++++ - sysdeps/loongarch/lp64/multiarch/strcmp.c | 35 ++++ - 6 files changed, 426 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index c4dd3143..d5a500de 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -12,6 +12,8 @@ sysdep_routines += \ - strchrnul-aligned \ - strchrnul-lsx \ - strchrnul-lasx \ -+ strcmp-aligned \ -+ strcmp-lsx \ - memcpy-aligned \ - memcpy-unaligned \ - memmove-unaligned \ -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index 7cec0b77..9183b7da 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -62,6 +62,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned) - ) - -+ IFUNC_IMPL (i, name, strcmp, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, strcmp, SUPPORT_LSX, __strcmp_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned) -+ ) -+ - IFUNC_IMPL (i, name, memcpy, - #if !defined __loongarch_soft_float - IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h -new file mode 100644 -index 00000000..ca26352b ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h -@@ -0,0 +1,38 @@ -+/* Common definition for strcmp ifunc selection. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+ -+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ return OPTIMIZE (aligned); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S -new file mode 100644 -index 00000000..f5f4f336 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S -@@ -0,0 +1,179 @@ -+/* Optimized strcmp implementation using basic Loongarch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define STRCMP_NAME __strcmp_aligned -+#else -+# define STRCMP_NAME strcmp -+#endif -+ -+LEAF(STRCMP_NAME, 6) -+ lu12i.w a4, 0x01010 -+ andi a2, a0, 0x7 -+ ori a4, a4, 0x101 -+ andi a3, a1, 0x7 -+ -+ bstrins.d a4, a4, 63, 32 -+ li.d t7, -1 -+ li.d t8, 8 -+ slli.d a5, a4, 7 -+ -+ bne a2, a3, L(unaligned) -+ bstrins.d a0, zero, 2, 0 -+ bstrins.d a1, zero, 2, 0 -+ ld.d t0, a0, 0 -+ -+ ld.d t1, a1, 0 -+ slli.d t3, a2, 3 -+ sll.d t2, t7, t3 -+ orn t0, t0, t2 -+ -+ -+ orn t1, t1, t2 -+ sub.d t2, t0, a4 -+ andn t3, a5, t0 -+ and t2, t2, t3 -+ -+ bne t0, t1, L(al_end) -+L(al_loop): -+ bnez t2, L(ret0) -+ ldx.d t0, a0, t8 -+ ldx.d t1, a1, t8 -+ -+ addi.d t8, t8, 8 -+ sub.d t2, t0, a4 -+ andn t3, a5, t0 -+ and t2, t2, t3 -+ -+ beq t0, t1, L(al_loop) -+L(al_end): -+ xor t3, t0, t1 -+ or t2, t2, t3 -+ ctz.d t3, t2 -+ -+ -+ bstrins.d t3, zero, 2, 0 -+ srl.d t0, t0, t3 -+ srl.d t1, t1, t3 -+ andi t0, t0, 0xff -+ -+ andi t1, t1, 0xff -+ sub.d a0, t0, t1 -+ jr ra -+ nop -+ -+L(ret0): -+ move a0, zero -+ jr ra -+ nop -+ nop -+ -+L(unaligned): -+ slt a6, a3, a2 -+ xor t0, a0, a1 -+ maskeqz t0, t0, a6 -+ xor a0, a0, t0 -+ -+ -+ xor a1, a1, t0 -+ andi a2, a0, 0x7 -+ andi a3, a1, 0x7 -+ bstrins.d a0, zero, 2, 0 -+ -+ bstrins.d a1, zero, 2, 0 -+ ld.d t4, a0, 0 -+ ld.d t1, a1, 0 -+ slli.d a2, a2, 3 -+ -+ slli.d a3, a3, 3 -+ srl.d t0, t4, a2 -+ srl.d t1, t1, a3 -+ srl.d t5, t7, a3 -+ -+ orn t0, t0, t5 -+ orn t1, t1, t5 -+ bne t0, t1, L(not_equal) -+ sll.d t5, t7, a2 -+ -+ -+ sub.d a3, a2, a3 -+ orn t4, t4, t5 -+ sub.d a2, zero, a3 -+ sub.d t2, t4, a4 -+ -+ andn t3, a5, t4 -+ and t2, t2, t3 -+ bnez t2, L(find_zero) -+L(un_loop): -+ srl.d t5, t4, a3 -+ -+ ldx.d t4, a0, t8 -+ ldx.d t1, a1, t8 -+ addi.d t8, t8, 8 -+ sll.d t0, t4, a2 -+ -+ or t0, t0, t5 -+ bne t0, t1, L(not_equal) -+ sub.d t2, t4, a4 -+ andn t3, a5, t4 -+ -+ -+ and t2, t2, t3 -+ beqz t2, L(un_loop) -+L(find_zero): -+ sub.d t2, t0, a4 -+ andn t3, a5, t0 -+ -+ and t2, t2, t3 -+ bnez t2, L(ret0) -+ ldx.d t1, a1, t8 -+ srl.d t0, t4, a3 -+ -+L(not_equal): -+ sub.d t2, t0, a4 -+ andn t3, a5, t0 -+ and t2, t2, t3 -+ xor t3, t0, t1 -+ -+ or t2, t2, t3 -+L(un_end): -+ ctz.d t3, t2 -+ bstrins.d t3, zero, 2, 0 -+ srl.d t0, t0, t3 -+ -+ -+ srl.d t1, t1, t3 -+ andi t0, t0, 0xff -+ andi t1, t1, 0xff -+ sub.d t2, t0, t1 -+ -+ -+ sub.d t3, t1, t0 -+ masknez t0, t2, a6 -+ maskeqz t1, t3, a6 -+ or a0, t0, t1 -+ -+ jr ra -+END(STRCMP_NAME) -+ -+libc_hidden_builtin_def (STRCMP_NAME) -diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S -new file mode 100644 -index 00000000..2e177a38 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S -@@ -0,0 +1,165 @@ -+/* Optimized strcmp implementation using Loongarch LSX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define STRCMP __strcmp_lsx -+ -+LEAF(STRCMP, 6) -+ pcalau12i t0, %pc_hi20(L(INDEX)) -+ andi a2, a0, 0xf -+ vld vr2, t0, %pc_lo12(L(INDEX)) -+ andi a3, a1, 0xf -+ -+ bne a2, a3, L(unaligned) -+ bstrins.d a0, zero, 3, 0 -+ bstrins.d a1, zero, 3, 0 -+ vld vr0, a0, 0 -+ -+ vld vr1, a1, 0 -+ vreplgr2vr.b vr3, a2 -+ vslt.b vr2, vr2, vr3 -+ vseq.b vr3, vr0, vr1 -+ -+ vmin.bu vr3, vr0, vr3 -+ vor.v vr3, vr3, vr2 -+ vsetanyeqz.b fcc0, vr3 -+ bcnez fcc0, L(al_out) -+ -+ -+L(al_loop): -+ vld vr0, a0, 16 -+ vld vr1, a1, 16 -+ addi.d a0, a0, 16 -+ addi.d a1, a1, 16 -+ -+ vseq.b vr3, vr0, vr1 -+ vmin.bu vr3, vr0, vr3 -+ vsetanyeqz.b fcc0, vr3 -+ bceqz fcc0, L(al_loop) -+ -+L(al_out): -+ vseqi.b vr3, vr3, 0 -+ vfrstpi.b vr3, vr3, 0 -+ vshuf.b vr0, vr0, vr0, vr3 -+ vshuf.b vr1, vr1, vr1, vr3 -+ -+ vpickve2gr.bu t0, vr0, 0 -+ vpickve2gr.bu t1, vr1, 0 -+ sub.d a0, t0, t1 -+ jr ra -+ -+ -+L(unaligned): -+ slt a4, a3, a2 -+ xor t0, a0, a1 -+ maskeqz t0, t0, a4 -+ xor a0, a0, t0 -+ -+ xor a1, a1, t0 -+ andi a2, a0, 0xf -+ andi a3, a1, 0xf -+ bstrins.d a0, zero, 3, 0 -+ -+ bstrins.d a1, zero, 3, 0 -+ vld vr3, a0, 0 -+ vld vr1, a1, 0 -+ vreplgr2vr.b vr4, a2 -+ -+ vreplgr2vr.b vr5, a3 -+ vslt.b vr7, vr2, vr5 -+ vsub.b vr5, vr5, vr4 -+ vaddi.bu vr6, vr2, 16 -+ -+ -+ vsub.b vr6, vr6, vr5 -+ vshuf.b vr0, vr3, vr3, vr6 -+ vor.v vr0, vr0, vr7 -+ vor.v vr1, vr1, vr7 -+ -+ vseq.b vr5, vr0, vr1 -+ vsetanyeqz.b fcc0, vr5 -+ bcnez fcc0, L(not_equal) -+ vslt.b vr4, vr2, vr4 -+ -+ vor.v vr0, vr3, vr4 -+ vsetanyeqz.b fcc0, vr0 -+ bcnez fcc0, L(find_zero) -+ nop -+ -+L(un_loop): -+ vld vr3, a0, 16 -+ vld vr1, a1, 16 -+ addi.d a0, a0, 16 -+ addi.d a1, a1, 16 -+ -+ -+ vshuf.b vr0, vr3, vr0, vr6 -+ vseq.b vr5, vr0, vr1 -+ vsetanyeqz.b fcc0, vr5 -+ bcnez fcc0, L(not_equal) -+ -+ vsetanyeqz.b fcc0, vr3 -+ vor.v vr0, vr3, vr3 -+ bceqz fcc0, L(un_loop) -+L(find_zero): -+ vmin.bu vr5, vr1, vr5 -+ -+ vsetanyeqz.b fcc0, vr5 -+ bcnez fcc0, L(ret0) -+ vld vr1, a1, 16 -+ vshuf.b vr0, vr3, vr3, vr6 -+ -+ vseq.b vr5, vr0, vr1 -+L(not_equal): -+ vmin.bu vr5, vr0, vr5 -+L(un_end): -+ vseqi.b vr5, vr5, 0 -+ vfrstpi.b vr5, vr5, 0 -+ -+ -+ vshuf.b vr0, vr0, vr0, vr5 -+ vshuf.b vr1, vr1, vr1, vr5 -+ vpickve2gr.bu t0, vr0, 0 -+ vpickve2gr.bu t1, vr1, 0 -+ -+ sub.d t3, t0, t1 -+ sub.d t4, t1, t0 -+ masknez t0, t3, a4 -+ maskeqz t1, t4, a4 -+ -+ or a0, t0, t1 -+ jr ra -+L(ret0): -+ move a0, zero -+ jr ra -+END(STRCMP) -+ -+ .section .rodata.cst16,"M",@progbits,16 -+ .align 4 -+L(INDEX): -+ .dword 0x0706050403020100 -+ .dword 0x0f0e0d0c0b0a0908 -+ -+libc_hidden_builtin_def (STRCMP) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp.c b/sysdeps/loongarch/lp64/multiarch/strcmp.c -new file mode 100644 -index 00000000..6f249c0b ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strcmp.c -@@ -0,0 +1,35 @@ -+/* Multiple versions of strcmp. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define strcmp __redirect_strcmp -+# include -+# undef strcmp -+ -+# define SYMBOL_NAME strcmp -+# include "ifunc-strcmp.h" -+ -+libc_ifunc_redirected (__redirect_strcmp, strcmp, IFUNC_SELECTOR ()); -+ -+# ifdef SHARED -+__hidden_ver1 (strcmp, __GI_strcmp, __redirect_strcmp) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcmp); -+# endif -+#endif --- -2.33.0 - diff --git a/LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch b/LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch deleted file mode 100644 index 2e98b05..0000000 --- a/LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch +++ /dev/null @@ -1,1099 +0,0 @@ -From 351086591d938aaf884d475261ae96ec5da00384 Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Wed, 13 Sep 2023 15:34:59 +0800 -Subject: [PATCH 22/29] LoongArch: Add ifunc support for strcpy, - stpcpy{aligned, unaligned, lsx, lasx} - -According to glibc strcpy and stpcpy microbenchmark test results(changed -to use generic_strcpy and generic_stpcpy instead of strlen + memcpy), -comparing with the generic version, this implementation could reduce the -runtime as following: - -Name Percent of rutime reduced -strcpy-aligned 8%-45% -strcpy-unaligned 8%-48%, comparing with the aligned version, unaligned - version takes less instructions to copy the tail of data - which length is less than 8. it also has better performance - in case src and dest cannot be both aligned with 8bytes -strcpy-lsx 20%-80% -strcpy-lasx 15%-86% -stpcpy-aligned 6%-43% -stpcpy-unaligned 8%-48% -stpcpy-lsx 10%-80% -stpcpy-lasx 10%-87% - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 8 + - .../lp64/multiarch/ifunc-impl-list.c | 18 ++ - .../loongarch/lp64/multiarch/stpcpy-aligned.S | 27 +++ - .../loongarch/lp64/multiarch/stpcpy-lasx.S | 22 ++ - sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S | 22 ++ - .../lp64/multiarch/stpcpy-unaligned.S | 22 ++ - sysdeps/loongarch/lp64/multiarch/stpcpy.c | 42 ++++ - .../loongarch/lp64/multiarch/strcpy-aligned.S | 202 ++++++++++++++++ - .../loongarch/lp64/multiarch/strcpy-lasx.S | 215 ++++++++++++++++++ - sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S | 212 +++++++++++++++++ - .../lp64/multiarch/strcpy-unaligned.S | 138 +++++++++++ - sysdeps/loongarch/lp64/multiarch/strcpy.c | 35 +++ - 12 files changed, 963 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy.c - create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index 360a6718..39550bea 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -16,6 +16,14 @@ sysdep_routines += \ - strcmp-lsx \ - strncmp-aligned \ - strncmp-lsx \ -+ strcpy-aligned \ -+ strcpy-unaligned \ -+ strcpy-lsx \ -+ strcpy-lasx \ -+ stpcpy-aligned \ -+ stpcpy-unaligned \ -+ stpcpy-lsx \ -+ stpcpy-lasx \ - memcpy-aligned \ - memcpy-unaligned \ - memmove-unaligned \ -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index e397d58c..39a14f1d 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -76,6 +76,24 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned) - ) - -+ IFUNC_IMPL (i, name, strcpy, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LASX, __strcpy_lasx) -+ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LSX, __strcpy_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_UAL, __strcpy_unaligned) -+ IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_aligned) -+ ) -+ -+ IFUNC_IMPL (i, name, stpcpy, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LASX, __stpcpy_lasx) -+ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LSX, __stpcpy_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_UAL, __stpcpy_unaligned) -+ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned) -+ ) -+ - IFUNC_IMPL (i, name, memcpy, - #if !defined __loongarch_soft_float - IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) -diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S -new file mode 100644 -index 00000000..1f763db6 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S -@@ -0,0 +1,27 @@ -+/* stpcpy-aligned implementation is in strcpy-aligned.S. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#if IS_IN (libc) -+# define STPCPY __stpcpy_aligned -+#else -+# define STPCPY __stpcpy -+#endif -+ -+#define USE_AS_STPCPY -+#define STRCPY STPCPY -+#include "strcpy-aligned.S" -diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S -new file mode 100644 -index 00000000..13d6c953 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S -@@ -0,0 +1,22 @@ -+/* stpcpy-lasx implementation is in strcpy-lasx.S. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#define STPCPY __stpcpy_lasx -+#define USE_AS_STPCPY -+#define STRCPY STPCPY -+#include "strcpy-lasx.S" -diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S -new file mode 100644 -index 00000000..e0f17ab5 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S -@@ -0,0 +1,22 @@ -+/* stpcpy-lsx implementation is in strcpy-lsx.S. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#define STPCPY __stpcpy_lsx -+#define USE_AS_STPCPY -+#define STRCPY STPCPY -+#include "strcpy-lsx.S" -diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S -new file mode 100644 -index 00000000..cc2f9712 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S -@@ -0,0 +1,22 @@ -+/* stpcpy-unaligned implementation is in strcpy-unaligned.S. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#define STPCPY __stpcpy_unaligned -+#define USE_AS_STPCPY -+#define STRCPY STPCPY -+#include "strcpy-unaligned.S" -diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy.c b/sysdeps/loongarch/lp64/multiarch/stpcpy.c -new file mode 100644 -index 00000000..d4860d7a ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/stpcpy.c -@@ -0,0 +1,42 @@ -+/* Multiple versions of stpcpy. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2017-2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define stpcpy __redirect_stpcpy -+# define __stpcpy __redirect___stpcpy -+# define NO_MEMPCPY_STPCPY_REDIRECT -+# define __NO_STRING_INLINES -+# include -+# undef stpcpy -+# undef __stpcpy -+ -+# define SYMBOL_NAME stpcpy -+# include "ifunc-lasx.h" -+ -+libc_ifunc_redirected (__redirect_stpcpy, __stpcpy, IFUNC_SELECTOR ()); -+ -+weak_alias (__stpcpy, stpcpy) -+# ifdef SHARED -+__hidden_ver1 (__stpcpy, __GI___stpcpy, __redirect___stpcpy) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy); -+__hidden_ver1 (stpcpy, __GI_stpcpy, __redirect_stpcpy) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy); -+# endif -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S -new file mode 100644 -index 00000000..4ed539fd ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S -@@ -0,0 +1,202 @@ -+/* Optimized strcpy stpcpy aligned implementation using basic LoongArch -+ instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# ifndef STRCPY -+# define STRCPY __strcpy_aligned -+# endif -+#else -+# ifndef STRCPY -+# define STRCPY strcpy -+# endif -+#endif -+ -+LEAF(STRCPY, 6) -+ andi a3, a0, 0x7 -+ move a2, a0 -+ beqz a3, L(dest_align) -+ sub.d a5, a1, a3 -+ addi.d a5, a5, 8 -+ -+L(make_dest_align): -+ ld.b t0, a1, 0 -+ addi.d a1, a1, 1 -+ st.b t0, a2, 0 -+ addi.d a2, a2, 1 -+ beqz t0, L(al_out) -+ -+ bne a1, a5, L(make_dest_align) -+ -+L(dest_align): -+ andi a4, a1, 7 -+ bstrins.d a1, zero, 2, 0 -+ -+ lu12i.w t5, 0x1010 -+ ld.d t0, a1, 0 -+ ori t5, t5, 0x101 -+ bstrins.d t5, t5, 63, 32 -+ -+ slli.d t6, t5, 0x7 -+ bnez a4, L(unalign) -+ sub.d t1, t0, t5 -+ andn t2, t6, t0 -+ -+ and t3, t1, t2 -+ bnez t3, L(al_end) -+ -+L(al_loop): -+ st.d t0, a2, 0 -+ ld.d t0, a1, 8 -+ -+ addi.d a1, a1, 8 -+ addi.d a2, a2, 8 -+ sub.d t1, t0, t5 -+ andn t2, t6, t0 -+ -+ and t3, t1, t2 -+ beqz t3, L(al_loop) -+ -+L(al_end): -+ ctz.d t1, t3 -+ srli.d t1, t1, 3 -+ addi.d t1, t1, 1 -+ -+ andi a3, t1, 8 -+ andi a4, t1, 4 -+ andi a5, t1, 2 -+ andi a6, t1, 1 -+ -+L(al_end_8): -+ beqz a3, L(al_end_4) -+ st.d t0, a2, 0 -+#ifdef USE_AS_STPCPY -+ addi.d a0, a2, 7 -+#endif -+ jr ra -+L(al_end_4): -+ beqz a4, L(al_end_2) -+ st.w t0, a2, 0 -+ addi.d a2, a2, 4 -+ srli.d t0, t0, 32 -+L(al_end_2): -+ beqz a5, L(al_end_1) -+ st.h t0, a2, 0 -+ addi.d a2, a2, 2 -+ srli.d t0, t0, 16 -+L(al_end_1): -+ beqz a6, L(al_out) -+ st.b t0, a2, 0 -+ addi.d a2, a2, 1 -+L(al_out): -+#ifdef USE_AS_STPCPY -+ addi.d a0, a2, -1 -+#endif -+ jr ra -+ -+ .align 4 -+L(unalign): -+ slli.d a5, a4, 3 -+ li.d t1, -1 -+ sub.d a6, zero, a5 -+ -+ srl.d a7, t0, a5 -+ sll.d t7, t1, a6 -+ -+ or t0, a7, t7 -+ sub.d t1, t0, t5 -+ andn t2, t6, t0 -+ and t3, t1, t2 -+ -+ bnez t3, L(un_end) -+ -+ ld.d t4, a1, 8 -+ -+ sub.d t1, t4, t5 -+ andn t2, t6, t4 -+ sll.d t0, t4, a6 -+ and t3, t1, t2 -+ -+ or t0, t0, a7 -+ bnez t3, L(un_end_with_remaining) -+ -+L(un_loop): -+ srl.d a7, t4, a5 -+ -+ ld.d t4, a1, 16 -+ addi.d a1, a1, 8 -+ -+ st.d t0, a2, 0 -+ addi.d a2, a2, 8 -+ -+ sub.d t1, t4, t5 -+ andn t2, t6, t4 -+ sll.d t0, t4, a6 -+ and t3, t1, t2 -+ -+ or t0, t0, a7 -+ beqz t3, L(un_loop) -+ -+L(un_end_with_remaining): -+ ctz.d t1, t3 -+ srli.d t1, t1, 3 -+ addi.d t1, t1, 1 -+ sub.d t1, t1, a4 -+ -+ blt t1, zero, L(un_end_less_8) -+ st.d t0, a2, 0 -+ addi.d a2, a2, 8 -+ beqz t1, L(un_out) -+ srl.d t0, t4, a5 -+ b L(un_end_less_8) -+ -+L(un_end): -+ ctz.d t1, t3 -+ srli.d t1, t1, 3 -+ addi.d t1, t1, 1 -+ -+L(un_end_less_8): -+ andi a4, t1, 4 -+ andi a5, t1, 2 -+ andi a6, t1, 1 -+L(un_end_4): -+ beqz a4, L(un_end_2) -+ st.w t0, a2, 0 -+ addi.d a2, a2, 4 -+ srli.d t0, t0, 32 -+L(un_end_2): -+ beqz a5, L(un_end_1) -+ st.h t0, a2, 0 -+ addi.d a2, a2, 2 -+ srli.d t0, t0, 16 -+L(un_end_1): -+ beqz a6, L(un_out) -+ st.b t0, a2, 0 -+ addi.d a2, a2, 1 -+L(un_out): -+#ifdef USE_AS_STPCPY -+ addi.d a0, a2, -1 -+#endif -+ jr ra -+END(STRCPY) -+ -+libc_hidden_builtin_def (STRCPY) -diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S -new file mode 100644 -index 00000000..c2825612 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S -@@ -0,0 +1,215 @@ -+/* Optimized strcpy stpcpy implementation using LoongArch LASX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# ifndef STRCPY -+# define STRCPY __strcpy_lasx -+# endif -+ -+# ifdef USE_AS_STPCPY -+# define dstend a0 -+# else -+# define dstend a4 -+# endif -+ -+LEAF(STRCPY, 6) -+ ori t8, zero, 0xfe0 -+ andi t0, a1, 0xfff -+ li.d t7, -1 -+ move a2, a0 -+ -+ bltu t8, t0, L(page_cross_start) -+L(start_entry): -+ xvld xr0, a1, 0 -+ li.d t0, 32 -+ andi t1, a2, 0x1f -+ -+ xvsetanyeqz.b fcc0, xr0 -+ sub.d t0, t0, t1 -+ bcnez fcc0, L(end) -+ add.d a1, a1, t0 -+ -+ xvst xr0, a2, 0 -+ andi a3, a1, 0x1f -+ add.d a2, a2, t0 -+ bnez a3, L(unaligned) -+ -+ -+ xvld xr0, a1, 0 -+ xvsetanyeqz.b fcc0, xr0 -+ bcnez fcc0, L(al_end) -+L(al_loop): -+ xvst xr0, a2, 0 -+ -+ xvld xr0, a1, 32 -+ addi.d a2, a2, 32 -+ addi.d a1, a1, 32 -+ xvsetanyeqz.b fcc0, xr0 -+ -+ bceqz fcc0, L(al_loop) -+L(al_end): -+ xvmsknz.b xr0, xr0 -+ xvpickve.w xr1, xr0, 4 -+ vilvl.h vr0, vr1, vr0 -+ -+ movfr2gr.s t0, fa0 -+ cto.w t0, t0 -+ add.d a1, a1, t0 -+ xvld xr0, a1, -31 -+ -+ -+ add.d dstend, a2, t0 -+ xvst xr0, dstend, -31 -+ jr ra -+ nop -+ -+L(page_cross_start): -+ move a4, a1 -+ bstrins.d a4, zero, 4, 0 -+ xvld xr0, a4, 0 -+ xvmsknz.b xr0, xr0 -+ -+ xvpickve.w xr1, xr0, 4 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+ sra.w t0, t0, a1 -+ -+ beq t0, t7, L(start_entry) -+ b L(tail) -+L(unaligned): -+ andi t0, a1, 0xfff -+ bltu t8, t0, L(un_page_cross) -+ -+ -+L(un_start_entry): -+ xvld xr0, a1, 0 -+ xvsetanyeqz.b fcc0, xr0 -+ bcnez fcc0, L(un_end) -+ addi.d a1, a1, 32 -+ -+L(un_loop): -+ xvst xr0, a2, 0 -+ andi t0, a1, 0xfff -+ addi.d a2, a2, 32 -+ bltu t8, t0, L(page_cross_loop) -+ -+L(un_loop_entry): -+ xvld xr0, a1, 0 -+ addi.d a1, a1, 32 -+ xvsetanyeqz.b fcc0, xr0 -+ bceqz fcc0, L(un_loop) -+ -+ addi.d a1, a1, -32 -+L(un_end): -+ xvmsknz.b xr0, xr0 -+ xvpickve.w xr1, xr0, 4 -+ vilvl.h vr0, vr1, vr0 -+ -+ -+ movfr2gr.s t0, fa0 -+L(un_tail): -+ cto.w t0, t0 -+ add.d a1, a1, t0 -+ xvld xr0, a1, -31 -+ -+ add.d dstend, a2, t0 -+ xvst xr0, dstend, -31 -+ jr ra -+L(un_page_cross): -+ sub.d a4, a1, a3 -+ -+ xvld xr0, a4, 0 -+ xvmsknz.b xr0, xr0 -+ xvpickve.w xr1, xr0, 4 -+ vilvl.h vr0, vr1, vr0 -+ -+ movfr2gr.s t0, fa0 -+ sra.w t0, t0, a1 -+ beq t0, t7, L(un_start_entry) -+ b L(un_tail) -+ -+ -+L(page_cross_loop): -+ sub.d a4, a1, a3 -+ xvld xr0, a4, 0 -+ xvmsknz.b xr0, xr0 -+ xvpickve.w xr1, xr0, 4 -+ -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+ sra.w t0, t0, a1 -+ beq t0, t7, L(un_loop_entry) -+ -+ b L(un_tail) -+L(end): -+ xvmsknz.b xr0, xr0 -+ xvpickve.w xr1, xr0, 4 -+ vilvl.h vr0, vr1, vr0 -+ -+ movfr2gr.s t0, fa0 -+L(tail): -+ cto.w t0, t0 -+ add.d dstend, a2, t0 -+ add.d a5, a1, t0 -+ -+L(less_32): -+ srli.d t1, t0, 4 -+ beqz t1, L(less_16) -+ vld vr0, a1, 0 -+ vld vr1, a5, -15 -+ -+ vst vr0, a2, 0 -+ vst vr1, dstend, -15 -+ jr ra -+L(less_16): -+ srli.d t1, t0, 3 -+ -+ beqz t1, L(less_8) -+ ld.d t2, a1, 0 -+ ld.d t3, a5, -7 -+ st.d t2, a2, 0 -+ -+ st.d t3, dstend, -7 -+ jr ra -+L(less_8): -+ li.d t1, 3 -+ bltu t0, t1, L(less_3) -+ -+ ld.w t2, a1, 0 -+ ld.w t3, a5, -3 -+ st.w t2, a2, 0 -+ st.w t3, dstend, -3 -+ -+ jr ra -+L(less_3): -+ beqz t0, L(zero_byte) -+ ld.h t2, a1, 0 -+ -+ st.h t2, a2, 0 -+L(zero_byte): -+ st.b zero, dstend, 0 -+ jr ra -+END(STRCPY) -+ -+libc_hidden_builtin_def (STRCPY) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S -new file mode 100644 -index 00000000..fc2498f7 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S -@@ -0,0 +1,212 @@ -+/* Optimized strcpy stpcpy implementation using LoongArch LSX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# ifndef STRCPY -+# define STRCPY __strcpy_lsx -+# endif -+ -+LEAF(STRCPY, 6) -+ pcalau12i t0, %pc_hi20(L(INDEX)) -+ andi a4, a1, 0xf -+ vld vr1, t0, %pc_lo12(L(INDEX)) -+ move a2, a0 -+ -+ beqz a4, L(load_start) -+ xor t0, a1, a4 -+ vld vr0, t0, 0 -+ vreplgr2vr.b vr2, a4 -+ -+ vadd.b vr2, vr2, vr1 -+ vshuf.b vr0, vr2, vr0, vr2 -+ vsetanyeqz.b fcc0, vr0 -+ bcnez fcc0, L(end) -+ -+L(load_start): -+ vld vr0, a1, 0 -+ li.d t1, 16 -+ andi a3, a2, 0xf -+ vsetanyeqz.b fcc0, vr0 -+ -+ -+ sub.d t0, t1, a3 -+ bcnez fcc0, L(end) -+ add.d a1, a1, t0 -+ vst vr0, a2, 0 -+ -+ andi a3, a1, 0xf -+ add.d a2, a2, t0 -+ bnez a3, L(unaligned) -+ vld vr0, a1, 0 -+ -+ vsetanyeqz.b fcc0, vr0 -+ bcnez fcc0, L(al_end) -+L(al_loop): -+ vst vr0, a2, 0 -+ vld vr0, a1, 16 -+ -+ addi.d a2, a2, 16 -+ addi.d a1, a1, 16 -+ vsetanyeqz.b fcc0, vr0 -+ bceqz fcc0, L(al_loop) -+ -+ -+L(al_end): -+ vmsknz.b vr1, vr0 -+ movfr2gr.s t0, fa1 -+ cto.w t0, t0 -+ add.d a1, a1, t0 -+ -+ vld vr0, a1, -15 -+# ifdef USE_AS_STPCPY -+ add.d a0, a2, t0 -+ vst vr0, a0, -15 -+# else -+ add.d a2, a2, t0 -+ vst vr0, a2, -15 -+# endif -+ jr ra -+ -+L(end): -+ vmsknz.b vr1, vr0 -+ movfr2gr.s t0, fa1 -+ cto.w t0, t0 -+ addi.d t0, t0, 1 -+ -+L(end_16): -+ andi t1, t0, 16 -+ beqz t1, L(end_8) -+ vst vr0, a2, 0 -+# ifdef USE_AS_STPCPY -+ addi.d a0, a2, 15 -+# endif -+ jr ra -+ -+L(end_8): -+ andi t2, t0, 8 -+ andi t3, t0, 4 -+ andi t4, t0, 2 -+ andi t5, t0, 1 -+ -+ beqz t2, L(end_4) -+ vstelm.d vr0, a2, 0, 0 -+ addi.d a2, a2, 8 -+ vbsrl.v vr0, vr0, 8 -+ -+L(end_4): -+ beqz t3, L(end_2) -+ vstelm.w vr0, a2, 0, 0 -+ addi.d a2, a2, 4 -+ vbsrl.v vr0, vr0, 4 -+ -+L(end_2): -+ beqz t4, L(end_1) -+ vstelm.h vr0, a2, 0, 0 -+ addi.d a2, a2, 2 -+ vbsrl.v vr0, vr0, 2 -+ -+ -+L(end_1): -+ beqz t5, L(out) -+ vstelm.b vr0, a2, 0, 0 -+ addi.d a2, a2, 1 -+L(out): -+# ifdef USE_AS_STPCPY -+ addi.d a0, a2, -1 -+# endif -+ jr ra -+ -+ .align 4 -+L(unaligned): -+ bstrins.d a1, zero, 3, 0 -+ vld vr2, a1, 0 -+ vreplgr2vr.b vr3, a3 -+ vslt.b vr4, vr1, vr3 -+ -+ vor.v vr0, vr2, vr4 -+ vsetanyeqz.b fcc0, vr0 -+ bcnez fcc0, L(un_first_end) -+ vld vr0, a1, 16 -+ -+ vadd.b vr3, vr3, vr1 -+ vshuf.b vr4, vr0, vr2, vr3 -+ vsetanyeqz.b fcc0, vr0 -+ bcnez fcc0, L(un_end) -+ -+ -+ vor.v vr2, vr0, vr0 -+ addi.d a1, a1, 16 -+L(un_loop): -+ vld vr0, a1, 16 -+ vst vr4, a2, 0 -+ -+ addi.d a2, a2, 16 -+ vshuf.b vr4, vr0, vr2, vr3 -+ vsetanyeqz.b fcc0, vr0 -+ bcnez fcc0, L(un_end) -+ -+ vld vr2, a1, 32 -+ vst vr4, a2, 0 -+ addi.d a1, a1, 32 -+ addi.d a2, a2, 16 -+ -+ vshuf.b vr4, vr2, vr0, vr3 -+ vsetanyeqz.b fcc0, vr2 -+ bceqz fcc0, L(un_loop) -+ vor.v vr0, vr2, vr2 -+ -+ -+ addi.d a1, a1, -16 -+L(un_end): -+ vsetanyeqz.b fcc0, vr4 -+ bcnez fcc0, 1f -+ vst vr4, a2, 0 -+ -+1: -+ vmsknz.b vr1, vr0 -+ movfr2gr.s t0, fa1 -+ cto.w t0, t0 -+ add.d a1, a1, t0 -+ -+ vld vr0, a1, 1 -+ add.d a2, a2, t0 -+ sub.d a2, a2, a3 -+ vst vr0, a2, 1 -+# ifdef USE_AS_STPCPY -+ addi.d a0, a2, 16 -+# endif -+ jr ra -+L(un_first_end): -+ addi.d a2, a2, -16 -+ addi.d a1, a1, -16 -+ b 1b -+END(STRCPY) -+ -+ .section .rodata.cst16,"M",@progbits,16 -+ .align 4 -+L(INDEX): -+ .dword 0x0706050403020100 -+ .dword 0x0f0e0d0c0b0a0908 -+ -+libc_hidden_builtin_def (STRCPY) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S -new file mode 100644 -index 00000000..9e31883b ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S -@@ -0,0 +1,138 @@ -+/* Optimized strcpy unaligned implementation using basic LoongArch -+ instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+ -+# ifndef STRCPY -+# define STRCPY __strcpy_unaligned -+# endif -+ -+# ifdef USE_AS_STPCPY -+# define dstend a0 -+# else -+# define dstend a4 -+# endif -+ -+LEAF(STRCPY, 6) -+ lu12i.w t5, 0x01010 -+ li.w t0, 0xff8 -+ ori t5, t5, 0x101 -+ andi t1, a1, 0xfff -+ -+ bstrins.d t5, t5, 63, 32 -+ move a2, a0 -+ slli.d t6, t5, 7 -+ bltu t0, t1, L(page_cross) -+ -+L(start_entry): -+ ld.d t0, a1, 0 -+ li.d t3, 8 -+ andi a3, a1, 0x7 -+ sub.d t1, t0, t5 -+ -+ andn t2, t6, t0 -+ sub.d t3, t3, a3 -+ and t1, t1, t2 -+ bnez t1, L(end) -+ -+ -+ add.d a1, a1, t3 -+ st.d t0, a2, 0 -+ add.d a2, a2, t3 -+ ld.d t0, a1, 0 -+ -+ sub.d t1, t0, t5 -+ andn t2, t6, t0 -+ and t1, t1, t2 -+ bnez t1, L(long_end) -+ -+L(loop): -+ st.d t0, a2, 0 -+ ld.d t0, a1, 8 -+ addi.d a2, a2, 8 -+ addi.d a1, a1, 8 -+ -+ sub.d t1, t0, t5 -+ andn t2, t6, t0 -+ and t1, t1, t2 -+ beqz t1, L(loop) -+ -+ -+L(long_end): -+ ctz.d t1, t1 -+ srli.d t1, t1, 3 -+ add.d a1, a1, t1 -+ ld.d t0, a1, -7 -+ -+ add.d dstend, a2, t1 -+ st.d t0, dstend, -7 -+ jr ra -+ nop -+ -+L(end): -+ ctz.d t1, t1 -+ srli.d t1, t1, 3 -+ add.d a3, a1, t1 -+ add.d dstend, a2, t1 -+ -+L(less_8): -+ li.d t0, 3 -+ bltu t1, t0, L(less_3) -+ ld.w t1, a1, 0 -+ ld.w t2, a3, -3 -+ -+ -+ st.w t1, a2, 0 -+ st.w t2, dstend, -3 -+ jr ra -+L(less_3): -+ beqz t1, L(zero_bytes) -+ -+ ld.h t1, a1, 0 -+ st.h t1, a2, 0 -+L(zero_bytes): -+ st.b zero, dstend, 0 -+ jr ra -+ -+L(page_cross): -+ move a4, a1 -+ bstrins.d a4, zero, 2, 0 -+ ld.d t0, a4, 0 -+ li.d t3, -1 -+ -+ slli.d t4, a1, 3 -+ srl.d t3, t3, t4 -+ srl.d t0, t0, t4 -+ orn t0, t0, t3 -+ -+ -+ sub.d t1, t0, t5 -+ andn t2, t6, t0 -+ and t1, t1, t2 -+ beqz t1, L(start_entry) -+ -+ b L(end) -+END(STRCPY) -+ -+libc_hidden_builtin_def (STRCPY) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy.c b/sysdeps/loongarch/lp64/multiarch/strcpy.c -new file mode 100644 -index 00000000..46afd068 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strcpy.c -@@ -0,0 +1,35 @@ -+/* Multiple versions of strcpy. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define strcpy __redirect_strcpy -+# include -+# undef strcpy -+ -+# define SYMBOL_NAME strcpy -+# include "ifunc-lasx.h" -+ -+libc_ifunc_redirected (__redirect_strcpy, strcpy, IFUNC_SELECTOR ()); -+ -+# ifdef SHARED -+__hidden_ver1 (strcpy, __GI_strcpy, __redirect_strcpy) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcpy); -+# endif -+#endif --- -2.33.0 - diff --git a/LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch b/LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch deleted file mode 100644 index 0297fc3..0000000 --- a/LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch +++ /dev/null @@ -1,583 +0,0 @@ -From 6f03da2d7ef218c0f78375cf706dada59c3fee63 Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Thu, 24 Aug 2023 16:50:19 +0800 -Subject: [PATCH 10/29] LoongArch: Add ifunc support for strncmp{aligned, lsx} - -Based on the glibc microbenchmark, only a few short inputs with this -strncmp-aligned and strncmp-lsx implementation experience performance -degradation, overall, strncmp-aligned could reduce the runtime 0%-10% -for aligned comparision, 10%-25% for unaligend comparision, strncmp-lsx -could reduce the runtime about 0%-60%. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 2 + - .../lp64/multiarch/ifunc-impl-list.c | 7 + - .../loongarch/lp64/multiarch/ifunc-strncmp.h | 38 +++ - .../lp64/multiarch/strncmp-aligned.S | 218 ++++++++++++++++++ - .../loongarch/lp64/multiarch/strncmp-lsx.S | 208 +++++++++++++++++ - sysdeps/loongarch/lp64/multiarch/strncmp.c | 35 +++ - 6 files changed, 508 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index d5a500de..5d7ae7ae 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -14,6 +14,8 @@ sysdep_routines += \ - strchrnul-lasx \ - strcmp-aligned \ - strcmp-lsx \ -+ strncmp-aligned \ -+ strncmp-lsx \ - memcpy-aligned \ - memcpy-unaligned \ - memmove-unaligned \ -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index 9183b7da..c8ba87bd 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -69,6 +69,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned) - ) - -+ IFUNC_IMPL (i, name, strncmp, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, strncmp, SUPPORT_LSX, __strncmp_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned) -+ ) -+ - IFUNC_IMPL (i, name, memcpy, - #if !defined __loongarch_soft_float - IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h -new file mode 100644 -index 00000000..1a7dc36b ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h -@@ -0,0 +1,38 @@ -+/* Common definition for strncmp ifunc selection. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+ -+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ return OPTIMIZE (aligned); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S -new file mode 100644 -index 00000000..e2687fa7 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S -@@ -0,0 +1,218 @@ -+/* Optimized strncmp implementation using basic Loongarch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define STRNCMP __strncmp_aligned -+#else -+# define STRNCMP strncmp -+#endif -+ -+LEAF(STRNCMP, 6) -+ beqz a2, L(ret0) -+ lu12i.w a5, 0x01010 -+ andi a3, a0, 0x7 -+ ori a5, a5, 0x101 -+ -+ andi a4, a1, 0x7 -+ bstrins.d a5, a5, 63, 32 -+ li.d t7, -1 -+ li.d t8, 8 -+ -+ addi.d a2, a2, -1 -+ slli.d a6, a5, 7 -+ bne a3, a4, L(unaligned) -+ bstrins.d a0, zero, 2, 0 -+ -+ bstrins.d a1, zero, 2, 0 -+ ld.d t0, a0, 0 -+ ld.d t1, a1, 0 -+ slli.d t2, a3, 3 -+ -+ -+ sub.d t5, t8, a3 -+ srl.d t3, t7, t2 -+ srl.d t0, t0, t2 -+ srl.d t1, t1, t2 -+ -+ orn t0, t0, t3 -+ orn t1, t1, t3 -+ sub.d t2, t0, a5 -+ andn t3, a6, t0 -+ -+ and t2, t2, t3 -+ bne t0, t1, L(al_end) -+ sltu t4, a2, t5 -+ sub.d a2, a2, t5 -+ -+L(al_loop): -+ or t4, t2, t4 -+ bnez t4, L(ret0) -+ ldx.d t0, a0, t8 -+ ldx.d t1, a1, t8 -+ -+ -+ addi.d t8, t8, 8 -+ sltui t4, a2, 8 -+ addi.d a2, a2, -8 -+ sub.d t2, t0, a5 -+ -+ andn t3, a6, t0 -+ and t2, t2, t3 -+ beq t0, t1, L(al_loop) -+ addi.d a2, a2, 8 -+ -+L(al_end): -+ xor t3, t0, t1 -+ or t2, t2, t3 -+ ctz.d t2, t2 -+ srli.d t4, t2, 3 -+ -+ bstrins.d t2, zero, 2, 0 -+ srl.d t0, t0, t2 -+ srl.d t1, t1, t2 -+ andi t0, t0, 0xff -+ -+ -+ andi t1, t1, 0xff -+ sltu t2, a2, t4 -+ sub.d a0, t0, t1 -+ masknez a0, a0, t2 -+ -+ jr ra -+L(ret0): -+ move a0, zero -+ jr ra -+ nop -+ -+L(unaligned): -+ slt a7, a4, a3 -+ xor t0, a0, a1 -+ maskeqz t0, t0, a7 -+ xor a0, a0, t0 -+ -+ xor a1, a1, t0 -+ andi a3, a0, 0x7 -+ andi a4, a1, 0x7 -+ bstrins.d a0, zero, 2, 0 -+ -+ -+ bstrins.d a1, zero, 2, 0 -+ ld.d t4, a0, 0 -+ ld.d t1, a1, 0 -+ slli.d t2, a3, 3 -+ -+ slli.d t3, a4, 3 -+ srl.d t5, t7, t3 -+ srl.d t0, t4, t2 -+ srl.d t1, t1, t3 -+ -+ orn t0, t0, t5 -+ orn t1, t1, t5 -+ bne t0, t1, L(not_equal) -+ sub.d t6, t8, a4 -+ -+ sub.d a4, t2, t3 -+ sll.d t2, t7, t2 -+ sub.d t5, t8, a3 -+ orn t4, t4, t2 -+ -+ -+ sub.d t2, t4, a5 -+ andn t3, a6, t4 -+ sltu t7, a2, t5 -+ and t2, t2, t3 -+ -+ sub.d a3, zero, a4 -+ or t2, t2, t7 -+ bnez t2, L(un_end) -+ sub.d t7, t5, t6 -+ -+ sub.d a2, a2, t5 -+ sub.d t6, t8, t7 -+L(un_loop): -+ srl.d t5, t4, a4 -+ ldx.d t4, a0, t8 -+ -+ ldx.d t1, a1, t8 -+ addi.d t8, t8, 8 -+ sll.d t0, t4, a3 -+ or t0, t0, t5 -+ -+ -+ bne t0, t1, L(loop_not_equal) -+ sub.d t2, t4, a5 -+ andn t3, a6, t4 -+ sltui t5, a2, 8 -+ -+ and t2, t2, t3 -+ addi.d a2, a2, -8 -+ or t3, t2, t5 -+ beqz t3, L(un_loop) -+ -+ addi.d a2, a2, 8 -+L(un_end): -+ sub.d t2, t0, a5 -+ andn t3, a6, t0 -+ sltu t5, a2, t6 -+ -+ and t2, t2, t3 -+ or t2, t2, t5 -+ bnez t2, L(ret0) -+ ldx.d t1, a1, t8 -+ -+ -+ srl.d t0, t4, a4 -+ sub.d a2, a2, t6 -+L(not_equal): -+ sub.d t2, t0, a5 -+ andn t3, a6, t0 -+ -+ xor t4, t0, t1 -+ and t2, t2, t3 -+ or t2, t2, t4 -+ ctz.d t2, t2 -+ -+ bstrins.d t2, zero, 2, 0 -+ srli.d t4, t2, 3 -+ srl.d t0, t0, t2 -+ srl.d t1, t1, t2 -+ -+ andi t0, t0, 0xff -+ andi t1, t1, 0xff -+ sub.d t2, t0, t1 -+ sub.d t3, t1, t0 -+ -+ -+ masknez t0, t2, a7 -+ maskeqz t1, t3, a7 -+ sltu t2, a2, t4 -+ or a0, t0, t1 -+ -+ masknez a0, a0, t2 -+ jr ra -+L(loop_not_equal): -+ add.d a2, a2, t7 -+ b L(not_equal) -+END(STRNCMP) -+ -+libc_hidden_builtin_def (STRNCMP) -diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S -new file mode 100644 -index 00000000..0b4eee2a ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S -@@ -0,0 +1,208 @@ -+/* Optimized strncmp implementation using Loongarch LSX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define STRNCMP __strncmp_lsx -+ -+LEAF(STRNCMP, 6) -+ beqz a2, L(ret0) -+ pcalau12i t0, %pc_hi20(L(INDEX)) -+ andi a3, a0, 0xf -+ vld vr2, t0, %pc_lo12(L(INDEX)) -+ -+ andi a4, a1, 0xf -+ li.d t2, 16 -+ bne a3, a4, L(unaligned) -+ xor t0, a0, a3 -+ -+ xor t1, a1, a4 -+ vld vr0, t0, 0 -+ vld vr1, t1, 0 -+ vreplgr2vr.b vr3, a3 -+ -+ -+ sub.d t2, t2, a3 -+ vadd.b vr3, vr3, vr2 -+ vshuf.b vr0, vr3, vr0, vr3 -+ vshuf.b vr1, vr3, vr1, vr3 -+ -+ vseq.b vr3, vr0, vr1 -+ vmin.bu vr3, vr0, vr3 -+ bgeu t2, a2, L(al_early_end) -+ vsetanyeqz.b fcc0, vr3 -+ -+ bcnez fcc0, L(al_end) -+ add.d a3, a0, a2 -+ addi.d a4, a3, -1 -+ bstrins.d a4, zero, 3, 0 -+ -+ sub.d a2, a3, a4 -+L(al_loop): -+ vld vr0, t0, 16 -+ vld vr1, t1, 16 -+ addi.d t0, t0, 16 -+ -+ -+ addi.d t1, t1, 16 -+ vseq.b vr3, vr0, vr1 -+ vmin.bu vr3, vr0, vr3 -+ beq t0, a4, L(al_early_end) -+ -+ vsetanyeqz.b fcc0, vr3 -+ bceqz fcc0, L(al_loop) -+L(al_end): -+ vseqi.b vr3, vr3, 0 -+ vfrstpi.b vr3, vr3, 0 -+ -+ vshuf.b vr0, vr0, vr0, vr3 -+ vshuf.b vr1, vr1, vr1, vr3 -+ vpickve2gr.bu t0, vr0, 0 -+ vpickve2gr.bu t1, vr1, 0 -+ -+ sub.d a0, t0, t1 -+ jr ra -+L(al_early_end): -+ vreplgr2vr.b vr4, a2 -+ vslt.b vr4, vr2, vr4 -+ -+ -+ vorn.v vr3, vr3, vr4 -+ b L(al_end) -+L(unaligned): -+ slt a5, a3, a4 -+ xor t0, a0, a1 -+ -+ maskeqz t0, t0, a5 -+ xor a0, a0, t0 -+ xor a1, a1, t0 -+ andi a3, a0, 0xf -+ -+ andi a4, a1, 0xf -+ xor t0, a0, a3 -+ xor t1, a1, a4 -+ vld vr0, t0, 0 -+ -+ vld vr3, t1, 0 -+ sub.d t2, t2, a3 -+ vreplgr2vr.b vr4, a3 -+ vreplgr2vr.b vr5, a4 -+ -+ -+ vaddi.bu vr6, vr2, 16 -+ vsub.b vr7, vr4, vr5 -+ vsub.b vr6, vr6, vr7 -+ vadd.b vr4, vr2, vr4 -+ -+ vshuf.b vr1, vr3, vr3, vr6 -+ vshuf.b vr0, vr7, vr0, vr4 -+ vshuf.b vr1, vr7, vr1, vr4 -+ vseq.b vr4, vr0, vr1 -+ -+ vmin.bu vr4, vr0, vr4 -+ bgeu t2, a2, L(un_early_end) -+ vsetanyeqz.b fcc0, vr4 -+ bcnez fcc0, L(un_end) -+ -+ add.d a6, a0, a2 -+ vslt.b vr5, vr2, vr5 -+ addi.d a7, a6, -1 -+ vor.v vr3, vr3, vr5 -+ -+ -+ bstrins.d a7, zero, 3, 0 -+ sub.d a2, a6, a7 -+L(un_loop): -+ vld vr0, t0, 16 -+ addi.d t0, t0, 16 -+ -+ vsetanyeqz.b fcc0, vr3 -+ bcnez fcc0, L(has_zero) -+ beq t0, a7, L(end_with_len) -+ vor.v vr1, vr3, vr3 -+ -+ vld vr3, t1, 16 -+ addi.d t1, t1, 16 -+ vshuf.b vr1, vr3, vr1, vr6 -+ vseq.b vr4, vr0, vr1 -+ -+ vmin.bu vr4, vr0, vr4 -+ vsetanyeqz.b fcc0, vr4 -+ bceqz fcc0, L(un_loop) -+L(un_end): -+ vseqi.b vr4, vr4, 0 -+ -+ -+ vfrstpi.b vr4, vr4, 0 -+ vshuf.b vr0, vr0, vr0, vr4 -+ vshuf.b vr1, vr1, vr1, vr4 -+ vpickve2gr.bu t0, vr0, 0 -+ -+ vpickve2gr.bu t1, vr1, 0 -+ sub.d t2, t0, t1 -+ sub.d t3, t1, t0 -+ masknez t0, t2, a5 -+ -+ maskeqz t1, t3, a5 -+ or a0, t0, t1 -+ jr ra -+L(has_zero): -+ vshuf.b vr1, vr3, vr3, vr6 -+ -+ vseq.b vr4, vr0, vr1 -+ vmin.bu vr4, vr0, vr4 -+ bne t0, a7, L(un_end) -+L(un_early_end): -+ vreplgr2vr.b vr5, a2 -+ -+ vslt.b vr5, vr2, vr5 -+ vorn.v vr4, vr4, vr5 -+ b L(un_end) -+L(end_with_len): -+ sub.d a6, a3, a4 -+ -+ bgeu a6, a2, 1f -+ vld vr4, t1, 16 -+1: -+ vshuf.b vr1, vr4, vr3, vr6 -+ vseq.b vr4, vr0, vr1 -+ -+ vmin.bu vr4, vr0, vr4 -+ vreplgr2vr.b vr5, a2 -+ vslt.b vr5, vr2, vr5 -+ vorn.v vr4, vr4, vr5 -+ -+ b L(un_end) -+L(ret0): -+ move a0, zero -+ jr ra -+END(STRNCMP) -+ -+ .section .rodata.cst16,"M",@progbits,16 -+ .align 4 -+L(INDEX): -+ .dword 0x0706050403020100 -+ .dword 0x0f0e0d0c0b0a0908 -+ -+libc_hidden_builtin_def (STRNCMP) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp.c b/sysdeps/loongarch/lp64/multiarch/strncmp.c -new file mode 100644 -index 00000000..af6d0bc4 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strncmp.c -@@ -0,0 +1,35 @@ -+/* Multiple versions of strncmp. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define strncmp __redirect_strncmp -+# include -+# undef strncmp -+ -+# define SYMBOL_NAME strncmp -+# include "ifunc-strncmp.h" -+ -+libc_ifunc_redirected (__redirect_strncmp, strncmp, IFUNC_SELECTOR ()); -+ -+# ifdef SHARED -+__hidden_ver1 (strncmp, __GI_strncmp, __redirect_strncmp) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strncmp); -+# endif -+#endif --- -2.33.0 - diff --git a/LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch b/LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch deleted file mode 100644 index e6b4d91..0000000 --- a/LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch +++ /dev/null @@ -1,465 +0,0 @@ -From e494d32d3b76eee0d59cfab37789a356459b517a Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Thu, 24 Aug 2023 16:50:17 +0800 -Subject: [PATCH 08/29] LoongArch: Add ifunc support for strnlen{aligned, lsx, - lasx} - -Based on the glibc microbenchmark, strnlen-aligned implementation could -reduce the runtime more than 10%, strnlen-lsx implementation could reduce -the runtime about 50%-78%, strnlen-lasx implementation could reduce the -runtime about 50%-88%. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 3 + - .../lp64/multiarch/ifunc-impl-list.c | 8 ++ - .../loongarch/lp64/multiarch/ifunc-strnlen.h | 41 +++++++ - .../lp64/multiarch/strnlen-aligned.S | 102 ++++++++++++++++++ - .../loongarch/lp64/multiarch/strnlen-lasx.S | 100 +++++++++++++++++ - .../loongarch/lp64/multiarch/strnlen-lsx.S | 89 +++++++++++++++ - sysdeps/loongarch/lp64/multiarch/strnlen.c | 39 +++++++ - 7 files changed, 382 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index afa51041..c4dd3143 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -3,6 +3,9 @@ sysdep_routines += \ - strlen-aligned \ - strlen-lsx \ - strlen-lasx \ -+ strnlen-aligned \ -+ strnlen-lsx \ -+ strnlen-lasx \ - strchr-aligned \ - strchr-lsx \ - strchr-lasx \ -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index 25eb96b0..7cec0b77 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -38,6 +38,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned) - ) - -+ IFUNC_IMPL (i, name, strnlen, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LASX, __strnlen_lasx) -+ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LSX, __strnlen_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_aligned) -+ ) -+ - IFUNC_IMPL (i, name, strchr, - #if !defined __loongarch_soft_float - IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx) -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h -new file mode 100644 -index 00000000..5cf89810 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h -@@ -0,0 +1,41 @@ -+/* Common definition for strnlen ifunc selections. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+ -+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LASX) -+ return OPTIMIZE (lasx); -+ else if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ return OPTIMIZE (aligned); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S -new file mode 100644 -index 00000000..b900430a ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S -@@ -0,0 +1,102 @@ -+/* Optimized strnlen implementation using basic Loongarch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define STRNLEN __strnlen_aligned -+#else -+# define STRNLEN __strnlen -+#endif -+ -+LEAF(STRNLEN, 6) -+ beqz a1, L(out) -+ lu12i.w a2, 0x01010 -+ andi t1, a0, 0x7 -+ move t4, a0 -+ -+ bstrins.d a0, zero, 2, 0 -+ ori a2, a2, 0x101 -+ li.w t0, -1 -+ ld.d t2, a0, 0 -+ -+ slli.d t3, t1, 3 -+ bstrins.d a2, a2, 63, 32 -+ li.w t5, 8 -+ slli.d a3, a2, 7 -+ -+ sub.w t1, t5, t1 -+ sll.d t0, t0, t3 -+ orn t2, t2, t0 -+ sub.d t0, t2, a2 -+ -+ -+ andn t3, a3, t2 -+ and t0, t0, t3 -+ bnez t0, L(count_pos) -+ sub.d t5, a1, t1 -+ -+ bgeu t1, a1, L(out) -+ addi.d a0, a0, 8 -+L(loop): -+ ld.d t2, a0, 0 -+ sub.d t0, t2, a2 -+ -+ andn t1, a3, t2 -+ sltui t6, t5, 9 -+ and t0, t0, t1 -+ or t7, t0, t6 -+ -+ bnez t7, L(count_pos) -+ ld.d t2, a0, 8 -+ addi.d a0, a0, 16 -+ sub.d t0, t2, a2 -+ -+ -+ andn t1, a3, t2 -+ sltui t6, t5, 17 -+ and t0, t0, t1 -+ addi.d t5, t5, -16 -+ -+ or t7, t0, t6 -+ beqz t7, L(loop) -+ addi.d a0, a0, -8 -+L(count_pos): -+ ctz.d t1, t0 -+ -+ sub.d a0, a0, t4 -+ srli.d t1, t1, 3 -+ add.d a0, t1, a0 -+ sltu t0, a0, a1 -+ -+ masknez t1, a1, t0 -+ maskeqz a0, a0, t0 -+ or a0, a0, t1 -+ jr ra -+ -+ -+L(out): -+ move a0, a1 -+ jr ra -+END(STRNLEN) -+ -+weak_alias (STRNLEN, strnlen) -+libc_hidden_builtin_def (STRNLEN) -diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S -new file mode 100644 -index 00000000..2c03d3d9 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S -@@ -0,0 +1,100 @@ -+/* Optimized strnlen implementation using loongarch LASX instructions -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define STRNLEN __strnlen_lasx -+ -+LEAF(STRNLEN, 6) -+ beqz a1, L(ret0) -+ andi t1, a0, 0x3f -+ li.d t3, 65 -+ sub.d a2, a0, t1 -+ -+ xvld xr0, a2, 0 -+ xvld xr1, a2, 32 -+ sub.d t1, t3, t1 -+ move a3, a0 -+ -+ sltu t1, a1, t1 -+ xvmsknz.b xr0, xr0 -+ xvmsknz.b xr1, xr1 -+ xvpickve.w xr2, xr0, 4 -+ -+ xvpickve.w xr3, xr1, 4 -+ vilvl.h vr0, vr2, vr0 -+ vilvl.h vr1, vr3, vr1 -+ vilvl.w vr0, vr1, vr0 -+ -+ -+ movfr2gr.d t0, fa0 -+ sra.d t0, t0, a0 -+ orn t1, t1, t0 -+ bnez t1, L(end) -+ -+ add.d a4, a0, a1 -+ move a0, a2 -+ addi.d a4, a4, -1 -+ bstrins.d a4, zero, 5, 0 -+ -+L(loop): -+ xvld xr0, a0, 64 -+ xvld xr1, a0, 96 -+ addi.d a0, a0, 64 -+ beq a0, a4, L(out) -+ -+ xvmin.bu xr2, xr0, xr1 -+ xvsetanyeqz.b fcc0, xr2 -+ bceqz fcc0, L(loop) -+L(out): -+ xvmsknz.b xr0, xr0 -+ -+ -+ xvmsknz.b xr1, xr1 -+ xvpickve.w xr2, xr0, 4 -+ xvpickve.w xr3, xr1, 4 -+ vilvl.h vr0, vr2, vr0 -+ -+ vilvl.h vr1, vr3, vr1 -+ vilvl.w vr0, vr1, vr0 -+ movfr2gr.d t0, fa0 -+L(end): -+ sub.d a0, a0, a3 -+ -+ cto.d t0, t0 -+ add.d a0, a0, t0 -+ sltu t1, a0, a1 -+ masknez t0, a1, t1 -+ -+ maskeqz t1, a0, t1 -+ or a0, t0, t1 -+ jr ra -+L(ret0): -+ move a0, zero -+ -+ -+ jr ra -+END(STRNLEN) -+ -+libc_hidden_def (STRNLEN) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S -new file mode 100644 -index 00000000..b769a895 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S -@@ -0,0 +1,89 @@ -+/* Optimized strnlen implementation using loongarch LSX instructions -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define STRNLEN __strnlen_lsx -+ -+LEAF(STRNLEN, 6) -+ beqz a1, L(ret0) -+ andi t1, a0, 0x1f -+ li.d t3, 33 -+ sub.d a2, a0, t1 -+ -+ vld vr0, a2, 0 -+ vld vr1, a2, 16 -+ sub.d t1, t3, t1 -+ move a3, a0 -+ -+ sltu t1, a1, t1 -+ vmsknz.b vr0, vr0 -+ vmsknz.b vr1, vr1 -+ vilvl.h vr0, vr1, vr0 -+ -+ movfr2gr.s t0, fa0 -+ sra.w t0, t0, a0 -+ orn t1, t1, t0 -+ bnez t1, L(end) -+ -+ -+ add.d a4, a0, a1 -+ move a0, a2 -+ addi.d a4, a4, -1 -+ bstrins.d a4, zero, 4, 0 -+ -+L(loop): -+ vld vr0, a0, 32 -+ vld vr1, a0, 48 -+ addi.d a0, a0, 32 -+ beq a0, a4, L(out) -+ -+ vmin.bu vr2, vr0, vr1 -+ vsetanyeqz.b fcc0, vr2 -+ bceqz fcc0, L(loop) -+L(out): -+ vmsknz.b vr0, vr0 -+ -+ vmsknz.b vr1, vr1 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+L(end): -+ sub.d a0, a0, a3 -+ -+ -+ cto.w t0, t0 -+ add.d a0, a0, t0 -+ sltu t1, a0, a1 -+ masknez t0, a1, t1 -+ -+ maskeqz t1, a0, t1 -+ or a0, t0, t1 -+ jr ra -+L(ret0): -+ move a0, zero -+ -+ jr ra -+END(STRNLEN) -+ -+libc_hidden_builtin_def (STRNLEN) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen.c b/sysdeps/loongarch/lp64/multiarch/strnlen.c -new file mode 100644 -index 00000000..38b7a25a ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strnlen.c -@@ -0,0 +1,39 @@ -+/* Multiple versions of strnlen. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define strnlen __redirect_strnlen -+# define __strnlen __redirect___strnlen -+# include -+# undef __strnlen -+# undef strnlen -+ -+# define SYMBOL_NAME strnlen -+# include "ifunc-strnlen.h" -+ -+libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ()); -+weak_alias (__strnlen, strnlen); -+# ifdef SHARED -+__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen) -+ __attribute__((visibility ("hidden"))) __attribute_copy__ (strnlen); -+__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen) -+ __attribute__((weak, visibility ("hidden"))) __attribute_copy__ (strnlen); -+# endif -+#endif --- -2.33.0 - diff --git a/LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch b/LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch deleted file mode 100644 index 63301f8..0000000 --- a/LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch +++ /dev/null @@ -1,670 +0,0 @@ -From d537d0ab45a55048c8da483e73be4448ddb45525 Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Wed, 13 Sep 2023 15:35:00 +0800 -Subject: [PATCH 23/29] LoongArch: Add ifunc support for strrchr{aligned, lsx, - lasx} - -According to glibc strrchr microbenchmark test results, this implementation -could reduce the runtime time as following: - -Name Percent of rutime reduced -strrchr-lasx 10%-50% -strrchr-lsx 0%-50% -strrchr-aligned 5%-50% - -Generic strrchr is implemented by function strlen + memrchr, the lasx version -will compare with generic strrchr implemented by strlen-lasx + memrchr-lasx, -the lsx version will compare with generic strrchr implemented by strlen-lsx + -memrchr-lsx, the aligned version will compare with generic strrchr implemented -by strlen-aligned + memrchr-generic. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 3 + - .../lp64/multiarch/ifunc-impl-list.c | 8 + - .../loongarch/lp64/multiarch/ifunc-strrchr.h | 41 ++++ - .../lp64/multiarch/strrchr-aligned.S | 170 +++++++++++++++++ - .../loongarch/lp64/multiarch/strrchr-lasx.S | 176 ++++++++++++++++++ - .../loongarch/lp64/multiarch/strrchr-lsx.S | 144 ++++++++++++++ - sysdeps/loongarch/lp64/multiarch/strrchr.c | 36 ++++ - 7 files changed, 578 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index 39550bea..fe863e1b 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -9,6 +9,9 @@ sysdep_routines += \ - strchr-aligned \ - strchr-lsx \ - strchr-lasx \ -+ strrchr-aligned \ -+ strrchr-lsx \ -+ strrchr-lasx \ - strchrnul-aligned \ - strchrnul-lsx \ - strchrnul-lasx \ -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index 39a14f1d..529e2369 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -94,6 +94,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned) - ) - -+ IFUNC_IMPL (i, name, strrchr, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx) -+ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned) -+ ) -+ - IFUNC_IMPL (i, name, memcpy, - #if !defined __loongarch_soft_float - IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h -new file mode 100644 -index 00000000..bbb34089 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h -@@ -0,0 +1,41 @@ -+/* Common definition for strrchr ifunc selections. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+ -+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LASX) -+ return OPTIMIZE (lasx); -+ else if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ return OPTIMIZE (aligned); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S -new file mode 100644 -index 00000000..a73deb78 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S -@@ -0,0 +1,170 @@ -+/* Optimized strrchr implementation using basic LoongArch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define STRRCHR __strrchr_aligned -+#else -+# define STRRCHR strrchr -+#endif -+ -+LEAF(STRRCHR, 6) -+ slli.d t0, a0, 3 -+ bstrins.d a0, zero, 2, 0 -+ lu12i.w a2, 0x01010 -+ ld.d t2, a0, 0 -+ -+ andi a1, a1, 0xff -+ ori a2, a2, 0x101 -+ li.d t3, -1 -+ bstrins.d a2, a2, 63, 32 -+ -+ sll.d t5, t3, t0 -+ slli.d a3, a2, 7 -+ orn t4, t2, t5 -+ mul.d a1, a1, a2 -+ -+ sub.d t0, t4, a2 -+ andn t1, a3, t4 -+ and t1, t0, t1 -+ beqz t1, L(find_tail) -+ -+ -+ ctz.d t0, t1 -+ orn t0, zero, t0 -+ xor t2, t4, a1 -+ srl.d t0, t3, t0 -+ -+ orn t2, t2, t0 -+ orn t2, t2, t5 -+ revb.d t2, t2 -+ sub.d t1, t2, a2 -+ -+ andn t0, a3, t2 -+ and t1, t0, t1 -+ ctz.d t0, t1 -+ srli.d t0, t0, 3 -+ -+ addi.d a0, a0, 7 -+ sub.d a0, a0, t0 -+ maskeqz a0, a0, t1 -+ jr ra -+ -+ -+L(find_tail): -+ addi.d a4, a0, 8 -+ addi.d a0, a0, 8 -+L(loop_ascii): -+ ld.d t2, a0, 0 -+ sub.d t1, t2, a2 -+ -+ and t0, t1, a3 -+ bnez t0, L(more_check) -+ ld.d t2, a0, 8 -+ sub.d t1, t2, a2 -+ -+ and t0, t1, a3 -+ addi.d a0, a0, 16 -+ beqz t0, L(loop_ascii) -+ addi.d a0, a0, -8 -+ -+L(more_check): -+ andn t0, a3, t2 -+ and t1, t1, t0 -+ bnez t1, L(tail) -+ addi.d a0, a0, 8 -+ -+ -+L(loop_nonascii): -+ ld.d t2, a0, 0 -+ sub.d t1, t2, a2 -+ andn t0, a3, t2 -+ and t1, t0, t1 -+ -+ bnez t1, L(tail) -+ ld.d t2, a0, 8 -+ addi.d a0, a0, 16 -+ sub.d t1, t2, a2 -+ -+ andn t0, a3, t2 -+ and t1, t0, t1 -+ beqz t1, L(loop_nonascii) -+ addi.d a0, a0, -8 -+ -+L(tail): -+ ctz.d t0, t1 -+ orn t0, zero, t0 -+ xor t2, t2, a1 -+ srl.d t0, t3, t0 -+ -+ -+ orn t2, t2, t0 -+ revb.d t2, t2 -+ sub.d t1, t2, a2 -+ andn t0, a3, t2 -+ -+ and t1, t0, t1 -+ bnez t1, L(count_pos) -+L(find_loop): -+ beq a0, a4, L(find_end) -+ ld.d t2, a0, -8 -+ -+ addi.d a0, a0, -8 -+ xor t2, t2, a1 -+ sub.d t1, t2, a2 -+ andn t0, a3, t2 -+ -+ and t1, t0, t1 -+ beqz t1, L(find_loop) -+ revb.d t2, t2 -+ sub.d t1, t2, a2 -+ -+ -+ andn t0, a3, t2 -+ and t1, t0, t1 -+L(count_pos): -+ ctz.d t0, t1 -+ addi.d a0, a0, 7 -+ -+ srli.d t0, t0, 3 -+ sub.d a0, a0, t0 -+ jr ra -+ nop -+ -+L(find_end): -+ xor t2, t4, a1 -+ orn t2, t2, t5 -+ revb.d t2, t2 -+ sub.d t1, t2, a2 -+ -+ -+ andn t0, a3, t2 -+ and t1, t0, t1 -+ ctz.d t0, t1 -+ srli.d t0, t0, 3 -+ -+ addi.d a0, a4, -1 -+ sub.d a0, a0, t0 -+ maskeqz a0, a0, t1 -+ jr ra -+END(STRRCHR) -+ -+libc_hidden_builtin_def(STRRCHR) -diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S -new file mode 100644 -index 00000000..5a6e2297 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S -@@ -0,0 +1,176 @@ -+/* Optimized strrchr implementation using LoongArch LASX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+#define STRRCHR __strrchr_lasx -+ -+LEAF(STRRCHR, 6) -+ move a2, a0 -+ bstrins.d a0, zero, 5, 0 -+ xvld xr0, a0, 0 -+ xvld xr1, a0, 32 -+ -+ li.d t2, -1 -+ xvreplgr2vr.b xr4, a1 -+ xvmsknz.b xr2, xr0 -+ xvmsknz.b xr3, xr1 -+ -+ xvpickve.w xr5, xr2, 4 -+ xvpickve.w xr6, xr3, 4 -+ vilvl.h vr2, vr5, vr2 -+ vilvl.h vr3, vr6, vr3 -+ -+ vilvl.w vr2, vr3, vr2 -+ movfr2gr.d t0, fa2 -+ sra.d t0, t0, a2 -+ beq t0, t2, L(find_tail) -+ -+ -+ xvseq.b xr2, xr0, xr4 -+ xvseq.b xr3, xr1, xr4 -+ xvmsknz.b xr2, xr2 -+ xvmsknz.b xr3, xr3 -+ -+ xvpickve.w xr4, xr2, 4 -+ xvpickve.w xr5, xr3, 4 -+ vilvl.h vr2, vr4, vr2 -+ vilvl.h vr3, vr5, vr3 -+ -+ vilvl.w vr1, vr3, vr2 -+ slli.d t3, t2, 1 -+ movfr2gr.d t1, fa1 -+ cto.d t0, t0 -+ -+ srl.d t1, t1, a2 -+ sll.d t3, t3, t0 -+ addi.d a0, a2, 63 -+ andn t1, t1, t3 -+ -+ -+ clz.d t0, t1 -+ sub.d a0, a0, t0 -+ maskeqz a0, a0, t1 -+ jr ra -+ -+ .align 5 -+L(find_tail): -+ addi.d a3, a0, 64 -+L(loop): -+ xvld xr2, a0, 64 -+ xvld xr3, a0, 96 -+ addi.d a0, a0, 64 -+ -+ xvmin.bu xr5, xr2, xr3 -+ xvsetanyeqz.b fcc0, xr5 -+ bceqz fcc0, L(loop) -+ xvmsknz.b xr5, xr2 -+ -+ -+ xvmsknz.b xr6, xr3 -+ xvpickve.w xr7, xr5, 4 -+ xvpickve.w xr8, xr6, 4 -+ vilvl.h vr5, vr7, vr5 -+ -+ vilvl.h vr6, vr8, vr6 -+ xvseq.b xr2, xr2, xr4 -+ xvseq.b xr3, xr3, xr4 -+ xvmsknz.b xr2, xr2 -+ -+ xvmsknz.b xr3, xr3 -+ xvpickve.w xr7, xr2, 4 -+ xvpickve.w xr8, xr3, 4 -+ vilvl.h vr2, vr7, vr2 -+ -+ vilvl.h vr3, vr8, vr3 -+ vilvl.w vr5, vr6, vr5 -+ vilvl.w vr2, vr3, vr2 -+ movfr2gr.d t0, fa5 -+ -+ -+ movfr2gr.d t1, fa2 -+ slli.d t3, t2, 1 -+ cto.d t0, t0 -+ sll.d t3, t3, t0 -+ -+ andn t1, t1, t3 -+ beqz t1, L(find_loop) -+ clz.d t0, t1 -+ addi.d a0, a0, 63 -+ -+ sub.d a0, a0, t0 -+ jr ra -+L(find_loop): -+ beq a0, a3, L(find_end) -+ xvld xr2, a0, -64 -+ -+ xvld xr3, a0, -32 -+ addi.d a0, a0, -64 -+ xvseq.b xr2, xr2, xr4 -+ xvseq.b xr3, xr3, xr4 -+ -+ -+ xvmax.bu xr5, xr2, xr3 -+ xvseteqz.v fcc0, xr5 -+ bcnez fcc0, L(find_loop) -+ xvmsknz.b xr0, xr2 -+ -+ xvmsknz.b xr1, xr3 -+ xvpickve.w xr2, xr0, 4 -+ xvpickve.w xr3, xr1, 4 -+ vilvl.h vr0, vr2, vr0 -+ -+ vilvl.h vr1, vr3, vr1 -+ vilvl.w vr0, vr1, vr0 -+ movfr2gr.d t0, fa0 -+ addi.d a0, a0, 63 -+ -+ clz.d t0, t0 -+ sub.d a0, a0, t0 -+ jr ra -+ nop -+ -+ -+L(find_end): -+ xvseq.b xr2, xr0, xr4 -+ xvseq.b xr3, xr1, xr4 -+ xvmsknz.b xr2, xr2 -+ xvmsknz.b xr3, xr3 -+ -+ xvpickve.w xr4, xr2, 4 -+ xvpickve.w xr5, xr3, 4 -+ vilvl.h vr2, vr4, vr2 -+ vilvl.h vr3, vr5, vr3 -+ -+ vilvl.w vr1, vr3, vr2 -+ movfr2gr.d t1, fa1 -+ addi.d a0, a2, 63 -+ srl.d t1, t1, a2 -+ -+ clz.d t0, t1 -+ sub.d a0, a0, t0 -+ maskeqz a0, a0, t1 -+ jr ra -+END(STRRCHR) -+ -+libc_hidden_builtin_def(STRRCHR) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S -new file mode 100644 -index 00000000..8f2fd22e ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S -@@ -0,0 +1,144 @@ -+/* Optimized strrchr implementation using LoongArch LSX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+#define STRRCHR __strrchr_lsx -+ -+LEAF(STRRCHR, 6) -+ move a2, a0 -+ bstrins.d a0, zero, 4, 0 -+ vld vr0, a0, 0 -+ vld vr1, a0, 16 -+ -+ li.d t2, -1 -+ vreplgr2vr.b vr4, a1 -+ vmsknz.b vr2, vr0 -+ vmsknz.b vr3, vr1 -+ -+ vilvl.h vr2, vr3, vr2 -+ movfr2gr.s t0, fa2 -+ sra.w t0, t0, a2 -+ beq t0, t2, L(find_tail) -+ -+ vseq.b vr2, vr0, vr4 -+ vseq.b vr3, vr1, vr4 -+ vmsknz.b vr2, vr2 -+ vmsknz.b vr3, vr3 -+ -+ -+ vilvl.h vr1, vr3, vr2 -+ slli.d t3, t2, 1 -+ movfr2gr.s t1, fa1 -+ cto.w t0, t0 -+ -+ srl.w t1, t1, a2 -+ sll.d t3, t3, t0 -+ addi.d a0, a2, 31 -+ andn t1, t1, t3 -+ -+ clz.w t0, t1 -+ sub.d a0, a0, t0 -+ maskeqz a0, a0, t1 -+ jr ra -+ -+ .align 5 -+L(find_tail): -+ addi.d a3, a0, 32 -+L(loop): -+ vld vr2, a0, 32 -+ vld vr3, a0, 48 -+ addi.d a0, a0, 32 -+ -+ vmin.bu vr5, vr2, vr3 -+ vsetanyeqz.b fcc0, vr5 -+ bceqz fcc0, L(loop) -+ vmsknz.b vr5, vr2 -+ -+ vmsknz.b vr6, vr3 -+ vilvl.h vr5, vr6, vr5 -+ vseq.b vr2, vr2, vr4 -+ vseq.b vr3, vr3, vr4 -+ -+ vmsknz.b vr2, vr2 -+ vmsknz.b vr3, vr3 -+ vilvl.h vr2, vr3, vr2 -+ movfr2gr.s t0, fa5 -+ -+ -+ movfr2gr.s t1, fa2 -+ slli.d t3, t2, 1 -+ cto.w t0, t0 -+ sll.d t3, t3, t0 -+ -+ andn t1, t1, t3 -+ beqz t1, L(find_loop) -+ clz.w t0, t1 -+ addi.d a0, a0, 31 -+ -+ sub.d a0, a0, t0 -+ jr ra -+L(find_loop): -+ beq a0, a3, L(find_end) -+ vld vr2, a0, -32 -+ -+ vld vr3, a0, -16 -+ addi.d a0, a0, -32 -+ vseq.b vr2, vr2, vr4 -+ vseq.b vr3, vr3, vr4 -+ -+ -+ vmax.bu vr5, vr2, vr3 -+ vseteqz.v fcc0, vr5 -+ bcnez fcc0, L(find_loop) -+ vmsknz.b vr0, vr2 -+ -+ vmsknz.b vr1, vr3 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+ addi.d a0, a0, 31 -+ -+ clz.w t0, t0 -+ sub.d a0, a0, t0 -+ jr ra -+ nop -+ -+L(find_end): -+ vseq.b vr2, vr0, vr4 -+ vseq.b vr3, vr1, vr4 -+ vmsknz.b vr2, vr2 -+ vmsknz.b vr3, vr3 -+ -+ -+ vilvl.h vr1, vr3, vr2 -+ movfr2gr.s t1, fa1 -+ addi.d a0, a2, 31 -+ srl.w t1, t1, a2 -+ -+ clz.w t0, t1 -+ sub.d a0, a0, t0 -+ maskeqz a0, a0, t1 -+ jr ra -+END(STRRCHR) -+ -+libc_hidden_builtin_def(STRRCHR) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr.c b/sysdeps/loongarch/lp64/multiarch/strrchr.c -new file mode 100644 -index 00000000..d9c9f660 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strrchr.c -@@ -0,0 +1,36 @@ -+/* Multiple versions of strrchr. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define strrchr __redirect_strrchr -+# include -+# undef strrchr -+ -+# define SYMBOL_NAME strrchr -+# include "ifunc-strrchr.h" -+ -+libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ()); -+weak_alias (strrchr, rindex) -+# ifdef SHARED -+__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strrchr); -+# endif -+ -+#endif --- -2.33.0 - diff --git a/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch b/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch deleted file mode 100644 index 35acd20..0000000 --- a/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch +++ /dev/null @@ -1,626 +0,0 @@ -From b5979df8ad07823c79a934c1fa0a91ec0abffb61 Mon Sep 17 00:00:00 2001 -From: caiyinyu -Date: Fri, 8 Sep 2023 14:10:55 +0800 -Subject: [PATCH 20/29] LoongArch: Add lasx/lsx support for - _dl_runtime_profile. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/bits/link.h | 24 ++- - sysdeps/loongarch/bits/link_lavcurrent.h | 25 +++ - sysdeps/loongarch/dl-audit-check.h | 23 +++ - sysdeps/loongarch/dl-link.sym | 8 +- - sysdeps/loongarch/dl-machine.h | 11 +- - sysdeps/loongarch/dl-trampoline.S | 177 +---------------- - sysdeps/loongarch/dl-trampoline.h | 242 +++++++++++++++++++++++ - 7 files changed, 331 insertions(+), 179 deletions(-) - create mode 100644 sysdeps/loongarch/bits/link_lavcurrent.h - create mode 100644 sysdeps/loongarch/dl-audit-check.h - -diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h -index 7fa61312..00f6f25f 100644 ---- a/sysdeps/loongarch/bits/link.h -+++ b/sysdeps/loongarch/bits/link.h -@@ -20,10 +20,26 @@ - #error "Never include directly; use instead." - #endif - -+#ifndef __loongarch_soft_float -+typedef float La_loongarch_vr -+ __attribute__ ((__vector_size__ (16), __aligned__ (16))); -+typedef float La_loongarch_xr -+ __attribute__ ((__vector_size__ (32), __aligned__ (16))); -+ -+typedef union -+{ -+ double fpreg[4]; -+ La_loongarch_vr vr[2]; -+ La_loongarch_xr xr[1]; -+} La_loongarch_vector __attribute__ ((__aligned__ (16))); -+#endif -+ - typedef struct La_loongarch_regs - { - unsigned long int lr_reg[8]; /* a0 - a7 */ -- double lr_fpreg[8]; /* fa0 - fa7 */ -+#ifndef __loongarch_soft_float -+ La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/ -+#endif - unsigned long int lr_ra; - unsigned long int lr_sp; - } La_loongarch_regs; -@@ -33,8 +49,10 @@ typedef struct La_loongarch_retval - { - unsigned long int lrv_a0; - unsigned long int lrv_a1; -- double lrv_fa0; -- double lrv_fa1; -+#ifndef __loongarch_soft_float -+ La_loongarch_vector lrv_vec0; -+ La_loongarch_vector lrv_vec1; -+#endif - } La_loongarch_retval; - - __BEGIN_DECLS -diff --git a/sysdeps/loongarch/bits/link_lavcurrent.h b/sysdeps/loongarch/bits/link_lavcurrent.h -new file mode 100644 -index 00000000..15f1eb84 ---- /dev/null -+++ b/sysdeps/loongarch/bits/link_lavcurrent.h -@@ -0,0 +1,25 @@ -+/* Data structure for communication from the run-time dynamic linker for -+ loaded ELF shared objects. LAV_CURRENT definition. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#ifndef _LINK_H -+# error "Never include directly; use instead." -+#endif -+ -+/* Version numbers for la_version handshake interface. */ -+#define LAV_CURRENT 3 -diff --git a/sysdeps/loongarch/dl-audit-check.h b/sysdeps/loongarch/dl-audit-check.h -new file mode 100644 -index 00000000..a139c939 ---- /dev/null -+++ b/sysdeps/loongarch/dl-audit-check.h -@@ -0,0 +1,23 @@ -+/* rtld-audit version check. LoongArch version. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+static inline bool -+_dl_audit_check_version (unsigned int lav) -+{ -+ return lav == LAV_CURRENT; -+} -diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym -index 868ab7c6..b534968e 100644 ---- a/sysdeps/loongarch/dl-link.sym -+++ b/sysdeps/loongarch/dl-link.sym -@@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs) - DL_SIZEOF_RV sizeof(struct La_loongarch_retval) - - DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg) --DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg) -+#ifndef __loongarch_soft_float -+DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec) -+#endif - DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra) - DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp) - - DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0) --DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1) -+#ifndef __loongarch_soft_float -+DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0) -+#endif -diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h -index 066bb233..8a2db9de 100644 ---- a/sysdeps/loongarch/dl-machine.h -+++ b/sysdeps/loongarch/dl-machine.h -@@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], - #if !defined __loongarch_soft_float - extern void _dl_runtime_resolve_lasx (void) attribute_hidden; - extern void _dl_runtime_resolve_lsx (void) attribute_hidden; -+ extern void _dl_runtime_profile_lasx (void) attribute_hidden; -+ extern void _dl_runtime_profile_lsx (void) attribute_hidden; - #endif - extern void _dl_runtime_resolve (void) attribute_hidden; - extern void _dl_runtime_profile (void) attribute_hidden; -@@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], - end in this function. */ - if (profile != 0) - { -- gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LASX) -+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx; -+ else if (SUPPORT_LSX) -+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx; -+ else -+#endif -+ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; - - if (GLRO(dl_profile) != NULL - && _dl_name_match_p (GLRO(dl_profile), l)) -diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S -index 8fd91469..bb449ecf 100644 ---- a/sysdeps/loongarch/dl-trampoline.S -+++ b/sysdeps/loongarch/dl-trampoline.S -@@ -22,190 +22,21 @@ - #if !defined __loongarch_soft_float - #define USE_LASX - #define _dl_runtime_resolve _dl_runtime_resolve_lasx -+#define _dl_runtime_profile _dl_runtime_profile_lasx - #include "dl-trampoline.h" - #undef FRAME_SIZE - #undef USE_LASX - #undef _dl_runtime_resolve -+#undef _dl_runtime_profile - - #define USE_LSX - #define _dl_runtime_resolve _dl_runtime_resolve_lsx -+#define _dl_runtime_profile _dl_runtime_profile_lsx - #include "dl-trampoline.h" - #undef FRAME_SIZE - #undef USE_LSX - #undef _dl_runtime_resolve -+#undef _dl_runtime_profile - #endif - - #include "dl-trampoline.h" -- --#include "dl-link.h" -- --ENTRY (_dl_runtime_profile) -- /* LoongArch we get called with: -- t0 linkr_map pointer -- t1 the scaled offset stored in t0, which can be used -- to calculate the offset of the current symbol in .rela.plt -- t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function -- t3 dl resolver entry point, no use in this function -- -- Stack frame layout: -- [sp, #96] La_loongarch_regs -- [sp, #48] La_loongarch_retval -- [sp, #40] frame size return from pltenter -- [sp, #32] dl_profile_call saved a1 -- [sp, #24] dl_profile_call saved a0 -- [sp, #16] T1 -- [sp, #0] ra, fp <- fp -- */ -- --# define OFFSET_T1 16 --# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 --# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 --# define OFFSET_RV OFFSET_FS + 8 --# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV -- --# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) -- -- /* Save arguments to stack. */ -- ADDI sp, sp, -SF_SIZE -- REG_S ra, sp, 0 -- REG_S fp, sp, 8 -- -- or fp, sp, zero -- -- REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG -- REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG -- REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG -- REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG -- REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG -- REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG -- REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG -- REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG -- --#ifndef __loongarch_soft_float -- FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG -- FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG -- FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG -- FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG -- FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG -- FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG -- FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG -- FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG --#endif -- -- /* Update .got.plt and obtain runtime address of callee. */ -- SLLI a1, t1, 1 -- or a0, t0, zero -- ADD a1, a1, t1 -- or a2, ra, zero /* return addr */ -- ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ -- ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ -- -- REG_S a0, fp, OFFSET_SAVED_CALL_A0 -- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG -- -- la t2, _dl_profile_fixup -- jirl ra, t2, 0 -- -- REG_L t3, fp, OFFSET_FS -- bge t3, zero, 1f -- -- /* Save the return. */ -- or t4, v0, zero -- -- /* Restore arguments from stack. */ -- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG -- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG -- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG -- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG -- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG -- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG -- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG -- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG -- --#ifndef __loongarch_soft_float -- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG -- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG -- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG -- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG -- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG -- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG -- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG -- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG --#endif -- -- REG_L ra, fp, 0 -- REG_L fp, fp, SZREG -- -- ADDI sp, sp, SF_SIZE -- jirl zero, t4, 0 -- --1: -- /* The new frame size is in t3. */ -- SUB sp, fp, t3 -- BSTRINS sp, zero, 3, 0 -- -- REG_S a0, fp, OFFSET_T1 -- -- or a0, sp, zero -- ADDI a1, fp, SF_SIZE -- or a2, t3, zero -- la t5, memcpy -- jirl ra, t5, 0 -- -- REG_L t6, fp, OFFSET_T1 -- -- /* Call the function. */ -- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG -- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG -- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG -- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG -- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG -- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG -- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG -- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG -- --#ifndef __loongarch_soft_float -- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG -- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG -- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG -- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG -- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG -- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG -- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG -- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG --#endif -- jirl ra, t6, 0 -- -- REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 -- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG -- --#ifndef __loongarch_soft_float -- FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 -- FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG --#endif -- -- /* Setup call to pltexit. */ -- REG_L a0, fp, OFFSET_SAVED_CALL_A0 -- REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG -- ADDI a2, fp, OFFSET_RG -- ADDI a3, fp, OFFSET_RV -- la t7, _dl_audit_pltexit -- jirl ra, t7, 0 -- -- REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 -- REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG -- --#ifndef __loongarch_soft_float -- FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0 -- FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG --#endif -- -- /* RA from within La_loongarch_reg. */ -- REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA -- or sp, fp, zero -- ADDI sp, sp, SF_SIZE -- REG_S fp, fp, SZREG -- -- jirl zero, ra, 0 -- --END (_dl_runtime_profile) -diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h -index 99fcacab..e298439d 100644 ---- a/sysdeps/loongarch/dl-trampoline.h -+++ b/sysdeps/loongarch/dl-trampoline.h -@@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve) - /* Invoke the callee. */ - jirl zero, t1, 0 - END (_dl_runtime_resolve) -+ -+#include "dl-link.h" -+ -+ENTRY (_dl_runtime_profile) -+ /* LoongArch we get called with: -+ t0 linkr_map pointer -+ t1 the scaled offset stored in t0, which can be used -+ to calculate the offset of the current symbol in .rela.plt -+ t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function -+ t3 dl resolver entry point, no use in this function -+ -+ Stack frame layout: -+ [sp, #208] La_loongarch_regs -+ [sp, #128] La_loongarch_retval // align: 16 -+ [sp, #112] frame size return from pltenter -+ [sp, #80 ] dl_profile_call saved vec1 -+ [sp, #48 ] dl_profile_call saved vec0 // align: 16 -+ [sp, #32 ] dl_profile_call saved a1 -+ [sp, #24 ] dl_profile_call saved a0 -+ [sp, #16 ] T1 -+ [sp, #0 ] ra, fp <- fp -+ */ -+ -+# define OFFSET_T1 16 -+# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 -+# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64 -+# define OFFSET_RV OFFSET_FS + 8 + 8 -+# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV -+ -+# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) -+ -+ /* Save arguments to stack. */ -+ ADDI sp, sp, -SF_SIZE -+ REG_S ra, sp, 0 -+ REG_S fp, sp, 8 -+ -+ or fp, sp, zero -+ -+ REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG -+ REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG -+ REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG -+ REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG -+ REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG -+ REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG -+ REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG -+ REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG -+ -+#ifdef USE_LASX -+ xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG -+ xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG -+ xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG -+ xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG -+ xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG -+ xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG -+ xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG -+ xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG -+#elif defined USE_LSX -+ vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG -+ vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG -+ vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG -+ vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG -+ vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG -+ vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG -+ vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG -+ vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG -+#elif !defined __loongarch_soft_float -+ FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG -+ FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG -+ FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG -+ FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG -+ FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG -+ FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG -+ FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG -+ FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG -+#endif -+ -+ /* Update .got.plt and obtain runtime address of callee. */ -+ SLLI a1, t1, 1 -+ or a0, t0, zero -+ ADD a1, a1, t1 -+ or a2, ra, zero /* return addr */ -+ ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ -+ ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ -+ -+ REG_S a0, fp, OFFSET_SAVED_CALL_A0 -+ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG -+ -+ la t2, _dl_profile_fixup -+ jirl ra, t2, 0 -+ -+ REG_L t3, fp, OFFSET_FS -+ bge t3, zero, 1f -+ -+ /* Save the return. */ -+ or t4, v0, zero -+ -+ /* Restore arguments from stack. */ -+ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG -+ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG -+ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG -+ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG -+ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG -+ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG -+ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG -+ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG -+ -+#ifdef USE_LASX -+ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG -+ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG -+ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG -+ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG -+ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG -+ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG -+ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG -+ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG -+#elif defined USE_LSX -+ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG -+ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG -+ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG -+ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG -+ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG -+ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG -+ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG -+ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG -+#elif !defined __loongarch_soft_float -+ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG -+ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG -+ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG -+ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG -+ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG -+ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG -+ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG -+ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG -+#endif -+ -+ REG_L ra, fp, 0 -+ REG_L fp, fp, SZREG -+ -+ ADDI sp, sp, SF_SIZE -+ jirl zero, t4, 0 -+ -+1: -+ /* The new frame size is in t3. */ -+ SUB sp, fp, t3 -+ BSTRINS sp, zero, 3, 0 -+ -+ REG_S a0, fp, OFFSET_T1 -+ -+ or a0, sp, zero -+ ADDI a1, fp, SF_SIZE -+ or a2, t3, zero -+ la t5, memcpy -+ jirl ra, t5, 0 -+ -+ REG_L t6, fp, OFFSET_T1 -+ -+ /* Call the function. */ -+ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG -+ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG -+ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG -+ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG -+ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG -+ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG -+ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG -+ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG -+ -+#ifdef USE_LASX -+ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG -+ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG -+ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG -+ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG -+ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG -+ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG -+ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG -+ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG -+#elif defined USE_LSX -+ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG -+ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG -+ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG -+ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG -+ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG -+ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG -+ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG -+ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG -+#elif !defined __loongarch_soft_float -+ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG -+ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG -+ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG -+ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG -+ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG -+ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG -+ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG -+ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG -+#endif -+ -+ jirl ra, t6, 0 -+ -+ REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 -+ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG -+ -+#ifdef USE_LASX -+ xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 -+ xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG -+#elif defined USE_LSX -+ vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 -+ vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG -+#elif !defined __loongarch_soft_float -+ FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 -+ FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG -+#endif -+ -+ /* Setup call to pltexit. */ -+ REG_L a0, fp, OFFSET_SAVED_CALL_A0 -+ REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG -+ ADDI a2, fp, OFFSET_RG -+ ADDI a3, fp, OFFSET_RV -+ la t7, _dl_audit_pltexit -+ jirl ra, t7, 0 -+ -+ REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 -+ REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG -+ -+#ifdef USE_LASX -+ xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 -+ xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG -+#elif defined USE_LSX -+ vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 -+ vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG -+#elif !defined __loongarch_soft_float -+ FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 -+ FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG -+#endif -+ -+ /* RA from within La_loongarch_reg. */ -+ REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA -+ or sp, fp, zero -+ ADDI sp, sp, SF_SIZE -+ REG_S fp, fp, SZREG -+ -+ jirl zero, ra, 0 -+ -+END (_dl_runtime_profile) --- -2.33.0 - diff --git a/LoongArch-Add-minuimum-binutils-required-version.patch b/LoongArch-Add-minuimum-binutils-required-version.patch deleted file mode 100644 index 9bdcf22..0000000 --- a/LoongArch-Add-minuimum-binutils-required-version.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 7353f21f6ed1754b67e455e2b80123787efa9e91 Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Tue, 8 Aug 2023 14:15:43 +0800 -Subject: [PATCH 02/29] LoongArch: Add minuimum binutils required version - -LoongArch glibc can add some LASX/LSX vector instructions codes, -change the required minimum binutils version to 2.41 which could -support vector instructions. HAVE_LOONGARCH_VEC_ASM is removed -accordingly. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - config.h.in | 5 ----- - sysdeps/loongarch/configure | 5 ++--- - sysdeps/loongarch/configure.ac | 4 ++-- - sysdeps/loongarch/dl-machine.h | 4 ++-- - sysdeps/loongarch/dl-trampoline.S | 2 +- - 5 files changed, 7 insertions(+), 13 deletions(-) - -diff --git a/config.h.in b/config.h.in -index 0dedc124..44a34072 100644 ---- a/config.h.in -+++ b/config.h.in -@@ -141,11 +141,6 @@ - /* LOONGARCH floating-point ABI for ld.so. */ - #undef LOONGARCH_ABI_FRLEN - --/* Assembler support LoongArch LASX/LSX vector instructions. -- This macro becomes obsolete when glibc increased the minimum -- required version of GNU 'binutils' to 2.41 or later. */ --#define HAVE_LOONGARCH_VEC_ASM 0 -- - /* Linux specific: minimum supported kernel version. */ - #undef __LINUX_KERNEL_VERSION - -diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure -index 5843c7cf..395ddc92 100644 ---- a/sysdeps/loongarch/configure -+++ b/sysdeps/loongarch/configure -@@ -128,8 +128,7 @@ rm -f conftest* - fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_asm" >&5 - printf "%s\n" "$libc_cv_loongarch_vec_asm" >&6; } --if test $libc_cv_loongarch_vec_asm = yes; then -- printf "%s\n" "#define HAVE_LOONGARCH_VEC_ASM 1" >>confdefs.h -- -+if test $libc_cv_loongarch_vec_asm = no; then -+ as_fn_error $? "binutils version is too old, use 2.41 or newer version" "$LINENO" 5 - fi - -diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac -index ba89d834..989287c6 100644 ---- a/sysdeps/loongarch/configure.ac -+++ b/sysdeps/loongarch/configure.ac -@@ -74,6 +74,6 @@ else - libc_cv_loongarch_vec_asm=no - fi - rm -f conftest*]) --if test $libc_cv_loongarch_vec_asm = yes; then -- AC_DEFINE(HAVE_LOONGARCH_VEC_ASM) -+if test $libc_cv_loongarch_vec_asm = no; then -+ AC_MSG_ERROR([binutils version is too old, use 2.41 or newer version]) - fi -diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h -index 51ce9af8..066bb233 100644 ---- a/sysdeps/loongarch/dl-machine.h -+++ b/sysdeps/loongarch/dl-machine.h -@@ -270,7 +270,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], - /* If using PLTs, fill in the first two entries of .got.plt. */ - if (l->l_info[DT_JMPREL]) - { --#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float -+#if !defined __loongarch_soft_float - extern void _dl_runtime_resolve_lasx (void) attribute_hidden; - extern void _dl_runtime_resolve_lsx (void) attribute_hidden; - #endif -@@ -300,7 +300,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], - /* This function will get called to fix up the GOT entry - indicated by the offset on the stack, and then jump to - the resolved address. */ --#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float -+#if !defined __loongarch_soft_float - if (SUPPORT_LASX) - gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx; - else if (SUPPORT_LSX) -diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S -index f6ba5e44..8fd91469 100644 ---- a/sysdeps/loongarch/dl-trampoline.S -+++ b/sysdeps/loongarch/dl-trampoline.S -@@ -19,7 +19,7 @@ - #include - #include - --#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float -+#if !defined __loongarch_soft_float - #define USE_LASX - #define _dl_runtime_resolve _dl_runtime_resolve_lasx - #include "dl-trampoline.h" --- -2.33.0 - diff --git a/LoongArch-Change-loongarch-to-LoongArch-in-comments.patch b/LoongArch-Change-loongarch-to-LoongArch-in-comments.patch deleted file mode 100644 index 1f7bff6..0000000 --- a/LoongArch-Change-loongarch-to-LoongArch-in-comments.patch +++ /dev/null @@ -1,277 +0,0 @@ -From e5ccd79e81de7ad5821fde83875973e878d85d4b Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Mon, 28 Aug 2023 10:08:40 +0800 -Subject: [PATCH 19/29] LoongArch: Change loongarch to LoongArch in comments - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S | 2 +- - sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S | 2 +- - sysdeps/loongarch/lp64/multiarch/memmove-aligned.S | 2 +- - sysdeps/loongarch/lp64/multiarch/memmove-lasx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/memmove-lsx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strchr-aligned.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strchr-lasx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strchr-lsx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strlen-aligned.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strlen-lasx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strlen-lsx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S | 2 +- - sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S | 2 +- - 24 files changed, 24 insertions(+), 24 deletions(-) - -diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S -index 299dd49c..7eb34395 100644 ---- a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S -+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S -@@ -1,4 +1,4 @@ --/* Optimized memcpy_aligned implementation using basic Loongarch instructions. -+/* Optimized memcpy_aligned implementation using basic LoongArch instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S -index 4aae5bf8..ae148df5 100644 ---- a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S -+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S -@@ -1,4 +1,4 @@ --/* Optimized memcpy implementation using Loongarch LASX instructions. -+/* Optimized memcpy implementation using LoongArch LASX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S -index 6ebbe7a2..feb2bb0e 100644 ---- a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S -+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S -@@ -1,4 +1,4 @@ --/* Optimized memcpy implementation using Loongarch LSX instructions. -+/* Optimized memcpy implementation using LoongArch LSX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S -index 8e60a22d..31019b13 100644 ---- a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S -+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S -@@ -1,4 +1,4 @@ --/* Optimized unaligned memcpy implementation using basic Loongarch instructions. -+/* Optimized unaligned memcpy implementation using basic LoongArch instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S -index 5354f383..a02114c0 100644 ---- a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S -+++ b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S -@@ -1,4 +1,4 @@ --/* Optimized memmove_aligned implementation using basic Loongarch instructions. -+/* Optimized memmove_aligned implementation using basic LoongArch instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S -index ff68e7a2..95d8ee7b 100644 ---- a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S -+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S -@@ -1,4 +1,4 @@ --/* Optimized memmove implementation using Loongarch LASX instructions. -+/* Optimized memmove implementation using LoongArch LASX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S -index 9e1502a7..8a936770 100644 ---- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S -+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S -@@ -1,4 +1,4 @@ --/* Optimized memmove implementation using Loongarch LSX instructions. -+/* Optimized memmove implementation using LoongArch LSX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S -index 90a64b6b..3284ce25 100644 ---- a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S -+++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S -@@ -1,4 +1,4 @@ --/* Optimized memmove_unaligned implementation using basic Loongarch instructions. -+/* Optimized memmove_unaligned implementation using basic LoongArch instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S -index 5fb01806..62020054 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S -+++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S -@@ -1,4 +1,4 @@ --/* Optimized strchr implementation using basic Loongarch instructions. -+/* Optimized strchr implementation using basic LoongArch instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S -index 254402da..4d3cc588 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S -+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S -@@ -1,4 +1,4 @@ --/* Optimized strchr implementation using loongarch LASX SIMD instructions. -+/* Optimized strchr implementation using LoongArch LASX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S -index dae98b0a..8b78c35c 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S -+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S -@@ -1,4 +1,4 @@ --/* Optimized strlen implementation using loongarch LSX SIMD instructions. -+/* Optimized strlen implementation using LoongArch LSX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S -index 1c01a023..20856a06 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S -+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S -@@ -1,4 +1,4 @@ --/* Optimized strchrnul implementation using basic Loongarch instructions. -+/* Optimized strchrnul implementation using basic LoongArch instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S -index d45495e4..4753d4ce 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S -+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S -@@ -1,4 +1,4 @@ --/* Optimized strchrnul implementation using loongarch LASX SIMD instructions. -+/* Optimized strchrnul implementation using LoongArch LASX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S -index 07d793ae..671e740c 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S -+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S -@@ -1,4 +1,4 @@ --/* Optimized strchrnul implementation using loongarch LSX SIMD instructions. -+/* Optimized strchrnul implementation using LoongArch LSX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S -index f5f4f336..ba1f9667 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S -+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S -@@ -1,4 +1,4 @@ --/* Optimized strcmp implementation using basic Loongarch instructions. -+/* Optimized strcmp implementation using basic LoongArch instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S -index 2e177a38..091c8c9e 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S -+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S -@@ -1,4 +1,4 @@ --/* Optimized strcmp implementation using Loongarch LSX instructions. -+/* Optimized strcmp implementation using LoongArch LSX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S -index e9e1d2fc..ed0548e4 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S -+++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S -@@ -1,4 +1,4 @@ --/* Optimized strlen implementation using basic Loongarch instructions. -+/* Optimized strlen implementation using basic LoongArch instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S -index 258c47ce..91342f34 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S -+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S -@@ -1,4 +1,4 @@ --/* Optimized strlen implementation using loongarch LASX SIMD instructions. -+/* Optimized strlen implementation using LoongArch LASX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S -index b194355e..b09c12e0 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S -+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S -@@ -1,4 +1,4 @@ --/* Optimized strlen implementation using Loongarch LSX SIMD instructions. -+/* Optimized strlen implementation using LoongArch LSX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S -index e2687fa7..f63de872 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S -+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S -@@ -1,4 +1,4 @@ --/* Optimized strncmp implementation using basic Loongarch instructions. -+/* Optimized strncmp implementation using basic LoongArch instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S -index 0b4eee2a..83cb801d 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S -+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S -@@ -1,4 +1,4 @@ --/* Optimized strncmp implementation using Loongarch LSX instructions. -+/* Optimized strncmp implementation using LoongArch LSX instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S -index b900430a..a8296a1b 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S -+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S -@@ -1,4 +1,4 @@ --/* Optimized strnlen implementation using basic Loongarch instructions. -+/* Optimized strnlen implementation using basic LoongArch instructions. - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S -index 2c03d3d9..aa6c812d 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S -+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S -@@ -1,4 +1,4 @@ --/* Optimized strnlen implementation using loongarch LASX instructions -+/* Optimized strnlen implementation using LoongArch LASX instructions - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. -diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S -index b769a895..d0febe3e 100644 ---- a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S -+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S -@@ -1,4 +1,4 @@ --/* Optimized strnlen implementation using loongarch LSX instructions -+/* Optimized strnlen implementation using LoongArch LSX instructions - Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. --- -2.33.0 - diff --git a/LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch b/LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch deleted file mode 100644 index 896eb1f..0000000 --- a/LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch +++ /dev/null @@ -1,67 +0,0 @@ -From fb72c81f9894b23797f6e2e066532c0963f5155f Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Wed, 13 Sep 2023 15:35:01 +0800 -Subject: [PATCH 24/29] LoongArch: Change to put magic number to .rodata - section - -Change to put magic number to .rodata section in memmove-lsx, and use -pcalau12i and %pc_lo12 with vld to get the data. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - .../loongarch/lp64/multiarch/memmove-lsx.S | 20 +++++++++---------- - 1 file changed, 10 insertions(+), 10 deletions(-) - -diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S -index 8a936770..5eb819ef 100644 ---- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S -+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S -@@ -209,13 +209,10 @@ L(al_less_16): - nop - - --L(magic_num): -- .dword 0x0706050403020100 -- .dword 0x0f0e0d0c0b0a0908 - L(unaligned): -- pcaddi t2, -4 -+ pcalau12i t2, %pc_hi20(L(INDEX)) - bstrins.d a1, zero, 3, 0 -- vld vr8, t2, 0 -+ vld vr8, t2, %pc_lo12(L(INDEX)) - vld vr0, a1, 0 - - vld vr1, a1, 16 -@@ -413,13 +410,10 @@ L(back_al_less_16): - vst vr1, a0, 0 - jr ra - --L(magic_num_2): -- .dword 0x0706050403020100 -- .dword 0x0f0e0d0c0b0a0908 - L(back_unaligned): -- pcaddi t2, -4 -+ pcalau12i t2, %pc_hi20(L(INDEX)) - bstrins.d a4, zero, 3, 0 -- vld vr8, t2, 0 -+ vld vr8, t2, %pc_lo12(L(INDEX)) - vld vr0, a4, 0 - - vld vr1, a4, -16 -@@ -529,6 +523,12 @@ L(back_un_less_16): - jr ra - END(MEMMOVE_NAME) - -+ .section .rodata.cst16,"M",@progbits,16 -+ .align 4 -+L(INDEX): -+ .dword 0x0706050403020100 -+ .dword 0x0f0e0d0c0b0a0908 -+ - libc_hidden_builtin_def (MEMCPY_NAME) - libc_hidden_builtin_def (MEMMOVE_NAME) - #endif --- -2.33.0 - diff --git a/LoongArch-Micro-optimize-LD_PCREL.patch b/LoongArch-Micro-optimize-LD_PCREL.patch deleted file mode 100644 index 0362e34..0000000 --- a/LoongArch-Micro-optimize-LD_PCREL.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 7f703cf758c4f185dd62f2a4f463002bb514af16 Mon Sep 17 00:00:00 2001 -From: Xi Ruoyao -Date: Sun, 27 Aug 2023 00:36:51 +0800 -Subject: [PATCH 13/29] LoongArch: Micro-optimize LD_PCREL - -We are requiring Binutils >= 2.41, so explicit relocation syntax is -always supported by the assembler. Use it to reduce one instruction. - -Signed-off-by: Xi Ruoyao -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/unix/sysv/linux/loongarch/pointer_guard.h | 10 ++++------ - 1 file changed, 4 insertions(+), 6 deletions(-) - -diff --git a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h -index b25e353b..d6c78687 100644 ---- a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h -+++ b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h -@@ -19,17 +19,15 @@ - #ifndef POINTER_GUARD_H - #define POINTER_GUARD_H - --/* Load a got-relative EXPR into G, using T. -- Note G and T are register names. */ -+/* Load a got-relative EXPR into register G. */ - #define LD_GLOBAL(G, EXPR) \ - la.global G, EXPR; \ - REG_L G, G, 0; - --/* Load a pc-relative EXPR into G, using T. -- Note G and T are register names. */ -+/* Load a pc-relative EXPR into register G. */ - #define LD_PCREL(G, EXPR) \ -- la.pcrel G, EXPR; \ -- REG_L G, G, 0; -+ pcalau12i G, %pc_hi20(EXPR); \ -+ REG_L G, G, %pc_lo12(EXPR); - - #if (IS_IN (rtld) \ - || (!defined SHARED && (IS_IN (libc) \ --- -2.33.0 - diff --git a/LoongArch-Redefine-macro-LEAF-ENTRY.patch b/LoongArch-Redefine-macro-LEAF-ENTRY.patch deleted file mode 100644 index 414ba83..0000000 --- a/LoongArch-Redefine-macro-LEAF-ENTRY.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 8dcd8c837df2e3cf81675522487697522f1542f8 Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Tue, 8 Aug 2023 14:15:42 +0800 -Subject: [PATCH 01/29] LoongArch: Redefine macro LEAF/ENTRY. - -The following usage of macro LEAF/ENTRY are all feasible: -1. LEAF(fcn) -- the align value of fcn is .align 3(default value) -2. LEAF(fcn, 6) -- the align value of fcn is .align 6 - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/sys/asm.h | 36 ++++++++++++++++++++++++++---------- - 1 file changed, 26 insertions(+), 10 deletions(-) - -diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h -index d1a279b8..c5eb8afa 100644 ---- a/sysdeps/loongarch/sys/asm.h -+++ b/sysdeps/loongarch/sys/asm.h -@@ -39,16 +39,32 @@ - #define FREG_L fld.d - #define FREG_S fst.d - --/* Declare leaf routine. */ --#define LEAF(symbol) \ -- .text; \ -- .globl symbol; \ -- .align 3; \ -- cfi_startproc; \ -- .type symbol, @function; \ -- symbol: -- --#define ENTRY(symbol) LEAF (symbol) -+/* Declare leaf routine. -+ The usage of macro LEAF/ENTRY is as follows: -+ 1. LEAF(fcn) -- the align value of fcn is .align 3 (default value) -+ 2. LEAF(fcn, 6) -- the align value of fcn is .align 6 -+*/ -+#define LEAF_IMPL(symbol, aln, ...) \ -+ .text; \ -+ .globl symbol; \ -+ .align aln; \ -+ .type symbol, @function; \ -+symbol: \ -+ cfi_startproc; -+ -+ -+#define LEAF(...) LEAF_IMPL(__VA_ARGS__, 3) -+#define ENTRY(...) LEAF(__VA_ARGS__) -+ -+#define LEAF_NO_ALIGN(symbol) \ -+ .text; \ -+ .globl symbol; \ -+ .type symbol, @function; \ -+symbol: \ -+ cfi_startproc; -+ -+#define ENTRY_NO_ALIGN(symbol) LEAF_NO_ALIGN(symbol) -+ - - /* Mark end of function. */ - #undef END --- -2.33.0 - diff --git a/LoongArch-Remove-support-code-for-old-linker-in-star.patch b/LoongArch-Remove-support-code-for-old-linker-in-star.patch deleted file mode 100644 index 3d688da..0000000 --- a/LoongArch-Remove-support-code-for-old-linker-in-star.patch +++ /dev/null @@ -1,56 +0,0 @@ -From f8d66a269cb6f1a7087afadf3375bdf0553abf53 Mon Sep 17 00:00:00 2001 -From: Xi Ruoyao -Date: Sun, 27 Aug 2023 00:36:50 +0800 -Subject: [PATCH 12/29] LoongArch: Remove support code for old linker in - start.S - -We are requiring Binutils >= 2.41, so la.pcrel always works here. - -Signed-off-by: Xi Ruoyao -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/start.S | 19 +++---------------- - 1 file changed, 3 insertions(+), 16 deletions(-) - -diff --git a/sysdeps/loongarch/start.S b/sysdeps/loongarch/start.S -index e9d82033..bf6bfc9e 100644 ---- a/sysdeps/loongarch/start.S -+++ b/sysdeps/loongarch/start.S -@@ -60,20 +60,7 @@ ENTRY (ENTRY_POINT) - cfi_undefined (1) - or a5, a0, zero /* rtld_fini */ - --#if ENABLE_STATIC_PIE --/* For static PIE, the GOT cannot be used in _start because the GOT entries are -- offsets instead of real addresses before __libc_start_main. -- __libc_start_main and/or main may be not local, so we rely on the linker to -- produce PLT entries for them. GNU ld >= 2.40 supports this. */ --# define LA la.pcrel --#else --/* Old GNU ld (< 2.40) cannot handle PC relative address against a non-local -- function correctly. We deem these old linkers failing to support static PIE -- and load the addresses from GOT. */ --# define LA la.got --#endif -- -- LA a0, t0, main -+ la.pcrel a0, t0, main - REG_L a1, sp, 0 - ADDI a2, sp, SZREG - -@@ -84,9 +71,9 @@ ENTRY (ENTRY_POINT) - move a4, zero /* used to be fini */ - or a6, sp, zero /* stack_end */ - -- LA ra, t0, __libc_start_main -+ la.pcrel ra, t0, __libc_start_main - jirl ra, ra, 0 - -- LA ra, t0, abort -+ la.pcrel ra, t0, abort - jirl ra, ra, 0 - END (ENTRY_POINT) --- -2.33.0 - diff --git a/LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch b/LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch deleted file mode 100644 index 82ae1be..0000000 --- a/LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch +++ /dev/null @@ -1,28 +0,0 @@ -From b4b4bb7c9220a0bbdf5aec0ac8c1de1d22329280 Mon Sep 17 00:00:00 2001 -From: caiyinyu -Date: Thu, 14 Sep 2023 19:48:24 +0800 -Subject: [PATCH 21/29] LoongArch: Replace deprecated $v0 with $a0 to eliminate - 'as' Warnings. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/dl-machine.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h -index 8a2db9de..57913cef 100644 ---- a/sysdeps/loongarch/dl-machine.h -+++ b/sysdeps/loongarch/dl-machine.h -@@ -90,7 +90,7 @@ static inline ElfW (Addr) elf_machine_dynamic (void) - or $a0, $sp, $zero \n\ - bl _dl_start \n\ - # Stash user entry point in s0. \n\ -- or $s0, $v0, $zero \n\ -+ or $s0, $a0, $zero \n\ - # Load the original argument count. \n\ - ld.d $a1, $sp, 0 \n\ - # Call _dl_init (struct link_map *main_map, int argc, \ --- -2.33.0 - diff --git a/LoongArch-Unify-Register-Names.patch b/LoongArch-Unify-Register-Names.patch deleted file mode 100644 index 9473b78..0000000 --- a/LoongArch-Unify-Register-Names.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 458ab6d5f39cca1cabd83abd2022f67491f6f5ed Mon Sep 17 00:00:00 2001 -From: caiyinyu -Date: Fri, 20 Oct 2023 09:20:02 +0800 -Subject: [PATCH 27/29] LoongArch: Unify Register Names. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/__longjmp.S | 20 ++++++++++---------- - sysdeps/loongarch/setjmp.S | 18 +++++++++--------- - 2 files changed, 19 insertions(+), 19 deletions(-) - -diff --git a/sysdeps/loongarch/__longjmp.S b/sysdeps/loongarch/__longjmp.S -index cbde1946..e87ce311 100644 ---- a/sysdeps/loongarch/__longjmp.S -+++ b/sysdeps/loongarch/__longjmp.S -@@ -43,18 +43,18 @@ ENTRY (__longjmp) - REG_L s8, a0, 12*SZREG - - #ifndef __loongarch_soft_float -- FREG_L $f24, a0, 13*SZREG + 0*SZFREG -- FREG_L $f25, a0, 13*SZREG + 1*SZFREG -- FREG_L $f26, a0, 13*SZREG + 2*SZFREG -- FREG_L $f27, a0, 13*SZREG + 3*SZFREG -- FREG_L $f28, a0, 13*SZREG + 4*SZFREG -- FREG_L $f29, a0, 13*SZREG + 5*SZFREG -- FREG_L $f30, a0, 13*SZREG + 6*SZFREG -- FREG_L $f31, a0, 13*SZREG + 7*SZFREG -+ FREG_L fs0, a0, 13*SZREG + 0*SZFREG -+ FREG_L fs1, a0, 13*SZREG + 1*SZFREG -+ FREG_L fs2, a0, 13*SZREG + 2*SZFREG -+ FREG_L fs3, a0, 13*SZREG + 3*SZFREG -+ FREG_L fs4, a0, 13*SZREG + 4*SZFREG -+ FREG_L fs5, a0, 13*SZREG + 5*SZFREG -+ FREG_L fs6, a0, 13*SZREG + 6*SZFREG -+ FREG_L fs7, a0, 13*SZREG + 7*SZFREG - #endif - -- sltui a0,a1,1 -+ sltui a0, a1, 1 - ADD a0, a0, a1 # a0 = (a1 == 0) ? 1 : a1 -- jirl zero,ra,0 -+ jirl zero, ra, 0 - - END (__longjmp) -diff --git a/sysdeps/loongarch/setjmp.S b/sysdeps/loongarch/setjmp.S -index 6c7065cd..b6e4f727 100644 ---- a/sysdeps/loongarch/setjmp.S -+++ b/sysdeps/loongarch/setjmp.S -@@ -52,19 +52,19 @@ ENTRY (__sigsetjmp) - REG_S s8, a0, 12*SZREG - - #ifndef __loongarch_soft_float -- FREG_S $f24, a0, 13*SZREG + 0*SZFREG -- FREG_S $f25, a0, 13*SZREG + 1*SZFREG -- FREG_S $f26, a0, 13*SZREG + 2*SZFREG -- FREG_S $f27, a0, 13*SZREG + 3*SZFREG -- FREG_S $f28, a0, 13*SZREG + 4*SZFREG -- FREG_S $f29, a0, 13*SZREG + 5*SZFREG -- FREG_S $f30, a0, 13*SZREG + 6*SZFREG -- FREG_S $f31, a0, 13*SZREG + 7*SZFREG -+ FREG_S fs0, a0, 13*SZREG + 0*SZFREG -+ FREG_S fs1, a0, 13*SZREG + 1*SZFREG -+ FREG_S fs2, a0, 13*SZREG + 2*SZFREG -+ FREG_S fs3, a0, 13*SZREG + 3*SZFREG -+ FREG_S fs4, a0, 13*SZREG + 4*SZFREG -+ FREG_S fs5, a0, 13*SZREG + 5*SZFREG -+ FREG_S fs6, a0, 13*SZREG + 6*SZFREG -+ FREG_S fs7, a0, 13*SZREG + 7*SZFREG - #endif - - #if !IS_IN (libc) && IS_IN(rtld) - li.w v0, 0 -- jirl zero,ra,0 -+ jirl zero, ra, 0 - #else - b __sigjmp_save - #endif --- -2.33.0 - diff --git a/LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch b/LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch deleted file mode 100644 index 27ee625..0000000 --- a/LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 4828d1aa0028e819a5fb336d962e8f7cbfedf8b4 Mon Sep 17 00:00:00 2001 -From: caiyinyu -Date: Mon, 23 Oct 2023 15:53:38 +0800 -Subject: [PATCH 28/29] LoongArch: Update hwcap.h to sync with LoongArch - kernel. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h -index 5104b69c..7acec23d 100644 ---- a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h -+++ b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h -@@ -35,3 +35,4 @@ - #define HWCAP_LOONGARCH_LBT_X86 (1 << 10) - #define HWCAP_LOONGARCH_LBT_ARM (1 << 11) - #define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) -+#define HWCAP_LOONGARCH_PTW (1 << 13) --- -2.33.0 - diff --git a/LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch b/LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch deleted file mode 100644 index b9b6f0d..0000000 --- a/LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 4938840b15ff9734fdcc63cc0744ce3f3bbb0b16 Mon Sep 17 00:00:00 2001 -From: caiyinyu -Date: Mon, 14 Aug 2023 15:34:08 +0800 -Subject: [PATCH 05/29] LoongArch: elf: Add new LoongArch reloc types 109 into - elf.h - -These reloc types are generated by GNU assembler >= 2.41 for relaxation -support. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - elf/elf.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/elf/elf.h b/elf/elf.h -index d623bdeb..9c51073f 100644 ---- a/elf/elf.h -+++ b/elf/elf.h -@@ -4213,6 +4213,7 @@ enum - #define R_LARCH_SUB6 106 - #define R_LARCH_ADD_ULEB128 107 - #define R_LARCH_SUB_ULEB128 108 -+#define R_LARCH_64_PCREL 109 - - /* ARC specific declarations. */ - --- -2.33.0 - diff --git a/Loongarch-Add-ifunc-support-and-add-different-versio.patch b/Loongarch-Add-ifunc-support-and-add-different-versio.patch deleted file mode 100644 index aae8ddc..0000000 --- a/Loongarch-Add-ifunc-support-and-add-different-versio.patch +++ /dev/null @@ -1,528 +0,0 @@ -From 43abd8772a143cd96688c081500397dd712e631b Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Tue, 8 Aug 2023 14:15:44 +0800 -Subject: [PATCH 03/29] Loongarch: Add ifunc support and add different versions - of strlen - -strlen-lasx is implemeted by LASX simd instructions(256bit) -strlen-lsx is implemeted by LSX simd instructions(128bit) -strlen-align is implemented by LA basic instructions and never use unaligned memory acess - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 7 ++ - .../lp64/multiarch/ifunc-impl-list.c | 41 +++++++ - .../loongarch/lp64/multiarch/ifunc-strlen.h | 40 +++++++ - .../loongarch/lp64/multiarch/strlen-aligned.S | 100 ++++++++++++++++++ - .../loongarch/lp64/multiarch/strlen-lasx.S | 63 +++++++++++ - sysdeps/loongarch/lp64/multiarch/strlen-lsx.S | 71 +++++++++++++ - sysdeps/loongarch/lp64/multiarch/strlen.c | 37 +++++++ - sysdeps/loongarch/sys/regdef.h | 57 ++++++++++ - .../unix/sysv/linux/loongarch/cpu-features.h | 2 + - 9 files changed, 418 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/Makefile - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -new file mode 100644 -index 00000000..76c506c9 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -0,0 +1,7 @@ -+ifeq ($(subdir),string) -+sysdep_routines += \ -+ strlen-aligned \ -+ strlen-lsx \ -+ strlen-lasx \ -+# sysdep_routines -+endif -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -new file mode 100644 -index 00000000..1a2a576f ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -0,0 +1,41 @@ -+/* Enumerate available IFUNC implementations of a function LoongArch64 version. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+size_t -+__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, -+ size_t max) -+{ -+ -+ size_t i = max; -+ -+ IFUNC_IMPL (i, name, strlen, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LASX, __strlen_lasx) -+ IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LSX, __strlen_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned) -+ ) -+ return i; -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h -new file mode 100644 -index 00000000..6258bb76 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h -@@ -0,0 +1,40 @@ -+/* Common definition for strlen ifunc selections. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LASX) -+ return OPTIMIZE (lasx); -+ else if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ return OPTIMIZE (aligned); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S -new file mode 100644 -index 00000000..e9e1d2fc ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S -@@ -0,0 +1,100 @@ -+/* Optimized strlen implementation using basic Loongarch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define STRLEN __strlen_aligned -+#else -+# define STRLEN strlen -+#endif -+ -+LEAF(STRLEN, 6) -+ move a1, a0 -+ bstrins.d a0, zero, 2, 0 -+ lu12i.w a2, 0x01010 -+ li.w t0, -1 -+ -+ ld.d t2, a0, 0 -+ andi t1, a1, 0x7 -+ ori a2, a2, 0x101 -+ slli.d t1, t1, 3 -+ -+ bstrins.d a2, a2, 63, 32 -+ sll.d t1, t0, t1 -+ slli.d t3, a2, 7 -+ nor a3, zero, t3 -+ -+ orn t2, t2, t1 -+ sub.d t0, t2, a2 -+ nor t1, t2, a3 -+ and t0, t0, t1 -+ -+ -+ bnez t0, L(count_pos) -+ addi.d a0, a0, 8 -+L(loop_16_7bit): -+ ld.d t2, a0, 0 -+ sub.d t1, t2, a2 -+ -+ and t0, t1, t3 -+ bnez t0, L(more_check) -+ ld.d t2, a0, 8 -+ sub.d t1, t2, a2 -+ -+ and t0, t1, t3 -+ addi.d a0, a0, 16 -+ beqz t0, L(loop_16_7bit) -+ addi.d a0, a0, -8 -+ -+L(more_check): -+ nor t0, t2, a3 -+ and t0, t1, t0 -+ bnez t0, L(count_pos) -+ addi.d a0, a0, 8 -+ -+ -+L(loop_16_8bit): -+ ld.d t2, a0, 0 -+ sub.d t1, t2, a2 -+ nor t0, t2, a3 -+ and t0, t0, t1 -+ -+ bnez t0, L(count_pos) -+ ld.d t2, a0, 8 -+ addi.d a0, a0, 16 -+ sub.d t1, t2, a2 -+ -+ nor t0, t2, a3 -+ and t0, t0, t1 -+ beqz t0, L(loop_16_8bit) -+ addi.d a0, a0, -8 -+ -+L(count_pos): -+ ctz.d t1, t0 -+ sub.d a0, a0, a1 -+ srli.d t1, t1, 3 -+ add.d a0, a0, t1 -+ -+ jr ra -+END(STRLEN) -+ -+libc_hidden_builtin_def (STRLEN) -diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S -new file mode 100644 -index 00000000..258c47ce ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S -@@ -0,0 +1,63 @@ -+/* Optimized strlen implementation using loongarch LASX SIMD instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define STRLEN __strlen_lasx -+ -+LEAF(STRLEN, 6) -+ move a1, a0 -+ bstrins.d a0, zero, 4, 0 -+ li.d t1, -1 -+ xvld xr0, a0, 0 -+ -+ xvmsknz.b xr0, xr0 -+ xvpickve.w xr1, xr0, 4 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 # sign extend -+ -+ sra.w t0, t0, a1 -+ beq t0, t1, L(loop) -+ cto.w a0, t0 -+ jr ra -+ -+L(loop): -+ xvld xr0, a0, 32 -+ addi.d a0, a0, 32 -+ xvsetanyeqz.b fcc0, xr0 -+ bceqz fcc0, L(loop) -+ -+ -+ xvmsknz.b xr0, xr0 -+ sub.d a0, a0, a1 -+ xvpickve.w xr1, xr0, 4 -+ vilvl.h vr0, vr1, vr0 -+ -+ movfr2gr.s t0, fa0 -+ cto.w t0, t0 -+ add.d a0, a0, t0 -+ jr ra -+END(STRLEN) -+ -+libc_hidden_builtin_def (STRLEN) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S -new file mode 100644 -index 00000000..b194355e ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S -@@ -0,0 +1,71 @@ -+/* Optimized strlen implementation using Loongarch LSX SIMD instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define STRLEN __strlen_lsx -+ -+LEAF(STRLEN, 6) -+ move a1, a0 -+ bstrins.d a0, zero, 4, 0 -+ vld vr0, a0, 0 -+ vld vr1, a0, 16 -+ -+ li.d t1, -1 -+ vmsknz.b vr0, vr0 -+ vmsknz.b vr1, vr1 -+ vilvl.h vr0, vr1, vr0 -+ -+ movfr2gr.s t0, fa0 -+ sra.w t0, t0, a1 -+ beq t0, t1, L(loop) -+ cto.w a0, t0 -+ -+ jr ra -+ nop -+ nop -+ nop -+ -+ -+L(loop): -+ vld vr0, a0, 32 -+ vld vr1, a0, 48 -+ addi.d a0, a0, 32 -+ vmin.bu vr2, vr0, vr1 -+ -+ vsetanyeqz.b fcc0, vr2 -+ bceqz fcc0, L(loop) -+ vmsknz.b vr0, vr0 -+ vmsknz.b vr1, vr1 -+ -+ vilvl.h vr0, vr1, vr0 -+ sub.d a0, a0, a1 -+ movfr2gr.s t0, fa0 -+ cto.w t0, t0 -+ -+ add.d a0, a0, t0 -+ jr ra -+END(STRLEN) -+ -+libc_hidden_builtin_def (STRLEN) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strlen.c b/sysdeps/loongarch/lp64/multiarch/strlen.c -new file mode 100644 -index 00000000..381c2daa ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strlen.c -@@ -0,0 +1,37 @@ -+/* Multiple versions of strlen. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+ -+#if IS_IN (libc) -+# define strlen __redirect_strlen -+# include -+# undef strlen -+ -+# define SYMBOL_NAME strlen -+# include "ifunc-strlen.h" -+ -+libc_ifunc_redirected (__redirect_strlen, strlen, IFUNC_SELECTOR ()); -+ -+# ifdef SHARED -+__hidden_ver1 (strlen, __GI_strlen, __redirect_strlen) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strlen); -+# endif -+ -+#endif -diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h -index 5100f36d..524d2e32 100644 ---- a/sysdeps/loongarch/sys/regdef.h -+++ b/sysdeps/loongarch/sys/regdef.h -@@ -89,6 +89,14 @@ - #define fs5 $f29 - #define fs6 $f30 - #define fs7 $f31 -+#define fcc0 $fcc0 -+#define fcc1 $fcc1 -+#define fcc2 $fcc2 -+#define fcc3 $fcc3 -+#define fcc4 $fcc4 -+#define fcc5 $fcc5 -+#define fcc6 $fcc6 -+#define fcc7 $fcc7 - - #define vr0 $vr0 - #define vr1 $vr1 -@@ -98,6 +106,30 @@ - #define vr5 $vr5 - #define vr6 $vr6 - #define vr7 $vr7 -+#define vr8 $vr8 -+#define vr9 $vr9 -+#define vr10 $vr10 -+#define vr11 $vr11 -+#define vr12 $vr12 -+#define vr13 $vr13 -+#define vr14 $vr14 -+#define vr15 $vr15 -+#define vr16 $vr16 -+#define vr17 $vr17 -+#define vr18 $vr18 -+#define vr19 $vr19 -+#define vr20 $vr20 -+#define vr21 $vr21 -+#define vr22 $vr22 -+#define vr23 $vr23 -+#define vr24 $vr24 -+#define vr25 $vr25 -+#define vr26 $vr26 -+#define vr27 $vr27 -+#define vr28 $vr28 -+#define vr29 $vr29 -+#define vr30 $vr30 -+#define vr31 $vr31 - - #define xr0 $xr0 - #define xr1 $xr1 -@@ -107,5 +139,30 @@ - #define xr5 $xr5 - #define xr6 $xr6 - #define xr7 $xr7 -+#define xr7 $xr7 -+#define xr8 $xr8 -+#define xr9 $xr9 -+#define xr10 $xr10 -+#define xr11 $xr11 -+#define xr12 $xr12 -+#define xr13 $xr13 -+#define xr14 $xr14 -+#define xr15 $xr15 -+#define xr16 $xr16 -+#define xr17 $xr17 -+#define xr18 $xr18 -+#define xr19 $xr19 -+#define xr20 $xr20 -+#define xr21 $xr21 -+#define xr22 $xr22 -+#define xr23 $xr23 -+#define xr24 $xr24 -+#define xr25 $xr25 -+#define xr26 $xr26 -+#define xr27 $xr27 -+#define xr28 $xr28 -+#define xr29 $xr29 -+#define xr30 $xr30 -+#define xr31 $xr31 - - #endif /* _SYS_REGDEF_H */ -diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h -index e371e13b..d1a280a5 100644 ---- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h -+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h -@@ -25,5 +25,7 @@ - #define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) - #define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) - -+#define INIT_ARCH() -+ - #endif /* _CPU_FEATURES_LOONGARCH64_H */ - --- -2.33.0 - diff --git a/Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch b/Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch deleted file mode 100644 index 2bbf367..0000000 --- a/Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch +++ /dev/null @@ -1,2570 +0,0 @@ -From 9c522272146423c1ef9fb9e071737a8ad26e844e Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Tue, 15 Aug 2023 09:11:53 +0800 -Subject: [PATCH 07/29] Loongarch: Add ifunc support for memcpy{aligned, - unaligned, lsx, lasx} and memmove{aligned, unaligned, lsx, lasx} - -These implementations improve the time to copy data in the glibc -microbenchmark as below: -memcpy-lasx reduces the runtime about 8%-76% -memcpy-lsx reduces the runtime about 8%-72% -memcpy-unaligned reduces the runtime of unaligned data copying up to 40% -memcpy-aligned reduece the runtime of unaligned data copying up to 25% -memmove-lasx reduces the runtime about 20%-73% -memmove-lsx reduces the runtime about 50% -memmove-unaligned reduces the runtime of unaligned data moving up to 40% -memmove-aligned reduces the runtime of unaligned data moving up to 25% - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 5 + - .../lp64/multiarch/ifunc-impl-list.c | 19 + - sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h | 45 + - .../loongarch/lp64/multiarch/memcpy-aligned.S | 783 ++++++++++++++++++ - .../loongarch/lp64/multiarch/memcpy-lasx.S | 20 + - sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S | 20 + - .../lp64/multiarch/memcpy-unaligned.S | 247 ++++++ - sysdeps/loongarch/lp64/multiarch/memcpy.c | 37 + - .../lp64/multiarch/memmove-aligned.S | 20 + - .../loongarch/lp64/multiarch/memmove-lasx.S | 287 +++++++ - .../loongarch/lp64/multiarch/memmove-lsx.S | 534 ++++++++++++ - .../lp64/multiarch/memmove-unaligned.S | 380 +++++++++ - sysdeps/loongarch/lp64/multiarch/memmove.c | 38 + - 13 files changed, 2435 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy.c - create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index 110a8c5c..afa51041 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -9,5 +9,10 @@ sysdep_routines += \ - strchrnul-aligned \ - strchrnul-lsx \ - strchrnul-lasx \ -+ memcpy-aligned \ -+ memcpy-unaligned \ -+ memmove-unaligned \ -+ memmove-lsx \ -+ memmove-lasx \ - # sysdep_routines - endif -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index c7164b45..25eb96b0 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -53,5 +53,24 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - #endif - IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned) - ) -+ -+ IFUNC_IMPL (i, name, memcpy, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) -+ IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LSX, __memcpy_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_UAL, __memcpy_unaligned) -+ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_aligned) -+ ) -+ -+ IFUNC_IMPL (i, name, memmove, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LASX, __memmove_lasx) -+ IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LSX, __memmove_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_UAL, __memmove_unaligned) -+ IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned) -+ ) -+ - return i; - } -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h b/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h -new file mode 100644 -index 00000000..3be67da6 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h -@@ -0,0 +1,45 @@ -+/* Common definition for ifunc selection implementation. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+ -+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; -+extern __typeof (REDIRECT_NAME) OPTIMIZE (unaligned) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LASX) -+ return OPTIMIZE (lasx); -+ else if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ if (SUPPORT_UAL) -+ return OPTIMIZE (unaligned); -+ else -+ return OPTIMIZE (aligned); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S -new file mode 100644 -index 00000000..299dd49c ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S -@@ -0,0 +1,783 @@ -+/* Optimized memcpy_aligned implementation using basic Loongarch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define MEMCPY_NAME __memcpy_aligned -+# define MEMMOVE_NAME __memmove_aligned -+#else -+# define MEMCPY_NAME memcpy -+# define MEMMOVE_NAME memmove -+#endif -+ -+#define LD_64(reg, n) \ -+ ld.d t0, reg, n; \ -+ ld.d t1, reg, n + 8; \ -+ ld.d t2, reg, n + 16; \ -+ ld.d t3, reg, n + 24; \ -+ ld.d t4, reg, n + 32; \ -+ ld.d t5, reg, n + 40; \ -+ ld.d t6, reg, n + 48; \ -+ ld.d t7, reg, n + 56; -+ -+#define ST_64(reg, n) \ -+ st.d t0, reg, n; \ -+ st.d t1, reg, n + 8; \ -+ st.d t2, reg, n + 16; \ -+ st.d t3, reg, n + 24; \ -+ st.d t4, reg, n + 32; \ -+ st.d t5, reg, n + 40; \ -+ st.d t6, reg, n + 48; \ -+ st.d t7, reg, n + 56; -+ -+LEAF(MEMMOVE_NAME, 6) -+ sub.d t0, a0, a1 -+ bltu t0, a2, L(copy_back) -+END(MEMMOVE_NAME) -+ -+LEAF_NO_ALIGN(MEMCPY_NAME) -+ srai.d a3, a2, 4 -+ beqz a3, L(short_data) -+ -+ move a4, a0 -+ andi a5, a0, 0x7 -+ andi a6, a1, 0x7 -+ li.d t8, 8 -+ beqz a5, L(check_align) -+ -+ sub.d t2, t8, a5 -+ sub.d a2, a2, t2 -+ pcaddi t1, 20 -+ slli.d t3, t2, 3 -+ -+ add.d a1, a1, t2 -+ sub.d t1, t1, t3 -+ add.d a4, a4, t2 -+ jr t1 -+ -+L(al7): -+ ld.b t0, a1, -7 -+ st.b t0, a4, -7 -+L(al6): -+ ld.b t0, a1, -6 -+ st.b t0, a4, -6 -+L(al5): -+ ld.b t0, a1, -5 -+ st.b t0, a4, -5 -+L(al4): -+ ld.b t0, a1, -4 -+ st.b t0, a4, -4 -+L(al3): -+ ld.b t0, a1, -3 -+ st.b t0, a4, -3 -+L(al2): -+ ld.b t0, a1, -2 -+ st.b t0, a4, -2 -+L(al1): -+ ld.b t0, a1, -1 -+ st.b t0, a4, -1 -+ -+L(check_align): -+ bne a5, a6, L(unalign) -+ srai.d a3, a2, 4 -+ beqz a3, L(al_less_16bytes) -+ andi a3, a2, 0x3f -+ -+ beq a3, a2, L(al_less_64bytes) -+ sub.d t0, a2, a3 -+ move a2, a3 -+ add.d a5, a1, t0 -+ -+L(loop_64bytes): -+ LD_64(a1, 0) -+ addi.d a1, a1, 64 -+ ST_64(a4, 0) -+ -+ addi.d a4, a4, 64 -+ bne a1, a5, L(loop_64bytes) -+ -+L(al_less_64bytes): -+ srai.d a3, a2, 5 -+ beqz a3, L(al_less_32bytes) -+ -+ ld.d t0, a1, 0 -+ ld.d t1, a1, 8 -+ ld.d t2, a1, 16 -+ ld.d t3, a1, 24 -+ -+ addi.d a1, a1, 32 -+ addi.d a2, a2, -32 -+ -+ st.d t0, a4, 0 -+ st.d t1, a4, 8 -+ st.d t2, a4, 16 -+ st.d t3, a4, 24 -+ -+ addi.d a4, a4, 32 -+ -+L(al_less_32bytes): -+ srai.d a3, a2, 4 -+ beqz a3, L(al_less_16bytes) -+ -+ ld.d t0, a1, 0 -+ ld.d t1, a1, 8 -+ addi.d a1, a1, 16 -+ addi.d a2, a2, -16 -+ -+ st.d t0, a4, 0 -+ st.d t1, a4, 8 -+ addi.d a4, a4, 16 -+ -+L(al_less_16bytes): -+ srai.d a3, a2, 3 -+ beqz a3, L(al_less_8bytes) -+ -+ ld.d t0, a1, 0 -+ addi.d a1, a1, 8 -+ addi.d a2, a2, -8 -+ st.d t0, a4, 0 -+ addi.d a4, a4, 8 -+ -+L(al_less_8bytes): -+ srai.d a3, a2, 2 -+ beqz a3, L(al_less_4bytes) -+ -+ ld.w t0, a1, 0 -+ addi.d a1, a1, 4 -+ addi.d a2, a2, -4 -+ st.w t0, a4, 0 -+ addi.d a4, a4, 4 -+ -+L(al_less_4bytes): -+ srai.d a3, a2, 1 -+ beqz a3, L(al_less_2bytes) -+ -+ ld.h t0, a1, 0 -+ addi.d a1, a1, 2 -+ addi.d a2, a2, -2 -+ st.h t0, a4, 0 -+ addi.d a4, a4, 2 -+ -+L(al_less_2bytes): -+ beqz a2, L(al_less_1byte) -+ -+ ld.b t0, a1, 0 -+ st.b t0, a4, 0 -+ -+L(al_less_1byte): -+ jr ra -+ -+L(unalign): -+ andi a5, a1, 0x7 -+ bstrins.d a1, zero, 2, 0 -+ sub.d t8, t8, a5 -+ slli.d a5, a5, 3 -+ -+ ld.d t0, a1, 0 -+ addi.d a1, a1, 8 -+ slli.d a6, t8, 3 -+ srl.d a7, t0, a5 -+ -+ srai.d a3, a2, 4 -+ beqz a3, L(un_less_16bytes) -+ andi a3, a2, 0x3f -+ beq a3, a2, L(un_less_64bytes) -+ -+ sub.d t0, a2, a3 -+ move a2, a3 -+ add.d a3, a1, t0 -+ -+L(un_long_bytes): -+ ld.d t0, a1, 0 -+ ld.d t1, a1, 8 -+ ld.d t2, a1, 16 -+ ld.d t3, a1, 24 -+ -+ srl.d t4, t0, a5 -+ sll.d t0, t0, a6 -+ srl.d t5, t1, a5 -+ sll.d t1, t1, a6 -+ -+ srl.d t6, t2, a5 -+ sll.d t2, t2, a6 -+ srl.d t7, t3, a5 -+ sll.d t3, t3, a6 -+ -+ or t0, a7, t0 -+ or t1, t4, t1 -+ or t2, t5, t2 -+ or t3, t6, t3 -+ -+ ld.d t4, a1, 32 -+ ld.d t5, a1, 40 -+ ld.d t6, a1, 48 -+ ld.d a7, a1, 56 -+ -+ st.d t0, a4, 0 -+ st.d t1, a4, 8 -+ st.d t2, a4, 16 -+ st.d t3, a4, 24 -+ -+ addi.d a1, a1, 64 -+ -+ srl.d t0, t4, a5 -+ sll.d t4, t4, a6 -+ srl.d t1, t5, a5 -+ sll.d t5, t5, a6 -+ -+ srl.d t2, t6, a5 -+ sll.d t6, t6, a6 -+ sll.d t3, a7, a6 -+ srl.d a7, a7, a5 -+ -+ or t4, t7, t4 -+ or t5, t0, t5 -+ or t6, t1, t6 -+ or t3, t2, t3 -+ -+ st.d t4, a4, 32 -+ st.d t5, a4, 40 -+ st.d t6, a4, 48 -+ st.d t3, a4, 56 -+ -+ addi.d a4, a4, 64 -+ bne a3, a1, L(un_long_bytes) -+ -+L(un_less_64bytes): -+ srai.d a3, a2, 5 -+ beqz a3, L(un_less_32bytes) -+ -+ ld.d t0, a1, 0 -+ ld.d t1, a1, 8 -+ ld.d t2, a1, 16 -+ ld.d t3, a1, 24 -+ -+ addi.d a1, a1, 32 -+ addi.d a2, a2, -32 -+ -+ srl.d t4, t0, a5 -+ sll.d t0, t0, a6 -+ srl.d t5, t1, a5 -+ sll.d t1, t1, a6 -+ -+ srl.d t6, t2, a5 -+ sll.d t2, t2, a6 -+ or t0, a7, t0 -+ srl.d a7, t3, a5 -+ sll.d t3, t3, a6 -+ -+ or t1, t4, t1 -+ or t2, t5, t2 -+ or t3, t6, t3 -+ -+ st.d t0, a4, 0 -+ st.d t1, a4, 8 -+ st.d t2, a4, 16 -+ st.d t3, a4, 24 -+ -+ addi.d a4, a4, 32 -+ -+L(un_less_32bytes): -+ srai.d a3, a2, 4 -+ beqz a3, L(un_less_16bytes) -+ -+ ld.d t0, a1, 0 -+ ld.d t1, a1, 8 -+ addi.d a1, a1, 16 -+ addi.d a2, a2, -16 -+ -+ srl.d t2, t0, a5 -+ sll.d t3, t0, a6 -+ sll.d t4, t1, a6 -+ or t3, a7, t3 -+ or t4, t2, t4 -+ -+ srl.d a7, t1, a5 -+ st.d t3, a4, 0 -+ st.d t4, a4, 8 -+ addi.d a4, a4, 16 -+ -+L(un_less_16bytes): -+ srai.d a3, a2, 3 -+ beqz a3, L(un_less_8bytes) -+ -+ ld.d t0, a1, 0 -+ addi.d a1, a1, 8 -+ addi.d a2, a2, -8 -+ sll.d t1, t0, a6 -+ -+ or t2, a7, t1 -+ srl.d a7, t0, a5 -+ st.d t2, a4, 0 -+ addi.d a4, a4, 8 -+ -+L(un_less_8bytes): -+ beqz a2, L(un_less_1byte) -+ bge t8, a2, 1f -+ -+ ld.d t0, a1, 0 -+ sll.d t0, t0, a6 -+ or a7, a7, t0 -+ -+1: -+ srai.d a3, a2, 2 -+ beqz a3, L(un_less_4bytes) -+ -+ addi.d a2, a2, -4 -+ st.w a7, a4, 0 -+ addi.d a4, a4, 4 -+ srai.d a7, a7, 32 -+ -+L(un_less_4bytes): -+ srai.d a3, a2, 1 -+ beqz a3, L(un_less_2bytes) -+ -+ addi.d a2, a2, -2 -+ st.h a7, a4, 0 -+ addi.d a4, a4, 2 -+ srai.d a7, a7, 16 -+ -+L(un_less_2bytes): -+ beqz a2, L(un_less_1byte) -+ st.b a7, a4, 0 -+ -+L(un_less_1byte): -+ jr ra -+ -+L(short_data): -+ pcaddi t1, 36 -+ slli.d t2, a2, 3 -+ add.d a4, a0, a2 -+ sub.d t1, t1, t2 -+ add.d a1, a1, a2 -+ jr t1 -+ -+L(short_15_bytes): -+ ld.b t0, a1, -15 -+ st.b t0, a4, -15 -+L(short_14_bytes): -+ ld.b t0, a1, -14 -+ st.b t0, a4, -14 -+L(short_13_bytes): -+ ld.b t0, a1, -13 -+ st.b t0, a4, -13 -+L(short_12_bytes): -+ ld.b t0, a1, -12 -+ st.b t0, a4, -12 -+L(short_11_bytes): -+ ld.b t0, a1, -11 -+ st.b t0, a4, -11 -+L(short_10_bytes): -+ ld.b t0, a1, -10 -+ st.b t0, a4, -10 -+L(short_9_bytes): -+ ld.b t0, a1, -9 -+ st.b t0, a4, -9 -+L(short_8_bytes): -+ ld.b t0, a1, -8 -+ st.b t0, a4, -8 -+L(short_7_bytes): -+ ld.b t0, a1, -7 -+ st.b t0, a4, -7 -+L(short_6_bytes): -+ ld.b t0, a1, -6 -+ st.b t0, a4, -6 -+L(short_5_bytes): -+ ld.b t0, a1, -5 -+ st.b t0, a4, -5 -+L(short_4_bytes): -+ ld.b t0, a1, -4 -+ st.b t0, a4, -4 -+L(short_3_bytes): -+ ld.b t0, a1, -3 -+ st.b t0, a4, -3 -+L(short_2_bytes): -+ ld.b t0, a1, -2 -+ st.b t0, a4, -2 -+L(short_1_bytes): -+ ld.b t0, a1, -1 -+ st.b t0, a4, -1 -+ jr ra -+ -+L(copy_back): -+ srai.d a3, a2, 4 -+ beqz a3, L(back_short_data) -+ -+ add.d a4, a0, a2 -+ add.d a1, a1, a2 -+ -+ andi a5, a4, 0x7 -+ andi a6, a1, 0x7 -+ beqz a5, L(back_check_align) -+ -+ sub.d a2, a2, a5 -+ sub.d a1, a1, a5 -+ sub.d a4, a4, a5 -+ -+ pcaddi t1, 18 -+ slli.d t3, a5, 3 -+ sub.d t1, t1, t3 -+ jr t1 -+ -+ ld.b t0, a1, 6 -+ st.b t0, a4, 6 -+ ld.b t0, a1, 5 -+ st.b t0, a4, 5 -+ ld.b t0, a1, 4 -+ st.b t0, a4, 4 -+ ld.b t0, a1, 3 -+ st.b t0, a4, 3 -+ ld.b t0, a1, 2 -+ st.b t0, a4, 2 -+ ld.b t0, a1, 1 -+ st.b t0, a4, 1 -+ ld.b t0, a1, 0 -+ st.b t0, a4, 0 -+ -+L(back_check_align): -+ bne a5, a6, L(back_unalign) -+ -+ srai.d a3, a2, 4 -+ beqz a3, L(back_less_16bytes) -+ -+ andi a3, a2, 0x3f -+ beq a3, a2, L(back_less_64bytes) -+ -+ sub.d t0, a2, a3 -+ move a2, a3 -+ sub.d a5, a1, t0 -+ -+L(back_loop_64bytes): -+ LD_64(a1, -64) -+ addi.d a1, a1, -64 -+ ST_64(a4, -64) -+ -+ addi.d a4, a4, -64 -+ bne a1, a5, L(back_loop_64bytes) -+ -+L(back_less_64bytes): -+ srai.d a3, a2, 5 -+ beqz a3, L(back_less_32bytes) -+ -+ ld.d t0, a1, -32 -+ ld.d t1, a1, -24 -+ ld.d t2, a1, -16 -+ ld.d t3, a1, -8 -+ -+ addi.d a1, a1, -32 -+ addi.d a2, a2, -32 -+ -+ st.d t0, a4, -32 -+ st.d t1, a4, -24 -+ st.d t2, a4, -16 -+ st.d t3, a4, -8 -+ -+ addi.d a4, a4, -32 -+ -+L(back_less_32bytes): -+ srai.d a3, a2, 4 -+ beqz a3, L(back_less_16bytes) -+ -+ ld.d t0, a1, -16 -+ ld.d t1, a1, -8 -+ -+ addi.d a2, a2, -16 -+ addi.d a1, a1, -16 -+ -+ st.d t0, a4, -16 -+ st.d t1, a4, -8 -+ addi.d a4, a4, -16 -+ -+L(back_less_16bytes): -+ srai.d a3, a2, 3 -+ beqz a3, L(back_less_8bytes) -+ -+ ld.d t0, a1, -8 -+ addi.d a2, a2, -8 -+ addi.d a1, a1, -8 -+ -+ st.d t0, a4, -8 -+ addi.d a4, a4, -8 -+ -+L(back_less_8bytes): -+ srai.d a3, a2, 2 -+ beqz a3, L(back_less_4bytes) -+ -+ ld.w t0, a1, -4 -+ addi.d a2, a2, -4 -+ addi.d a1, a1, -4 -+ -+ st.w t0, a4, -4 -+ addi.d a4, a4, -4 -+ -+L(back_less_4bytes): -+ srai.d a3, a2, 1 -+ beqz a3, L(back_less_2bytes) -+ -+ ld.h t0, a1, -2 -+ addi.d a2, a2, -2 -+ addi.d a1, a1, -2 -+ -+ st.h t0, a4, -2 -+ addi.d a4, a4, -2 -+ -+L(back_less_2bytes): -+ beqz a2, L(back_less_1byte) -+ -+ ld.b t0, a1, -1 -+ st.b t0, a4, -1 -+ -+L(back_less_1byte): -+ jr ra -+ -+L(back_unalign): -+ andi t8, a1, 0x7 -+ bstrins.d a1, zero, 2, 0 -+ -+ sub.d a6, zero, t8 -+ -+ ld.d t0, a1, 0 -+ slli.d a6, a6, 3 -+ slli.d a5, t8, 3 -+ sll.d a7, t0, a6 -+ -+ srai.d a3, a2, 4 -+ beqz a3, L(back_un_less_16bytes) -+ -+ andi a3, a2, 0x3f -+ beq a3, a2, L(back_un_less_64bytes) -+ -+ sub.d t0, a2, a3 -+ move a2, a3 -+ sub.d a3, a1, t0 -+ -+L(back_un_long_bytes): -+ ld.d t0, a1, -8 -+ ld.d t1, a1, -16 -+ ld.d t2, a1, -24 -+ ld.d t3, a1, -32 -+ -+ sll.d t4, t0, a6 -+ srl.d t0, t0, a5 -+ -+ sll.d t5, t1, a6 -+ srl.d t1, t1, a5 -+ -+ sll.d t6, t2, a6 -+ srl.d t2, t2, a5 -+ -+ sll.d t7, t3, a6 -+ srl.d t3, t3, a5 -+ -+ or t0, t0, a7 -+ or t1, t1, t4 -+ or t2, t2, t5 -+ or t3, t3, t6 -+ -+ ld.d t4, a1, -40 -+ ld.d t5, a1, -48 -+ ld.d t6, a1, -56 -+ ld.d a7, a1, -64 -+ st.d t0, a4, -8 -+ st.d t1, a4, -16 -+ st.d t2, a4, -24 -+ st.d t3, a4, -32 -+ -+ addi.d a1, a1, -64 -+ -+ sll.d t0, t4, a6 -+ srl.d t4, t4, a5 -+ -+ sll.d t1, t5, a6 -+ srl.d t5, t5, a5 -+ -+ sll.d t2, t6, a6 -+ srl.d t6, t6, a5 -+ -+ srl.d t3, a7, a5 -+ sll.d a7, a7, a6 -+ -+ or t4, t7, t4 -+ or t5, t0, t5 -+ or t6, t1, t6 -+ or t3, t2, t3 -+ -+ st.d t4, a4, -40 -+ st.d t5, a4, -48 -+ st.d t6, a4, -56 -+ st.d t3, a4, -64 -+ -+ addi.d a4, a4, -64 -+ bne a3, a1, L(back_un_long_bytes) -+ -+L(back_un_less_64bytes): -+ srai.d a3, a2, 5 -+ beqz a3, L(back_un_less_32bytes) -+ -+ ld.d t0, a1, -8 -+ ld.d t1, a1, -16 -+ ld.d t2, a1, -24 -+ ld.d t3, a1, -32 -+ -+ addi.d a1, a1, -32 -+ addi.d a2, a2, -32 -+ -+ sll.d t4, t0, a6 -+ srl.d t0, t0, a5 -+ -+ sll.d t5, t1, a6 -+ srl.d t1, t1, a5 -+ -+ sll.d t6, t2, a6 -+ srl.d t2, t2, a5 -+ -+ or t0, a7, t0 -+ -+ sll.d a7, t3, a6 -+ srl.d t3, t3, a5 -+ -+ or t1, t4, t1 -+ or t2, t5, t2 -+ or t3, t6, t3 -+ -+ st.d t0, a4, -8 -+ st.d t1, a4, -16 -+ st.d t2, a4, -24 -+ st.d t3, a4, -32 -+ -+ addi.d a4, a4, -32 -+ -+L(back_un_less_32bytes): -+ srai.d a3, a2, 4 -+ beqz a3, L(back_un_less_16bytes) -+ -+ ld.d t0, a1, -8 -+ ld.d t1, a1, -16 -+ -+ addi.d a1, a1, -16 -+ addi.d a2, a2, -16 -+ -+ sll.d t2, t0, a6 -+ srl.d t3, t0, a5 -+ -+ srl.d t4, t1, a5 -+ or t3, a7, t3 -+ or t4, t2, t4 -+ sll.d a7, t1, a6 -+ -+ st.d t3, a4, -8 -+ st.d t4, a4, -16 -+ -+ addi.d a4, a4, -16 -+ -+L(back_un_less_16bytes): -+ srai.d a3, a2, 3 -+ beqz a3, L(back_un_less_8bytes) -+ -+ ld.d t0, a1, -8 -+ -+ addi.d a1, a1, -8 -+ addi.d a2, a2, -8 -+ -+ srl.d t1, t0, a5 -+ or t2, a7, t1 -+ sll.d a7, t0, a6 -+ -+ st.d t2, a4, -8 -+ addi.d a4, a4, -8 -+ -+L(back_un_less_8bytes): -+ beqz a2, L(back_end) -+ bge t8, a2, 1f -+ -+ ld.d t0, a1, -8 -+ srl.d t0, t0, a5 -+ or a7, a7, t0 -+ -+1: -+ srai.d a3, a2, 2 -+ beqz a3, L(back_un_less_4bytes) -+ -+ srai.d t0, a7, 32 -+ addi.d a2, a2, -4 -+ st.w t0, a4, -4 -+ addi.d a4, a4, -4 -+ slli.d a7, a7, 32 -+ -+L(back_un_less_4bytes): -+ srai.d a3, a2, 1 -+ beqz a3, L(back_un_less_2bytes) -+ srai.d t0, a7, 48 -+ addi.d a2, a2, -2 -+ st.h t0, a4, -2 -+ addi.d a4, a4, -2 -+ slli.d a7, a7, 16 -+L(back_un_less_2bytes): -+ beqz a2, L(back_un_less_1byte) -+ srai.d t0, a7, 56 -+ st.b t0, a4, -1 -+L(back_un_less_1byte): -+ jr ra -+ -+L(back_short_data): -+ pcaddi t1, 34 -+ slli.d t2, a2, 3 -+ sub.d t1, t1, t2 -+ jr t1 -+ -+ ld.b t0, a1, 14 -+ st.b t0, a0, 14 -+ ld.b t0, a1, 13 -+ st.b t0, a0, 13 -+ ld.b t0, a1, 12 -+ st.b t0, a0, 12 -+ ld.b t0, a1, 11 -+ st.b t0, a0, 11 -+ ld.b t0, a1, 10 -+ st.b t0, a0, 10 -+ ld.b t0, a1, 9 -+ st.b t0, a0, 9 -+ ld.b t0, a1, 8 -+ st.b t0, a0, 8 -+ ld.b t0, a1, 7 -+ st.b t0, a0, 7 -+ ld.b t0, a1, 6 -+ st.b t0, a0, 6 -+ ld.b t0, a1, 5 -+ st.b t0, a0, 5 -+ ld.b t0, a1, 4 -+ st.b t0, a0, 4 -+ ld.b t0, a1, 3 -+ st.b t0, a0, 3 -+ ld.b t0, a1, 2 -+ st.b t0, a0, 2 -+ ld.b t0, a1, 1 -+ st.b t0, a0, 1 -+ ld.b t0, a1, 0 -+ st.b t0, a0, 0 -+L(back_end): -+ jr ra -+ -+END(MEMCPY_NAME) -+ -+libc_hidden_builtin_def (MEMMOVE_NAME) -+libc_hidden_builtin_def (MEMCPY_NAME) -diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S -new file mode 100644 -index 00000000..4aae5bf8 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S -@@ -0,0 +1,20 @@ -+/* Optimized memcpy implementation using Loongarch LASX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+/* memcpy is part of memmove.S */ -diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S -new file mode 100644 -index 00000000..6ebbe7a2 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S -@@ -0,0 +1,20 @@ -+/* Optimized memcpy implementation using Loongarch LSX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+/* memcpy is part of memmove.S */ -diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S -new file mode 100644 -index 00000000..8e60a22d ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S -@@ -0,0 +1,247 @@ -+/* Optimized unaligned memcpy implementation using basic Loongarch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+ -+# define MEMCPY_NAME __memcpy_unaligned -+ -+# define LD_64(reg, n) \ -+ ld.d t0, reg, n; \ -+ ld.d t1, reg, n + 8; \ -+ ld.d t2, reg, n + 16; \ -+ ld.d t3, reg, n + 24; \ -+ ld.d t4, reg, n + 32; \ -+ ld.d t5, reg, n + 40; \ -+ ld.d t6, reg, n + 48; \ -+ ld.d t7, reg, n + 56; -+ -+# define ST_64(reg, n) \ -+ st.d t0, reg, n; \ -+ st.d t1, reg, n + 8; \ -+ st.d t2, reg, n + 16; \ -+ st.d t3, reg, n + 24; \ -+ st.d t4, reg, n + 32; \ -+ st.d t5, reg, n + 40; \ -+ st.d t6, reg, n + 48; \ -+ st.d t7, reg, n + 56; -+ -+LEAF(MEMCPY_NAME, 3) -+ add.d a4, a1, a2 -+ add.d a3, a0, a2 -+ li.w a6, 16 -+ bge a6, a2, L(less_16bytes) -+ -+ li.w a6, 128 -+ blt a6, a2, L(long_bytes) -+ li.w a6, 64 -+ blt a6, a2, L(more_64bytes) -+ -+ li.w a6, 32 -+ blt a6, a2, L(more_32bytes) -+ -+ ld.d t0, a1, 0 -+ ld.d t1, a1, 8 -+ ld.d t2, a4, -16 -+ ld.d t3, a4, -8 -+ -+ st.d t0, a0, 0 -+ st.d t1, a0, 8 -+ st.d t2, a3, -16 -+ st.d t3, a3, -8 -+ jr ra -+ -+L(more_64bytes): -+ srli.d t8, a0, 3 -+ slli.d t8, t8, 3 -+ addi.d t8, t8, 0x8 -+ sub.d a7, a0, t8 -+ -+ ld.d t0, a1, 0 -+ sub.d a1, a1, a7 -+ st.d t0, a0, 0 -+ add.d a7, a7, a2 -+ addi.d a7, a7, -0x20 -+ -+L(loop_32): -+ ld.d t0, a1, 0 -+ ld.d t1, a1, 8 -+ ld.d t2, a1, 16 -+ ld.d t3, a1, 24 -+ -+ st.d t0, t8, 0 -+ st.d t1, t8, 8 -+ st.d t2, t8, 16 -+ st.d t3, t8, 24 -+ -+ addi.d t8, t8, 0x20 -+ addi.d a1, a1, 0x20 -+ addi.d a7, a7, -0x20 -+ blt zero, a7, L(loop_32) -+ -+ ld.d t4, a4, -32 -+ ld.d t5, a4, -24 -+ ld.d t6, a4, -16 -+ ld.d t7, a4, -8 -+ -+ st.d t4, a3, -32 -+ st.d t5, a3, -24 -+ st.d t6, a3, -16 -+ st.d t7, a3, -8 -+ -+ jr ra -+ -+L(more_32bytes): -+ ld.d t0, a1, 0 -+ ld.d t1, a1, 8 -+ ld.d t2, a1, 16 -+ ld.d t3, a1, 24 -+ -+ ld.d t4, a4, -32 -+ ld.d t5, a4, -24 -+ ld.d t6, a4, -16 -+ ld.d t7, a4, -8 -+ -+ st.d t0, a0, 0 -+ st.d t1, a0, 8 -+ st.d t2, a0, 16 -+ st.d t3, a0, 24 -+ -+ st.d t4, a3, -32 -+ st.d t5, a3, -24 -+ st.d t6, a3, -16 -+ st.d t7, a3, -8 -+ -+ jr ra -+ -+L(less_16bytes): -+ srai.d a6, a2, 3 -+ beqz a6, L(less_8bytes) -+ -+ ld.d t0, a1, 0 -+ ld.d t1, a4, -8 -+ st.d t0, a0, 0 -+ st.d t1, a3, -8 -+ -+ jr ra -+ -+L(less_8bytes): -+ srai.d a6, a2, 2 -+ beqz a6, L(less_4bytes) -+ -+ ld.w t0, a1, 0 -+ ld.w t1, a4, -4 -+ st.w t0, a0, 0 -+ st.w t1, a3, -4 -+ -+ jr ra -+ -+L(less_4bytes): -+ srai.d a6, a2, 1 -+ beqz a6, L(less_2bytes) -+ -+ ld.h t0, a1, 0 -+ ld.h t1, a4, -2 -+ st.h t0, a0, 0 -+ st.h t1, a3, -2 -+ -+ jr ra -+ -+L(less_2bytes): -+ beqz a2, L(less_1bytes) -+ -+ ld.b t0, a1, 0 -+ st.b t0, a0, 0 -+ jr ra -+ -+L(less_1bytes): -+ jr ra -+ -+L(long_bytes): -+ srli.d t8, a0, 3 -+ slli.d t8, t8, 3 -+ beq a0, t8, L(start) -+ ld.d t0, a1, 0 -+ -+ addi.d t8, t8, 0x8 -+ st.d t0, a0, 0 -+ sub.d a7, a0, t8 -+ sub.d a1, a1, a7 -+ -+L(start): -+ addi.d a5, a3, -0x80 -+ blt a5, t8, L(align_end_proc) -+ -+L(loop_128): -+ LD_64(a1, 0) -+ ST_64(t8, 0) -+ LD_64(a1, 64) -+ addi.d a1, a1, 0x80 -+ ST_64(t8, 64) -+ addi.d t8, t8, 0x80 -+ bge a5, t8, L(loop_128) -+ -+L(align_end_proc): -+ sub.d a2, a3, t8 -+ pcaddi t1, 34 -+ andi t2, a2, 0x78 -+ sub.d t1, t1, t2 -+ jr t1 -+ -+ ld.d t0, a1, 112 -+ st.d t0, t8, 112 -+ ld.d t0, a1, 104 -+ st.d t0, t8, 104 -+ ld.d t0, a1, 96 -+ st.d t0, t8, 96 -+ ld.d t0, a1, 88 -+ st.d t0, t8, 88 -+ ld.d t0, a1, 80 -+ st.d t0, t8, 80 -+ ld.d t0, a1, 72 -+ st.d t0, t8, 72 -+ ld.d t0, a1, 64 -+ st.d t0, t8, 64 -+ ld.d t0, a1, 56 -+ st.d t0, t8, 56 -+ ld.d t0, a1, 48 -+ st.d t0, t8, 48 -+ ld.d t0, a1, 40 -+ st.d t0, t8, 40 -+ ld.d t0, a1, 32 -+ st.d t0, t8, 32 -+ ld.d t0, a1, 24 -+ st.d t0, t8, 24 -+ ld.d t0, a1, 16 -+ st.d t0, t8, 16 -+ ld.d t0, a1, 8 -+ st.d t0, t8, 8 -+ ld.d t0, a1, 0 -+ st.d t0, t8, 0 -+ ld.d t0, a4, -8 -+ st.d t0, a3, -8 -+ -+ jr ra -+END(MEMCPY_NAME) -+ -+libc_hidden_builtin_def (MEMCPY_NAME) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy.c b/sysdeps/loongarch/lp64/multiarch/memcpy.c -new file mode 100644 -index 00000000..93b238ce ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memcpy.c -@@ -0,0 +1,37 @@ -+/* Multiple versions of memcpy. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define memcpy __redirect_memcpy -+# include -+# undef memcpy -+ -+# define SYMBOL_NAME memcpy -+# include "ifunc-lasx.h" -+ -+libc_ifunc_redirected (__redirect_memcpy, memcpy, -+ IFUNC_SELECTOR ()); -+ -+# ifdef SHARED -+__hidden_ver1 (memcpy, __GI_memcpy, __redirect_memcpy) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp); -+# endif -+ -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S -new file mode 100644 -index 00000000..5354f383 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S -@@ -0,0 +1,20 @@ -+/* Optimized memmove_aligned implementation using basic Loongarch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+/* memmove_aligned is part of memcpy_aligned, see memcpy-aligned.S. */ -diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S -new file mode 100644 -index 00000000..ff68e7a2 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S -@@ -0,0 +1,287 @@ -+/* Optimized memmove implementation using Loongarch LASX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+#ifndef MEMCPY_NAME -+# define MEMCPY_NAME __memcpy_lasx -+#endif -+ -+#ifndef MEMMOVE_NAME -+# define MEMMOVE_NAME __memmove_lasx -+#endif -+ -+LEAF(MEMCPY_NAME, 6) -+ li.d t0, 32 -+ add.d a3, a0, a2 -+ add.d a4, a1, a2 -+ bgeu t0, a2, L(less_32bytes) -+ -+ li.d t1, 64 -+ bltu t1, a2, L(copy_long) -+ xvld xr0, a1, 0 -+ xvld xr1, a4, -32 -+ -+ xvst xr0, a0, 0 -+ xvst xr1, a3, -32 -+ jr ra -+L(less_32bytes): -+ srli.d t0, a2, 4 -+ -+ beqz t0, L(less_16bytes) -+ vld vr0, a1, 0 -+ vld vr1, a4, -16 -+ vst vr0, a0, 0 -+ -+ -+ vst vr1, a3, -16 -+ jr ra -+L(less_16bytes): -+ srli.d t0, a2, 3 -+ beqz t0, L(less_8bytes) -+ -+ ld.d t0, a1, 0 -+ ld.d t1, a4, -8 -+ st.d t0, a0, 0 -+ st.d t1, a3, -8 -+ -+ jr ra -+L(less_8bytes): -+ srli.d t0, a2, 2 -+ beqz t0, L(less_4bytes) -+ ld.w t0, a1, 0 -+ -+ ld.w t1, a4, -4 -+ st.w t0, a0, 0 -+ st.w t1, a3, -4 -+ jr ra -+ -+ -+L(less_4bytes): -+ srli.d t0, a2, 1 -+ beqz t0, L(less_2bytes) -+ ld.h t0, a1, 0 -+ ld.h t1, a4, -2 -+ -+ st.h t0, a0, 0 -+ st.h t1, a3, -2 -+ jr ra -+L(less_2bytes): -+ beqz a2, L(less_1bytes) -+ -+ ld.b t0, a1, 0 -+ st.b t0, a0, 0 -+L(less_1bytes): -+ jr ra -+END(MEMCPY_NAME) -+ -+LEAF(MEMMOVE_NAME, 6) -+ -+ li.d t0, 32 -+ add.d a3, a0, a2 -+ add.d a4, a1, a2 -+ bgeu t0, a2, L(less_32bytes) -+ -+ li.d t1, 64 -+ bltu t1, a2, L(move_long) -+ xvld xr0, a1, 0 -+ xvld xr1, a4, -32 -+ -+ xvst xr0, a0, 0 -+ xvst xr1, a3, -32 -+ jr ra -+L(move_long): -+ sub.d t2, a0, a1 -+ -+ bltu t2, a2, L(copy_back) -+L(copy_long): -+ andi t2, a0, 0x1f -+ addi.d a2, a2, -1 -+ sub.d t2, t0, t2 -+ -+ -+ xvld xr8, a1, 0 -+ xvld xr9, a4, -32 -+ sub.d t3, a2, t2 -+ add.d a5, a0, t2 -+ -+ andi a2, t3, 0xff -+ add.d a1, a1, t2 -+ beq a2, t3, L(lt256) -+ sub.d a6, a4, a2 -+ -+ addi.d a6, a6, -1 -+L(loop_256): -+ xvld xr0, a1, 0 -+ xvld xr1, a1, 32 -+ xvld xr2, a1, 64 -+ -+ xvld xr3, a1, 96 -+ xvld xr4, a1, 128 -+ xvld xr5, a1, 160 -+ xvld xr6, a1, 192 -+ -+ -+ xvld xr7, a1, 224 -+ addi.d a1, a1, 256 -+ xvst xr0, a5, 0 -+ xvst xr1, a5, 32 -+ -+ xvst xr2, a5, 64 -+ xvst xr3, a5, 96 -+ xvst xr4, a5, 128 -+ xvst xr5, a5, 160 -+ -+ xvst xr6, a5, 192 -+ xvst xr7, a5, 224 -+ addi.d a5, a5, 256 -+ bne a1, a6, L(loop_256) -+ -+L(lt256): -+ srli.d t2, a2, 7 -+ beqz t2, L(lt128) -+ xvld xr0, a1, 0 -+ xvld xr1, a1, 32 -+ -+ -+ xvld xr2, a1, 64 -+ xvld xr3, a1, 96 -+ addi.d a1, a1, 128 -+ addi.d a2, a2, -128 -+ -+ xvst xr0, a5, 0 -+ xvst xr1, a5, 32 -+ xvst xr2, a5, 64 -+ xvst xr3, a5, 96 -+ -+ addi.d a5, a5, 128 -+L(lt128): -+ bltu a2, t1, L(lt64) -+ xvld xr0, a1, 0 -+ xvld xr1, a1, 32 -+ -+ addi.d a1, a1, 64 -+ addi.d a2, a2, -64 -+ xvst xr0, a5, 0 -+ xvst xr1, a5, 32 -+ -+ -+ addi.d a5, a5, 64 -+L(lt64): -+ bltu a2, t0, L(lt32) -+ xvld xr0, a1, 0 -+ xvst xr0, a5, 0 -+ -+L(lt32): -+ xvst xr8, a0, 0 -+ xvst xr9, a3, -32 -+ jr ra -+ nop -+ -+L(copy_back): -+ addi.d a3, a3, -1 -+ addi.d a2, a2, -2 -+ andi t2, a3, 0x1f -+ xvld xr8, a1, 0 -+ -+ xvld xr9, a4, -32 -+ sub.d t3, a2, t2 -+ sub.d a5, a3, t2 -+ sub.d a4, a4, t2 -+ -+ -+ andi a2, t3, 0xff -+ beq a2, t3, L(back_lt256) -+ add.d a6, a1, a2 -+ addi.d a6, a6, 2 -+ -+L(back_loop_256): -+ xvld xr0, a4, -33 -+ xvld xr1, a4, -65 -+ xvld xr2, a4, -97 -+ xvld xr3, a4, -129 -+ -+ xvld xr4, a4, -161 -+ xvld xr5, a4, -193 -+ xvld xr6, a4, -225 -+ xvld xr7, a4, -257 -+ -+ addi.d a4, a4, -256 -+ xvst xr0, a5, -32 -+ xvst xr1, a5, -64 -+ xvst xr2, a5, -96 -+ -+ -+ xvst xr3, a5, -128 -+ xvst xr4, a5, -160 -+ xvst xr5, a5, -192 -+ xvst xr6, a5, -224 -+ -+ xvst xr7, a5, -256 -+ addi.d a5, a5, -256 -+ bne a4, a6, L(back_loop_256) -+L(back_lt256): -+ srli.d t2, a2, 7 -+ -+ beqz t2, L(back_lt128) -+ xvld xr0, a4, -33 -+ xvld xr1, a4, -65 -+ xvld xr2, a4, -97 -+ -+ xvld xr3, a4, -129 -+ addi.d a2, a2, -128 -+ addi.d a4, a4, -128 -+ xvst xr0, a5, -32 -+ -+ -+ xvst xr1, a5, -64 -+ xvst xr2, a5, -96 -+ xvst xr3, a5, -128 -+ addi.d a5, a5, -128 -+ -+L(back_lt128): -+ blt a2, t1, L(back_lt64) -+ xvld xr0, a4, -33 -+ xvld xr1, a4, -65 -+ addi.d a2, a2, -64 -+ -+ addi.d a4, a4, -64 -+ xvst xr0, a5, -32 -+ xvst xr1, a5, -64 -+ addi.d a5, a5, -64 -+ -+L(back_lt64): -+ bltu a2, t0, L(back_lt32) -+ xvld xr0, a4, -33 -+ xvst xr0, a5, -32 -+L(back_lt32): -+ xvst xr8, a0, 0 -+ -+ -+ xvst xr9, a3, -31 -+ jr ra -+END(MEMMOVE_NAME) -+ -+libc_hidden_builtin_def (MEMCPY_NAME) -+libc_hidden_builtin_def (MEMMOVE_NAME) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S -new file mode 100644 -index 00000000..9e1502a7 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S -@@ -0,0 +1,534 @@ -+/* Optimized memmove implementation using Loongarch LSX instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+# define MEMCPY_NAME __memcpy_lsx -+# define MEMMOVE_NAME __memmove_lsx -+ -+LEAF(MEMCPY_NAME, 6) -+ li.d t6, 16 -+ add.d a3, a0, a2 -+ add.d a4, a1, a2 -+ bgeu t6, a2, L(less_16bytes) -+ -+ li.d t8, 64 -+ li.d t7, 32 -+ bltu t8, a2, L(copy_long) -+ bltu t7, a2, L(more_32bytes) -+ -+ vld vr0, a1, 0 -+ vld vr1, a4, -16 -+ vst vr0, a0, 0 -+ vst vr1, a3, -16 -+ -+ jr ra -+L(more_32bytes): -+ vld vr0, a1, 0 -+ vld vr1, a1, 16 -+ vld vr2, a4, -32 -+ -+ -+ vld vr3, a4, -16 -+ vst vr0, a0, 0 -+ vst vr1, a0, 16 -+ vst vr2, a3, -32 -+ -+ vst vr3, a3, -16 -+ jr ra -+L(less_16bytes): -+ srli.d t0, a2, 3 -+ beqz t0, L(less_8bytes) -+ -+ vldrepl.d vr0, a1, 0 -+ vldrepl.d vr1, a4, -8 -+ vstelm.d vr0, a0, 0, 0 -+ vstelm.d vr1, a3, -8, 0 -+ -+ jr ra -+L(less_8bytes): -+ srli.d t0, a2, 2 -+ beqz t0, L(less_4bytes) -+ vldrepl.w vr0, a1, 0 -+ -+ -+ vldrepl.w vr1, a4, -4 -+ vstelm.w vr0, a0, 0, 0 -+ vstelm.w vr1, a3, -4, 0 -+ jr ra -+ -+L(less_4bytes): -+ srli.d t0, a2, 1 -+ beqz t0, L(less_2bytes) -+ vldrepl.h vr0, a1, 0 -+ vldrepl.h vr1, a4, -2 -+ -+ vstelm.h vr0, a0, 0, 0 -+ vstelm.h vr1, a3, -2, 0 -+ jr ra -+L(less_2bytes): -+ beqz a2, L(less_1bytes) -+ -+ ld.b t0, a1, 0 -+ st.b t0, a0, 0 -+L(less_1bytes): -+ jr ra -+ nop -+END(MEMCPY_NAME) -+ -+LEAF(MEMMOVE_NAME, 6) -+ li.d t6, 16 -+ add.d a3, a0, a2 -+ add.d a4, a1, a2 -+ bgeu t6, a2, L(less_16bytes) -+ -+ li.d t8, 64 -+ li.d t7, 32 -+ bltu t8, a2, L(move_long) -+ bltu t7, a2, L(more_32bytes) -+ -+ vld vr0, a1, 0 -+ vld vr1, a4, -16 -+ vst vr0, a0, 0 -+ vst vr1, a3, -16 -+ -+ jr ra -+ nop -+L(move_long): -+ sub.d t0, a0, a1 -+ bltu t0, a2, L(copy_back) -+ -+ -+L(copy_long): -+ vld vr2, a1, 0 -+ andi t0, a0, 0xf -+ sub.d t0, t6, t0 -+ add.d a1, a1, t0 -+ -+ sub.d a2, a2, t0 -+ andi t1, a1, 0xf -+ bnez t1, L(unaligned) -+ vld vr0, a1, 0 -+ -+ addi.d a2, a2, -16 -+ vst vr2, a0, 0 -+ andi t2, a2, 0x7f -+ add.d a5, a0, t0 -+ -+ beq a2, t2, L(al_less_128) -+ sub.d t3, a2, t2 -+ move a2, t2 -+ add.d a6, a1, t3 -+ -+ -+L(al_loop): -+ vld vr1, a1, 16 -+ vld vr2, a1, 32 -+ vld vr3, a1, 48 -+ vld vr4, a1, 64 -+ -+ vld vr5, a1, 80 -+ vld vr6, a1, 96 -+ vld vr7, a1, 112 -+ vst vr0, a5, 0 -+ -+ vld vr0, a1, 128 -+ addi.d a1, a1, 128 -+ vst vr1, a5, 16 -+ vst vr2, a5, 32 -+ -+ vst vr3, a5, 48 -+ vst vr4, a5, 64 -+ vst vr5, a5, 80 -+ vst vr6, a5, 96 -+ -+ -+ vst vr7, a5, 112 -+ addi.d a5, a5, 128 -+ bne a1, a6, L(al_loop) -+L(al_less_128): -+ blt a2, t8, L(al_less_64) -+ -+ vld vr1, a1, 16 -+ vld vr2, a1, 32 -+ vld vr3, a1, 48 -+ addi.d a2, a2, -64 -+ -+ vst vr0, a5, 0 -+ vld vr0, a1, 64 -+ addi.d a1, a1, 64 -+ vst vr1, a5, 16 -+ -+ vst vr2, a5, 32 -+ vst vr3, a5, 48 -+ addi.d a5, a5, 64 -+L(al_less_64): -+ blt a2, t7, L(al_less_32) -+ -+ -+ vld vr1, a1, 16 -+ addi.d a2, a2, -32 -+ vst vr0, a5, 0 -+ vld vr0, a1, 32 -+ -+ addi.d a1, a1, 32 -+ vst vr1, a5, 16 -+ addi.d a5, a5, 32 -+L(al_less_32): -+ blt a2, t6, L(al_less_16) -+ -+ vst vr0, a5, 0 -+ vld vr0, a1, 16 -+ addi.d a5, a5, 16 -+L(al_less_16): -+ vld vr1, a4, -16 -+ -+ vst vr0, a5, 0 -+ vst vr1, a3, -16 -+ jr ra -+ nop -+ -+ -+L(magic_num): -+ .dword 0x0706050403020100 -+ .dword 0x0f0e0d0c0b0a0908 -+L(unaligned): -+ pcaddi t2, -4 -+ bstrins.d a1, zero, 3, 0 -+ vld vr8, t2, 0 -+ vld vr0, a1, 0 -+ -+ vld vr1, a1, 16 -+ addi.d a2, a2, -16 -+ vst vr2, a0, 0 -+ add.d a5, a0, t0 -+ -+ vreplgr2vr.b vr9, t1 -+ andi t2, a2, 0x7f -+ vadd.b vr9, vr9, vr8 -+ addi.d a1, a1, 32 -+ -+ -+ beq t2, a2, L(un_less_128) -+ sub.d t3, a2, t2 -+ move a2, t2 -+ add.d a6, a1, t3 -+ -+L(un_loop): -+ vld vr2, a1, 0 -+ vld vr3, a1, 16 -+ vld vr4, a1, 32 -+ vld vr5, a1, 48 -+ -+ vld vr6, a1, 64 -+ vld vr7, a1, 80 -+ vshuf.b vr8, vr1, vr0, vr9 -+ vld vr0, a1, 96 -+ -+ vst vr8, a5, 0 -+ vshuf.b vr8, vr2, vr1, vr9 -+ vld vr1, a1, 112 -+ vst vr8, a5, 16 -+ -+ -+ addi.d a1, a1, 128 -+ vshuf.b vr2, vr3, vr2, vr9 -+ vshuf.b vr3, vr4, vr3, vr9 -+ vst vr2, a5, 32 -+ -+ vshuf.b vr4, vr5, vr4, vr9 -+ vst vr3, a5, 48 -+ vshuf.b vr5, vr6, vr5, vr9 -+ vst vr4, a5, 64 -+ -+ vshuf.b vr6, vr7, vr6, vr9 -+ vst vr5, a5, 80 -+ vshuf.b vr7, vr0, vr7, vr9 -+ vst vr6, a5, 96 -+ -+ vst vr7, a5, 112 -+ addi.d a5, a5, 128 -+ bne a1, a6, L(un_loop) -+L(un_less_128): -+ blt a2, t8, L(un_less_64) -+ -+ -+ vld vr2, a1, 0 -+ vld vr3, a1, 16 -+ vshuf.b vr4, vr1, vr0, vr9 -+ vld vr0, a1, 32 -+ -+ vst vr4, a5, 0 -+ addi.d a2, a2, -64 -+ vshuf.b vr4, vr2, vr1, vr9 -+ vld vr1, a1, 48 -+ -+ addi.d a1, a1, 64 -+ vst vr4, a5, 16 -+ vshuf.b vr2, vr3, vr2, vr9 -+ vshuf.b vr3, vr0, vr3, vr9 -+ -+ vst vr2, a5, 32 -+ vst vr3, a5, 48 -+ addi.d a5, a5, 64 -+L(un_less_64): -+ blt a2, t7, L(un_less_32) -+ -+ -+ vshuf.b vr3, vr1, vr0, vr9 -+ vld vr0, a1, 0 -+ vst vr3, a5, 0 -+ addi.d a2, a2, -32 -+ -+ vshuf.b vr3, vr0, vr1, vr9 -+ vld vr1, a1, 16 -+ addi.d a1, a1, 32 -+ vst vr3, a5, 16 -+ -+ addi.d a5, a5, 32 -+L(un_less_32): -+ blt a2, t6, L(un_less_16) -+ vshuf.b vr2, vr1, vr0, vr9 -+ vor.v vr0, vr1, vr1 -+ -+ vld vr1, a1, 0 -+ vst vr2, a5, 0 -+ addi.d a5, a5, 16 -+L(un_less_16): -+ vld vr2, a4, -16 -+ -+ -+ vshuf.b vr0, vr1, vr0, vr9 -+ vst vr0, a5, 0 -+ vst vr2, a3, -16 -+ jr ra -+ -+L(copy_back): -+ addi.d t0, a3, -1 -+ vld vr2, a4, -16 -+ andi t0, t0, 0xf -+ addi.d t0, t0, 1 -+ -+ sub.d a4, a4, t0 -+ sub.d a2, a2, t0 -+ andi t1, a4, 0xf -+ bnez t1, L(back_unaligned) -+ -+ vld vr0, a4, -16 -+ addi.d a2, a2, -16 -+ vst vr2, a3, -16 -+ andi t2, a2, 0x7f -+ -+ -+ sub.d a3, a3, t0 -+ beq t2, a2, L(back_al_less_128) -+ sub.d t3, a2, t2 -+ move a2, t2 -+ -+ sub.d a6, a4, t3 -+L(back_al_loop): -+ vld vr1, a4, -32 -+ vld vr2, a4, -48 -+ vld vr3, a4, -64 -+ -+ vld vr4, a4, -80 -+ vld vr5, a4, -96 -+ vld vr6, a4, -112 -+ vld vr7, a4, -128 -+ -+ vst vr0, a3, -16 -+ vld vr0, a4, -144 -+ addi.d a4, a4, -128 -+ vst vr1, a3, -32 -+ -+ -+ vst vr2, a3, -48 -+ vst vr3, a3, -64 -+ vst vr4, a3, -80 -+ vst vr5, a3, -96 -+ -+ vst vr6, a3, -112 -+ vst vr7, a3, -128 -+ addi.d a3, a3, -128 -+ bne a4, a6, L(back_al_loop) -+ -+L(back_al_less_128): -+ blt a2, t8, L(back_al_less_64) -+ vld vr1, a4, -32 -+ vld vr2, a4, -48 -+ vld vr3, a4, -64 -+ -+ addi.d a2, a2, -64 -+ vst vr0, a3, -16 -+ vld vr0, a4, -80 -+ addi.d a4, a4, -64 -+ -+ -+ vst vr1, a3, -32 -+ vst vr2, a3, -48 -+ vst vr3, a3, -64 -+ addi.d a3, a3, -64 -+ -+L(back_al_less_64): -+ blt a2, t7, L(back_al_less_32) -+ vld vr1, a4, -32 -+ addi.d a2, a2, -32 -+ vst vr0, a3, -16 -+ -+ vld vr0, a4, -48 -+ vst vr1, a3, -32 -+ addi.d a3, a3, -32 -+ addi.d a4, a4, -32 -+ -+L(back_al_less_32): -+ blt a2, t6, L(back_al_less_16) -+ vst vr0, a3, -16 -+ vld vr0, a4, -32 -+ addi.d a3, a3, -16 -+ -+ -+L(back_al_less_16): -+ vld vr1, a1, 0 -+ vst vr0, a3, -16 -+ vst vr1, a0, 0 -+ jr ra -+ -+L(magic_num_2): -+ .dword 0x0706050403020100 -+ .dword 0x0f0e0d0c0b0a0908 -+L(back_unaligned): -+ pcaddi t2, -4 -+ bstrins.d a4, zero, 3, 0 -+ vld vr8, t2, 0 -+ vld vr0, a4, 0 -+ -+ vld vr1, a4, -16 -+ addi.d a2, a2, -16 -+ vst vr2, a3, -16 -+ sub.d a3, a3, t0 -+ -+ -+ vreplgr2vr.b vr9, t1 -+ andi t2, a2, 0x7f -+ vadd.b vr9, vr9, vr8 -+ addi.d a4, a4, -16 -+ -+ beq t2, a2, L(back_un_less_128) -+ sub.d t3, a2, t2 -+ move a2, t2 -+ sub.d a6, a4, t3 -+ -+L(back_un_loop): -+ vld vr2, a4, -16 -+ vld vr3, a4, -32 -+ vld vr4, a4, -48 -+ -+ vld vr5, a4, -64 -+ vld vr6, a4, -80 -+ vld vr7, a4, -96 -+ vshuf.b vr8, vr0, vr1, vr9 -+ -+ -+ vld vr0, a4, -112 -+ vst vr8, a3, -16 -+ vshuf.b vr8, vr1, vr2, vr9 -+ vld vr1, a4, -128 -+ -+ vst vr8, a3, -32 -+ addi.d a4, a4, -128 -+ vshuf.b vr2, vr2, vr3, vr9 -+ vshuf.b vr3, vr3, vr4, vr9 -+ -+ vst vr2, a3, -48 -+ vshuf.b vr4, vr4, vr5, vr9 -+ vst vr3, a3, -64 -+ vshuf.b vr5, vr5, vr6, vr9 -+ -+ vst vr4, a3, -80 -+ vshuf.b vr6, vr6, vr7, vr9 -+ vst vr5, a3, -96 -+ vshuf.b vr7, vr7, vr0, vr9 -+ -+ -+ vst vr6, a3, -112 -+ vst vr7, a3, -128 -+ addi.d a3, a3, -128 -+ bne a4, a6, L(back_un_loop) -+ -+L(back_un_less_128): -+ blt a2, t8, L(back_un_less_64) -+ vld vr2, a4, -16 -+ vld vr3, a4, -32 -+ vshuf.b vr4, vr0, vr1, vr9 -+ -+ vld vr0, a4, -48 -+ vst vr4, a3, -16 -+ addi.d a2, a2, -64 -+ vshuf.b vr4, vr1, vr2, vr9 -+ -+ vld vr1, a4, -64 -+ addi.d a4, a4, -64 -+ vst vr4, a3, -32 -+ vshuf.b vr2, vr2, vr3, vr9 -+ -+ -+ vshuf.b vr3, vr3, vr0, vr9 -+ vst vr2, a3, -48 -+ vst vr3, a3, -64 -+ addi.d a3, a3, -64 -+ -+L(back_un_less_64): -+ blt a2, t7, L(back_un_less_32) -+ vshuf.b vr3, vr0, vr1, vr9 -+ vld vr0, a4, -16 -+ vst vr3, a3, -16 -+ -+ addi.d a2, a2, -32 -+ vshuf.b vr3, vr1, vr0, vr9 -+ vld vr1, a4, -32 -+ addi.d a4, a4, -32 -+ -+ vst vr3, a3, -32 -+ addi.d a3, a3, -32 -+L(back_un_less_32): -+ blt a2, t6, L(back_un_less_16) -+ vshuf.b vr2, vr0, vr1, vr9 -+ -+ -+ vor.v vr0, vr1, vr1 -+ vld vr1, a4, -16 -+ vst vr2, a3, -16 -+ addi.d a3, a3, -16 -+ -+L(back_un_less_16): -+ vld vr2, a1, 0 -+ vshuf.b vr0, vr0, vr1, vr9 -+ vst vr0, a3, -16 -+ vst vr2, a0, 0 -+ -+ jr ra -+END(MEMMOVE_NAME) -+ -+libc_hidden_builtin_def (MEMCPY_NAME) -+libc_hidden_builtin_def (MEMMOVE_NAME) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S -new file mode 100644 -index 00000000..90a64b6b ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S -@@ -0,0 +1,380 @@ -+/* Optimized memmove_unaligned implementation using basic Loongarch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+ -+# define MEMMOVE_NAME __memmove_unaligned -+ -+# define LD_64(reg, n) \ -+ ld.d t0, reg, n; \ -+ ld.d t1, reg, n + 8; \ -+ ld.d t2, reg, n + 16; \ -+ ld.d t3, reg, n + 24; \ -+ ld.d t4, reg, n + 32; \ -+ ld.d t5, reg, n + 40; \ -+ ld.d t6, reg, n + 48; \ -+ ld.d t7, reg, n + 56; -+ -+# define ST_64(reg, n) \ -+ st.d t0, reg, n; \ -+ st.d t1, reg, n + 8; \ -+ st.d t2, reg, n + 16; \ -+ st.d t3, reg, n + 24; \ -+ st.d t4, reg, n + 32; \ -+ st.d t5, reg, n + 40; \ -+ st.d t6, reg, n + 48; \ -+ st.d t7, reg, n + 56; -+ -+LEAF(MEMMOVE_NAME, 3) -+ add.d a4, a1, a2 -+ add.d a3, a0, a2 -+ beq a1, a0, L(less_1bytes) -+ move t8, a0 -+ -+ srai.d a6, a2, 4 -+ beqz a6, L(less_16bytes) -+ srai.d a6, a2, 6 -+ bnez a6, L(more_64bytes) -+ srai.d a6, a2, 5 -+ beqz a6, L(less_32bytes) -+ -+ ld.d t0, a1, 0 -+ ld.d t1, a1, 8 -+ ld.d t2, a1, 16 -+ ld.d t3, a1, 24 -+ -+ ld.d t4, a4, -32 -+ ld.d t5, a4, -24 -+ ld.d t6, a4, -16 -+ ld.d t7, a4, -8 -+ -+ st.d t0, a0, 0 -+ st.d t1, a0, 8 -+ st.d t2, a0, 16 -+ st.d t3, a0, 24 -+ -+ st.d t4, a3, -32 -+ st.d t5, a3, -24 -+ st.d t6, a3, -16 -+ st.d t7, a3, -8 -+ -+ jr ra -+ -+L(less_32bytes): -+ ld.d t0, a1, 0 -+ ld.d t1, a1, 8 -+ ld.d t2, a4, -16 -+ ld.d t3, a4, -8 -+ -+ st.d t0, a0, 0 -+ st.d t1, a0, 8 -+ st.d t2, a3, -16 -+ st.d t3, a3, -8 -+ -+ jr ra -+ -+L(less_16bytes): -+ srai.d a6, a2, 3 -+ beqz a6, L(less_8bytes) -+ -+ ld.d t0, a1, 0 -+ ld.d t1, a4, -8 -+ st.d t0, a0, 0 -+ st.d t1, a3, -8 -+ -+ jr ra -+ -+L(less_8bytes): -+ srai.d a6, a2, 2 -+ beqz a6, L(less_4bytes) -+ -+ ld.w t0, a1, 0 -+ ld.w t1, a4, -4 -+ st.w t0, a0, 0 -+ st.w t1, a3, -4 -+ -+ jr ra -+ -+L(less_4bytes): -+ srai.d a6, a2, 1 -+ beqz a6, L(less_2bytes) -+ -+ ld.h t0, a1, 0 -+ ld.h t1, a4, -2 -+ st.h t0, a0, 0 -+ st.h t1, a3, -2 -+ -+ jr ra -+ -+L(less_2bytes): -+ beqz a2, L(less_1bytes) -+ -+ ld.b t0, a1, 0 -+ st.b t0, a0, 0 -+ -+ jr ra -+ -+L(less_1bytes): -+ jr ra -+ -+L(more_64bytes): -+ sub.d a7, a0, a1 -+ bltu a7, a2, L(copy_backward) -+ -+L(copy_forward): -+ srli.d a0, a0, 3 -+ slli.d a0, a0, 3 -+ beq a0, t8, L(all_align) -+ addi.d a0, a0, 0x8 -+ sub.d a7, t8, a0 -+ sub.d a1, a1, a7 -+ add.d a2, a7, a2 -+ -+L(start_unalign_proc): -+ pcaddi t1, 18 -+ slli.d a6, a7, 3 -+ add.d t1, t1, a6 -+ jr t1 -+ -+ ld.b t0, a1, -7 -+ st.b t0, a0, -7 -+ ld.b t0, a1, -6 -+ st.b t0, a0, -6 -+ ld.b t0, a1, -5 -+ st.b t0, a0, -5 -+ ld.b t0, a1, -4 -+ st.b t0, a0, -4 -+ ld.b t0, a1, -3 -+ st.b t0, a0, -3 -+ ld.b t0, a1, -2 -+ st.b t0, a0, -2 -+ ld.b t0, a1, -1 -+ st.b t0, a0, -1 -+L(start_over): -+ -+ addi.d a2, a2, -0x80 -+ blt a2, zero, L(end_unalign_proc) -+ -+L(loop_less): -+ LD_64(a1, 0) -+ ST_64(a0, 0) -+ LD_64(a1, 64) -+ ST_64(a0, 64) -+ -+ addi.d a0, a0, 0x80 -+ addi.d a1, a1, 0x80 -+ addi.d a2, a2, -0x80 -+ bge a2, zero, L(loop_less) -+ -+L(end_unalign_proc): -+ addi.d a2, a2, 0x80 -+ -+ pcaddi t1, 36 -+ andi t2, a2, 0x78 -+ add.d a1, a1, t2 -+ add.d a0, a0, t2 -+ sub.d t1, t1, t2 -+ jr t1 -+ -+ ld.d t0, a1, -120 -+ st.d t0, a0, -120 -+ ld.d t0, a1, -112 -+ st.d t0, a0, -112 -+ ld.d t0, a1, -104 -+ st.d t0, a0, -104 -+ ld.d t0, a1, -96 -+ st.d t0, a0, -96 -+ ld.d t0, a1, -88 -+ st.d t0, a0, -88 -+ ld.d t0, a1, -80 -+ st.d t0, a0, -80 -+ ld.d t0, a1, -72 -+ st.d t0, a0, -72 -+ ld.d t0, a1, -64 -+ st.d t0, a0, -64 -+ ld.d t0, a1, -56 -+ st.d t0, a0, -56 -+ ld.d t0, a1, -48 -+ st.d t0, a0, -48 -+ ld.d t0, a1, -40 -+ st.d t0, a0, -40 -+ ld.d t0, a1, -32 -+ st.d t0, a0, -32 -+ ld.d t0, a1, -24 -+ st.d t0, a0, -24 -+ ld.d t0, a1, -16 -+ st.d t0, a0, -16 -+ ld.d t0, a1, -8 -+ st.d t0, a0, -8 -+ -+ andi a2, a2, 0x7 -+ pcaddi t1, 18 -+ slli.d a2, a2, 3 -+ sub.d t1, t1, a2 -+ jr t1 -+ -+ ld.b t0, a4, -7 -+ st.b t0, a3, -7 -+ ld.b t0, a4, -6 -+ st.b t0, a3, -6 -+ ld.b t0, a4, -5 -+ st.b t0, a3, -5 -+ ld.b t0, a4, -4 -+ st.b t0, a3, -4 -+ ld.b t0, a4, -3 -+ st.b t0, a3, -3 -+ ld.b t0, a4, -2 -+ st.b t0, a3, -2 -+ ld.b t0, a4, -1 -+ st.b t0, a3, -1 -+L(end): -+ move a0, t8 -+ jr ra -+ -+L(all_align): -+ addi.d a1, a1, 0x8 -+ addi.d a0, a0, 0x8 -+ ld.d t0, a1, -8 -+ st.d t0, a0, -8 -+ addi.d a2, a2, -8 -+ b L(start_over) -+ -+L(all_align_back): -+ addi.d a4, a4, -0x8 -+ addi.d a3, a3, -0x8 -+ ld.d t0, a4, 0 -+ st.d t0, a3, 0 -+ addi.d a2, a2, -8 -+ b L(start_over_back) -+ -+L(copy_backward): -+ move a5, a3 -+ srli.d a3, a3, 3 -+ slli.d a3, a3, 3 -+ beq a3, a5, L(all_align_back) -+ sub.d a7, a3, a5 -+ add.d a4, a4, a7 -+ add.d a2, a7, a2 -+ -+ pcaddi t1, 18 -+ slli.d a6, a7, 3 -+ add.d t1, t1, a6 -+ jr t1 -+ -+ ld.b t0, a4, 6 -+ st.b t0, a3, 6 -+ ld.b t0, a4, 5 -+ st.b t0, a3, 5 -+ ld.b t0, a4, 4 -+ st.b t0, a3, 4 -+ ld.b t0, a4, 3 -+ st.b t0, a3, 3 -+ ld.b t0, a4, 2 -+ st.b t0, a3, 2 -+ ld.b t0, a4, 1 -+ st.b t0, a3, 1 -+ ld.b t0, a4, 0 -+ st.b t0, a3, 0 -+L(start_over_back): -+ addi.d a2, a2, -0x80 -+ blt a2, zero, L(end_unalign_proc_back) -+ -+L(loop_less_back): -+ LD_64(a4, -64) -+ ST_64(a3, -64) -+ LD_64(a4, -128) -+ ST_64(a3, -128) -+ -+ addi.d a4, a4, -0x80 -+ addi.d a3, a3, -0x80 -+ addi.d a2, a2, -0x80 -+ bge a2, zero, L(loop_less_back) -+ -+L(end_unalign_proc_back): -+ addi.d a2, a2, 0x80 -+ -+ pcaddi t1, 36 -+ andi t2, a2, 0x78 -+ sub.d a4, a4, t2 -+ sub.d a3, a3, t2 -+ sub.d t1, t1, t2 -+ jr t1 -+ -+ ld.d t0, a4, 112 -+ st.d t0, a3, 112 -+ ld.d t0, a4, 104 -+ st.d t0, a3, 104 -+ ld.d t0, a4, 96 -+ st.d t0, a3, 96 -+ ld.d t0, a4, 88 -+ st.d t0, a3, 88 -+ ld.d t0, a4, 80 -+ st.d t0, a3, 80 -+ ld.d t0, a4, 72 -+ st.d t0, a3, 72 -+ ld.d t0, a4, 64 -+ st.d t0, a3, 64 -+ ld.d t0, a4, 56 -+ st.d t0, a3, 56 -+ ld.d t0, a4, 48 -+ st.d t0, a3, 48 -+ ld.d t0, a4, 40 -+ st.d t0, a3, 40 -+ ld.d t0, a4, 32 -+ st.d t0, a3, 32 -+ ld.d t0, a4, 24 -+ st.d t0, a3, 24 -+ ld.d t0, a4, 16 -+ st.d t0, a3, 16 -+ ld.d t0, a4, 8 -+ st.d t0, a3, 8 -+ ld.d t0, a4, 0 -+ st.d t0, a3, 0 -+ -+ andi a2, a2, 0x7 -+ pcaddi t1, 18 -+ slli.d a2, a2, 3 -+ sub.d t1, t1, a2 -+ jr t1 -+ -+ ld.b t0, a1, 6 -+ st.b t0, a0, 6 -+ ld.b t0, a1, 5 -+ st.b t0, a0, 5 -+ ld.b t0, a1, 4 -+ st.b t0, a0, 4 -+ ld.b t0, a1, 3 -+ st.b t0, a0, 3 -+ ld.b t0, a1, 2 -+ st.b t0, a0, 2 -+ ld.b t0, a1, 1 -+ st.b t0, a0, 1 -+ ld.b t0, a1, 0 -+ st.b t0, a0, 0 -+ -+ move a0, t8 -+ jr ra -+END(MEMMOVE_NAME) -+ -+libc_hidden_builtin_def (MEMMOVE_NAME) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/memmove.c b/sysdeps/loongarch/lp64/multiarch/memmove.c -new file mode 100644 -index 00000000..7e3ca4c4 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/memmove.c -@@ -0,0 +1,38 @@ -+/* Multiple versions of memmove. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define memmove __redirect_memmove -+# include -+# undef memmove -+ -+# define SYMBOL_NAME memmove -+# include "ifunc-lasx.h" -+ -+libc_ifunc_redirected (__redirect_memmove, __libc_memmove, -+ IFUNC_SELECTOR ()); -+strong_alias (__libc_memmove, memmove); -+ -+# ifdef SHARED -+__hidden_ver1 (__libc_memmove, __GI_memmove, __redirect_memmove) -+ __attribute__ ((visibility ("hidden"))); -+# endif -+ -+#endif --- -2.33.0 - diff --git a/Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch b/Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch deleted file mode 100644 index 03e1299..0000000 --- a/Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch +++ /dev/null @@ -1,706 +0,0 @@ -From aca7d7f0dde5f56344e8e58e5f6648c96bb1f1cc Mon Sep 17 00:00:00 2001 -From: dengjianbo -Date: Tue, 15 Aug 2023 09:08:11 +0800 -Subject: [PATCH 06/29] Loongarch: Add ifunc support for strchr{aligned, lsx, - lasx} and strchrnul{aligned, lsx, lasx} - -These implementations improve the time to run strchr{nul} -microbenchmark in glibc as below: -strchr-lasx reduces the runtime about 50%-83% -strchr-lsx reduces the runtime about 30%-67% -strchr-aligned reduces the runtime about 10%-20% -strchrnul-lasx reduces the runtime about 50%-83% -strchrnul-lsx reduces the runtime about 36%-65% -strchrnul-aligned reduces the runtime about 6%-10% - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/lp64/multiarch/Makefile | 6 ++ - .../lp64/multiarch/ifunc-impl-list.c | 16 +++ - .../loongarch/lp64/multiarch/ifunc-strchr.h | 41 ++++++++ - .../lp64/multiarch/ifunc-strchrnul.h | 41 ++++++++ - .../loongarch/lp64/multiarch/strchr-aligned.S | 99 +++++++++++++++++++ - .../loongarch/lp64/multiarch/strchr-lasx.S | 91 +++++++++++++++++ - sysdeps/loongarch/lp64/multiarch/strchr-lsx.S | 73 ++++++++++++++ - sysdeps/loongarch/lp64/multiarch/strchr.c | 36 +++++++ - .../lp64/multiarch/strchrnul-aligned.S | 95 ++++++++++++++++++ - .../loongarch/lp64/multiarch/strchrnul-lasx.S | 22 +++++ - .../loongarch/lp64/multiarch/strchrnul-lsx.S | 22 +++++ - sysdeps/loongarch/lp64/multiarch/strchrnul.c | 39 ++++++++ - 12 files changed, 581 insertions(+) - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h - create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr.c - create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S - create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul.c - -diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile -index 76c506c9..110a8c5c 100644 ---- a/sysdeps/loongarch/lp64/multiarch/Makefile -+++ b/sysdeps/loongarch/lp64/multiarch/Makefile -@@ -3,5 +3,11 @@ sysdep_routines += \ - strlen-aligned \ - strlen-lsx \ - strlen-lasx \ -+ strchr-aligned \ -+ strchr-lsx \ -+ strchr-lasx \ -+ strchrnul-aligned \ -+ strchrnul-lsx \ -+ strchrnul-lasx \ - # sysdep_routines - endif -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -index 1a2a576f..c7164b45 100644 ---- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c -@@ -37,5 +37,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - #endif - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned) - ) -+ -+ IFUNC_IMPL (i, name, strchr, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx) -+ IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LSX, __strchr_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_aligned) -+ ) -+ -+ IFUNC_IMPL (i, name, strchrnul, -+#if !defined __loongarch_soft_float -+ IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LASX, __strchrnul_lasx) -+ IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LSX, __strchrnul_lsx) -+#endif -+ IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned) -+ ) - return i; - } -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h -new file mode 100644 -index 00000000..4494db79 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h -@@ -0,0 +1,41 @@ -+/* Common definition for strchr ifunc selections. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+ -+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LASX) -+ return OPTIMIZE (lasx); -+ else if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ return OPTIMIZE (aligned); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h -new file mode 100644 -index 00000000..8a925120 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h -@@ -0,0 +1,41 @@ -+/* Common definition for strchrnul ifunc selections. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+#include -+#include -+ -+#if !defined __loongarch_soft_float -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; -+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; -+#endif -+ -+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; -+ -+static inline void * -+IFUNC_SELECTOR (void) -+{ -+#if !defined __loongarch_soft_float -+ if (SUPPORT_LASX) -+ return OPTIMIZE (lasx); -+ else if (SUPPORT_LSX) -+ return OPTIMIZE (lsx); -+ else -+#endif -+ return OPTIMIZE (aligned); -+} -diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S -new file mode 100644 -index 00000000..5fb01806 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S -@@ -0,0 +1,99 @@ -+/* Optimized strchr implementation using basic Loongarch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define STRCHR_NAME __strchr_aligned -+#else -+# define STRCHR_NAME strchr -+#endif -+ -+LEAF(STRCHR_NAME, 6) -+ slli.d t1, a0, 3 -+ bstrins.d a0, zero, 2, 0 -+ lu12i.w a2, 0x01010 -+ ld.d t2, a0, 0 -+ -+ ori a2, a2, 0x101 -+ andi a1, a1, 0xff -+ bstrins.d a2, a2, 63, 32 -+ li.w t0, -1 -+ -+ mul.d a1, a1, a2 -+ sll.d t0, t0, t1 -+ slli.d a3, a2, 7 -+ orn t2, t2, t0 -+ -+ sll.d t3, a1, t1 -+ xor t4, t2, t3 -+ sub.d a4, t2, a2 -+ sub.d a5, t4, a2 -+ -+ -+ andn a4, a4, t2 -+ andn a5, a5, t4 -+ or t0, a4, a5 -+ and t0, t0, a3 -+ -+ bnez t0, L(end) -+ addi.d a0, a0, 8 -+L(loop): -+ ld.d t4, a0, 0 -+ xor t2, t4, a1 -+ -+ sub.d a4, t4, a2 -+ sub.d a5, t2, a2 -+ andn a4, a4, t4 -+ andn a5, a5, t2 -+ -+ or t0, a4, a5 -+ and t0, t0, a3 -+ bnez t0, L(end) -+ ld.d t4, a0, 8 -+ -+ -+ addi.d a0, a0, 16 -+ xor t2, t4, a1 -+ sub.d a4, t4, a2 -+ sub.d a5, t2, a2 -+ -+ andn a4, a4, t4 -+ andn a5, a5, t2 -+ or t0, a4, a5 -+ and t0, t0, a3 -+ -+ beqz t0, L(loop) -+ addi.d a0, a0, -8 -+L(end): -+ and t0, a5, a3 -+ and t1, a4, a3 -+ -+ ctz.d t0, t0 -+ ctz.d t1, t1 -+ srli.w t2, t0, 3 -+ sltu t3, t1, t0 -+ -+ -+ add.d a0, a0, t2 -+ masknez a0, a0, t3 -+ jr ra -+END(STRCHR_NAME) -diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S -new file mode 100644 -index 00000000..254402da ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S -@@ -0,0 +1,91 @@ -+/* Optimized strchr implementation using loongarch LASX SIMD instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+#ifndef AS_STRCHRNUL -+# define STRCHR __strchr_lasx -+#endif -+ -+LEAF(STRCHR, 6) -+ andi t1, a0, 0x1f -+ bstrins.d a0, zero, 4, 0 -+ xvld xr0, a0, 0 -+ li.d t2, -1 -+ -+ xvreplgr2vr.b xr1, a1 -+ sll.d t1, t2, t1 -+ xvxor.v xr2, xr0, xr1 -+ xvmin.bu xr0, xr0, xr2 -+ -+ xvmsknz.b xr0, xr0 -+ xvpickve.w xr3, xr0, 4 -+ vilvl.h vr0, vr3, vr0 -+ movfr2gr.s t0, fa0 -+ -+ orn t0, t0, t1 -+ bne t0, t2, L(end) -+ addi.d a0, a0, 32 -+ nop -+ -+ -+L(loop): -+ xvld xr0, a0, 0 -+ xvxor.v xr2, xr0, xr1 -+ xvmin.bu xr0, xr0, xr2 -+ xvsetanyeqz.b fcc0, xr0 -+ -+ bcnez fcc0, L(loop_end) -+ xvld xr0, a0, 32 -+ addi.d a0, a0, 64 -+ xvxor.v xr2, xr0, xr1 -+ -+ xvmin.bu xr0, xr0, xr2 -+ xvsetanyeqz.b fcc0, xr0 -+ bceqz fcc0, L(loop) -+ addi.d a0, a0, -32 -+ -+L(loop_end): -+ xvmsknz.b xr0, xr0 -+ xvpickve.w xr1, xr0, 4 -+ vilvl.h vr0, vr1, vr0 -+ movfr2gr.s t0, fa0 -+ -+ -+L(end): -+ cto.w t0, t0 -+ add.d a0, a0, t0 -+#ifndef AS_STRCHRNUL -+ vreplgr2vr.b vr0, t0 -+ xvpermi.q xr3, xr2, 1 -+ -+ vshuf.b vr0, vr3, vr2, vr0 -+ vpickve2gr.bu t0, vr0, 0 -+ masknez a0, a0, t0 -+#endif -+ jr ra -+ -+END(STRCHR) -+ -+libc_hidden_builtin_def(STRCHR) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S -new file mode 100644 -index 00000000..dae98b0a ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S -@@ -0,0 +1,73 @@ -+/* Optimized strlen implementation using loongarch LSX SIMD instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) && !defined __loongarch_soft_float -+ -+#ifndef AS_STRCHRNUL -+# define STRCHR __strchr_lsx -+#endif -+ -+LEAF(STRCHR, 6) -+ andi t1, a0, 0xf -+ bstrins.d a0, zero, 3, 0 -+ vld vr0, a0, 0 -+ li.d t2, -1 -+ -+ vreplgr2vr.b vr1, a1 -+ sll.d t3, t2, t1 -+ vxor.v vr2, vr0, vr1 -+ vmin.bu vr0, vr0, vr2 -+ -+ vmsknz.b vr0, vr0 -+ movfr2gr.s t0, fa0 -+ ext.w.h t0, t0 -+ orn t0, t0, t3 -+ -+ beq t0, t2, L(loop) -+L(found): -+ cto.w t0, t0 -+ add.d a0, a0, t0 -+#ifndef AS_STRCHRNUL -+ vreplve.b vr2, vr2, t0 -+ vpickve2gr.bu t1, vr2, 0 -+ masknez a0, a0, t1 -+#endif -+ jr ra -+ -+ -+L(loop): -+ vld vr0, a0, 16 -+ addi.d a0, a0, 16 -+ vxor.v vr2, vr0, vr1 -+ vmin.bu vr0, vr0, vr2 -+ -+ vsetanyeqz.b fcc0, vr0 -+ bceqz fcc0, L(loop) -+ vmsknz.b vr0, vr0 -+ movfr2gr.s t0, fa0 -+ -+ b L(found) -+END(STRCHR) -+ -+libc_hidden_builtin_def (STRCHR) -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strchr.c b/sysdeps/loongarch/lp64/multiarch/strchr.c -new file mode 100644 -index 00000000..404e97bd ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strchr.c -@@ -0,0 +1,36 @@ -+/* Multiple versions of strchr. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+#if IS_IN (libc) -+# define strchr __redirect_strchr -+# include -+# undef strchr -+ -+# define SYMBOL_NAME strchr -+# include "ifunc-strchr.h" -+ -+libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ()); -+weak_alias(strchr, index) -+# ifdef SHARED -+__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr) -+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strchr); -+# endif -+ -+#endif -diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S -new file mode 100644 -index 00000000..1c01a023 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S -@@ -0,0 +1,95 @@ -+/* Optimized strchrnul implementation using basic Loongarch instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+#include -+ -+#if IS_IN (libc) -+# define STRCHRNUL_NAME __strchrnul_aligned -+#else -+# define STRCHRNUL_NAME __strchrnul -+#endif -+ -+LEAF(STRCHRNUL_NAME, 6) -+ slli.d t1, a0, 3 -+ bstrins.d a0, zero, 2, 0 -+ lu12i.w a2, 0x01010 -+ ld.d t2, a0, 0 -+ -+ ori a2, a2, 0x101 -+ andi a1, a1, 0xff -+ bstrins.d a2, a2, 63, 32 -+ li.w t0, -1 -+ -+ mul.d a1, a1, a2 -+ sll.d t0, t0, t1 -+ slli.d a3, a2, 7 -+ orn t2, t2, t0 -+ -+ sll.d t3, a1, t1 -+ xor t4, t2, t3 -+ sub.d a4, t2, a2 -+ sub.d a5, t4, a2 -+ -+ -+ andn a4, a4, t2 -+ andn a5, a5, t4 -+ or t0, a4, a5 -+ and t0, t0, a3 -+ -+ bnez t0, L(end) -+ addi.d a0, a0, 8 -+L(loop): -+ ld.d t4, a0, 0 -+ xor t2, t4, a1 -+ -+ sub.d a4, t4, a2 -+ sub.d a5, t2, a2 -+ andn a4, a4, t4 -+ andn a5, a5, t2 -+ -+ or t0, a4, a5 -+ and t0, t0, a3 -+ bnez t0, L(end) -+ ld.d t4, a0, 8 -+ -+ -+ addi.d a0, a0, 16 -+ xor t2, t4, a1 -+ sub.d a4, t4, a2 -+ sub.d a5, t2, a2 -+ -+ andn a4, a4, t4 -+ andn a5, a5, t2 -+ or t0, a4, a5 -+ and t0, t0, a3 -+ -+ beqz t0, L(loop) -+ addi.d a0, a0, -8 -+L(end): -+ ctz.d t0, t0 -+ srli.w t0, t0, 3 -+ -+ -+ add.d a0, a0, t0 -+ jr ra -+END(STRCHRNUL_NAME) -+ -+libc_hidden_builtin_def (STRCHRNUL_NAME) -diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S -new file mode 100644 -index 00000000..d45495e4 ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S -@@ -0,0 +1,22 @@ -+/* Optimized strchrnul implementation using loongarch LASX SIMD instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#define STRCHR __strchrnul_lasx -+#define AS_STRCHRNUL -+#include "strchr-lasx.S" -diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S -new file mode 100644 -index 00000000..07d793ae ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S -@@ -0,0 +1,22 @@ -+/* Optimized strchrnul implementation using loongarch LSX SIMD instructions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#define STRCHR __strchrnul_lsx -+#define AS_STRCHRNUL -+#include "strchr-lsx.S" -diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul.c b/sysdeps/loongarch/lp64/multiarch/strchrnul.c -new file mode 100644 -index 00000000..f3b8296e ---- /dev/null -+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul.c -@@ -0,0 +1,39 @@ -+/* Multiple versions of strchrnul. -+ All versions must be listed in ifunc-impl-list.c. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ . */ -+ -+/* Define multiple versions only for the definition in libc. */ -+ -+#if IS_IN (libc) -+# define strchrnul __redirect_strchrnul -+# define __strchrnul __redirect___strchrnul -+# include -+# undef __strchrnul -+# undef strchrnul -+ -+# define SYMBOL_NAME strchrnul -+# include "ifunc-strchrnul.h" -+ -+libc_ifunc_redirected (__redirect_strchrnul, __strchrnul, -+ IFUNC_SELECTOR ()); -+weak_alias (__strchrnul, strchrnul) -+# ifdef SHARED -+__hidden_ver1 (__strchrnul, __GI___strchrnul, __redirect_strchrnul) -+ __attribute__((visibility ("hidden"))) __attribute_copy__ (strchrnul); -+# endif -+#endif --- -2.33.0 - diff --git a/Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch b/Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch deleted file mode 100644 index 6180f26..0000000 --- a/Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch +++ /dev/null @@ -1,478 +0,0 @@ -From c0f3b0a8c71c26d5351e8ddabe3e8a323803e683 Mon Sep 17 00:00:00 2001 -From: caiyinyu -Date: Thu, 21 Sep 2023 09:10:11 +0800 -Subject: [PATCH 26/29] Revert "LoongArch: Add glibc.cpu.hwcap support." - -This reverts commit a53451559dc9cce765ea5bcbb92c4007e058e92b. - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - sysdeps/loongarch/Makefile | 4 - - sysdeps/loongarch/Versions | 5 -- - sysdeps/loongarch/cpu-tunables.c | 89 ------------------- - sysdeps/loongarch/dl-get-cpu-features.c | 25 ------ - sysdeps/loongarch/dl-machine.h | 27 +----- - sysdeps/loongarch/dl-tunables.list | 25 ------ - .../unix/sysv/linux/loongarch/cpu-features.c | 29 ------ - .../unix/sysv/linux/loongarch/cpu-features.h | 18 +--- - .../unix/sysv/linux/loongarch/dl-procinfo.c | 60 ------------- - sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 ----- - .../unix/sysv/linux/loongarch/libc-start.c | 34 ------- - 11 files changed, 8 insertions(+), 329 deletions(-) - delete mode 100644 sysdeps/loongarch/Versions - delete mode 100644 sysdeps/loongarch/cpu-tunables.c - delete mode 100644 sysdeps/loongarch/dl-get-cpu-features.c - delete mode 100644 sysdeps/loongarch/dl-tunables.list - delete mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c - delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c - delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c - delete mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c - -diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile -index 30a1f4a8..43d2f583 100644 ---- a/sysdeps/loongarch/Makefile -+++ b/sysdeps/loongarch/Makefile -@@ -6,10 +6,6 @@ ifeq ($(subdir),elf) - gen-as-const-headers += dl-link.sym - endif - --ifeq ($(subdir),elf) -- sysdep-dl-routines += dl-get-cpu-features --endif -- - # LoongArch's assembler also needs to know about PIC as it changes the - # definition of some assembler macros. - ASFLAGS-.os += $(pic-ccflag) -diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions -deleted file mode 100644 -index 33ae2cc0..00000000 ---- a/sysdeps/loongarch/Versions -+++ /dev/null -@@ -1,5 +0,0 @@ --ld { -- GLIBC_PRIVATE { -- _dl_larch_get_cpu_features; -- } --} -diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c -deleted file mode 100644 -index 8e9fab93..00000000 ---- a/sysdeps/loongarch/cpu-tunables.c -+++ /dev/null -@@ -1,89 +0,0 @@ --/* LoongArch CPU feature tuning. -- This file is part of the GNU C Library. -- Copyright (C) 2023 Free Software Foundation, Inc. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --# include --# include --# include /* Get STDOUT_FILENO for _dl_printf. */ --# include --# include --# include --# include --# include -- --# define HWCAP_LOONGARCH_IFUNC \ -- (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX) -- --# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len) \ -- _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ -- if (!memcmp (f, #name, len) && \ -- (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name)) \ -- { \ -- hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC)); \ -- break; \ -- } \ -- --attribute_hidden --void --TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) --{ -- const char *p = valp->strval; -- size_t len; -- unsigned long hwcap = 0; -- const char *c; -- -- do { -- for (c = p; *c != ','; c++) -- if (*c == '\0') -- break; -- -- len = c - p; -- -- switch(len) -- { -- default: -- _dl_fatal_printf ( -- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" -- ); -- break; -- case 3: -- { -- CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3); -- CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3); -- _dl_fatal_printf ( -- "Some features are invalid or not supported on this machine!!\n" -- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" -- ); -- } -- break; -- case 4: -- { -- CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4); -- _dl_fatal_printf ( -- "Some features are invalid or not supported on this machine!!\n" -- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" -- ); -- } -- break; -- } -- -- p += len + 1; -- } -- while (*c != '\0'); -- -- GLRO (dl_larch_cpu_features).hwcap &= hwcap; --} -diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c -deleted file mode 100644 -index 7cd9bc15..00000000 ---- a/sysdeps/loongarch/dl-get-cpu-features.c -+++ /dev/null -@@ -1,25 +0,0 @@ --/* Define _dl_larch_get_cpu_features. -- Copyright (C) 2023 Free Software Foundation, Inc. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- -- --#include -- --const struct cpu_features * --_dl_larch_get_cpu_features (void) --{ -- return &GLRO(dl_larch_cpu_features); --} -diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h -index b395a928..57913cef 100644 ---- a/sysdeps/loongarch/dl-machine.h -+++ b/sysdeps/loongarch/dl-machine.h -@@ -29,8 +29,6 @@ - #include - #include - --#include -- - #ifndef _RTLD_PROLOGUE - # define _RTLD_PROLOGUE(entry) \ - ".globl\t" __STRING (entry) "\n\t" \ -@@ -55,23 +53,6 @@ - #define ELF_MACHINE_NO_REL 1 - #define ELF_MACHINE_NO_RELA 0 - --#define DL_PLATFORM_INIT dl_platform_init () -- --static inline void __attribute__ ((unused)) --dl_platform_init (void) --{ -- if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') -- /* Avoid an empty string which would disturb us. */ -- GLRO(dl_platform) = NULL; -- --#ifdef SHARED -- /* init_cpu_features has been called early from __libc_start_main in -- static executable. */ -- init_cpu_features (&GLRO(dl_larch_cpu_features)); --#endif --} -- -- - /* Return nonzero iff ELF header is compatible with the running host. */ - static inline int - elf_machine_matches_host (const ElfW (Ehdr) *ehdr) -@@ -309,9 +290,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], - if (profile != 0) - { - #if !defined __loongarch_soft_float -- if (RTLD_SUPPORT_LASX) -+ if (SUPPORT_LASX) - gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx; -- else if (RTLD_SUPPORT_LSX) -+ else if (SUPPORT_LSX) - gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx; - else - #endif -@@ -329,9 +310,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], - indicated by the offset on the stack, and then jump to - the resolved address. */ - #if !defined __loongarch_soft_float -- if (RTLD_SUPPORT_LASX) -+ if (SUPPORT_LASX) - gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx; -- else if (RTLD_SUPPORT_LSX) -+ else if (SUPPORT_LSX) - gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx; - else - #endif -diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list -deleted file mode 100644 -index 66b34275..00000000 ---- a/sysdeps/loongarch/dl-tunables.list -+++ /dev/null -@@ -1,25 +0,0 @@ --# LoongArch specific tunables. --# Copyright (C) 2023 Free Software Foundation, Inc. --# This file is part of the GNU C Library. -- --# The GNU C Library is free software; you can redistribute it and/or --# modify it under the terms of the GNU Lesser General Public --# License as published by the Free Software Foundation; either --# version 2.1 of the License, or (at your option) any later version. -- --# The GNU C Library is distributed in the hope that it will be useful, --# but WITHOUT ANY WARRANTY; without even the implied warranty of --# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU --# Lesser General Public License for more details. -- --# You should have received a copy of the GNU Lesser General Public --# License along with the GNU C Library; if not, see --# . -- --glibc { -- cpu { -- hwcaps { -- type: STRING -- } -- } --} -diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c -deleted file mode 100644 -index 1290c4ce..00000000 ---- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c -+++ /dev/null -@@ -1,29 +0,0 @@ --/* Initialize CPU feature data. LoongArch64 version. -- This file is part of the GNU C Library. -- Copyright (C) 2023 Free Software Foundation, Inc. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --#include --#include --#include --extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden; -- --static inline void --init_cpu_features (struct cpu_features *cpu_features) --{ -- GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap); -- TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps)); --} -diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h -index 450963ce..d1a280a5 100644 ---- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h -+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h -@@ -19,23 +19,13 @@ - #ifndef _CPU_FEATURES_LOONGARCH64_H - #define _CPU_FEATURES_LOONGARCH64_H - --#include - #include - --struct cpu_features -- { -- uint64_t hwcap; -- }; -+#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL) -+#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) -+#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) - --/* Get a pointer to the CPU features structure. */ --extern const struct cpu_features *_dl_larch_get_cpu_features (void) -- __attribute__ ((pure)); -- --#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL) --#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX) --#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX) --#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) --#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) - #define INIT_ARCH() - - #endif /* _CPU_FEATURES_LOONGARCH64_H */ -+ -diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c -deleted file mode 100644 -index 6217fda9..00000000 ---- a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c -+++ /dev/null -@@ -1,60 +0,0 @@ --/* Data for LoongArch64 version of processor capability information. -- Linux version. -- Copyright (C) 2023 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --/* If anything should be added here check whether the size of each string -- is still ok with the given array size. -- -- All the #ifdefs in the definitions are quite irritating but -- necessary if we want to avoid duplicating the information. There -- are three different modes: -- -- - PROCINFO_DECL is defined. This means we are only interested in -- declarations. -- -- - PROCINFO_DECL is not defined: -- -- + if SHARED is defined the file is included in an array -- initializer. The .element = { ... } syntax is needed. -- -- + if SHARED is not defined a normal array initialization is -- needed. -- */ -- --#ifndef PROCINFO_CLASS --# define PROCINFO_CLASS --#endif -- --#if !IS_IN (ldconfig) --# if !defined PROCINFO_DECL && defined SHARED -- ._dl_larch_cpu_features --# else --PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features --# endif --# ifndef PROCINFO_DECL --= { } --# endif --# if !defined SHARED || defined PROCINFO_DECL --; --# else --, --# endif --#endif -- --#undef PROCINFO_DECL --#undef PROCINFO_CLASS -diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c -deleted file mode 100644 -index 455fd71a..00000000 ---- a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c -+++ /dev/null -@@ -1,21 +0,0 @@ --/* Operating system support for run-time dynamic linker. LoongArch version. -- Copyright (C) 2023 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --#include --#include --#include -diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c -deleted file mode 100644 -index f1346ece..00000000 ---- a/sysdeps/unix/sysv/linux/loongarch/libc-start.c -+++ /dev/null -@@ -1,34 +0,0 @@ --/* Override csu/libc-start.c on LoongArch64. -- Copyright (C) 2023 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --#ifndef SHARED -- --/* Mark symbols hidden in static PIE for early self relocation to work. */ --# if BUILD_PIE_DEFAULT --# pragma GCC visibility push(hidden) --# endif -- --# include --# include -- --extern struct cpu_features _dl_larch_cpu_features; -- --# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features) -- --#endif --#include --- -2.33.0 - diff --git a/elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch b/elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch deleted file mode 100644 index b3d7545..0000000 --- a/elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch +++ /dev/null @@ -1,39 +0,0 @@ -From fc60db3cf29ba157d09ba4f4b92e3ab382b0339d Mon Sep 17 00:00:00 2001 -From: Xi Ruoyao -Date: Wed, 9 Aug 2023 19:12:54 +0800 -Subject: [PATCH 04/29] elf: Add new LoongArch reloc types (101 to 108) into - elf.h - -These reloc types are generated by GNU assembler >= 2.41 for relaxation -support. - -Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=57a930e3 -Signed-off-by: Xi Ruoyao -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - elf/elf.h | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/elf/elf.h b/elf/elf.h -index 89fc8021..d623bdeb 100644 ---- a/elf/elf.h -+++ b/elf/elf.h -@@ -4205,6 +4205,14 @@ enum - #define R_LARCH_TLS_GD_HI20 98 - #define R_LARCH_32_PCREL 99 - #define R_LARCH_RELAX 100 -+#define R_LARCH_DELETE 101 -+#define R_LARCH_ALIGN 102 -+#define R_LARCH_PCREL20_S2 103 -+#define R_LARCH_CFA 104 -+#define R_LARCH_ADD6 105 -+#define R_LARCH_SUB6 106 -+#define R_LARCH_ADD_ULEB128 107 -+#define R_LARCH_SUB_ULEB128 108 - - /* ARC specific declarations. */ - --- -2.33.0 - diff --git a/glibc.spec b/glibc.spec index 2e5adb0..3f5ae47 100644 --- a/glibc.spec +++ b/glibc.spec @@ -1,4 +1,4 @@ -%define anolis_release 3 +%define anolis_release 1 %bcond_without testsuite %bcond_without benchtests @@ -102,38 +102,6 @@ Patch0186: 1086-CVE-2023-4911.patch Patch0187: 0087-CVE-2023-6246.patch Patch0188: 0088-CVE-2023-6779.patch Patch0189: 0089-CVE-2023-6780.patch -Patch0190: 0090-CVE-2024-2961.patch - -# Part 3000 ~ 4999 -Patch3000: LoongArch-Redefine-macro-LEAF-ENTRY.patch -Patch3001: LoongArch-Add-minuimum-binutils-required-version.patch -Patch3002: Loongarch-Add-ifunc-support-and-add-different-versio.patch -Patch3003: elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch -Patch3004: LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch -Patch3005: Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch -Patch3006: Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch -Patch3007: LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch -Patch3008: LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch -Patch3009: LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch -Patch3010: LoongArch-Remove-support-code-for-old-linker-in-star.patch -Patch3011: LoongArch-Micro-optimize-LD_PCREL.patch -Patch3012: LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch -Patch3013: LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch -Patch3014: LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch -Patch3015: LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch -Patch3016: LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch -Patch3017: LoongArch-Change-loongarch-to-LoongArch-in-comments.patch -Patch3018: LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch -Patch3019: LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch -Patch3020: LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch -Patch3021: LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch -Patch3022: LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch -Patch3023: LoongArch-Add-glibc.cpu.hwcap-support.patch -Patch3024: Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch -Patch3025: LoongArch-Unify-Register-Names.patch -Patch3026: LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch -Patch3027: linux-Sync-Linux-6.6-elf.h.patch -Patch3028: Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch BuildRequires: audit-libs-devel >= 1.1.3 libcap-devel systemtap-sdt-devel BuildRequires: procps-ng util-linux gawk sed >= 3.95 gettext @@ -1087,13 +1055,6 @@ update_gconv_modules_cache () %{_libdir}/libpthread_nonshared.a %changelog -* Thu May 09 2024 Zhenyu Wang - 2.38-3 -- fix CVE-2024-2961 - -* Sat Mar 16 2024 Peng Fan - 2.38-2 -- LoongArch: sync patch from glibc upstream -- Reduced kernel version requirements - * Tue Mar 05 2024 mgb01105731 - 2.38-1 - update to 2.38 diff --git a/linux-Sync-Linux-6.6-elf.h.patch b/linux-Sync-Linux-6.6-elf.h.patch deleted file mode 100644 index ac3c74a..0000000 --- a/linux-Sync-Linux-6.6-elf.h.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 6b3d687470b8f91bc6eb87e924fe97d4592b3aa5 Mon Sep 17 00:00:00 2001 -From: Adhemerval Zanella -Date: Tue, 31 Oct 2023 13:32:38 -0300 -Subject: [PATCH 29/29] linux: Sync Linux 6.6 elf.h - -It adds NT_X86_SHSTK (2fab02b25ae7cf5), NT_RISCV_CSR/NT_RISCV_VECTOR -(9300f00439743c4), and NT_LOONGARCH_HW_BREAK/NT_LOONGARCH_HW_WATCH -(1a69f7a161a78ae). - -Signed-off-by: Peng Fan -Signed-off-by: ticat_fp ---- - elf/elf.h | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/elf/elf.h b/elf/elf.h -index 9c51073f..51633079 100644 ---- a/elf/elf.h -+++ b/elf/elf.h -@@ -794,6 +794,7 @@ typedef struct - #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ - #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ - #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ -+#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */ - #define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ - #define NT_S390_TIMER 0x301 /* s390 timer register */ - #define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */ -@@ -832,6 +833,8 @@ typedef struct - #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers. */ - #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode. */ - #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers. */ -+#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ -+#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ - #define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers. */ - #define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and - status registers. */ -@@ -841,6 +844,8 @@ typedef struct - SIMD Extension registers. */ - #define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary - Translation registers. */ -+#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */ -+#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */ - - /* Legal values for the note segment descriptor types for object files. */ - --- -2.33.0 -