1060 changed files with 15194 additions and 268687 deletions
--- a/0083-CVE-2023-4527.patch
+++ b/0083-CVE-2023-4527.patch
@ -1,36 +1,40 @@
-commit bd77dd7e73e3530203be1c52c8a29d08270cb25d
-Author: Florian Weimer <fweimer@redhat.com>
-Date:   Wed Sep 13 14:10:56 2023 +0200
+From 4ea972b7edd7e36610e8cde18bf7a8149d7bac4f Mon Sep 17 00:00:00 2001
+From: Florian Weimer <fweimer@redhat.com>
+Date: Wed, 13 Sep 2023 14:10:56 +0200
+Subject: [PATCH] CVE-2023-4527: Stack read overflow with large TCP responses
+ in no-aaaa mode

-    CVE-2023-4527: Stack read overflow with large TCP responses in no-aaaa mode
+Without passing alt_dns_packet_buffer, __res_context_search can only
+store 2048 bytes (what fits into dns_packet_buffer).  However,
+the function returns the total packet size, and the subsequent
+DNS parsing code in _nss_dns_gethostbyname4_r reads beyond the end
+of the stack-allocated buffer.

-    Without passing alt_dns_packet_buffer, __res_context_search can only
-    store 2048 bytes (what fits into dns_packet_buffer).  However,
-    the function returns the total packet size, and the subsequent
-    DNS parsing code in _nss_dns_gethostbyname4_r reads beyond the end
-    of the stack-allocated buffer.
+Fixes commit f282cdbe7f436c75864e5640a4 ("resolv: Implement no-aaaa
+stub resolver option") and bug 30842.

-    Fixes commit f282cdbe7f436c75864e5640a4 ("resolv: Implement no-aaaa
-    stub resolver option") and bug 30842.
-
-Conflicts:
-	resolv/nss_dns/dns-host.c
-	  (missing dns_packet_buffer cleanup downstream)
+(cherry picked from commit bd77dd7e73e3530203be1c52c8a29d08270cb25d)
+---
+ resolv/Makefile               |   2 +
+ resolv/nss_dns/dns-host.c     |   2 +-
+ resolv/tst-resolv-noaaaa-vc.c | 129 ++++++++++++++++++++++++++++++++++
+ 4 files changed, 139 insertions(+), 1 deletion(-)
+ create mode 100644 resolv/tst-resolv-noaaaa-vc.c

 diff --git a/resolv/Makefile b/resolv/Makefile
-index ab8ad49b5318ad41..4f4eaf060443c128 100644
+index f8a92c6cff..28cedf49ee 100644
 --- a/resolv/Makefile
 +++ b/resolv/Makefile
-@@ -58,6 +58,7 @@ tests += \
-   tst-resolv-edns \
+@@ -101,6 +101,7 @@ tests += \
+   tst-resolv-invalid-cname \
   tst-resolv-network \
   tst-resolv-noaaaa \
 +  tst-resolv-noaaaa-vc \
   tst-resolv-nondecimal \
   tst-resolv-res_init-multi \
   tst-resolv-search \
-@@ -202,6 +203,7 @@ $(objpfx)tst-resolv-res_init-multi: $(objpfx)libresolv.so \
- $(objpfx)tst-resolv-res_init-thread: $(libdl) $(objpfx)libresolv.so \
+@@ -291,6 +292,7 @@ $(objpfx)tst-resolv-res_init-thread: $(objpfx)libresolv.so \
+ $(objpfx)tst-resolv-invalid-cname: $(objpfx)libresolv.so \
   $(shared-thread-library)
 $(objpfx)tst-resolv-noaaaa: $(objpfx)libresolv.so $(shared-thread-library)
 +$(objpfx)tst-resolv-noaaaa-vc: $(objpfx)libresolv.so $(shared-thread-library)
@ -38,21 +42,21 @@ index ab8ad49b5318ad41..4f4eaf060443c128 100644
 $(objpfx)tst-resolv-qtypes: $(objpfx)libresolv.so $(shared-thread-library)
 $(objpfx)tst-resolv-rotate: $(objpfx)libresolv.so $(shared-thread-library)
 diff --git a/resolv/nss_dns/dns-host.c b/resolv/nss_dns/dns-host.c
-index ff0a0b6f7f1f4703..f678c7d7caa3a026 100644
+index 9fa81f23c8..227734da5c 100644
 --- a/resolv/nss_dns/dns-host.c
 +++ b/resolv/nss_dns/dns-host.c
-@@ -392,7 +392,7 @@ _nss_dns_gethostbyname4_r (const char *name, struct gaih_addrtuple **pat,
-   else
+@@ -427,7 +427,7 @@ _nss_dns_gethostbyname4_r (const char *name, struct gaih_addrtuple **pat,
     {
       n = __res_context_search (ctx, name, C_IN, T_A,
-				host_buffer.buf->buf, 2048, NULL,
-+				host_buffer.buf->buf, 2048, &host_buffer.ptr,
- 				NULL, NULL, NULL, NULL);
+ 				dns_packet_buffer, sizeof (dns_packet_buffer),
+-				NULL, NULL, NULL, NULL, NULL);
+				&alt_dns_packet_buffer, NULL, NULL, NULL, NULL);
       if (n >= 0)
- 	status = gaih_getanswer_noaaaa (host_buffer.buf, n,
+ 	status = gaih_getanswer_noaaaa (alt_dns_packet_buffer, n,
+ 					&abuf, pat, errnop, herrnop, ttlp);
 diff --git a/resolv/tst-resolv-noaaaa-vc.c b/resolv/tst-resolv-noaaaa-vc.c
 new file mode 100644
-index 0000000000000000..9f5aebd99f2d74a2
+index 0000000000..9f5aebd99f
 --- /dev/null
 +++ b/resolv/tst-resolv-noaaaa-vc.c
@@ -0,0 +1,129 @@
@ -185,3 +189,6 @@ index 0000000000000000..9f5aebd99f2d74a2
 +}
 +
 +#include <support/test-driver.c>
+-- 
+2.39.3
+
--- a/0084-CVE-2023-4806.patch
+++ b/0084-CVE-2023-4806.patch
@ -1,4 +1,8 @@
-Avoid UAF in getcanonname (CVE-2023-4806)
+From a9728f798ec7f05454c95637ee6581afaa9b487d Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh@sourceware.org>
+Date: Fri, 15 Sep 2023 13:51:12 -0400
+Subject: [PATCH] getaddrinfo: Fix use after free in getcanonname
+ (CVE-2023-4806)

 When an NSS plugin only implements the _gethostbyname2_r and
 _getcanonname_r callbacks, getaddrinfo could use memory that was freed
@ -17,60 +21,39 @@ reference in res->at->name.  This then gets dereferenced in the
 getcanonname_r plugin call, resulting in the use after free.

 Fix this by copying h_name over and freeing it at the end.  This
-resolves BZ #30843, which is assigned CVE-2023-4806.  This is a minimal
-RHEL-8-specific fix.  Test case differences from upstream:
+resolves BZ #30843, which is assigned CVE-2023-4806.

- The test module needs to explicitly link against libnss_files on
-  RHEL-8; upstream libnss_files is built into libc.so.
-
- Test module code was adapted to not use the upstream NSS module
-  convenience macros.
-
-This change is adapted from the following commit from upstream:
-
-commit 973fe93a5675c42798b2161c6f29c01b0e243994
-Author: Siddhesh Poyarekar <siddhesh@sourceware.org>
-Date:   Fri Sep 15 13:51:12 2023 -0400
-
-    getaddrinfo: Fix use after free in getcanonname (CVE-2023-4806)
-    
-    When an NSS plugin only implements the _gethostbyname2_r and
-    _getcanonname_r callbacks, getaddrinfo could use memory that was freed
-    during tmpbuf resizing, through h_name in a previous query response.
-    
-    The backing store for res->at->name when doing a query with
-    gethostbyname3_r or gethostbyname2_r is tmpbuf, which is reallocated in
-    gethosts during the query.  For AF_INET6 lookup with AI_ALL |
-    AI_V4MAPPED, gethosts gets called twice, once for a v6 lookup and second
-    for a v4 lookup.  In this case, if the first call reallocates tmpbuf
-    enough number of times, resulting in a malloc, th->h_name (that
-    res->at->name refers to) ends up on a heap allocated storage in tmpbuf.
-    Now if the second call to gethosts also causes the plugin callback to
-    return NSS_STATUS_TRYAGAIN, tmpbuf will get freed, resulting in a UAF
-    reference in res->at->name.  This then gets dereferenced in the
-    getcanonname_r plugin call, resulting in the use after free.
-    
-    Fix this by copying h_name over and freeing it at the end.  This
-    resolves BZ #30843, which is assigned CVE-2023-4806.
-    
-    Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
+Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
+(cherry picked from commit 973fe93a5675c42798b2161c6f29c01b0e243994)
+---
+ nss/Makefile                                  | 15 ++++-
+ nss/nss_test_gai_hv2_canonname.c              | 56 +++++++++++++++++
+ nss/tst-nss-gai-hv2-canonname.c               | 63 +++++++++++++++++++
+ nss/tst-nss-gai-hv2-canonname.h               |  1 +
+ .../postclean.req                             |  0
+ .../tst-nss-gai-hv2-canonname.script          |  2 +
+ sysdeps/posix/getaddrinfo.c                   | 25 +++++---
+ 7 files changed, 152 insertions(+), 10 deletions(-)
+ create mode 100644 nss/nss_test_gai_hv2_canonname.c
+ create mode 100644 nss/tst-nss-gai-hv2-canonname.c
+ create mode 100644 nss/tst-nss-gai-hv2-canonname.h
+ create mode 100644 nss/tst-nss-gai-hv2-canonname.root/postclean.req
+ create mode 100644 nss/tst-nss-gai-hv2-canonname.root/tst-nss-gai-hv2-canonname.script

 diff --git a/nss/Makefile b/nss/Makefile
-index cfb255c6e7a3a4de..5829a2539306ddb5 100644
+index a978e3927a..f0af87e6f1 100644
 --- a/nss/Makefile
 +++ b/nss/Makefile
-@@ -66,7 +66,8 @@ xtests			= bug-erange
- tests-container = \
- 			  tst-nss-db-endpwent \
- 			  tst-nss-db-endgrent \
-			  tst-nss-gai-actions
-+			  tst-nss-gai-actions \
-+			  tst-nss-gai-hv2-canonname
+@@ -81,6 +81,7 @@ tests-container := \
+   tst-nss-test3 \
+   tst-reload1 \
+   tst-reload2 \
+  tst-nss-gai-hv2-canonname \
+ # tests-container
 
 # Tests which need libdl
- ifeq (yes,$(build-shared))
-@@ -132,7 +133,8 @@ routines                += $(libnss_files-routines)
- static-only-routines    += $(libnss_files-routines)
+@@ -144,7 +145,8 @@ libnss_compat-inhibit-o	= $(filter-out .os,$(object-suffixes))
+ ifeq ($(build-static-nss),yes)
 tests-static		+= tst-nss-static
 endif
 -extra-test-objs		+= nss_test1.os nss_test2.os nss_test_errno.os
@ -79,7 +62,7 @@ index cfb255c6e7a3a4de..5829a2539306ddb5 100644
 
 include ../Rules
 
-@@ -169,12 +171,17 @@ rtld-tests-LDFLAGS += -Wl,--dynamic-list=nss_test.ver
+@@ -179,12 +181,16 @@ rtld-tests-LDFLAGS += -Wl,--dynamic-list=nss_test.ver
 libof-nss_test1 = extramodules
 libof-nss_test2 = extramodules
 libof-nss_test_errno = extramodules
@ -91,34 +74,38 @@ index cfb255c6e7a3a4de..5829a2539306ddb5 100644
 $(objpfx)/libnss_test_errno.so: $(objpfx)nss_test_errno.os $(link-libc-deps)
 	$(build-module)
 +$(objpfx)/libnss_test_gai_hv2_canonname.so: \
-+  $(objpfx)nss_test_gai_hv2_canonname.os $(link-libc-deps) \
-+  $(objpfx)/libnss_files.so
+  $(objpfx)nss_test_gai_hv2_canonname.os $(link-libc-deps)
 +	$(build-module)
 $(objpfx)nss_test2.os : nss_test1.c
- ifdef libnss_test1.so-version
- $(objpfx)/libnss_test1.so$(libnss_test1.so-version): $(objpfx)/libnss_test1.so
-@@ -187,10 +194,14 @@ endif
+ # Use the nss_files suffix for these objects as well.
+ $(objpfx)/libnss_test1.so$(libnss_files.so-version): $(objpfx)/libnss_test1.so
+@@ -194,10 +200,14 @@ $(objpfx)/libnss_test2.so$(libnss_files.so-version): $(objpfx)/libnss_test2.so
 $(objpfx)/libnss_test_errno.so$(libnss_files.so-version): \
   $(objpfx)/libnss_test_errno.so
 	$(make-link)
 +$(objpfx)/libnss_test_gai_hv2_canonname.so$(libnss_files.so-version): \
 +  $(objpfx)/libnss_test_gai_hv2_canonname.so
 +	$(make-link)
- $(patsubst %,$(objpfx)%.out,$(tests)) : \
- 	$(objpfx)/libnss_test1.so$(libnss_test1.so-version) \
- 	$(objpfx)/libnss_test2.so$(libnss_test2.so-version) \
+ $(patsubst %,$(objpfx)%.out,$(tests) $(tests-container)) : \
+ 	$(objpfx)/libnss_test1.so$(libnss_files.so-version) \
+ 	$(objpfx)/libnss_test2.so$(libnss_files.so-version) \
 -	$(objpfx)/libnss_test_errno.so$(libnss_files.so-version)
 +	$(objpfx)/libnss_test_errno.so$(libnss_files.so-version) \
 +	$(objpfx)/libnss_test_gai_hv2_canonname.so$(libnss_files.so-version)
 
 ifeq (yes,$(have-thread-library))
 $(objpfx)tst-cancel-getpwuid_r: $(shared-thread-library)
+@@ -214,3 +224,4 @@ LDFLAGS-tst-nss-test3 = -Wl,--disable-new-dtags
+ LDFLAGS-tst-nss-test4 = -Wl,--disable-new-dtags
+ LDFLAGS-tst-nss-test5 = -Wl,--disable-new-dtags
+ LDFLAGS-tst-nss-test_errno = -Wl,--disable-new-dtags
+LDFLAGS-tst-nss-test_gai_hv2_canonname = -Wl,--disable-new-dtags
 diff --git a/nss/nss_test_gai_hv2_canonname.c b/nss/nss_test_gai_hv2_canonname.c
 new file mode 100644
-index 0000000000000000..4195d7d24fdd5f6d
+index 0000000000..4439c83c9f
 --- /dev/null
 +++ b/nss/nss_test_gai_hv2_canonname.c
-@@ -0,0 +1,64 @@
+@@ -0,0 +1,56 @@
 +/* NSS service provider that only provides gethostbyname2_r.
 +   Copyright The GNU Toolchain Authors.
 +   This file is part of the GNU C Library.
@ -137,7 +124,6 @@ index 0000000000000000..4195d7d24fdd5f6d
 +   License along with the GNU C Library; if not, see
 +   <https://www.gnu.org/licenses/>.  */
 +
-+#include <netdb.h>
 +#include <nss.h>
 +#include <stdlib.h>
 +#include <string.h>
@ -145,20 +131,13 @@ index 0000000000000000..4195d7d24fdd5f6d
 +
 +/* Catch misnamed and functions.  */
 +#pragma GCC diagnostic error "-Wmissing-prototypes"
+NSS_DECLARE_MODULE_FUNCTIONS (test_gai_hv2_canonname)
 +
 +extern enum nss_status _nss_files_gethostbyname2_r (const char *, int,
 +						    struct hostent *, char *,
 +						    size_t, int *, int *);
 +
 +enum nss_status
-+_nss_test_gai_hv2_canonname_gethostbyname2_r (const char *, int, struct hostent
-+					      *, char *, size_t, int *, int *);
-+
-+enum nss_status
-+_nss_test_gai_hv2_canonname_getcanonname_r (const char *, char *, size_t, char
-+					    **, int *, int *);
-+
-+enum nss_status
 +_nss_test_gai_hv2_canonname_gethostbyname2_r (const char *name, int af,
 +					      struct hostent *result,
 +					      char *buffer, size_t buflen,
@ -185,7 +164,7 @@ index 0000000000000000..4195d7d24fdd5f6d
 +}
 diff --git a/nss/tst-nss-gai-hv2-canonname.c b/nss/tst-nss-gai-hv2-canonname.c
 new file mode 100644
-index 0000000000000000..d5f10c07d6a90773
+index 0000000000..d5f10c07d6
 --- /dev/null
 +++ b/nss/tst-nss-gai-hv2-canonname.c
@@ -0,0 +1,63 @@
@ -254,61 +233,86 @@ index 0000000000000000..d5f10c07d6a90773
 +#include <support/test-driver.c>
 diff --git a/nss/tst-nss-gai-hv2-canonname.h b/nss/tst-nss-gai-hv2-canonname.h
 new file mode 100644
-index 0000000000000000..14f2a9cb0867dff9
+index 0000000000..14f2a9cb08
 --- /dev/null
 +++ b/nss/tst-nss-gai-hv2-canonname.h
@@ -0,0 +1 @@
 +#define QUERYNAME "test.example.com"
 diff --git a/nss/tst-nss-gai-hv2-canonname.root/postclean.req b/nss/tst-nss-gai-hv2-canonname.root/postclean.req
 new file mode 100644
-index 0000000000000000..e69de29bb2d1d643
+index 0000000000..e69de29bb2
 diff --git a/nss/tst-nss-gai-hv2-canonname.root/tst-nss-gai-hv2-canonname.script b/nss/tst-nss-gai-hv2-canonname.root/tst-nss-gai-hv2-canonname.script
 new file mode 100644
-index 0000000000000000..31848b4a28524af6
+index 0000000000..31848b4a28
 --- /dev/null
 +++ b/nss/tst-nss-gai-hv2-canonname.root/tst-nss-gai-hv2-canonname.script
@@ -0,0 +1,2 @@
 +cp $B/nss/libnss_test_gai_hv2_canonname.so $L/libnss_test_gai_hv2_canonname.so.2
 +su
 diff --git a/sysdeps/posix/getaddrinfo.c b/sysdeps/posix/getaddrinfo.c
-index 4fa963644af8b7d5..46046504a6858f2e 100644
+index 5cda9bb072..7a43a3bf4c 100644
 --- a/sysdeps/posix/getaddrinfo.c
 +++ b/sysdeps/posix/getaddrinfo.c
-@@ -233,7 +233,6 @@ convert_hostent_to_gaih_addrtuple (const struct addrinfo *req,
+@@ -120,6 +120,7 @@ struct gaih_result
+ {
+   struct gaih_addrtuple *at;
+   char *canon;
+  char *h_name;
+   bool free_at;
+   bool got_ipv6;
+ };
+@@ -165,6 +166,7 @@ gaih_result_reset (struct gaih_result *res)
+   if (res->free_at)
+     free (res->at);
+   free (res->canon);
+  free (res->h_name);
+   memset (res, 0, sizeof (*res));
+ }
+ 
+@@ -203,9 +205,8 @@ gaih_inet_serv (const char *servicename, const struct gaih_typeproto *tp,
+   return 0;
+ }
+ 
+-/* Convert struct hostent to a list of struct gaih_addrtuple objects.  h_name
+-   is not copied, and the struct hostent object must not be deallocated
+-   prematurely.  The new addresses are appended to the tuple array in RES.  */
+/* Convert struct hostent to a list of struct gaih_addrtuple objects.  The new
+   addresses are appended to the tuple array in RES.  */
+ static bool
+ convert_hostent_to_gaih_addrtuple (const struct addrinfo *req, int family,
+ 				   struct hostent *h, struct gaih_result *res)
+@@ -238,6 +239,15 @@ convert_hostent_to_gaih_addrtuple (const struct addrinfo *req, int family,
+   res->at = array;
+   res->free_at = true;
+ 
+  /* Duplicate h_name because it may get reclaimed when the underlying storage
+     is freed.  */
+  if (res->h_name == NULL)
+    {
+      res->h_name = __strdup (h->h_name);
+      if (res->h_name == NULL)
+	return false;
+    }
+
+   /* Update the next pointers on reallocation.  */
+   for (size_t i = 0; i < old; i++)
+     array[i].next = array + i + 1;
+@@ -262,7 +272,6 @@ convert_hostent_to_gaih_addrtuple (const struct addrinfo *req, int family,
 	}
       array[i].next = array + i + 1;
     }
 -  array[0].name = h->h_name;
   array[count - 1].next = NULL;
 
-   *result = array;
-@@ -287,6 +286,18 @@ convert_hostent_to_gaih_addrtuple (const struct addrinfo *req,
- 	}								      \
-       *pat = addrmem;							      \
- 									      \
-+      /* Store h_name so that it survives accidental deallocation when	      \
-+	 gethosts is called again and tmpbuf gets reallocated.  */	      \
-+      if (h_name == NULL && th.h_name != NULL)				      \
-+        {								      \
-+	  h_name = __strdup (th.h_name);				      \
-+	  if (h_name == NULL)						      \
-+	    {								      \
-+	      __resolv_context_put (res_ctx);				      \
-+	      result = -EAI_SYSTEM;					      \
-+	      goto free_and_return;					      \
-+	    }								      \
-+	}								      \
-       if (localcanon != NULL && canon == NULL)				      \
- 	{								      \
- 	  canonbuf = __strdup (localcanon);				      \
-@@ -323,15 +334,15 @@ typedef enum nss_status (*nss_getcanonname_r)
+   return true;
+@@ -324,15 +333,15 @@ gethosts (nss_gethostbyname3_r fct, int family, const char *name,
    memory allocation failure.  The returned string is allocated on the
    heap; the caller has to free it.  */
 static char *
-getcanonname (service_user *nip, struct gaih_addrtuple *at, const char *name)
-+getcanonname (service_user *nip, const char *hname, const char *name)
+-getcanonname (nss_action_list nip, struct gaih_addrtuple *at, const char *name)
+getcanonname (nss_action_list nip, const char *hname, const char *name)
 {
-   nss_getcanonname_r cfct = __nss_lookup_function (nip, "getcanonname_r");
+   nss_getcanonname_r *cfct = __nss_lookup_function (nip, "getcanonname_r");
   char *s = (char *) name;
   if (cfct != NULL)
     {
@ -320,28 +324,15 @@ index 4fa963644af8b7d5..46046504a6858f2e 100644
 	/* If the canonical name cannot be determined, use the passed
 	   string.  */
 	s = (char *) name;
-@@ -349,6 +360,7 @@ gaih_inet (const char *name, const struct gaih_service *service,
-   struct gaih_addrtuple *at = NULL;
-   bool got_ipv6 = false;
-   const char *canon = NULL;
-+  char *h_name = NULL;
-   const char *orig_name = name;
- 
-   /* Reserve stack memory for the scratch buffer in the getaddrinfo
-@@ -919,7 +931,7 @@ gaih_inet (const char *name, const struct gaih_service *service,
- 			  if ((req->ai_flags & AI_CANONNAME) != 0
- 			      && canon == NULL)
- 			    {
-			      canonbuf = getcanonname (nip, at, name);
-+			      canonbuf = getcanonname (nip, h_name, name);
- 			      if (canonbuf == NULL)
- 				{
- 				  __resolv_context_enable_inet6
-@@ -1169,6 +1181,7 @@ gaih_inet (const char *name, const struct gaih_service *service,
-     free ((char *) name);
-   free (addrmem);
-   free (canonbuf);
-+  free (h_name);
- 
-   return result;
- }
+@@ -771,7 +780,7 @@ get_nss_addresses (const char *name, const struct addrinfo *req,
+ 		  if ((req->ai_flags & AI_CANONNAME) != 0
+ 		      && res->canon == NULL)
+ 		    {
+-		      char *canonbuf = getcanonname (nip, res->at, name);
+		      char *canonbuf = getcanonname (nip, res->h_name, name);
+ 		      if (canonbuf == NULL)
+ 			{
+ 			  __resolv_context_put (res_ctx);
+-- 
+2.39.3
+
--- a/0085-CVE-2023-5156.patch
+++ b/0085-CVE-2023-5156.patch
@ -0,0 +1,98 @@
+From 856bac55f98dc840e7c27cfa82262b933385de90 Mon Sep 17 00:00:00 2001
+From: Romain Geissler <romain.geissler@amadeus.com>
+Date: Mon, 25 Sep 2023 01:21:51 +0100
+Subject: [PATCH] Fix leak in getaddrinfo introduced by the fix for
+ CVE-2023-4806 [BZ #30843]
+
+This patch fixes a very recently added leak in getaddrinfo.
+
+This was assigned CVE-2023-5156.
+
+Resolves: BZ #30884
+Related: BZ #30842
+
+Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
+(cherry picked from commit ec6b95c3303c700eb89eebeda2d7264cc184a796)
+---
+ nss/Makefile                    | 20 ++++++++++++++++++++
+ nss/tst-nss-gai-hv2-canonname.c |  3 +++
+ sysdeps/posix/getaddrinfo.c     |  4 +---
+ 3 files changed, 24 insertions(+), 3 deletions(-)
+
+diff --git a/nss/Makefile b/nss/Makefile
+index f0af87e6f1..7a52c68791 100644
+--- a/nss/Makefile
+++ b/nss/Makefile
+@@ -148,6 +148,15 @@ endif
+ extra-test-objs		+= nss_test1.os nss_test2.os nss_test_errno.os \
+ 			   nss_test_gai_hv2_canonname.os
+ 
+ifeq ($(run-built-tests),yes)
+ifneq (no,$(PERL))
+tests-special += $(objpfx)mtrace-tst-nss-gai-hv2-canonname.out
+endif
+endif
+
+generated += mtrace-tst-nss-gai-hv2-canonname.out \
+		tst-nss-gai-hv2-canonname.mtrace
+
+ include ../Rules
+ 
+ ifeq (yes,$(have-selinux))
+@@ -216,6 +225,17 @@ endif
+ $(objpfx)tst-nss-files-alias-leak.out: $(objpfx)/libnss_files.so
+ $(objpfx)tst-nss-files-alias-truncated.out: $(objpfx)/libnss_files.so
+ 
+tst-nss-gai-hv2-canonname-ENV = \
+		MALLOC_TRACE=$(objpfx)tst-nss-gai-hv2-canonname.mtrace \
+		LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
+$(objpfx)mtrace-tst-nss-gai-hv2-canonname.out: \
+  $(objpfx)tst-nss-gai-hv2-canonname.out
+	{ test -r $(objpfx)tst-nss-gai-hv2-canonname.mtrace \
+	|| ( echo "tst-nss-gai-hv2-canonname.mtrace does not exist"; exit 77; ) \
+	&& $(common-objpfx)malloc/mtrace \
+	$(objpfx)tst-nss-gai-hv2-canonname.mtrace; } > $@; \
+	$(evaluate-test)
+
+ # Disable DT_RUNPATH on NSS tests so that the glibc internal NSS
+ # functions can load testing NSS modules via DT_RPATH.
+ LDFLAGS-tst-nss-test1 = -Wl,--disable-new-dtags
+diff --git a/nss/tst-nss-gai-hv2-canonname.c b/nss/tst-nss-gai-hv2-canonname.c
+index d5f10c07d6..7db53cf09d 100644
+--- a/nss/tst-nss-gai-hv2-canonname.c
+++ b/nss/tst-nss-gai-hv2-canonname.c
+@@ -21,6 +21,7 @@
+ #include <netdb.h>
+ #include <stdlib.h>
+ #include <string.h>
+#include <mcheck.h>
+ #include <support/check.h>
+ #include <support/xstdio.h>
+ #include "nss/tst-nss-gai-hv2-canonname.h"
+@@ -41,6 +42,8 @@ static void do_prepare (int a, char **av)
+ static int
+ do_test (void)
+ {
+  mtrace ();
+
+   __nss_configure_lookup ("hosts", "test_gai_hv2_canonname");
+ 
+   struct addrinfo hints = {};
+diff --git a/sysdeps/posix/getaddrinfo.c b/sysdeps/posix/getaddrinfo.c
+index 7a43a3bf4c..f975dcd2bc 100644
+--- a/sysdeps/posix/getaddrinfo.c
+++ b/sysdeps/posix/getaddrinfo.c
+@@ -1196,9 +1196,7 @@ free_and_return:
+   if (malloc_name)
+     free ((char *) name);
+   free (addrmem);
+-  if (res.free_at)
+-    free (res.at);
+-  free (res.canon);
+  gaih_result_reset (&res);
+ 
+   return result;
+ }
+-- 
+2.39.3
+
--- a/0087-CVE-2023-6246.patch
+++ b/0087-CVE-2023-6246.patch
@ -0,0 +1,181 @@
+From d1a83b6767f68b3cb5b4b4ea2617254acd040c82 Mon Sep 17 00:00:00 2001
+From: Arjun Shankar <arjun@redhat.com>
+Date: Mon, 15 Jan 2024 17:44:43 +0100
+Subject: [PATCH] syslog: Fix heap buffer overflow in __vsyslog_internal
+ (CVE-2023-6246)
+
+__vsyslog_internal did not handle a case where printing a SYSLOG_HEADER
+containing a long program name failed to update the required buffer
+size, leading to the allocation and overflow of a too-small buffer on
+the heap.  This commit fixes that.  It also adds a new regression test
+that uses glibc.malloc.check.
+
+Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+Reviewed-by: Carlos O'Donell <carlos@redhat.com>
+Tested-by: Carlos O'Donell <carlos@redhat.com>
+(cherry picked from commit 6bd0e4efcc78f3c0115e5ea9739a1642807450da)
+---
+ misc/Makefile                                 |  8 ++-
+ misc/syslog.c                                 | 50 +++++++++++++------
+ misc/tst-syslog-long-progname.c               | 39 +++++++++++++++
+ .../postclean.req                             |  0
+ 4 files changed, 82 insertions(+), 15 deletions(-)
+ create mode 100644 misc/tst-syslog-long-progname.c
+ create mode 100644 misc/tst-syslog-long-progname.root/postclean.req
+
+diff --git a/misc/Makefile b/misc/Makefile
+index ba8232a0e9..66e9ded8f9 100644
+--- a/misc/Makefile
+++ b/misc/Makefile
+@@ -115,7 +115,10 @@ tests-special += $(objpfx)tst-error1-mem.out \
+   $(objpfx)tst-allocate_once-mem.out
+ endif
+ 
+-tests-container := tst-syslog
+tests-container := \
+  tst-syslog \
+  tst-syslog-long-progname \
+  # tests-container
+ 
+ CFLAGS-select.c += -fexceptions -fasynchronous-unwind-tables
+ CFLAGS-tsearch.c += $(uses-callbacks)
+@@ -175,6 +178,9 @@ $(objpfx)tst-allocate_once-mem.out: $(objpfx)tst-allocate_once.out
+ 	$(common-objpfx)malloc/mtrace $(objpfx)tst-allocate_once.mtrace > $@; \
+ 	$(evaluate-test)
+ 
+tst-syslog-long-progname-ENV = GLIBC_TUNABLES=glibc.malloc.check=3 \
+			       LD_PRELOAD=libc_malloc_debug.so.0
+
+ $(objpfx)tst-select: $(librt)
+ $(objpfx)tst-select-time64: $(librt)
+ $(objpfx)tst-pselect: $(librt)
+diff --git a/misc/syslog.c b/misc/syslog.c
+index f67d4b58a4..fe1daf988b 100644
+--- a/misc/syslog.c
+++ b/misc/syslog.c
+@@ -122,8 +122,9 @@ __vsyslog_internal (int pri, const char *fmt, va_list ap,
+ {
+   /* Try to use a static buffer as an optimization.  */
+   char bufs[1024];
+-  char *buf = NULL;
+-  size_t bufsize = 0;
+  char *buf = bufs;
+  size_t bufsize;
+
+   int msgoff;
+   int saved_errno = errno;
+ 
+@@ -175,29 +176,50 @@ __vsyslog_internal (int pri, const char *fmt, va_list ap,
+ #define SYSLOG_HEADER_WITHOUT_TS(__pri, __msgoff)        \
+   "<%d>: %n", __pri, __msgoff
+ 
+-  int l;
+  int l, vl;
+   if (has_ts)
+     l = __snprintf (bufs, sizeof bufs,
+ 		    SYSLOG_HEADER (pri, timestamp, &msgoff, pid));
+   else
+     l = __snprintf (bufs, sizeof bufs,
+ 		    SYSLOG_HEADER_WITHOUT_TS (pri, &msgoff));
+
+  char *pos;
+  size_t len;
+
+   if (0 <= l && l < sizeof bufs)
+     {
+-      va_list apc;
+-      va_copy (apc, ap);
+      /* At this point, there is still a chance that we can print the
+         remaining part of the log into bufs and use that.  */
+      pos = bufs + l;
+      len = sizeof (bufs) - l;
+    }
+  else
+    {
+      buf = NULL;
+      /* We already know that bufs is too small to use for this log message.
+         The next vsnprintf into bufs is used only to calculate the total
+         required buffer length.  We will discard bufs contents and allocate
+         an appropriately sized buffer later instead.  */
+      pos = bufs;
+      len = sizeof (bufs);
+    }
+ 
+-      /* Restore errno for %m format.  */
+-      __set_errno (saved_errno);
+  {
+    va_list apc;
+    va_copy (apc, ap);
+ 
+-      int vl = __vsnprintf_internal (bufs + l, sizeof bufs - l, fmt, apc,
+-                                     mode_flags);
+-      if (0 <= vl && vl < sizeof bufs - l)
+-        buf = bufs;
+-      bufsize = l + vl;
+    /* Restore errno for %m format.  */
+    __set_errno (saved_errno);
+ 
+-      va_end (apc);
+-    }
+    vl = __vsnprintf_internal (pos, len, fmt, apc, mode_flags);
+
+    if (!(0 <= vl && vl < len))
+      buf = NULL;
+
+    bufsize = l + vl;
+    va_end (apc);
+  }
+ 
+   if (buf == NULL)
+     {
+diff --git a/misc/tst-syslog-long-progname.c b/misc/tst-syslog-long-progname.c
+new file mode 100644
+index 0000000000..88f37a8a00
+--- /dev/null
+++ b/misc/tst-syslog-long-progname.c
+@@ -0,0 +1,39 @@
+/* Test heap buffer overflow in syslog with long __progname (CVE-2023-6246)
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <syslog.h>
+#include <string.h>
+
+extern char * __progname;
+
+static int
+do_test (void)
+{
+  char long_progname[2048];
+
+  memset (long_progname, 'X', sizeof (long_progname) - 1);
+  long_progname[sizeof (long_progname) - 1] = '\0';
+
+  __progname = long_progname;
+
+  syslog (LOG_INFO, "Hello, World!");
+
+  return 0;
+}
+
+#include <support/test-driver.c>
+diff --git a/misc/tst-syslog-long-progname.root/postclean.req b/misc/tst-syslog-long-progname.root/postclean.req
+new file mode 100644
+index 0000000000..e69de29bb2
+-- 
+2.39.3
+
--- a/0088-CVE-2023-6779.patch
+++ b/0088-CVE-2023-6779.patch
@ -0,0 +1,106 @@
+From 2bc9d7c002bdac38b5c2a3f11b78e309d7765b83 Mon Sep 17 00:00:00 2001
+From: Arjun Shankar <arjun@redhat.com>
+Date: Mon, 15 Jan 2024 17:44:44 +0100
+Subject: [PATCH] syslog: Fix heap buffer overflow in __vsyslog_internal
+ (CVE-2023-6779)
+
+__vsyslog_internal used the return value of snprintf/vsnprintf to
+calculate buffer sizes for memory allocation.  If these functions (for
+any reason) failed and returned -1, the resulting buffer would be too
+small to hold output.  This commit fixes that.
+
+All snprintf/vsnprintf calls are checked for negative return values and
+the function silently returns upon encountering them.
+
+Reviewed-by: Carlos O'Donell <carlos@redhat.com>
+(cherry picked from commit 7e5a0c286da33159d47d0122007aac016f3e02cd)
+---
+ misc/syslog.c | 39 ++++++++++++++++++++++++++++-----------
+ 1 file changed, 28 insertions(+), 11 deletions(-)
+
+diff --git a/misc/syslog.c b/misc/syslog.c
+index fe1daf988b..3108ae9134 100644
+--- a/misc/syslog.c
+++ b/misc/syslog.c
+@@ -183,11 +183,13 @@ __vsyslog_internal (int pri, const char *fmt, va_list ap,
+   else
+     l = __snprintf (bufs, sizeof bufs,
+ 		    SYSLOG_HEADER_WITHOUT_TS (pri, &msgoff));
+  if (l < 0)
+    goto out;
+ 
+   char *pos;
+   size_t len;
+ 
+-  if (0 <= l && l < sizeof bufs)
+  if (l < sizeof bufs)
+     {
+       /* At this point, there is still a chance that we can print the
+          remaining part of the log into bufs and use that.  */
+@@ -213,12 +215,15 @@ __vsyslog_internal (int pri, const char *fmt, va_list ap,
+     __set_errno (saved_errno);
+ 
+     vl = __vsnprintf_internal (pos, len, fmt, apc, mode_flags);
+    va_end (apc);
+
+    if (vl < 0)
+      goto out;
+ 
+-    if (!(0 <= vl && vl < len))
+    if (vl >= len)
+       buf = NULL;
+ 
+     bufsize = l + vl;
+-    va_end (apc);
+   }
+ 
+   if (buf == NULL)
+@@ -229,25 +234,37 @@ __vsyslog_internal (int pri, const char *fmt, va_list ap,
+ 	  /* Tell the cancellation handler to free this buffer.  */
+ 	  clarg.buf = buf;
+ 
+	  int cl;
+ 	  if (has_ts)
+-	    __snprintf (buf, l + 1,
+-			SYSLOG_HEADER (pri, timestamp, &msgoff, pid));
+	    cl = __snprintf (buf, l + 1,
+			     SYSLOG_HEADER (pri, timestamp, &msgoff, pid));
+ 	  else
+-	    __snprintf (buf, l + 1,
+-			SYSLOG_HEADER_WITHOUT_TS (pri, &msgoff));
+	    cl = __snprintf (buf, l + 1,
+			     SYSLOG_HEADER_WITHOUT_TS (pri, &msgoff));
+	  if (cl != l)
+	    goto out;
+ 
+ 	  va_list apc;
+ 	  va_copy (apc, ap);
+-	  __vsnprintf_internal (buf + l, bufsize - l + 1, fmt, apc,
+-				mode_flags);
+	  cl = __vsnprintf_internal (buf + l, bufsize - l + 1, fmt, apc,
+				     mode_flags);
+ 	  va_end (apc);
+
+	  if (cl != vl)
+	    goto out;
+ 	}
+       else
+         {
+          int bl;
+ 	  /* Nothing much to do but emit an error message.  */
+-          bufsize = __snprintf (bufs, sizeof bufs,
+-                                "out of memory[%d]", __getpid ());
+          bl = __snprintf (bufs, sizeof bufs,
+                           "out of memory[%d]", __getpid ());
+          if (bl < 0 || bl >= sizeof bufs)
+            goto out;
+
+          bufsize = bl;
+           buf = bufs;
+          msgoff = 0;
+         }
+     }
+ 
+-- 
+2.39.3
+
--- a/0089-CVE-2023-6780.patch
+++ b/0089-CVE-2023-6780.patch
@ -0,0 +1,41 @@
+From b9b7d6a27aa0632f334352fa400771115b3c69b7 Mon Sep 17 00:00:00 2001
+From: Arjun Shankar <arjun@redhat.com>
+Date: Mon, 15 Jan 2024 17:44:45 +0100
+Subject: [PATCH] syslog: Fix integer overflow in __vsyslog_internal
+ (CVE-2023-6780)
+
+__vsyslog_internal calculated a buffer size by adding two integers, but
+did not first check if the addition would overflow.  This commit fixes
+that.
+
+Reviewed-by: Carlos O'Donell <carlos@redhat.com>
+Tested-by: Carlos O'Donell <carlos@redhat.com>
+(cherry picked from commit ddf542da94caf97ff43cc2875c88749880b7259b)
+---
+ misc/syslog.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/misc/syslog.c b/misc/syslog.c
+index 3108ae9134..9336036666 100644
+--- a/misc/syslog.c
+++ b/misc/syslog.c
+@@ -41,6 +41,7 @@ static char sccsid[] = "@(#)syslog.c	8.4 (Berkeley) 3/18/94";
+ #include <sys/uio.h>
+ #include <sys/un.h>
+ #include <syslog.h>
+#include <limits.h>
+ 
+ static int LogType = SOCK_DGRAM;	/* type of socket connection */
+ static int LogFile = -1;		/* fd for log */
+@@ -217,7 +218,7 @@ __vsyslog_internal (int pri, const char *fmt, va_list ap,
+     vl = __vsnprintf_internal (pos, len, fmt, apc, mode_flags);
+     va_end (apc);
+ 
+-    if (vl < 0)
+    if (vl < 0 || vl >= INT_MAX - l)
+       goto out;
+ 
+     if (vl >= len)
+-- 
+2.39.3
+
--- a/0090-CVE-2024-2961.patch
+++ b/0090-CVE-2024-2961.patch
@ -1,34 +1,42 @@
-Author: Charles Fol <folcharles@gmail.com>
-Date:   Thu Mar 28 12:25:38 2024 -0300
+From f9dc609e06b1136bb0408be9605ce7973a767ada Mon Sep 17 00:00:00 2001
+From: Charles Fol <folcharles@gmail.com>
+Date: Thu, 28 Mar 2024 12:25:38 -0300
+Subject: [PATCH] iconv: ISO-2022-CN-EXT: fix out-of-bound writes when writing
+ escape sequence (CVE-2024-2961)

-    iconv: ISO-2022-CN-EXT: fix out-of-bound writes when writing escape sequence (CVE-2024-2961)
+ISO-2022-CN-EXT uses escape sequences to indicate character set changes
+(as specified by RFC 1922).  While the SOdesignation has the expected
+bounds checks, neither SS2designation nor SS3designation have its;
+allowing a write overflow of 1, 2, or 3 bytes with fixed values:
+'$+I', '$+J', '$+K', '$+L', '$+M', or '$*H'.

-    ISO-2022-CN-EXT uses escape sequences to indicate character set changes
-    (as specified by RFC 1922).  While the SOdesignation has the expected
-    bounds checks, neither SS2designation nor SS3designation have its;
-    allowing a write overflow of 1, 2, or 3 bytes with fixed values:
-    '$+I', '$+J', '$+K', '$+L', '$+M', or '$*H'.
+Checked on aarch64-linux-gnu.

-    Checked on aarch64-linux-gnu.
-
-    Co-authored-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
-    Reviewed-by: Carlos O'Donell <carlos@redhat.com>
-    Tested-by: Carlos O'Donell <carlos@redhat.com>
+Co-authored-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+Reviewed-by: Carlos O'Donell <carlos@redhat.com>
+Tested-by: Carlos O'Donell <carlos@redhat.com>
+---
+ iconvdata/Makefile                    |   5 +-
+ iconvdata/iso-2022-cn-ext.c           |  12 +++
+ iconvdata/tst-iconv-iso-2022-cn-ext.c | 128 ++++++++++++++++++++++++++
+ 3 files changed, 144 insertions(+), 1 deletion(-)
+ create mode 100644 iconvdata/tst-iconv-iso-2022-cn-ext.c

 diff --git a/iconvdata/Makefile b/iconvdata/Makefile
-index 646e2ccd11478646..c959758a90ed954f 100644
+index ea019ce5c0..7196a8744b 100644
 --- a/iconvdata/Makefile
 +++ b/iconvdata/Makefile
-@@ -75,7 +75,7 @@ ifeq (yes,$(build-shared))
+@@ -75,7 +75,8 @@ ifeq (yes,$(build-shared))
 tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
 	tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
- 	bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13 bug-iconv14 \
-	bug-iconv15
-+	bug-iconv15 tst-iconv-iso-2022-cn-ext
+ 	bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
+-	bug-iconv13 bug-iconv14 bug-iconv15
+	bug-iconv13 bug-iconv14 bug-iconv15 \
+	tst-iconv-iso-2022-cn-ext
 ifeq ($(have-thread-library),yes)
 tests += bug-iconv3
 endif
-@@ -325,6 +325,8 @@ $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
+@@ -330,6 +331,8 @@ $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
 			  $(addprefix $(objpfx),$(modules.so))
 $(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
 			  $(addprefix $(objpfx),$(modules.so))
@ -38,10 +46,10 @@ index 646e2ccd11478646..c959758a90ed954f 100644
 $(objpfx)iconv-test.out: run-iconv-test.sh \
 			 $(addprefix $(objpfx), $(gconv-modules)) \
 diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c
-index c21a7187b4d7808e..bd9493c12d95070b 100644
+index b34c8a36f4..cce29b1969 100644
 --- a/iconvdata/iso-2022-cn-ext.c
 +++ b/iconvdata/iso-2022-cn-ext.c
-@@ -575,6 +575,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
+@@ -574,6 +574,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
 	      {								      \
 		const char *escseq;					      \
 									      \
@ -54,7 +62,7 @@ index c21a7187b4d7808e..bd9493c12d95070b 100644
 		assert (used == CNS11643_2_set); /* XXX */		      \
 		escseq = "*H";						      \
 		*outptr++ = ESC;					      \
-@@ -588,6 +594,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
+@@ -587,6 +593,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
 	      {								      \
 		const char *escseq;					      \
 									      \
@ -69,7 +77,7 @@ index c21a7187b4d7808e..bd9493c12d95070b 100644
 		*outptr++ = ESC;					      \
 diff --git a/iconvdata/tst-iconv-iso-2022-cn-ext.c b/iconvdata/tst-iconv-iso-2022-cn-ext.c
 new file mode 100644
-index 0000000000000000..96a8765fd5369681
+index 0000000000..96a8765fd5
 --- /dev/null
 +++ b/iconvdata/tst-iconv-iso-2022-cn-ext.c
@@ -0,0 +1,128 @@
@ -155,7 +163,7 @@ index 0000000000000000..96a8765fd5369681
 +
 +  /* Same as before for SS2designation.  */
 +  {
-+    char inbuf[] = "㴽 \xe3\xb4\xbd";
+    char inbuf[] = "ã´½ \xe3\xb4\xbd";
 +
 +    for (int i = 0; i < 14; i++)
 +      {
@ -175,7 +183,7 @@ index 0000000000000000..96a8765fd5369681
 +
 +  /* Same as before for SS3designation.  */
 +  {
-+    char inbuf[] = "劄 \xe5\x8a\x84";
+    char inbuf[] = "å \xe5\x8a\x84";
 +
 +    for (int i = 0; i < 14; i++)
 +      {
@ -201,3 +209,5 @@ index 0000000000000000..96a8765fd5369681
 +}
 +
 +#include <support/test-driver.c>
+-- 
+2.39.3
--- a/1086-CVE-2023-4911.patch
+++ b/1086-CVE-2023-4911.patch
@ -1,38 +1,18 @@
-This patch was developed under embargo and cannot reference an upstream
-commit. To find the associated commit please review the upstream git
-log for CVE-2023-4911 to identify the relevant commits.
+From 27e06a423cf06845a0515ab767a109b31b34724a Mon Sep 17 00:00:00 2001
+From: Chunmei Xu <xuchunmei@linux.alibaba.com>
+Date: Tue, 5 Mar 2024 14:12:15 +0800
+Subject: [PATCH 1/1] fix CVE-2023-4911

-Author: Siddhesh Poyarekar <siddhesh@sourceware.org>
-Date:   Tue Sep 19 18:39:32 2023 -0400
-
-    tunables: Terminate if end of input is reached (CVE-2023-4911)
-    
-    The string parsing routine may end up writing beyond bounds of tunestr
-    if the input tunable string is malformed, of the form name=name=val.
-    This gets processed twice, first as name=name=val and next as name=val,
-    resulting in tunestr being name=name=val:name=val, thus overflowing
-    tunestr.
-    
-    Terminate the parsing loop at the first instance itself so that tunestr
-    does not overflow.
-    
-    This also fixes up tst-env-setuid-tunables to actually handle failures
-    correct and add new tests to validate the fix for this CVE.
-    
-    Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
-    Reviewed-by: Carlos O'Donell <carlos@redhat.com>
-
-Conflicts:
-	NEWS
-	(Dropped)
-	elf/tst-env-setuid-tunables.c
-	(Trivial conflict at HAVE_TUNABLES)
+---
+ elf/dl-tunables.c             | 16 ++++++++-------
+ elf/tst-env-setuid-tunables.c | 37 +++++++++++++++++++++++++++--------
+ 2 files changed, 38 insertions(+), 15 deletions(-)

 diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c
-index 3c84809d44381241..2c878e08ea197b29 100644
+index 62b7332d..0edfade8 100644
 --- a/elf/dl-tunables.c
 +++ b/elf/dl-tunables.c
-@@ -193,11 +193,7 @@ parse_tunables (char *tunestr, char *valstring)
+@@ -180,11 +180,7 @@ parse_tunables (char *tunestr, char *valstring)
       /* If we reach the end of the string before getting a valid name-value
 	 pair, bail out.  */
       if (p[len] == '\0')
@ -45,7 +25,7 @@ index 3c84809d44381241..2c878e08ea197b29 100644
 
       /* We did not find a valid name-value pair before encountering the
 	 colon.  */
-@@ -257,9 +253,16 @@ parse_tunables (char *tunestr, char *valstring)
+@@ -244,9 +240,15 @@ parse_tunables (char *tunestr, char *valstring)
 	    }
 	}
 
@ -54,7 +34,6 @@ index 3c84809d44381241..2c878e08ea197b29 100644
 +      /* We reached the end while processing the tunable string.  */
 +      if (p[len] == '\0')
 +	break;
-+
 +      p += len + 1;
     }
 +
@ -62,13 +41,13 @@ index 3c84809d44381241..2c878e08ea197b29 100644
 +  if (__libc_enable_secure)
 +    tunestr[off] = '\0';
 }
- #endif
 
+ /* Enable the glibc.malloc.check tunable in SETUID/SETGID programs only when
 diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c
-index 0b9b075c40598c6f..8b0861c4ad853040 100644
+index 7dfb0e07..2364d162 100644
 --- a/elf/tst-env-setuid-tunables.c
 +++ b/elf/tst-env-setuid-tunables.c
-@@ -52,6 +52,8 @@ const char *teststrings[] =
+@@ -50,6 +50,8 @@ const char *teststrings[] =
   "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096",
   "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096",
   "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096",
@ -77,7 +56,7 @@ index 0b9b075c40598c6f..8b0861c4ad853040 100644
   "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2",
   "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096",
   ":glibc.malloc.garbage=2:glibc.malloc.check=1",
-@@ -70,6 +72,8 @@ const char *resultstrings[] =
+@@ -68,6 +70,8 @@ const char *resultstrings[] =
   "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096",
   "glibc.malloc.mmap_threshold=4096",
   "glibc.malloc.mmap_threshold=4096",
@ -86,10 +65,10 @@ index 0b9b075c40598c6f..8b0861c4ad853040 100644
   "",
   "",
   "",
-@@ -84,11 +88,18 @@ test_child (int off)
+@@ -81,11 +85,17 @@ test_child (int off)
+ {
   const char *val = getenv ("GLIBC_TUNABLES");
 
- #if HAVE_TUNABLES
 +  printf ("    [%d] GLIBC_TUNABLES is %s\n", off, val);
 +  fflush (stdout);
   if (val != NULL && strcmp (val, resultstrings[off]) == 0)
@ -98,27 +77,26 @@ index 0b9b075c40598c6f..8b0861c4ad853040 100644
   if (val != NULL)
 -    printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val);
 +    printf ("    [%d] Unexpected GLIBC_TUNABLES VALUE %s, expected %s\n",
-+	    off, val, resultstrings[off]);
+		  off, val, resultstrings[off]);
 +  else
 +    printf ("    [%d] GLIBC_TUNABLES environment variable absent\n", off);
-+
 +  fflush (stdout);
 
   return 1;
- #else
-@@ -117,21 +128,26 @@ do_test (int argc, char **argv)
+ }
+@@ -106,31 +116,42 @@ do_test (int argc, char **argv)
       if (ret != 0)
 	exit (1);
 
 -      exit (EXIT_SUCCESS);
 +      /* Special return code to make sure that the child executed all the way
-+	 through.  */
+         through.  */
 +      exit (42);
     }
   else
     {
 -      int ret = 0;
-
+ 
       /* Spawn tests.  */
       for (int i = 0; i < array_length (teststrings); i++)
 	{
@ -130,28 +108,32 @@ index 0b9b075c40598c6f..8b0861c4ad853040 100644
 +	  fflush (stdout);
 	  if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0)
 -	    exit (1);
-+	    {
+-
+            {
 +	      printf ("    [%d] Failed to set GLIBC_TUNABLES: %m", i);
 +	      support_record_failure ();
-+	      continue;
-+	    }
- 
+              continue;
+            }	
+	  
 	  int status = support_capture_subprogram_self_sgid (buf);
 
-@@ -139,9 +155,14 @@ do_test (int argc, char **argv)
+ 	  /* Bail out early if unsupported.  */
 	  if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
 	    return EXIT_UNSUPPORTED;
 
 -	  ret |= status;
 +	  if (WEXITSTATUS (status) != 42)
-+	    {
-+	      printf ("    [%d] child failed with status %d\n", i,
-+		      WEXITSTATUS (status));
-+	      support_record_failure ();
-+	    }
+             {
+	       printf ("    [%d] child failed with status %d\n", i,
+				     WEXITSTATUS (status));
+	       support_record_failure ();
+             }
 	}
 -      return ret;
 +      return 0;
     }
 }
 
+-- 
+2.41.0
+
--- a/ChangeLog.old
+++ b/ChangeLog.old
--- a/Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch
+++ b/Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch
@ -0,0 +1,40 @@
+From 2c8dfc45a8009e5110a9d2148b62d802e989fde7 Mon Sep 17 00:00:00 2001
+From: ticat_fp <fanpeng@loongson.cn>
+Date: Thu, 29 Feb 2024 15:58:31 +0800
+Subject: [PATCH] Decrease value of arch_minimum_kernel with LoongArch
+
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/unix/sysv/linux/loongarch/configure    | 2 +-
+ sysdeps/unix/sysv/linux/loongarch/configure.ac | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/sysdeps/unix/sysv/linux/loongarch/configure b/sysdeps/unix/sysv/linux/loongarch/configure
+index 0d1159e9..851b2285 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/configure
+++ b/sysdeps/unix/sysv/linux/loongarch/configure
+@@ -1,7 +1,7 @@
+ # This file is generated from configure.ac by Autoconf.  DO NOT EDIT!
+  # Local configure fragment for sysdeps/unix/sysv/linux/loongarch.
+ 
+-arch_minimum_kernel=5.19.0
+arch_minimum_kernel=4.19.0
+ 
+ libc_cv_loongarch_int_abi=no
+ 
+diff --git a/sysdeps/unix/sysv/linux/loongarch/configure.ac b/sysdeps/unix/sysv/linux/loongarch/configure.ac
+index 04e9150a..00815c2f 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/configure.ac
+++ b/sysdeps/unix/sysv/linux/loongarch/configure.ac
+@@ -2,7 +2,7 @@ sinclude(./aclocal.m4)dnl Autoconf lossage
+ GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+ # Local configure fragment for sysdeps/unix/sysv/linux/loongarch.
+ 
+-arch_minimum_kernel=5.19.0
+arch_minimum_kernel=4.19.0
+ 
+ libc_cv_loongarch_int_abi=no
+ AC_EGREP_CPP(4 8 8, [__SIZEOF_INT__ __SIZEOF_LONG__ __SIZEOF_POINTER__
+-- 
+2.33.0
+
--- a/Fix-tst-cancel21.c-to-suit-kernel-struct-sigcontext-.patch
+++ b/Fix-tst-cancel21.c-to-suit-kernel-struct-sigcontext-.patch
@ -1,34 +0,0 @@
-From c5de7c407853b807e8d0c764e6325bb1311f39cd Mon Sep 17 00:00:00 2001
-From: Xing Li <lixing@loongson.cn>
-Date: Tue, 4 Jul 2023 15:10:03 +0800
-Subject: [PATCH 2/2] Fix tst-cancel21.c to suit kernel struct sigcontext
- change. * nptl/tst-cancel21.c
-
---
- nptl/tst-cancel21.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/nptl/tst-cancel21.c b/nptl/tst-cancel21.c
-index b10fdbc1..a3653f21 100644
--- a/nptl/tst-cancel21.c
-+++ b/nptl/tst-cancel21.c
-@@ -217,14 +217,14 @@ static int
- do_test (void)
- {
-   stack_t ss;
-  ss.ss_sp = malloc (2 * SIGSTKSZ);
-+  ss.ss_sp = malloc (4 * SIGSTKSZ);
-   if (ss.ss_sp == NULL)
-     {
-       puts ("failed to allocate alternate stack");
-       return 1;
-     }
-   ss.ss_flags = 0;
-  ss.ss_size = 2 * SIGSTKSZ;
-+  ss.ss_size = 4 * SIGSTKSZ;
-   if (sigaltstack (&ss, NULL) < 0)
-     {
-       printf ("sigaltstack failed %m\n");
-- 
-2.27.0
-
--- a/LoongArch-Add-glibc.cpu.hwcap-support.patch
+++ b/LoongArch-Add-glibc.cpu.hwcap-support.patch
@ -0,0 +1,499 @@
+From 8923e4e9c79e672fd6b3b89aba598a60d5c01211 Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Fri, 15 Sep 2023 17:35:19 +0800
+Subject: [PATCH 25/29] LoongArch: Add glibc.cpu.hwcap support.
+
+Key Points:
+1. On lasx & lsx platforms, We must use _dl_runtime_{profile, resolve}_{lsx, lasx}
+   to save vector registers.
+2. Via "tunables", users can choose str/mem_{lasx,lsx,unaligned} functions with
+   `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,...`.
+   Note: glibc.cpu.hwcaps doesn't affect _dl_runtime_{profile, resolve}_{lsx, lasx}
+   selection.
+
+Usage Notes:
+1. Only valid inputs: LASX, LSX, UAL. Case-sensitive, comma-separated, no spaces.
+2. Example: `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL` turns on LASX & UAL.
+   Unmentioned features turn off. With default ifunc: lasx > lsx > unaligned >
+   aligned > generic, effect is: lasx > unaligned > aligned > generic; lsx off.
+3. Incorrect GLIBC_TUNABLES settings will show error messages.
+   For example: On lsx platforms, you cannot enable lasx features. If you do
+   that, you will get error messages.
+4. Valid input examples:
+   - GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX: lasx > aligned > generic.
+   - GLIBC_TUNABLES=glibc.cpu.hwcaps=LSX,UAL: lsx > unaligned > aligned > generic.
+   - GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL,LASX,UAL,LSX,LASX,UAL: Repetitions
+     allowed but not recommended. Results in: lasx > lsx > unaligned > aligned >
+     generic.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/Makefile                    |  4 +
+ sysdeps/loongarch/Versions                    |  5 ++
+ sysdeps/loongarch/cpu-tunables.c              | 89 +++++++++++++++++++
+ sysdeps/loongarch/dl-get-cpu-features.c       | 25 ++++++
+ sysdeps/loongarch/dl-machine.h                | 27 +++++-
+ sysdeps/loongarch/dl-tunables.list            | 25 ++++++
+ .../unix/sysv/linux/loongarch/cpu-features.c  | 29 ++++++
+ .../unix/sysv/linux/loongarch/cpu-features.h  | 18 +++-
+ .../unix/sysv/linux/loongarch/dl-procinfo.c   | 60 +++++++++++++
+ sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 +++++
+ .../unix/sysv/linux/loongarch/libc-start.c    | 34 +++++++
+ 11 files changed, 329 insertions(+), 8 deletions(-)
+ create mode 100644 sysdeps/loongarch/Versions
+ create mode 100644 sysdeps/loongarch/cpu-tunables.c
+ create mode 100644 sysdeps/loongarch/dl-get-cpu-features.c
+ create mode 100644 sysdeps/loongarch/dl-tunables.list
+ create mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+ create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+ create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+ create mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c
+
+diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
+index 43d2f583..30a1f4a8 100644
+--- a/sysdeps/loongarch/Makefile
+++ b/sysdeps/loongarch/Makefile
+@@ -6,6 +6,10 @@ ifeq ($(subdir),elf)
+ gen-as-const-headers += dl-link.sym
+ endif
+ 
+ifeq ($(subdir),elf)
+  sysdep-dl-routines += dl-get-cpu-features
+endif
+
+ # LoongArch's assembler also needs to know about PIC as it changes the
+ # definition of some assembler macros.
+ ASFLAGS-.os += $(pic-ccflag)
+diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions
+new file mode 100644
+index 00000000..33ae2cc0
+--- /dev/null
+++ b/sysdeps/loongarch/Versions
+@@ -0,0 +1,5 @@
+ld {
+  GLIBC_PRIVATE {
+    _dl_larch_get_cpu_features;
+  }
+}
+diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c
+new file mode 100644
+index 00000000..8e9fab93
+--- /dev/null
+++ b/sysdeps/loongarch/cpu-tunables.c
+@@ -0,0 +1,89 @@
+/* LoongArch CPU feature tuning.
+   This file is part of the GNU C Library.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+# include <stdbool.h>
+# include <stdint.h>
+# include <unistd.h>		/* Get STDOUT_FILENO for _dl_printf.  */
+# include <elf/dl-tunables.h>
+# include <string.h>
+# include <cpu-features.h>
+# include <ldsodefs.h>
+# include <sys/auxv.h>
+
+# define HWCAP_LOONGARCH_IFUNC \
+  (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX)
+
+# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len)			\
+  _Static_assert (sizeof (#name) - 1 == len, #name " != " #len);	\
+  if (!memcmp (f, #name, len) &&					\
+      (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name))			\
+    {									\
+      hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC));	\
+      break;								\
+    }									\
+
+attribute_hidden
+void
+TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
+{
+  const char *p = valp->strval;
+  size_t len;
+  unsigned long hwcap = 0;
+  const char *c;
+
+  do {
+      for (c = p; *c != ','; c++)
+	if (*c == '\0')
+	  break;
+
+      len = c - p;
+
+      switch(len)
+      {
+	default:
+	  _dl_fatal_printf (
+	    "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+			    );
+	  break;
+	case 3:
+	  {
+	    CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3);
+	    CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3);
+	    _dl_fatal_printf (
+		"Some features are invalid or not supported on this machine!!\n"
+		"The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+                       );
+	  }
+	  break;
+	case 4:
+	  {
+	    CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4);
+	    _dl_fatal_printf (
+		"Some features are invalid or not supported on this machine!!\n"
+		"The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+                       );
+	  }
+	  break;
+      }
+
+      p += len + 1;
+    }
+  while (*c != '\0');
+
+  GLRO (dl_larch_cpu_features).hwcap &= hwcap;
+}
+diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c
+new file mode 100644
+index 00000000..7cd9bc15
+--- /dev/null
+++ b/sysdeps/loongarch/dl-get-cpu-features.c
+@@ -0,0 +1,25 @@
+/* Define _dl_larch_get_cpu_features.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+
+#include <ldsodefs.h>
+
+const struct cpu_features *
+_dl_larch_get_cpu_features (void)
+{
+  return &GLRO(dl_larch_cpu_features);
+}
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index 57913cef..b395a928 100644
+--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
+@@ -29,6 +29,8 @@
+ #include <dl-static-tls.h>
+ #include <dl-machine-rel.h>
+ 
+#include <cpu-features.c>
+
+ #ifndef _RTLD_PROLOGUE
+ # define _RTLD_PROLOGUE(entry)					\
+ 	".globl\t" __STRING (entry) "\n\t"			\
+@@ -53,6 +55,23 @@
+ #define ELF_MACHINE_NO_REL 1
+ #define ELF_MACHINE_NO_RELA 0
+ 
+#define DL_PLATFORM_INIT dl_platform_init ()
+
+static inline void __attribute__ ((unused))
+dl_platform_init (void)
+{
+  if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
+    /* Avoid an empty string which would disturb us.  */
+    GLRO(dl_platform) = NULL;
+
+#ifdef SHARED
+  /* init_cpu_features has been called early from __libc_start_main in
+     static executable.  */
+  init_cpu_features (&GLRO(dl_larch_cpu_features));
+#endif
+}
+
+
+ /* Return nonzero iff ELF header is compatible with the running host.  */
+ static inline int
+ elf_machine_matches_host (const ElfW (Ehdr) *ehdr)
+@@ -290,9 +309,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+       if (profile != 0)
+ 	{
+ #if !defined __loongarch_soft_float
+-	  if (SUPPORT_LASX)
+	  if (RTLD_SUPPORT_LASX)
+ 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
+-	  else if (SUPPORT_LSX)
+	  else if (RTLD_SUPPORT_LSX)
+ 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
+ 	  else
+ #endif
+@@ -310,9 +329,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ 	     indicated by the offset on the stack, and then jump to
+ 	     the resolved address.  */
+ #if !defined __loongarch_soft_float
+-	  if (SUPPORT_LASX)
+	  if (RTLD_SUPPORT_LASX)
+ 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
+-	  else if (SUPPORT_LSX)
+	  else if (RTLD_SUPPORT_LSX)
+ 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
+ 	  else
+ #endif
+diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list
+new file mode 100644
+index 00000000..66b34275
+--- /dev/null
+++ b/sysdeps/loongarch/dl-tunables.list
+@@ -0,0 +1,25 @@
+# LoongArch specific tunables.
+# Copyright (C) 2023 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+glibc {
+  cpu {
+    hwcaps {
+      type: STRING
+    }
+  }
+}
+diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+new file mode 100644
+index 00000000..1290c4ce
+--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+@@ -0,0 +1,29 @@
+/* Initialize CPU feature data.  LoongArch64 version.
+   This file is part of the GNU C Library.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <cpu-features.h>
+#include <elf/dl-hwcaps.h>
+#include <elf/dl-tunables.h>
+extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden;
+
+static inline void
+init_cpu_features (struct cpu_features *cpu_features)
+{
+    GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap);
+    TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
+}
+diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+index d1a280a5..450963ce 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+@@ -19,13 +19,23 @@
+ #ifndef _CPU_FEATURES_LOONGARCH64_H
+ #define _CPU_FEATURES_LOONGARCH64_H
+ 
+#include <stdint.h>
+ #include <sys/auxv.h>
+ 
+-#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
+-#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+-#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+struct cpu_features
+ {
+    uint64_t hwcap;
+ };
+ 
+/* Get a pointer to the CPU features structure.  */
+extern const struct cpu_features *_dl_larch_get_cpu_features (void)
+     __attribute__ ((pure));
+
+#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL)
+#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX)
+#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX)
+#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+ #define INIT_ARCH()
+ 
+ #endif /* _CPU_FEATURES_LOONGARCH64_H  */
+-
+diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+new file mode 100644
+index 00000000..6217fda9
+--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+@@ -0,0 +1,60 @@
+/* Data for LoongArch64 version of processor capability information.
+   Linux version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* If anything should be added here check whether the size of each string
+   is still ok with the given array size.
+
+   All the #ifdefs in the definitions are quite irritating but
+   necessary if we want to avoid duplicating the information.  There
+   are three different modes:
+
+   - PROCINFO_DECL is defined.  This means we are only interested in
+     declarations.
+
+   - PROCINFO_DECL is not defined:
+
+     + if SHARED is defined the file is included in an array
+       initializer.  The .element = { ... } syntax is needed.
+
+     + if SHARED is not defined a normal array initialization is
+       needed.
+  */
+
+#ifndef PROCINFO_CLASS
+# define PROCINFO_CLASS
+#endif
+
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+  ._dl_larch_cpu_features
+# else
+PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features
+# endif
+# ifndef PROCINFO_DECL
+= { }
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
+
+#undef PROCINFO_DECL
+#undef PROCINFO_CLASS
+diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+new file mode 100644
+index 00000000..455fd71a
+--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+@@ -0,0 +1,21 @@
+/* Operating system support for run-time dynamic linker.  LoongArch version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include <sysdeps/loongarch/cpu-tunables.c>
+#include <sysdeps/unix/sysv/linux/dl-sysdep.c>
+diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
+new file mode 100644
+index 00000000..f1346ece
+--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
+@@ -0,0 +1,34 @@
+/* Override csu/libc-start.c on LoongArch64.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef SHARED
+
+/* Mark symbols hidden in static PIE for early self relocation to work.  */
+# if BUILD_PIE_DEFAULT
+#  pragma GCC visibility push(hidden)
+# endif
+
+# include <ldsodefs.h>
+# include <cpu-features.c>
+
+extern struct cpu_features _dl_larch_cpu_features;
+
+# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features)
+
+#endif
+#include <csu/libc-start.c>
+-- 
+2.33.0
+
--- a/LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch
+++ b/LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch
@ -0,0 +1,485 @@
+From 3ee56bbc56faa7b85a6513340db4a4fdd6ce709d Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:36 +0800
+Subject: [PATCH 15/29] LoongArch: Add ifunc support for memchr{aligned, lsx,
+ lasx}
+
+According to glibc memchr microbenchmark, this implementation could reduce
+the runtime as following:
+
+Name               Percent of runtime reduced
+memchr-lasx        37%-83%
+memchr-lsx         30%-66%
+memchr-aligned     0%-15%
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   7 ++
+ .../loongarch/lp64/multiarch/ifunc-memchr.h   |  40 ++++++
+ .../loongarch/lp64/multiarch/memchr-aligned.S |  95 ++++++++++++++
+ .../loongarch/lp64/multiarch/memchr-lasx.S    | 117 ++++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memchr-lsx.S | 102 +++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memchr.c     |  37 ++++++
+ 7 files changed, 401 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 64416b02..2f4802cf 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -24,5 +24,8 @@ sysdep_routines += \
+   rawmemchr-aligned \
+   rawmemchr-lsx \
+   rawmemchr-lasx \
+  memchr-aligned \
+  memchr-lsx \
+  memchr-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 3db9af14..a567b9cf 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -102,5 +102,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
+ 	      )
+ 
+  IFUNC_IMPL (i, name, memchr,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx)
+	      IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned)
+	      )
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
+new file mode 100644
+index 00000000..9060ccd5
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
+@@ -0,0 +1,40 @@
+/* Common definition for memchr ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LASX)
+    return OPTIMIZE (lasx);
+  else if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (aligned);
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
+new file mode 100644
+index 00000000..81d0d004
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
+@@ -0,0 +1,95 @@
+/* Optimized memchr implementation using basic LoongArch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define MEMCHR_NAME __memchr_aligned
+#else
+# define MEMCHR_NAME memchr
+#endif
+
+LEAF(MEMCHR_NAME, 6)
+    beqz        a2, L(out)
+    andi        t1, a0, 0x7
+    add.d       a5, a0, a2
+    bstrins.d   a0, zero, 2, 0
+
+    ld.d        t0, a0, 0
+    bstrins.d   a1, a1, 15, 8
+    lu12i.w     a3, 0x01010
+    slli.d      t2, t1, 03
+
+    bstrins.d   a1, a1, 31, 16
+    ori         a3, a3, 0x101
+    li.d        t7, -1
+    li.d        t8, 8
+
+    bstrins.d   a1, a1, 63, 32
+    bstrins.d   a3, a3, 63, 32
+    sll.d       t2, t7, t2
+    xor         t0, t0, a1
+
+
+    addi.d      a6, a5, -1
+    slli.d      a4, a3, 7
+    sub.d       t1, t8, t1
+    orn         t0, t0, t2
+
+    sub.d       t2, t0, a3
+    andn        t3, a4, t0
+    bstrins.d   a6, zero, 2, 0
+    and         t0, t2, t3
+
+    bgeu        t1, a2, L(end)
+L(loop):
+    bnez        t0, L(found)
+    ld.d        t1, a0, 8
+    xor         t0, t1, a1
+
+    addi.d      a0, a0, 8
+    sub.d       t2, t0, a3
+    andn        t3, a4, t0
+    and         t0, t2, t3
+
+
+    bne         a0, a6, L(loop)
+L(end):
+    sub.d       t1, a5, a6
+    ctz.d       t0, t0
+    srli.d      t0, t0, 3
+
+    sltu        t1, t0, t1
+    add.d       a0, a0, t0
+    maskeqz     a0, a0, t1
+    jr          ra
+
+L(found):
+    ctz.d       t0, t0
+    srli.d      t0, t0, 3
+    add.d       a0, a0, t0
+    jr          ra
+
+L(out):
+    move        a0, zero
+    jr          ra
+END(MEMCHR_NAME)
+
+libc_hidden_builtin_def (MEMCHR_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
+new file mode 100644
+index 00000000..a26cdf48
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
+@@ -0,0 +1,117 @@
+/* Optimized memchr implementation using LoongArch LASX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMCHR __memchr_lasx
+
+LEAF(MEMCHR, 6)
+    beqz            a2, L(ret0)
+    add.d           a3, a0, a2
+    andi            t0, a0, 0x3f
+    bstrins.d       a0, zero, 5, 0
+
+    xvld            xr0, a0, 0
+    xvld            xr1, a0, 32
+    li.d            t1, -1
+    li.d            t2, 64
+
+    xvreplgr2vr.b   xr2, a1
+    sll.d           t3, t1, t0
+    sub.d           t2, t2, t0
+    xvseq.b         xr0, xr0, xr2
+
+    xvseq.b         xr1, xr1, xr2
+    xvmsknz.b       xr0, xr0
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr3, xr0, 4
+
+
+    xvpickve.w      xr4, xr1, 4
+    vilvl.h         vr0, vr3, vr0
+    vilvl.h         vr1, vr4, vr1
+    vilvl.w         vr0, vr1, vr0
+
+    movfr2gr.d      t0, fa0
+    and             t0, t0, t3
+    bgeu            t2, a2, L(end)
+    bnez            t0, L(found)
+
+    addi.d          a4, a3, -1
+    bstrins.d       a4, zero, 5, 0
+L(loop):
+    xvld            xr0, a0, 64
+    xvld            xr1, a0, 96
+
+    addi.d          a0, a0, 64
+    xvseq.b         xr0, xr0, xr2
+    xvseq.b         xr1, xr1, xr2
+    beq             a0, a4, L(out)
+
+
+    xvmax.bu        xr3, xr0, xr1
+    xvseteqz.v      fcc0, xr3
+    bcnez           fcc0, L(loop)
+    xvmsknz.b       xr0, xr0
+
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr3, xr0, 4
+    xvpickve.w      xr4, xr1, 4
+    vilvl.h         vr0, vr3, vr0
+
+    vilvl.h         vr1, vr4, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+L(found):
+    ctz.d           t1, t0
+
+    add.d           a0, a0, t1
+    jr              ra
+L(ret0):
+    move            a0, zero
+    jr              ra
+
+
+L(out):
+    xvmsknz.b       xr0, xr0
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr3, xr0, 4
+    xvpickve.w      xr4, xr1, 4
+
+    vilvl.h         vr0, vr3, vr0
+    vilvl.h         vr1, vr4, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+
+L(end):
+    sub.d           t2, zero, a3
+    srl.d           t1, t1, t2
+    and             t0, t0, t1
+    ctz.d           t1, t0
+
+    add.d           a0, a0, t1
+    maskeqz         a0, a0, t0
+    jr              ra
+END(MEMCHR)
+
+libc_hidden_builtin_def (MEMCHR)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
+new file mode 100644
+index 00000000..a73ecd25
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
+@@ -0,0 +1,102 @@
+/* Optimized memchr implementation using LoongArch LSX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMCHR __memchr_lsx
+
+LEAF(MEMCHR, 6)
+    beqz            a2, L(ret0)
+    add.d           a3, a0, a2
+    andi            t0, a0, 0x1f
+    bstrins.d       a0, zero, 4, 0
+
+    vld             vr0, a0, 0
+    vld             vr1, a0, 16
+    li.d            t1, -1
+    li.d            t2, 32
+
+    vreplgr2vr.b    vr2, a1
+    sll.d           t3, t1, t0
+    sub.d           t2, t2, t0
+    vseq.b          vr0, vr0, vr2
+
+    vseq.b          vr1, vr1, vr2
+    vmsknz.b        vr0, vr0
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+
+
+    movfr2gr.s      t0, fa0
+    and             t0, t0, t3
+    bgeu            t2, a2, L(end)
+    bnez            t0, L(found)
+
+    addi.d          a4, a3, -1
+    bstrins.d       a4, zero, 4, 0
+L(loop):
+    vld             vr0, a0, 32
+    vld             vr1, a0, 48
+
+    addi.d          a0, a0, 32
+    vseq.b          vr0, vr0, vr2
+    vseq.b          vr1, vr1, vr2
+    beq             a0, a4, L(out)
+
+    vmax.bu         vr3, vr0, vr1
+    vseteqz.v       fcc0, vr3
+    bcnez           fcc0, L(loop)
+    vmsknz.b        vr0, vr0
+
+
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+L(found):
+    ctz.w           t0, t0
+
+    add.d           a0, a0, t0
+    jr              ra
+L(ret0):
+    move            a0, zero
+    jr              ra
+
+L(out):
+    vmsknz.b        vr0, vr0
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+
+L(end):
+    sub.d           t2, zero, a3
+    srl.w           t1, t1, t2
+    and             t0, t0, t1
+    ctz.w           t1, t0
+
+
+    add.d           a0, a0, t1
+    maskeqz         a0, a0, t0
+    jr              ra
+END(MEMCHR)
+
+libc_hidden_builtin_def (MEMCHR)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr.c b/sysdeps/loongarch/lp64/multiarch/memchr.c
+new file mode 100644
+index 00000000..059479c0
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr.c
+@@ -0,0 +1,37 @@
+/* Multiple versions of memchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define memchr __redirect_memchr
+# include <string.h>
+# undef memchr
+
+# define SYMBOL_NAME memchr
+# include "ifunc-memchr.h"
+
+libc_ifunc_redirected (__redirect_memchr, memchr,
+		       IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
+# endif
+
+#endif
+-- 
+2.33.0
+
--- a/LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch
+++ b/LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch
@ -0,0 +1,946 @@
+From 60f4bbd1eec528ba8df044ae6b3091f6337a7fcc Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:39 +0800
+Subject: [PATCH 18/29] LoongArch: Add ifunc support for memcmp{aligned, lsx,
+ lasx}
+
+According to glibc memcmp microbenchmark test results(Add generic
+memcmp), this implementation have performance improvement
+except the length is less than 3, details as below:
+
+Name             Percent of time reduced
+memcmp-lasx      16%-74%
+memcmp-lsx       20%-50%
+memcmp-aligned   5%-20%
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   7 +
+ .../loongarch/lp64/multiarch/ifunc-memcmp.h   |  40 +++
+ .../loongarch/lp64/multiarch/memcmp-aligned.S | 292 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/memcmp-lasx.S    | 207 +++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S | 269 ++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memcmp.c     |  43 +++
+ 7 files changed, 861 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 216886c5..360a6718 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -34,5 +34,8 @@ sysdep_routines += \
+   memset-unaligned \
+   memset-lsx \
+   memset-lasx \
+  memcmp-aligned \
+  memcmp-lsx \
+  memcmp-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 37f60dde..e397d58c 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -127,5 +127,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned)
+ 	      )
+ 
+  IFUNC_IMPL (i, name, memcmp,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LASX, __memcmp_lasx)
+	      IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LSX, __memcmp_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_aligned)
+	      )
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
+new file mode 100644
+index 00000000..04adc2e5
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
+@@ -0,0 +1,40 @@
+/* Common definition for memcmp ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LASX)
+    return OPTIMIZE (lasx);
+  else if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (aligned);
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
+new file mode 100644
+index 00000000..14a7caa9
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
+@@ -0,0 +1,292 @@
+/* Optimized memcmp implementation using basic LoongArch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define MEMCMP_NAME __memcmp_aligned
+#else
+# define MEMCMP_NAME memcmp
+#endif
+
+LEAF(MEMCMP_NAME, 6)
+    beqz        a2, L(ret)
+    andi        a4, a1, 0x7
+    andi        a3, a0, 0x7
+    sltu        a5, a4, a3
+
+    xor         t0, a0, a1
+    li.w        t8, 8
+    maskeqz     t0, t0, a5
+    li.w        t7, -1
+
+    xor         a0, a0, t0
+    xor         a1, a1, t0
+    andi        a3, a0, 0x7
+    andi        a4, a1, 0x7
+
+    xor         a0, a0, a3
+    xor         a1, a1, a4
+    ld.d        t2, a0, 0
+    ld.d        t1, a1, 0
+
+    slli.d      t3, a3, 3
+    slli.d      t4, a4, 3
+    sub.d       a6, t3, t4
+    srl.d       t1, t1, t4
+
+    srl.d       t0, t2, t3
+    srl.d       t5, t7, t4
+    sub.d       t6, t0, t1
+    and         t6, t6, t5
+
+    sub.d       t5, t8, a4
+    bnez        t6, L(first_out)
+    bgeu        t5, a2, L(ret)
+    sub.d       a2, a2, t5
+
+    bnez        a6, L(unaligned)
+    blt         a2, t8, L(al_less_8bytes)
+    andi        t1, a2, 31
+    beq         t1, a2, L(al_less_32bytes)
+
+    sub.d       t2, a2, t1
+    add.d       a4, a0, t2
+    move        a2, t1
+
+L(al_loop):
+    ld.d        t0, a0, 8
+
+    ld.d        t1, a1, 8
+    ld.d        t2, a0, 16
+    ld.d        t3, a1, 16
+    ld.d        t4, a0, 24
+
+    ld.d        t5, a1, 24
+    ld.d        t6, a0, 32
+    ld.d        t7, a1, 32
+    addi.d      a0, a0, 32
+
+    addi.d      a1, a1, 32
+    bne         t0, t1, L(out1)
+    bne         t2, t3, L(out2)
+    bne         t4, t5, L(out3)
+
+    bne         t6, t7, L(out4)
+    bne         a0, a4, L(al_loop)
+
+L(al_less_32bytes):
+    srai.d      a4, a2, 4
+    beqz        a4, L(al_less_16bytes)
+
+    ld.d        t0, a0, 8
+    ld.d        t1, a1, 8
+    ld.d        t2, a0, 16
+    ld.d        t3, a1, 16
+
+    addi.d      a0, a0, 16
+    addi.d      a1, a1, 16
+    addi.d      a2, a2, -16
+    bne         t0, t1, L(out1)
+
+    bne         t2, t3, L(out2)
+
+L(al_less_16bytes):
+    srai.d      a4, a2, 3
+    beqz        a4, L(al_less_8bytes)
+    ld.d        t0, a0, 8
+
+    ld.d        t1, a1, 8
+    addi.d      a0, a0, 8
+    addi.d      a1, a1, 8
+    addi.d      a2, a2, -8
+
+    bne         t0, t1, L(out1)
+
+L(al_less_8bytes):
+    beqz        a2, L(ret)
+    ld.d        t0, a0, 8
+    ld.d        t1, a1, 8
+
+    li.d        t7, -1
+    slli.d      t2, a2, 3
+    sll.d       t2, t7, t2
+    sub.d       t3, t0, t1
+
+    andn        t6, t3, t2
+    bnez        t6, L(count_diff)
+
+L(ret):
+    move        a0, zero
+    jr          ra
+
+L(out4):
+    move        t0, t6
+    move        t1, t7
+    sub.d       t6, t6, t7
+    b           L(count_diff)
+
+L(out3):
+    move        t0, t4
+    move        t1, t5
+    sub.d       t6, t4, t5
+    b           L(count_diff)
+
+L(out2):
+    move        t0, t2
+    move        t1, t3
+L(out1):
+    sub.d       t6, t0, t1
+    b           L(count_diff)
+
+L(first_out):
+    slli.d      t4, a2, 3
+    slt         t3, a2, t5
+    sll.d       t4, t7, t4
+    maskeqz     t4, t4, t3
+
+    andn        t6, t6, t4
+
+L(count_diff):
+    ctz.d       t2, t6
+    bstrins.d   t2, zero, 2, 0
+    srl.d       t0, t0, t2
+
+    srl.d       t1, t1, t2
+    andi        t0, t0, 0xff
+    andi        t1, t1, 0xff
+    sub.d       t2, t0, t1
+
+    sub.d       t3, t1, t0
+    masknez     t2, t2, a5
+    maskeqz     t3, t3, a5
+    or          a0, t2, t3
+
+    jr          ra
+
+L(unaligned):
+    sub.d       a7, zero, a6
+    srl.d       t0, t2, a6
+    blt         a2, t8, L(un_less_8bytes)
+
+    andi        t1, a2, 31
+    beq         t1, a2, L(un_less_32bytes)
+    sub.d       t2, a2, t1
+    add.d       a4, a0, t2
+
+    move        a2, t1
+
+L(un_loop):
+    ld.d        t2, a0, 8
+    ld.d        t1, a1, 8
+    ld.d        t4, a0, 16
+
+    ld.d        t3, a1, 16
+    ld.d        t6, a0, 24
+    ld.d        t5, a1, 24
+    ld.d        t8, a0, 32
+
+    ld.d        t7, a1, 32
+    addi.d      a0, a0, 32
+    addi.d      a1, a1, 32
+    sll.d       a3, t2, a7
+
+    or          t0, a3, t0
+    bne         t0, t1, L(out1)
+    srl.d       t0, t2, a6
+    sll.d       a3, t4, a7
+
+    or          t2, a3, t0
+    bne         t2, t3, L(out2)
+    srl.d       t0, t4, a6
+    sll.d       a3, t6, a7
+
+    or          t4, a3, t0
+    bne         t4, t5, L(out3)
+    srl.d       t0, t6, a6
+    sll.d       a3, t8, a7
+
+    or          t6, t0, a3
+    bne         t6, t7, L(out4)
+    srl.d       t0, t8, a6
+    bne         a0, a4, L(un_loop)
+
+L(un_less_32bytes):
+    srai.d      a4, a2, 4
+    beqz        a4, L(un_less_16bytes)
+    ld.d        t2, a0, 8
+    ld.d        t1, a1, 8
+
+    ld.d        t4, a0, 16
+    ld.d        t3, a1, 16
+    addi.d      a0, a0, 16
+    addi.d      a1, a1, 16
+
+    addi.d      a2, a2, -16
+    sll.d       a3, t2, a7
+    or          t0, a3, t0
+    bne         t0, t1, L(out1)
+
+    srl.d       t0, t2, a6
+    sll.d       a3, t4, a7
+    or          t2, a3, t0
+    bne         t2, t3, L(out2)
+
+    srl.d       t0, t4, a6
+
+L(un_less_16bytes):
+    srai.d      a4, a2, 3
+    beqz        a4, L(un_less_8bytes)
+    ld.d        t2, a0, 8
+
+    ld.d        t1, a1, 8
+    addi.d      a0, a0, 8
+    addi.d      a1, a1, 8
+    addi.d      a2, a2, -8
+
+    sll.d       a3, t2, a7
+    or          t0, a3, t0
+    bne         t0, t1, L(out1)
+    srl.d       t0, t2, a6
+
+L(un_less_8bytes):
+    beqz        a2, L(ret)
+    andi        a7, a7, 63
+    slli.d      a4, a2, 3
+    bgeu        a7, a4, L(last_cmp)
+
+    ld.d        t2, a0, 8
+    sll.d       a3, t2, a7
+    or          t0, a3, t0
+
+L(last_cmp):
+    ld.d        t1, a1, 8
+
+    li.d        t7, -1
+    sll.d       t2, t7, a4
+    sub.d       t3, t0, t1
+    andn        t6, t3, t2
+
+    bnez        t6, L(count_diff)
+    move        a0, zero
+    jr          ra
+END(MEMCMP_NAME)
+
+libc_hidden_builtin_def (MEMCMP_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
+new file mode 100644
+index 00000000..3151a179
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
+@@ -0,0 +1,207 @@
+/* Optimized memcmp implementation using LoongArch LASX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMCMP __memcmp_lasx
+
+LEAF(MEMCMP, 6)
+    li.d            t2, 32
+    add.d           a3, a0, a2
+    add.d           a4, a1, a2
+    bgeu            t2, a2, L(less32)
+
+    li.d            t1, 160
+    bgeu            a2, t1, L(make_aligned)
+L(loop32):
+    xvld            xr0, a0, 0
+    xvld            xr1, a1, 0
+
+    addi.d          a0, a0, 32
+    addi.d          a1, a1, 32
+    addi.d          a2, a2, -32
+    xvseq.b         xr2, xr0, xr1
+
+    xvsetanyeqz.b   fcc0, xr2
+    bcnez           fcc0, L(end)
+L(last_bytes):
+    bltu            t2, a2, L(loop32)
+    xvld            xr0, a3, -32
+
+
+    xvld            xr1, a4, -32
+    xvseq.b         xr2, xr0, xr1
+L(end):
+    xvmsknz.b       xr2, xr2
+    xvpermi.q       xr4, xr0, 1
+
+    xvpickve.w      xr3, xr2, 4
+    xvpermi.q       xr5, xr1, 1
+    vilvl.h         vr2, vr3, vr2
+    movfr2gr.s      t0, fa2
+
+    cto.w           t0, t0
+    vreplgr2vr.b    vr2, t0
+    vshuf.b         vr0, vr4, vr0, vr2
+    vshuf.b         vr1, vr5, vr1, vr2
+
+    vpickve2gr.bu   t0, vr0, 0
+    vpickve2gr.bu   t1, vr1, 0
+    sub.d           a0, t0, t1
+    jr              ra
+
+
+L(less32):
+    srli.d          t0, a2, 4
+    beqz            t0, L(less16)
+    vld             vr0, a0, 0
+    vld             vr1, a1, 0
+
+    vld             vr2, a3, -16
+    vld             vr3, a4, -16
+L(short_ret):
+    vseq.b          vr4, vr0, vr1
+    vseq.b          vr5, vr2, vr3
+
+    vmsknz.b        vr4, vr4
+    vmsknz.b        vr5, vr5
+    vilvl.h         vr4, vr5, vr4
+    movfr2gr.s      t0, fa4
+
+    cto.w           t0, t0
+    vreplgr2vr.b    vr4, t0
+    vshuf.b         vr0, vr2, vr0, vr4
+    vshuf.b         vr1, vr3, vr1, vr4
+
+
+    vpickve2gr.bu   t0, vr0, 0
+    vpickve2gr.bu   t1, vr1, 0
+    sub.d           a0, t0, t1
+    jr              ra
+
+L(less16):
+    srli.d          t0, a2, 3
+    beqz            t0, L(less8)
+    vldrepl.d       vr0, a0, 0
+    vldrepl.d       vr1, a1, 0
+
+    vldrepl.d       vr2, a3, -8
+    vldrepl.d       vr3, a4, -8
+    b               L(short_ret)
+    nop
+
+L(less8):
+    srli.d          t0, a2, 2
+    beqz            t0, L(less4)
+    vldrepl.w       vr0, a0, 0
+    vldrepl.w       vr1, a1, 0
+
+
+    vldrepl.w       vr2, a3, -4
+    vldrepl.w       vr3, a4, -4
+    b               L(short_ret)
+    nop
+
+L(less4):
+    srli.d          t0, a2, 1
+    beqz            t0, L(less2)
+    vldrepl.h       vr0, a0, 0
+    vldrepl.h       vr1, a1, 0
+
+    vldrepl.h       vr2, a3, -2
+    vldrepl.h       vr3, a4, -2
+    b               L(short_ret)
+    nop
+
+L(less2):
+    beqz            a2, L(ret0)
+    ld.bu           t0, a0, 0
+    ld.bu           t1, a1, 0
+    sub.d           a0, t0, t1
+
+    jr              ra
+L(ret0):
+    move            a0, zero
+    jr              ra
+
+L(make_aligned):
+    xvld            xr0, a0, 0
+
+    xvld            xr1, a1, 0
+    xvseq.b         xr2, xr0, xr1
+    xvsetanyeqz.b   fcc0, xr2
+    bcnez           fcc0, L(end)
+
+    andi            t0, a0, 0x1f
+    sub.d           t0, t2, t0
+    sub.d           t1, a2, t0
+    add.d           a0, a0, t0
+
+    add.d           a1, a1, t0
+    andi            a2, t1, 0x3f
+    sub.d           t0, t1, a2
+    add.d           a5, a0, t0
+
+
+L(loop_align):
+    xvld            xr0, a0, 0
+    xvld            xr1, a1, 0
+    xvld            xr2, a0, 32
+    xvld            xr3, a1, 32
+
+    xvseq.b         xr0, xr0, xr1
+    xvseq.b         xr1, xr2, xr3
+    xvmin.bu        xr2, xr1, xr0
+    xvsetanyeqz.b   fcc0, xr2
+
+    bcnez           fcc0, L(pair_end)
+    addi.d          a0, a0, 64
+    addi.d          a1, a1, 64
+    bne             a0, a5, L(loop_align)
+
+    bnez            a2, L(last_bytes)
+    move            a0, zero
+    jr              ra
+    nop
+
+
+L(pair_end):
+    xvmsknz.b       xr0, xr0
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr2, xr0, 4
+    xvpickve.w      xr3, xr1, 4
+
+    vilvl.h         vr0, vr2, vr0
+    vilvl.h         vr1, vr3, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+
+    cto.d           t0, t0
+    ldx.bu          t1, a0, t0
+    ldx.bu          t2, a1, t0
+    sub.d           a0, t1, t2
+
+    jr              ra
+END(MEMCMP)
+
+libc_hidden_builtin_def (MEMCMP)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
+new file mode 100644
+index 00000000..38a50a4c
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
+@@ -0,0 +1,269 @@
+/* Optimized memcmp implementation using LoongArch LSX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#define MEMCMP __memcmp_lsx
+
+LEAF(MEMCMP, 6)
+    beqz            a2, L(out)
+    pcalau12i       t0, %pc_hi20(L(INDEX))
+    andi            a3, a0, 0xf
+    vld             vr5, t0, %pc_lo12(L(INDEX))
+
+    andi            a4, a1, 0xf
+    bne             a3, a4, L(unaligned)
+    bstrins.d       a0, zero, 3, 0
+    xor             a1, a1, a4
+
+    vld             vr0, a0, 0
+    vld             vr1, a1, 0
+    li.d            t0, 16
+    vreplgr2vr.b    vr3, a3
+
+    sub.d           t1, t0, a3
+    vadd.b          vr3, vr3, vr5
+    vshuf.b         vr0, vr3, vr0, vr3
+    vshuf.b         vr1, vr3, vr1, vr3
+
+
+    vseq.b          vr4, vr0, vr1
+    bgeu            t1, a2, L(al_end)
+    vsetanyeqz.b    fcc0, vr4
+    bcnez           fcc0, L(al_found)
+
+    sub.d           t1, a2, t1
+    andi            a2, t1, 31
+    beq             a2, t1, L(al_less_32bytes)
+    sub.d           t2, t1, a2
+
+    add.d           a4, a0, t2
+L(al_loop):
+    vld             vr0, a0, 16
+    vld             vr1, a1, 16
+    vld             vr2, a0, 32
+
+    vld             vr3, a1, 32
+    addi.d          a0, a0, 32
+    addi.d          a1, a1, 32
+    vseq.b          vr4, vr0, vr1
+
+
+    vseq.b          vr6, vr2, vr3
+    vand.v          vr6, vr4, vr6
+    vsetanyeqz.b    fcc0, vr6
+    bcnez           fcc0, L(al_pair_end)
+
+    bne             a0, a4, L(al_loop)
+L(al_less_32bytes):
+    bgeu            t0, a2, L(al_less_16bytes)
+    vld             vr0, a0, 16
+    vld             vr1, a1, 16
+
+    vld             vr2, a0, 32
+    vld             vr3, a1, 32
+    addi.d          a2, a2, -16
+    vreplgr2vr.b    vr6, a2
+
+    vslt.b          vr5, vr5, vr6
+    vseq.b          vr4, vr0, vr1
+    vseq.b          vr6, vr2, vr3
+    vorn.v          vr6, vr6, vr5
+
+
+L(al_pair_end):
+    vsetanyeqz.b    fcc0, vr4
+    bcnez           fcc0, L(al_found)
+    vnori.b         vr4, vr6, 0
+    vfrstpi.b       vr4, vr4, 0
+
+    vshuf.b         vr0, vr2, vr2, vr4
+    vshuf.b         vr1, vr3, vr3, vr4
+    vpickve2gr.bu   t0, vr0, 0
+    vpickve2gr.bu   t1, vr1, 0
+
+    sub.d           a0, t0, t1
+    jr              ra
+    nop
+    nop
+
+L(al_less_16bytes):
+    beqz            a2, L(out)
+    vld             vr0, a0, 16
+    vld             vr1, a1, 16
+    vseq.b          vr4, vr0, vr1
+
+
+L(al_end):
+    vreplgr2vr.b    vr6, a2
+    vslt.b          vr5, vr5, vr6
+    vorn.v          vr4, vr4, vr5
+    nop
+
+L(al_found):
+    vnori.b         vr4, vr4, 0
+    vfrstpi.b       vr4, vr4, 0
+    vshuf.b         vr0, vr0, vr0, vr4
+    vshuf.b         vr1, vr1, vr1, vr4
+
+    vpickve2gr.bu   t0, vr0, 0
+    vpickve2gr.bu   t1, vr1, 0
+    sub.d           a0, t0, t1
+    jr              ra
+
+L(out):
+    move            a0, zero
+    jr              ra
+    nop
+    nop
+
+
+L(unaligned):
+    xor             t2, a0, a1
+    sltu            a5, a3, a4
+    masknez         t2, t2, a5
+    xor             a0, a0, t2
+
+    xor             a1, a1, t2
+    andi            a3, a0, 0xf
+    andi            a4, a1, 0xf
+    bstrins.d       a0, zero, 3, 0
+
+    xor             a1, a1, a4
+    vld             vr4, a0, 0
+    vld             vr1, a1, 0
+    li.d            t0, 16
+
+    vreplgr2vr.b    vr2, a4
+    sub.d           a6, a4, a3
+    sub.d           t1, t0, a4
+    sub.d           t2, t0, a6
+
+
+    vadd.b          vr2, vr2, vr5
+    vreplgr2vr.b    vr6, t2
+    vadd.b          vr6, vr6, vr5
+    vshuf.b         vr0, vr4, vr4, vr6
+
+    vshuf.b         vr1, vr2, vr1, vr2
+    vshuf.b         vr0, vr2, vr0, vr2
+    vseq.b          vr7, vr0, vr1
+    bgeu            t1, a2, L(un_end)
+
+    vsetanyeqz.b    fcc0, vr7
+    bcnez           fcc0, L(un_found)
+    sub.d           a2, a2, t1
+    andi            t1, a2, 31
+
+    beq             a2, t1, L(un_less_32bytes)
+    sub.d           t2, a2, t1
+    move            a2, t1
+    add.d           a4, a1, t2
+
+
+L(un_loop):
+    vld             vr2, a0, 16
+    vld             vr1, a1, 16
+    vld             vr3, a1, 32
+    addi.d          a1, a1, 32
+
+    addi.d          a0, a0, 32
+    vshuf.b         vr0, vr2, vr4, vr6
+    vld             vr4, a0, 0
+    vseq.b          vr7, vr0, vr1
+
+    vshuf.b         vr2, vr4, vr2, vr6
+    vseq.b          vr8, vr2, vr3
+    vand.v          vr8, vr7, vr8
+    vsetanyeqz.b    fcc0, vr8
+
+    bcnez           fcc0, L(un_pair_end)
+    bne             a1, a4, L(un_loop)
+
+L(un_less_32bytes):
+    bltu            a2, t0, L(un_less_16bytes)
+    vld             vr2, a0, 16
+    vld             vr1, a1, 16
+    addi.d          a0, a0, 16
+
+    addi.d          a1, a1, 16
+    addi.d          a2, a2, -16
+    vshuf.b         vr0, vr2, vr4, vr6
+    vor.v           vr4, vr2, vr2
+
+    vseq.b          vr7, vr0, vr1
+    vsetanyeqz.b    fcc0, vr7
+    bcnez           fcc0, L(un_found)
+L(un_less_16bytes):
+    beqz            a2, L(out)
+    vld             vr1, a1, 16
+    bgeu            a6, a2, 1f
+
+    vld             vr2, a0, 16
+1:
+    vshuf.b         vr0, vr2, vr4, vr6
+    vseq.b          vr7, vr0, vr1
+L(un_end):
+    vreplgr2vr.b    vr3, a2
+
+
+    vslt.b          vr3, vr5, vr3
+    vorn.v          vr7, vr7, vr3
+
+L(un_found):
+    vnori.b         vr7, vr7, 0
+    vfrstpi.b       vr7, vr7, 0
+
+    vshuf.b         vr0, vr0, vr0, vr7
+    vshuf.b         vr1, vr1, vr1, vr7
+L(calc_result):
+    vpickve2gr.bu   t0, vr0, 0
+    vpickve2gr.bu   t1, vr1, 0
+
+    sub.d           t2, t0, t1
+    sub.d           t3, t1, t0
+    masknez         t0, t3, a5
+    maskeqz         t1, t2, a5
+
+    or              a0, t0, t1
+    jr              ra
+L(un_pair_end):
+    vsetanyeqz.b    fcc0, vr7
+    bcnez           fcc0, L(un_found)
+
+
+    vnori.b         vr7, vr8, 0
+    vfrstpi.b       vr7, vr7, 0
+    vshuf.b         vr0, vr2, vr2, vr7
+    vshuf.b         vr1, vr3, vr3, vr7
+
+    b               L(calc_result)
+END(MEMCMP)
+
+    .section         .rodata.cst16,"M",@progbits,16
+    .align           4
+L(INDEX):
+    .dword           0x0706050403020100
+    .dword           0x0f0e0d0c0b0a0908
+
+libc_hidden_builtin_def (MEMCMP)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp.c b/sysdeps/loongarch/lp64/multiarch/memcmp.c
+new file mode 100644
+index 00000000..32eccac2
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memcmp.c
+@@ -0,0 +1,43 @@
+/* Multiple versions of memcmp.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define memcmp __redirect_memcmp
+# include <string.h>
+# undef memcmp
+
+# define SYMBOL_NAME memcmp
+# include "ifunc-memcmp.h"
+
+libc_ifunc_redirected (__redirect_memcmp, memcmp,
+		       IFUNC_SELECTOR ());
+# undef bcmp
+weak_alias (memcmp, bcmp)
+
+# undef __memcmpeq
+strong_alias (memcmp, __memcmpeq)
+libc_hidden_def (__memcmpeq)
+
+# ifdef SHARED
+__hidden_ver1 (memcmp, __GI_memcmp, __redirect_memcmp)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp);
+# endif
+
+#endif
+-- 
+2.33.0
+
--- a/LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch
+++ b/LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch
@ -0,0 +1,417 @@
+From c4c272fb8067364530a2a78df92c37403acc963f Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:37 +0800
+Subject: [PATCH 16/29] LoongArch: Add ifunc support for memrchr{lsx, lasx}
+
+According to glibc memrchr microbenchmark, this implementation could reduce
+the runtime as following:
+
+Name            Percent of rutime reduced
+memrchr-lasx    20%-83%
+memrchr-lsx     20%-64%
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   8 ++
+ .../loongarch/lp64/multiarch/ifunc-memrchr.h  |  40 ++++++
+ .../lp64/multiarch/memrchr-generic.c          |  23 ++++
+ .../loongarch/lp64/multiarch/memrchr-lasx.S   | 123 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/memrchr-lsx.S    | 105 +++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memrchr.c    |  33 +++++
+ 7 files changed, 335 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 2f4802cf..7b87bc90 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -27,5 +27,8 @@ sysdep_routines += \
+   memchr-aligned \
+   memchr-lsx \
+   memchr-lasx \
+  memrchr-generic \
+  memrchr-lsx \
+  memrchr-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index a567b9cf..8bd5489e 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -109,5 +109,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ #endif
+ 	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned)
+ 	      )
+
+  IFUNC_IMPL (i, name, memrchr,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LASX, __memrchr_lasx)
+	      IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LSX, __memrchr_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic)
+	      )
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
+new file mode 100644
+index 00000000..8215f9ad
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
+@@ -0,0 +1,40 @@
+/* Common definition for memrchr implementation.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LASX)
+    return OPTIMIZE (lasx);
+  else if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (generic);
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
+new file mode 100644
+index 00000000..ced61ebc
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
+@@ -0,0 +1,23 @@
+/* Generic implementation of memrchr.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define MEMRCHR __memrchr_generic
+#endif
+
+#include <string/memrchr.c>
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
+new file mode 100644
+index 00000000..5f3e0d06
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
+@@ -0,0 +1,123 @@
+/* Optimized memrchr implementation using LoongArch LASX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#ifndef MEMRCHR
+# define MEMRCHR __memrchr_lasx
+#endif
+
+LEAF(MEMRCHR, 6)
+    beqz            a2, L(ret0)
+    addi.d          a2, a2, -1
+    add.d           a3, a0, a2
+    andi            t1, a3, 0x3f
+
+    bstrins.d       a3, zero, 5, 0
+    addi.d          t1, t1, 1
+    xvld            xr0, a3, 0
+    xvld            xr1, a3, 32
+
+    sub.d           t2, zero, t1
+    li.d            t3, -1
+    xvreplgr2vr.b   xr2, a1
+    andi            t4, a0, 0x3f
+
+    srl.d           t2, t3, t2
+    xvseq.b         xr0, xr0, xr2
+    xvseq.b         xr1, xr1, xr2
+    xvmsknz.b       xr0, xr0
+
+
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr3, xr0, 4
+    xvpickve.w      xr4, xr1, 4
+    vilvl.h         vr0, vr3, vr0
+
+    vilvl.h         vr1, vr4, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+    and             t0, t0, t2
+
+    bltu            a2, t1, L(end)
+    bnez            t0, L(found)
+    bstrins.d       a0, zero, 5, 0
+L(loop):
+    xvld            xr0, a3, -64
+
+    xvld            xr1, a3, -32
+    addi.d          a3, a3, -64
+    xvseq.b         xr0, xr0, xr2
+    xvseq.b         xr1, xr1, xr2
+
+
+    beq             a0, a3, L(out)
+    xvmax.bu        xr3, xr0, xr1
+    xvseteqz.v      fcc0, xr3
+    bcnez           fcc0, L(loop)
+
+    xvmsknz.b       xr0, xr0
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr3, xr0, 4
+    xvpickve.w      xr4, xr1, 4
+
+    vilvl.h         vr0, vr3, vr0
+    vilvl.h         vr1, vr4, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+
+L(found):
+    addi.d          a0, a3, 63
+    clz.d           t1, t0
+    sub.d           a0, a0, t1
+    jr              ra
+
+
+L(out):
+    xvmsknz.b       xr0, xr0
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr3, xr0, 4
+    xvpickve.w      xr4, xr1, 4
+
+    vilvl.h         vr0, vr3, vr0
+    vilvl.h         vr1, vr4, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+
+L(end):
+    sll.d           t2, t3, t4
+    and             t0, t0, t2
+    addi.d          a0, a3, 63
+    clz.d           t1, t0
+
+    sub.d           a0, a0, t1
+    maskeqz         a0, a0, t0
+    jr              ra
+L(ret0):
+    move            a0, zero
+
+
+    jr              ra
+END(MEMRCHR)
+
+libc_hidden_builtin_def (MEMRCHR)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
+new file mode 100644
+index 00000000..39a7c8b0
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
+@@ -0,0 +1,105 @@
+/* Optimized memrchr implementation using LoongArch LSX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMRCHR __memrchr_lsx
+
+LEAF(MEMRCHR, 6)
+    beqz            a2, L(ret0)
+    addi.d          a2, a2, -1
+    add.d           a3, a0, a2
+    andi            t1, a3, 0x1f
+
+    bstrins.d       a3, zero, 4, 0
+    addi.d          t1, t1, 1
+    vld             vr0, a3, 0
+    vld             vr1, a3, 16
+
+    sub.d           t2, zero, t1
+    li.d            t3, -1
+    vreplgr2vr.b    vr2, a1
+    andi            t4, a0, 0x1f
+
+    srl.d           t2, t3, t2
+    vseq.b          vr0, vr0, vr2
+    vseq.b          vr1, vr1, vr2
+    vmsknz.b        vr0, vr0
+
+
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+    and             t0, t0, t2
+
+    bltu            a2, t1, L(end)
+    bnez            t0, L(found)
+    bstrins.d       a0, zero, 4, 0
+L(loop):
+    vld             vr0, a3, -32
+
+    vld             vr1, a3, -16
+    addi.d          a3, a3, -32
+    vseq.b          vr0, vr0, vr2
+    vseq.b          vr1, vr1, vr2
+
+    beq             a0, a3, L(out)
+    vmax.bu         vr3, vr0, vr1
+    vseteqz.v       fcc0, vr3
+    bcnez           fcc0, L(loop)
+
+
+    vmsknz.b        vr0, vr0
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+
+L(found):
+    addi.d          a0, a3, 31
+    clz.w           t1, t0
+    sub.d           a0, a0, t1
+    jr              ra
+
+L(out):
+    vmsknz.b        vr0, vr0
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+
+L(end):
+    sll.d           t2, t3, t4
+    and             t0, t0, t2
+    addi.d          a0, a3, 31
+    clz.w           t1, t0
+
+
+    sub.d           a0, a0, t1
+    maskeqz         a0, a0, t0
+    jr              ra
+L(ret0):
+    move            a0, zero
+
+    jr              ra
+END(MEMRCHR)
+
+libc_hidden_builtin_def (MEMRCHR)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr.c b/sysdeps/loongarch/lp64/multiarch/memrchr.c
+new file mode 100644
+index 00000000..8baba9ab
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memrchr.c
+@@ -0,0 +1,33 @@
+/* Multiple versions of memrchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define memrchr __redirect_memrchr
+# include <string.h>
+# undef memrchr
+
+# define SYMBOL_NAME memrchr
+# include "ifunc-memrchr.h"
+
+libc_ifunc_redirected (__redirect_memrchr, __memrchr, IFUNC_SELECTOR ());
+libc_hidden_def (__memrchr)
+weak_alias (__memrchr, memrchr)
+
+#endif
+-- 
+2.33.0
+
--- a/LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch
+++ b/LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch
@ -0,0 +1,784 @@
+From 14032f7bbe18443af8492f5d0365f72b76701673 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:38 +0800
+Subject: [PATCH 17/29] LoongArch: Add ifunc support for memset{aligned,
+ unaligned, lsx, lasx}
+
+According to glibc memset microbenchmark test results, for LSX and LASX
+versions, A few cases with length less than 8 experience performace
+degradation, overall, the LASX version could reduce the runtime about
+15% - 75%, LSX version could reduce the runtime about 15%-50%.
+
+The unaligned version uses unaligned memmory access to set data which
+length is less than 64 and make address aligned with 8. For this part,
+the performace is better than aligned version. Comparing with the generic
+version, the performance is close when the length is larger than 128. When
+the length is 8-128, the unaligned version could reduce the runtime about
+30%-70%, the aligned version could reduce the runtime about 20%-50%.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   4 +
+ .../lp64/multiarch/dl-symbol-redir-ifunc.h    |  24 +++
+ .../lp64/multiarch/ifunc-impl-list.c          |  10 +
+ .../loongarch/lp64/multiarch/memset-aligned.S | 174 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/memset-lasx.S    | 142 ++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memset-lsx.S | 135 ++++++++++++++
+ .../lp64/multiarch/memset-unaligned.S         | 162 ++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memset.c     |  37 ++++
+ 8 files changed, 688 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 7b87bc90..216886c5 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -30,5 +30,9 @@ sysdep_routines += \
+   memrchr-generic \
+   memrchr-lsx \
+   memrchr-lasx \
+  memset-aligned \
+  memset-unaligned \
+  memset-lsx \
+  memset-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
+new file mode 100644
+index 00000000..e2723873
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
+@@ -0,0 +1,24 @@
+/* Symbol rediretion for loader/static initialization code.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DL_IFUNC_GENERIC_H
+#define _DL_IFUNC_GENERIC_H
+
+asm ("memset = __memset_aligned");
+
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 8bd5489e..37f60dde 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -117,5 +117,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ #endif
+ 	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic)
+ 	      )
+
+  IFUNC_IMPL (i, name, memset,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LASX, __memset_lasx)
+	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LSX, __memset_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_UAL, __memset_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned)
+	      )
+
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-aligned.S b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S
+new file mode 100644
+index 00000000..1fce95b7
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S
+@@ -0,0 +1,174 @@
+/* Optimized memset aligned implementation using basic LoongArch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define MEMSET_NAME __memset_aligned
+#else
+# define MEMSET_NAME memset
+#endif
+
+LEAF(MEMSET_NAME, 6)
+    move        t0, a0
+    andi        a3, a0, 0x7
+    li.w        t6, 16
+    beqz        a3, L(align)
+    bltu        a2, t6, L(short_data)
+
+L(make_align):
+    li.w        t8, 8
+    sub.d       t2, t8, a3
+    pcaddi      t1, 11
+    slli.d      t3, t2, 2
+    sub.d       t1, t1, t3
+    jr          t1
+
+L(al7):
+    st.b        a1, t0, 6
+L(al6):
+    st.b        a1, t0, 5
+L(al5):
+    st.b        a1, t0, 4
+L(al4):
+    st.b        a1, t0, 3
+L(al3):
+    st.b        a1, t0, 2
+L(al2):
+    st.b        a1, t0, 1
+L(al1):
+    st.b        a1, t0, 0
+L(al0):
+    add.d       t0, t0, t2
+    sub.d       a2, a2, t2
+
+L(align):
+    bstrins.d   a1, a1, 15, 8
+    bstrins.d   a1, a1, 31, 16
+    bstrins.d   a1, a1, 63, 32
+    bltu        a2, t6, L(less_16bytes)
+
+    andi        a4, a2, 0x3f
+    beq         a4, a2, L(less_64bytes)
+
+    sub.d       t1, a2, a4
+    move        a2, a4
+    add.d       a5, t0, t1
+
+L(loop_64bytes):
+    addi.d      t0, t0, 64
+    st.d        a1, t0, -64
+    st.d        a1, t0, -56
+    st.d        a1, t0, -48
+    st.d        a1, t0, -40
+
+    st.d        a1, t0, -32
+    st.d        a1, t0, -24
+    st.d        a1, t0, -16
+    st.d        a1, t0, -8
+    bne         t0, a5, L(loop_64bytes)
+
+L(less_64bytes):
+    srai.d      a4, a2, 5
+    beqz        a4, L(less_32bytes)
+    addi.d      a2, a2, -32
+    st.d        a1, t0, 0
+
+    st.d        a1, t0, 8
+    st.d        a1, t0, 16
+    st.d        a1, t0, 24
+    addi.d      t0, t0, 32
+
+L(less_32bytes):
+    bltu        a2, t6, L(less_16bytes)
+    addi.d      a2, a2, -16
+    st.d        a1, t0, 0
+    st.d        a1, t0, 8
+    addi.d      t0, t0, 16
+
+L(less_16bytes):
+    srai.d      a4, a2, 3
+    beqz        a4, L(less_8bytes)
+    addi.d      a2, a2, -8
+    st.d        a1, t0, 0
+    addi.d      t0, t0, 8
+
+L(less_8bytes):
+    beqz        a2, L(less_1byte)
+    srai.d      a4, a2, 2
+    beqz        a4, L(less_4bytes)
+    addi.d      a2, a2, -4
+    st.w        a1, t0, 0
+    addi.d      t0, t0, 4
+
+L(less_4bytes):
+    srai.d      a3, a2, 1
+    beqz        a3, L(less_2bytes)
+    addi.d      a2, a2, -2
+    st.h        a1, t0, 0
+    addi.d      t0, t0, 2
+
+L(less_2bytes):
+    beqz        a2, L(less_1byte)
+    st.b        a1, t0, 0
+L(less_1byte):
+    jr          ra
+
+L(short_data):
+    pcaddi      t1, 19
+    slli.d      t3, a2, 2
+    sub.d       t1, t1, t3
+    jr          t1
+L(short_15):
+    st.b        a1, a0, 14
+L(short_14):
+    st.b        a1, a0, 13
+L(short_13):
+    st.b        a1, a0, 12
+L(short_12):
+    st.b        a1, a0, 11
+L(short_11):
+    st.b        a1, a0, 10
+L(short_10):
+    st.b        a1, a0, 9
+L(short_9):
+    st.b        a1, a0, 8
+L(short_8):
+    st.b        a1, a0, 7
+L(short_7):
+    st.b        a1, a0, 6
+L(short_6):
+    st.b        a1, a0, 5
+L(short_5):
+    st.b        a1, a0, 4
+L(short_4):
+    st.b        a1, a0, 3
+L(short_3):
+    st.b        a1, a0, 2
+L(short_2):
+    st.b        a1, a0, 1
+L(short_1):
+    st.b        a1, a0, 0
+L(short_0):
+    jr          ra
+END(MEMSET_NAME)
+
+libc_hidden_builtin_def (MEMSET_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lasx.S b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S
+new file mode 100644
+index 00000000..041abbac
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S
+@@ -0,0 +1,142 @@
+/* Optimized memset implementation using LoongArch LASX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMSET __memset_lasx
+
+LEAF(MEMSET, 6)
+    li.d            t1, 32
+    move            a3, a0
+    xvreplgr2vr.b   xr0, a1
+    add.d           a4, a0, a2
+
+    bgeu            t1, a2, L(less_32bytes)
+    li.d            t3, 128
+    li.d            t2, 64
+    blt             t3, a2, L(long_bytes)
+
+L(less_128bytes):
+    bgeu            t2, a2, L(less_64bytes)
+    xvst            xr0, a3, 0
+    xvst            xr0, a3, 32
+    xvst            xr0, a4, -32
+
+    xvst            xr0, a4, -64
+    jr              ra
+L(less_64bytes):
+    xvst            xr0, a3, 0
+    xvst            xr0, a4, -32
+
+
+    jr              ra
+L(less_32bytes):
+    srli.d          t0, a2, 4
+    beqz            t0, L(less_16bytes)
+    vst             vr0, a3, 0
+
+    vst             vr0, a4, -16
+    jr              ra
+L(less_16bytes):
+    srli.d          t0, a2, 3
+    beqz            t0, L(less_8bytes)
+
+    vstelm.d        vr0, a3, 0, 0
+    vstelm.d        vr0, a4, -8, 0
+    jr              ra
+L(less_8bytes):
+    srli.d          t0, a2, 2
+
+    beqz            t0, L(less_4bytes)
+    vstelm.w        vr0, a3, 0, 0
+    vstelm.w        vr0, a4, -4, 0
+    jr              ra
+
+
+L(less_4bytes):
+    srli.d          t0, a2, 1
+    beqz            t0, L(less_2bytes)
+    vstelm.h        vr0, a3, 0, 0
+    vstelm.h        vr0, a4, -2, 0
+
+    jr              ra
+L(less_2bytes):
+    beqz            a2, L(less_1bytes)
+    st.b            a1, a3, 0
+L(less_1bytes):
+    jr              ra
+
+L(long_bytes):
+    xvst            xr0, a3, 0
+    bstrins.d       a3, zero, 4, 0
+    addi.d          a3, a3, 32
+    sub.d           a2, a4, a3
+
+    andi            t0, a2, 0xff
+    beq             t0, a2, L(long_end)
+    move            a2, t0
+    sub.d           t0, a4, t0
+
+
+L(loop_256):
+    xvst            xr0, a3, 0
+    xvst            xr0, a3, 32
+    xvst            xr0, a3, 64
+    xvst            xr0, a3, 96
+
+    xvst            xr0, a3, 128
+    xvst            xr0, a3, 160
+    xvst            xr0, a3, 192
+    xvst            xr0, a3, 224
+
+    addi.d          a3, a3, 256
+    bne             a3, t0, L(loop_256)
+L(long_end):
+    bltu            a2, t3, L(end_less_128)
+    addi.d          a2, a2, -128
+
+    xvst            xr0, a3, 0
+    xvst            xr0, a3, 32
+    xvst            xr0, a3, 64
+    xvst            xr0, a3, 96
+
+
+    addi.d          a3, a3, 128
+L(end_less_128):
+    bltu            a2, t2, L(end_less_64)
+    addi.d          a2, a2, -64
+    xvst            xr0, a3, 0
+
+    xvst            xr0, a3, 32
+    addi.d          a3, a3, 64
+L(end_less_64):
+    bltu            a2, t1, L(end_less_32)
+    xvst            xr0, a3, 0
+
+L(end_less_32):
+    xvst            xr0, a4, -32
+    jr              ra
+END(MEMSET)
+
+libc_hidden_builtin_def (MEMSET)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lsx.S b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S
+new file mode 100644
+index 00000000..3d3982aa
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S
+@@ -0,0 +1,135 @@
+/* Optimized memset implementation using LoongArch LSX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMSET __memset_lsx
+
+LEAF(MEMSET, 6)
+    li.d            t1, 16
+    move            a3, a0
+    vreplgr2vr.b    vr0, a1
+    add.d           a4, a0, a2
+
+    bgeu            t1, a2, L(less_16bytes)
+    li.d            t3, 64
+    li.d            t2, 32
+    bgeu            a2, t3, L(long_bytes)
+
+L(less_64bytes):
+    bgeu            t2, a2, L(less_32bytes)
+    vst             vr0, a3, 0
+    vst             vr0, a3, 16
+    vst             vr0, a4, -32
+
+    vst             vr0, a4, -16
+    jr              ra
+L(less_32bytes):
+    vst             vr0, a3, 0
+    vst             vr0, a4, -16
+
+
+    jr              ra
+L(less_16bytes):
+    srli.d          t0, a2, 3
+    beqz            t0, L(less_8bytes)
+    vstelm.d        vr0, a3, 0, 0
+
+    vstelm.d        vr0, a4, -8, 0
+    jr              ra
+L(less_8bytes):
+    srli.d          t0, a2, 2
+    beqz            t0, L(less_4bytes)
+
+    vstelm.w        vr0, a3, 0, 0
+    vstelm.w        vr0, a4, -4, 0
+    jr              ra
+L(less_4bytes):
+    srli.d          t0, a2, 1
+
+    beqz            t0, L(less_2bytes)
+    vstelm.h        vr0, a3, 0, 0
+    vstelm.h        vr0, a4, -2, 0
+    jr              ra
+
+
+L(less_2bytes):
+    beqz            a2, L(less_1bytes)
+    vstelm.b        vr0, a3, 0, 0
+L(less_1bytes):
+    jr              ra
+L(long_bytes):
+    vst             vr0, a3, 0
+
+    bstrins.d       a3, zero, 3, 0
+    addi.d          a3, a3, 16
+    sub.d           a2, a4, a3
+    andi            t0, a2, 0x7f
+
+    beq             t0, a2, L(long_end)
+    move            a2, t0
+    sub.d           t0, a4, t0
+
+L(loop_128):
+    vst             vr0, a3, 0
+
+    vst             vr0, a3, 16
+    vst             vr0, a3, 32
+    vst             vr0, a3, 48
+    vst             vr0, a3, 64
+
+
+    vst             vr0, a3, 80
+    vst             vr0, a3, 96
+    vst             vr0, a3, 112
+    addi.d          a3, a3, 128
+
+    bne             a3, t0, L(loop_128)
+L(long_end):
+    bltu            a2, t3, L(end_less_64)
+    addi.d          a2, a2, -64
+    vst             vr0, a3, 0
+
+    vst             vr0, a3, 16
+    vst             vr0, a3, 32
+    vst             vr0, a3, 48
+    addi.d          a3, a3, 64
+
+L(end_less_64):
+    bltu            a2, t2, L(end_less_32)
+    addi.d          a2, a2, -32
+    vst             vr0, a3, 0
+    vst             vr0, a3, 16
+
+    addi.d          a3, a3, 32
+L(end_less_32):
+    bltu            a2, t1, L(end_less_16)
+    vst             vr0, a3, 0
+
+L(end_less_16):
+    vst             vr0, a4, -16
+    jr              ra
+END(MEMSET)
+
+libc_hidden_builtin_def (MEMSET)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
+new file mode 100644
+index 00000000..f7d32039
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
+@@ -0,0 +1,162 @@
+/* Optimized memset unaligned implementation using basic LoongArch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+
+# define MEMSET_NAME __memset_unaligned
+
+#define ST_128(n)              \
+    st.d        a1, a0, n;     \
+    st.d        a1, a0, n+8  ; \
+    st.d        a1, a0, n+16 ; \
+    st.d        a1, a0, n+24 ; \
+    st.d        a1, a0, n+32 ; \
+    st.d        a1, a0, n+40 ; \
+    st.d        a1, a0, n+48 ; \
+    st.d        a1, a0, n+56 ; \
+    st.d        a1, a0, n+64 ; \
+    st.d        a1, a0, n+72 ; \
+    st.d        a1, a0, n+80 ; \
+    st.d        a1, a0, n+88 ; \
+    st.d        a1, a0, n+96 ; \
+    st.d        a1, a0, n+104; \
+    st.d        a1, a0, n+112; \
+    st.d        a1, a0, n+120;
+
+LEAF(MEMSET_NAME, 6)
+    bstrins.d   a1, a1, 15, 8
+    add.d       t7, a0, a2
+    bstrins.d   a1, a1, 31, 16
+    move        t0, a0
+
+    bstrins.d   a1, a1, 63, 32
+    srai.d      t8, a2, 4
+    beqz        t8, L(less_16bytes)
+    srai.d      t8, a2, 6
+
+    bnez        t8, L(more_64bytes)
+    srai.d      t8, a2, 5
+    beqz        t8, L(less_32bytes)
+
+    st.d        a1, a0, 0
+    st.d        a1, a0, 8
+    st.d        a1, a0, 16
+    st.d        a1, a0, 24
+
+    st.d        a1, t7, -32
+    st.d        a1, t7, -24
+    st.d        a1, t7, -16
+    st.d        a1, t7, -8
+
+    jr          ra
+
+L(less_32bytes):
+    st.d        a1, a0, 0
+    st.d        a1, a0, 8
+    st.d        a1, t7, -16
+    st.d        a1, t7, -8
+
+    jr          ra
+
+L(less_16bytes):
+    srai.d      t8, a2, 3
+    beqz        t8, L(less_8bytes)
+    st.d        a1, a0, 0
+    st.d        a1, t7, -8
+
+    jr          ra
+
+L(less_8bytes):
+    srai.d      t8, a2, 2
+    beqz        t8, L(less_4bytes)
+    st.w        a1, a0, 0
+    st.w        a1, t7, -4
+
+    jr          ra
+
+L(less_4bytes):
+    srai.d      t8, a2, 1
+    beqz        t8, L(less_2bytes)
+    st.h        a1, a0, 0
+    st.h        a1, t7, -2
+
+    jr          ra
+
+L(less_2bytes):
+    beqz        a2, L(less_1bytes)
+    st.b        a1, a0, 0
+
+    jr          ra
+
+L(less_1bytes):
+    jr          ra
+
+L(more_64bytes):
+    srli.d      a0, a0, 3
+    slli.d      a0, a0, 3
+    addi.d      a0, a0, 0x8
+    st.d        a1, t0, 0
+
+    sub.d       t2, t0, a0
+    add.d       a2, t2, a2
+    addi.d      a2, a2, -0x80
+    blt         a2, zero, L(end_unalign_proc)
+
+L(loop_less):
+    ST_128(0)
+    addi.d      a0, a0,  0x80
+    addi.d      a2, a2, -0x80
+    bge         a2, zero, L(loop_less)
+
+L(end_unalign_proc):
+    addi.d      a2, a2, 0x80
+    pcaddi      t1, 20
+    andi        t5, a2, 0x78
+    srli.d      t5, t5, 1
+
+    sub.d       t1, t1, t5
+    jr          t1
+
+    st.d        a1, a0, 112
+    st.d        a1, a0, 104
+    st.d        a1, a0, 96
+    st.d        a1, a0, 88
+    st.d        a1, a0, 80
+    st.d        a1, a0, 72
+    st.d        a1, a0, 64
+    st.d        a1, a0, 56
+    st.d        a1, a0, 48
+    st.d        a1, a0, 40
+    st.d        a1, a0, 32
+    st.d        a1, a0, 24
+    st.d        a1, a0, 16
+    st.d        a1, a0, 8
+    st.d        a1, a0, 0
+    st.d        a1, t7, -8
+
+    move        a0, t0
+    jr          ra
+END(MEMSET_NAME)
+
+libc_hidden_builtin_def (MEMSET_NAME)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset.c b/sysdeps/loongarch/lp64/multiarch/memset.c
+new file mode 100644
+index 00000000..3ff60d8a
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memset.c
+@@ -0,0 +1,37 @@
+/* Multiple versions of memset.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define memset __redirect_memset
+# include <string.h>
+# undef memset
+
+# define SYMBOL_NAME memset
+# include "ifunc-lasx.h"
+
+libc_ifunc_redirected (__redirect_memset, memset,
+		       IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (memset, __GI_memset, __redirect_memset)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memset);
+# endif
+
+#endif
+-- 
+2.33.0
+
--- a/LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch
+++ b/LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch
@ -0,0 +1,448 @@
+From b412bcb2cf4914a664bcd24924d670a2e37394b3 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:35 +0800
+Subject: [PATCH 14/29] LoongArch: Add ifunc support for rawmemchr{aligned,
+ lsx, lasx}
+
+According to glibc rawmemchr microbenchmark, A few cases tested with
+char '\0' experience performance degradation due to the lasx and lsx
+versions don't handle the '\0' separately. Overall, rawmemchr-lasx
+implementation could reduce the runtime about 40%-80%, rawmemchr-lsx
+implementation could reduce the runtime about 40%-66%, rawmemchr-aligned
+implementation could reduce the runtime about 20%-40%.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   8 ++
+ .../lp64/multiarch/ifunc-rawmemchr.h          |  40 ++++++
+ .../lp64/multiarch/rawmemchr-aligned.S        | 124 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/rawmemchr-lasx.S |  82 ++++++++++++
+ .../loongarch/lp64/multiarch/rawmemchr-lsx.S  |  71 ++++++++++
+ sysdeps/loongarch/lp64/multiarch/rawmemchr.c  |  37 ++++++
+ 7 files changed, 365 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 5d7ae7ae..64416b02 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -21,5 +21,8 @@ sysdep_routines += \
+   memmove-unaligned \
+   memmove-lsx \
+   memmove-lasx \
+  rawmemchr-aligned \
+  rawmemchr-lsx \
+  rawmemchr-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index c8ba87bd..3db9af14 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -94,5 +94,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+               IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned)
+               )
+ 
+  IFUNC_IMPL (i, name, rawmemchr,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx)
+	      IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
+	      )
+
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
+new file mode 100644
+index 00000000..a7bb4cf9
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
+@@ -0,0 +1,40 @@
+/* Common definition for rawmemchr ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LASX)
+    return OPTIMIZE (lasx);
+  else if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (aligned);
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
+new file mode 100644
+index 00000000..9c7155ae
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
+@@ -0,0 +1,124 @@
+/* Optimized rawmemchr implementation using basic LoongArch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define RAWMEMCHR_NAME __rawmemchr_aligned
+#else
+# define RAWMEMCHR_NAME __rawmemchr
+#endif
+
+LEAF(RAWMEMCHR_NAME, 6)
+    andi        t1, a0, 0x7
+    bstrins.d   a0, zero, 2, 0
+    lu12i.w     a2, 0x01010
+    bstrins.d   a1, a1, 15, 8
+
+    ld.d        t0, a0, 0
+    slli.d      t1, t1, 3
+    ori         a2, a2, 0x101
+    bstrins.d   a1, a1, 31, 16
+
+    li.w        t8, -1
+    bstrins.d   a1, a1, 63, 32
+    bstrins.d   a2, a2, 63, 32
+    sll.d       t2, t8, t1
+
+    sll.d       t3, a1, t1
+    orn         t0, t0, t2
+    slli.d      a3, a2, 7
+    beqz        a1, L(find_zero)
+
+    xor         t0, t0, t3
+    sub.d       t1, t0, a2
+    andn        t2, a3, t0
+    and         t3, t1, t2
+
+    bnez        t3, L(count_pos)
+    addi.d      a0, a0, 8
+
+L(loop):
+    ld.d        t0, a0, 0
+    xor         t0, t0, a1
+
+    sub.d       t1, t0, a2
+    andn        t2, a3, t0
+    and         t3, t1, t2
+    bnez        t3, L(count_pos)
+
+    ld.d        t0, a0, 8
+    addi.d      a0, a0, 16
+    xor         t0, t0, a1
+    sub.d       t1, t0, a2
+
+    andn        t2, a3, t0
+    and         t3, t1, t2
+    beqz        t3, L(loop)
+    addi.d      a0, a0, -8
+L(count_pos):
+    ctz.d       t0, t3
+    srli.d      t0, t0, 3
+    add.d       a0, a0, t0
+    jr          ra
+
+L(loop_7bit):
+    ld.d        t0, a0, 0
+L(find_zero):
+    sub.d       t1, t0, a2
+    and         t2, t1, a3
+    bnez        t2, L(more_check)
+
+    ld.d        t0, a0, 8
+    addi.d      a0, a0, 16
+    sub.d       t1, t0, a2
+    and         t2, t1, a3
+
+    beqz        t2, L(loop_7bit)
+    addi.d      a0, a0, -8
+
+L(more_check):
+    andn        t2, a3, t0
+    and         t3, t1, t2
+    bnez        t3, L(count_pos)
+    addi.d      a0, a0, 8
+
+L(loop_8bit):
+    ld.d        t0, a0, 0
+
+    sub.d       t1, t0, a2
+    andn        t2, a3, t0
+    and         t3, t1, t2
+    bnez        t3, L(count_pos)
+
+    ld.d        t0, a0, 8
+    addi.d      a0, a0, 16
+    sub.d       t1, t0, a2
+
+    andn        t2, a3, t0
+    and         t3, t1, t2
+    beqz        t3, L(loop_8bit)
+
+    addi.d      a0, a0, -8
+    b           L(count_pos)
+
+END(RAWMEMCHR_NAME)
+
+libc_hidden_builtin_def (__rawmemchr)
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
+new file mode 100644
+index 00000000..be2eb59d
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
+@@ -0,0 +1,82 @@
+/* Optimized rawmemchr implementation using LoongArch LASX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+#include <sys/regdef.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define RAWMEMCHR __rawmemchr_lasx
+
+LEAF(RAWMEMCHR, 6)
+    move            a2, a0
+    bstrins.d       a0, zero, 5, 0
+    xvld            xr0, a0, 0
+    xvld            xr1, a0, 32
+
+    xvreplgr2vr.b   xr2, a1
+    xvseq.b         xr0, xr0, xr2
+    xvseq.b         xr1, xr1, xr2
+    xvmsknz.b       xr0, xr0
+
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr3, xr0, 4
+    xvpickve.w      xr4, xr1, 4
+    vilvl.h         vr0, vr3, vr0
+
+    vilvl.h         vr1, vr4, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+    sra.d           t0, t0, a2
+
+
+    beqz            t0, L(loop)
+    ctz.d           t0, t0
+    add.d           a0, a2, t0
+    jr              ra
+
+L(loop):
+    xvld            xr0, a0, 64
+    xvld            xr1, a0, 96
+    addi.d          a0, a0, 64
+    xvseq.b         xr0, xr0, xr2
+
+    xvseq.b         xr1, xr1, xr2
+    xvmax.bu        xr3, xr0, xr1
+    xvseteqz.v      fcc0, xr3
+    bcnez           fcc0, L(loop)
+
+    xvmsknz.b       xr0, xr0
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr3, xr0, 4
+    xvpickve.w      xr4, xr1, 4
+
+
+    vilvl.h         vr0, vr3, vr0
+    vilvl.h         vr1, vr4, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+
+    ctz.d           t0, t0
+    add.d           a0, a0, t0
+    jr              ra
+END(RAWMEMCHR)
+
+libc_hidden_builtin_def (RAWMEMCHR)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
+new file mode 100644
+index 00000000..2f6fe024
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
+@@ -0,0 +1,71 @@
+/* Optimized rawmemchr implementation using LoongArch LSX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define RAWMEMCHR __rawmemchr_lsx
+
+LEAF(RAWMEMCHR, 6)
+    move            a2, a0
+    bstrins.d       a0, zero, 4, 0
+    vld             vr0, a0, 0
+    vld             vr1, a0, 16
+
+    vreplgr2vr.b    vr2, a1
+    vseq.b          vr0, vr0, vr2
+    vseq.b          vr1, vr1, vr2
+    vmsknz.b        vr0, vr0
+
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+    sra.w           t0, t0, a2
+
+    beqz            t0, L(loop)
+    ctz.w           t0, t0
+    add.d           a0, a2, t0
+    jr              ra
+
+
+L(loop):
+    vld             vr0, a0, 32
+    vld             vr1, a0, 48
+    addi.d          a0, a0, 32
+    vseq.b          vr0, vr0, vr2
+
+    vseq.b          vr1, vr1, vr2
+    vmax.bu         vr3, vr0, vr1
+    vseteqz.v       fcc0, vr3
+    bcnez           fcc0, L(loop)
+
+    vmsknz.b        vr0, vr0
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+
+    ctz.w           t0, t0
+    add.d           a0, a0, t0
+    jr              ra
+END(RAWMEMCHR)
+
+libc_hidden_builtin_def (RAWMEMCHR)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
+new file mode 100644
+index 00000000..89c7ffff
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
+@@ -0,0 +1,37 @@
+/* Multiple versions of rawmemchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define rawmemchr __redirect_rawmemchr
+# define __rawmemchr __redirect___rawmemchr
+# include <string.h>
+# undef rawmemchr
+# undef __rawmemchr
+
+# define SYMBOL_NAME rawmemchr
+# include "ifunc-rawmemchr.h"
+
+libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr,
+                       IFUNC_SELECTOR ());
+weak_alias (__rawmemchr, rawmemchr)
+# ifdef SHARED
+__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr)
+  __attribute__((visibility ("hidden")));
+# endif
+#endif
+-- 
+2.33.0
+
--- a/LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch
+++ b/LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch
@ -0,0 +1,499 @@
+From e258cfcf92f5e31e902fa045b41652f00fcf2521 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Thu, 24 Aug 2023 16:50:18 +0800
+Subject: [PATCH 09/29] LoongArch: Add ifunc support for strcmp{aligned, lsx}
+
+Based on the glibc microbenchmark, strcmp-aligned implementation could
+reduce the runtime 0%-10% for aligned comparison, 10%-20% for unaligned
+comparison, strcmp-lsx implemenation could reduce the runtime 0%-50%.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   2 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   7 +
+ .../loongarch/lp64/multiarch/ifunc-strcmp.h   |  38 ++++
+ .../loongarch/lp64/multiarch/strcmp-aligned.S | 179 ++++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 165 ++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strcmp.c     |  35 ++++
+ 6 files changed, 426 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index c4dd3143..d5a500de 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -12,6 +12,8 @@ sysdep_routines += \
+   strchrnul-aligned \
+   strchrnul-lsx \
+   strchrnul-lasx \
+  strcmp-aligned \
+  strcmp-lsx \
+   memcpy-aligned \
+   memcpy-unaligned \
+   memmove-unaligned \
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 7cec0b77..9183b7da 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -62,6 +62,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
+ 	      )
+ 
+  IFUNC_IMPL (i, name, strcmp,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, strcmp, SUPPORT_LSX, __strcmp_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned)
+	      )
+
+   IFUNC_IMPL (i, name, memcpy,
+ #if !defined __loongarch_soft_float
+               IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
+new file mode 100644
+index 00000000..ca26352b
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
+@@ -0,0 +1,38 @@
+/* Common definition for strcmp ifunc selection.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (aligned);
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+new file mode 100644
+index 00000000..f5f4f336
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+@@ -0,0 +1,179 @@
+/* Optimized strcmp implementation using basic Loongarch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRCMP_NAME __strcmp_aligned
+#else
+# define STRCMP_NAME strcmp
+#endif
+
+LEAF(STRCMP_NAME, 6)
+    lu12i.w     a4, 0x01010
+    andi        a2, a0, 0x7
+    ori         a4, a4, 0x101
+    andi        a3, a1, 0x7
+
+    bstrins.d   a4, a4, 63, 32
+    li.d        t7, -1
+    li.d        t8, 8
+    slli.d      a5, a4, 7
+
+    bne         a2, a3, L(unaligned)
+    bstrins.d   a0, zero, 2, 0
+    bstrins.d   a1, zero, 2, 0
+    ld.d        t0, a0, 0
+
+    ld.d        t1, a1, 0
+    slli.d      t3, a2, 3
+    sll.d       t2, t7, t3
+    orn         t0, t0, t2
+
+
+    orn         t1, t1, t2
+    sub.d       t2, t0, a4
+    andn        t3, a5, t0
+    and         t2, t2, t3
+
+    bne         t0, t1, L(al_end)
+L(al_loop):
+    bnez        t2, L(ret0)
+    ldx.d       t0, a0, t8
+    ldx.d       t1, a1, t8
+
+    addi.d      t8, t8, 8
+    sub.d       t2, t0, a4
+    andn        t3, a5, t0
+    and         t2, t2, t3
+
+    beq         t0, t1, L(al_loop)
+L(al_end):
+    xor         t3, t0, t1
+    or          t2, t2, t3
+    ctz.d       t3, t2
+
+
+    bstrins.d   t3, zero, 2, 0
+    srl.d       t0, t0, t3
+    srl.d       t1, t1, t3
+    andi        t0, t0, 0xff
+
+    andi        t1, t1, 0xff
+    sub.d       a0, t0, t1
+    jr          ra
+    nop
+
+L(ret0):
+    move        a0, zero
+    jr          ra
+    nop
+    nop
+
+L(unaligned):
+    slt         a6, a3, a2
+    xor         t0, a0, a1
+    maskeqz     t0, t0, a6
+    xor         a0, a0, t0
+
+
+    xor         a1, a1, t0
+    andi        a2, a0, 0x7
+    andi        a3, a1, 0x7
+    bstrins.d   a0, zero, 2, 0
+
+    bstrins.d   a1, zero, 2, 0
+    ld.d        t4, a0, 0
+    ld.d        t1, a1, 0
+    slli.d      a2, a2, 3
+
+    slli.d      a3, a3, 3
+    srl.d       t0, t4, a2
+    srl.d       t1, t1, a3
+    srl.d       t5, t7, a3
+
+    orn         t0, t0, t5
+    orn         t1, t1, t5
+    bne         t0, t1, L(not_equal)
+    sll.d       t5, t7, a2
+
+
+    sub.d       a3, a2, a3
+    orn         t4, t4, t5
+    sub.d       a2, zero, a3
+    sub.d       t2, t4, a4
+
+    andn        t3, a5, t4
+    and         t2, t2, t3
+    bnez        t2, L(find_zero)
+L(un_loop):
+    srl.d       t5, t4, a3
+
+    ldx.d       t4, a0, t8
+    ldx.d       t1, a1, t8
+    addi.d      t8, t8, 8
+    sll.d       t0, t4, a2
+
+    or          t0, t0, t5
+    bne         t0, t1, L(not_equal)
+    sub.d       t2, t4, a4
+    andn        t3, a5, t4
+
+
+    and         t2, t2, t3
+    beqz        t2, L(un_loop)
+L(find_zero):
+    sub.d       t2, t0, a4
+    andn        t3, a5, t0
+
+    and         t2, t2, t3
+    bnez        t2, L(ret0)
+    ldx.d       t1, a1, t8
+    srl.d       t0, t4, a3
+
+L(not_equal):
+    sub.d       t2, t0, a4
+    andn        t3, a5, t0
+    and         t2, t2, t3
+    xor         t3, t0, t1
+
+    or          t2, t2, t3
+L(un_end):
+    ctz.d       t3, t2
+    bstrins.d   t3, zero, 2, 0
+    srl.d       t0, t0, t3
+
+
+    srl.d       t1, t1, t3
+    andi        t0, t0, 0xff
+    andi        t1, t1, 0xff
+    sub.d       t2, t0, t1
+
+
+    sub.d       t3, t1, t0
+    masknez     t0, t2, a6
+    maskeqz     t1, t3, a6
+    or          a0, t0, t1
+
+    jr	ra
+END(STRCMP_NAME)
+
+libc_hidden_builtin_def (STRCMP_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+new file mode 100644
+index 00000000..2e177a38
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+@@ -0,0 +1,165 @@
+/* Optimized strcmp implementation using Loongarch LSX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRCMP	__strcmp_lsx
+
+LEAF(STRCMP, 6)
+    pcalau12i       t0, %pc_hi20(L(INDEX))
+    andi            a2, a0, 0xf
+    vld             vr2, t0, %pc_lo12(L(INDEX))
+    andi            a3, a1, 0xf
+
+    bne             a2, a3, L(unaligned)
+    bstrins.d       a0, zero, 3, 0
+    bstrins.d       a1, zero, 3, 0
+    vld             vr0, a0, 0
+
+    vld             vr1, a1, 0
+    vreplgr2vr.b    vr3, a2
+    vslt.b          vr2, vr2, vr3
+    vseq.b          vr3, vr0, vr1
+
+    vmin.bu         vr3, vr0, vr3
+    vor.v           vr3, vr3, vr2
+    vsetanyeqz.b    fcc0, vr3
+    bcnez           fcc0, L(al_out)
+
+
+L(al_loop):
+    vld             vr0, a0, 16
+    vld             vr1, a1, 16
+    addi.d          a0, a0, 16
+    addi.d          a1, a1, 16
+
+    vseq.b          vr3, vr0, vr1
+    vmin.bu         vr3, vr0, vr3
+    vsetanyeqz.b    fcc0, vr3
+    bceqz           fcc0, L(al_loop)
+
+L(al_out):
+    vseqi.b         vr3, vr3, 0
+    vfrstpi.b       vr3, vr3, 0
+    vshuf.b         vr0, vr0, vr0, vr3
+    vshuf.b         vr1, vr1, vr1, vr3
+
+    vpickve2gr.bu   t0, vr0, 0
+    vpickve2gr.bu   t1, vr1, 0
+    sub.d           a0, t0, t1
+    jr              ra
+
+
+L(unaligned):
+    slt             a4, a3, a2
+    xor             t0, a0, a1
+    maskeqz         t0, t0, a4
+    xor             a0, a0, t0
+
+    xor             a1, a1, t0
+    andi            a2, a0, 0xf
+    andi            a3, a1, 0xf
+    bstrins.d       a0, zero, 3, 0
+
+    bstrins.d       a1, zero, 3, 0
+    vld             vr3, a0, 0
+    vld             vr1, a1, 0
+    vreplgr2vr.b    vr4, a2
+
+    vreplgr2vr.b    vr5, a3
+    vslt.b          vr7, vr2, vr5
+    vsub.b          vr5, vr5, vr4
+    vaddi.bu        vr6, vr2, 16
+
+
+    vsub.b          vr6, vr6, vr5
+    vshuf.b         vr0, vr3, vr3, vr6
+    vor.v           vr0, vr0, vr7
+    vor.v           vr1, vr1, vr7
+
+    vseq.b          vr5, vr0, vr1
+    vsetanyeqz.b    fcc0, vr5
+    bcnez           fcc0, L(not_equal)
+    vslt.b          vr4, vr2, vr4
+
+    vor.v           vr0, vr3, vr4
+    vsetanyeqz.b    fcc0, vr0
+    bcnez           fcc0, L(find_zero)
+    nop
+
+L(un_loop):
+    vld             vr3, a0, 16
+    vld             vr1, a1, 16
+    addi.d          a0, a0, 16
+    addi.d          a1, a1, 16
+
+
+    vshuf.b         vr0, vr3, vr0, vr6
+    vseq.b          vr5, vr0, vr1
+    vsetanyeqz.b    fcc0, vr5
+    bcnez           fcc0, L(not_equal)
+
+    vsetanyeqz.b    fcc0, vr3
+    vor.v           vr0, vr3, vr3
+    bceqz           fcc0, L(un_loop)
+L(find_zero):
+    vmin.bu         vr5, vr1, vr5
+
+    vsetanyeqz.b    fcc0, vr5
+    bcnez           fcc0, L(ret0)
+    vld             vr1, a1, 16
+    vshuf.b         vr0, vr3, vr3, vr6
+
+    vseq.b          vr5, vr0, vr1
+L(not_equal):
+    vmin.bu         vr5, vr0, vr5
+L(un_end):
+    vseqi.b         vr5, vr5, 0
+    vfrstpi.b       vr5, vr5, 0
+
+
+    vshuf.b         vr0, vr0, vr0, vr5
+    vshuf.b         vr1, vr1, vr1, vr5
+    vpickve2gr.bu   t0, vr0, 0
+    vpickve2gr.bu   t1, vr1, 0
+
+    sub.d           t3, t0, t1
+    sub.d           t4, t1, t0
+    masknez         t0, t3, a4
+    maskeqz         t1, t4, a4
+
+    or              a0, t0, t1
+    jr              ra
+L(ret0):
+    move            a0, zero
+    jr              ra
+END(STRCMP)
+
+    .section         .rodata.cst16,"M",@progbits,16
+    .align           4
+L(INDEX):
+    .dword           0x0706050403020100
+    .dword           0x0f0e0d0c0b0a0908
+
+libc_hidden_builtin_def (STRCMP)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp.c b/sysdeps/loongarch/lp64/multiarch/strcmp.c
+new file mode 100644
+index 00000000..6f249c0b
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp.c
+@@ -0,0 +1,35 @@
+/* Multiple versions of strcmp.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strcmp __redirect_strcmp
+# include <string.h>
+# undef strcmp
+
+# define SYMBOL_NAME strcmp
+# include "ifunc-strcmp.h"
+
+libc_ifunc_redirected (__redirect_strcmp, strcmp, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strcmp, __GI_strcmp, __redirect_strcmp)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcmp);
+# endif
+#endif
+-- 
+2.33.0
+
--- a/LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch
+++ b/LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch
--- a/LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch
+++ b/LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch
@ -0,0 +1,583 @@
+From 6f03da2d7ef218c0f78375cf706dada59c3fee63 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Thu, 24 Aug 2023 16:50:19 +0800
+Subject: [PATCH 10/29] LoongArch: Add ifunc support for strncmp{aligned, lsx}
+
+Based on the glibc microbenchmark, only a few short inputs with this
+strncmp-aligned and strncmp-lsx implementation experience performance
+degradation, overall, strncmp-aligned could reduce the runtime 0%-10%
+for aligned comparision, 10%-25% for unaligend comparision, strncmp-lsx
+could reduce the runtime about 0%-60%.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   2 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   7 +
+ .../loongarch/lp64/multiarch/ifunc-strncmp.h  |  38 +++
+ .../lp64/multiarch/strncmp-aligned.S          | 218 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/strncmp-lsx.S    | 208 +++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strncmp.c    |  35 +++
+ 6 files changed, 508 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index d5a500de..5d7ae7ae 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -14,6 +14,8 @@ sysdep_routines += \
+   strchrnul-lasx \
+   strcmp-aligned \
+   strcmp-lsx \
+  strncmp-aligned \
+  strncmp-lsx \
+   memcpy-aligned \
+   memcpy-unaligned \
+   memmove-unaligned \
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 9183b7da..c8ba87bd 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -69,6 +69,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned)
+ 	      )
+ 
+  IFUNC_IMPL (i, name, strncmp,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, strncmp, SUPPORT_LSX, __strncmp_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned)
+	      )
+
+   IFUNC_IMPL (i, name, memcpy,
+ #if !defined __loongarch_soft_float
+               IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
+new file mode 100644
+index 00000000..1a7dc36b
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
+@@ -0,0 +1,38 @@
+/* Common definition for strncmp ifunc selection.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (aligned);
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+new file mode 100644
+index 00000000..e2687fa7
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+@@ -0,0 +1,218 @@
+/* Optimized strncmp implementation using basic Loongarch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRNCMP __strncmp_aligned
+#else
+# define STRNCMP strncmp
+#endif
+
+LEAF(STRNCMP, 6)
+    beqz        a2, L(ret0)
+    lu12i.w     a5, 0x01010
+    andi        a3, a0, 0x7
+    ori         a5, a5, 0x101
+
+    andi        a4, a1, 0x7
+    bstrins.d   a5, a5, 63, 32
+    li.d        t7, -1
+    li.d        t8, 8
+
+    addi.d      a2, a2, -1
+    slli.d      a6, a5, 7
+    bne         a3, a4, L(unaligned)
+    bstrins.d   a0, zero, 2, 0
+
+    bstrins.d   a1, zero, 2, 0
+    ld.d        t0, a0, 0
+    ld.d        t1, a1, 0
+    slli.d      t2, a3, 3
+
+
+    sub.d       t5, t8, a3
+    srl.d       t3, t7, t2
+    srl.d       t0, t0, t2
+    srl.d       t1, t1, t2
+
+    orn         t0, t0, t3
+    orn         t1, t1, t3
+    sub.d       t2, t0, a5
+    andn        t3, a6, t0
+
+    and         t2, t2, t3
+    bne         t0, t1, L(al_end)
+    sltu        t4, a2, t5
+    sub.d       a2, a2, t5
+
+L(al_loop):
+    or          t4, t2, t4
+    bnez        t4, L(ret0)
+    ldx.d       t0, a0, t8
+    ldx.d       t1, a1, t8
+
+
+    addi.d      t8, t8, 8
+    sltui       t4, a2, 8
+    addi.d      a2, a2, -8
+    sub.d       t2, t0, a5
+
+    andn        t3, a6, t0
+    and         t2, t2, t3
+    beq         t0, t1, L(al_loop)
+    addi.d      a2, a2, 8
+
+L(al_end):
+    xor         t3, t0, t1
+    or          t2, t2, t3
+    ctz.d       t2, t2
+    srli.d      t4, t2, 3
+
+    bstrins.d   t2, zero, 2, 0
+    srl.d       t0, t0, t2
+    srl.d       t1, t1, t2
+    andi        t0, t0, 0xff
+
+
+    andi        t1, t1, 0xff
+    sltu        t2, a2, t4
+    sub.d       a0, t0, t1
+    masknez     a0, a0, t2
+
+    jr          ra
+L(ret0):
+    move        a0, zero
+    jr          ra
+    nop
+
+L(unaligned):
+    slt         a7, a4, a3
+    xor         t0, a0, a1
+    maskeqz     t0, t0, a7
+    xor         a0, a0, t0
+
+    xor         a1, a1, t0
+    andi        a3, a0, 0x7
+    andi        a4, a1, 0x7
+    bstrins.d   a0, zero, 2, 0
+
+
+    bstrins.d   a1, zero, 2, 0
+    ld.d        t4, a0, 0
+    ld.d        t1, a1, 0
+    slli.d      t2, a3, 3
+
+    slli.d      t3, a4, 3
+    srl.d       t5, t7, t3
+    srl.d       t0, t4, t2
+    srl.d       t1, t1, t3
+
+    orn         t0, t0, t5
+    orn         t1, t1, t5
+    bne         t0, t1, L(not_equal)
+    sub.d       t6, t8, a4
+
+    sub.d       a4, t2, t3
+    sll.d       t2, t7, t2
+    sub.d       t5, t8, a3
+    orn         t4, t4, t2
+
+
+    sub.d       t2, t4, a5
+    andn        t3, a6, t4
+    sltu        t7, a2, t5
+    and         t2, t2, t3
+
+    sub.d       a3, zero, a4
+    or          t2, t2, t7
+    bnez        t2, L(un_end)
+    sub.d       t7, t5, t6
+
+    sub.d       a2, a2, t5
+    sub.d       t6, t8, t7
+L(un_loop):
+    srl.d       t5, t4, a4
+    ldx.d       t4, a0, t8
+
+    ldx.d       t1, a1, t8
+    addi.d      t8, t8, 8
+    sll.d       t0, t4, a3
+    or          t0, t0, t5
+
+
+    bne         t0, t1, L(loop_not_equal)
+    sub.d       t2, t4, a5
+    andn        t3, a6, t4
+    sltui       t5, a2, 8
+
+    and         t2, t2, t3
+    addi.d      a2, a2, -8
+    or          t3, t2, t5
+    beqz        t3, L(un_loop)
+
+    addi.d      a2, a2, 8
+L(un_end):
+    sub.d       t2, t0, a5
+    andn        t3, a6, t0
+    sltu        t5, a2, t6
+
+    and         t2, t2, t3
+    or          t2, t2, t5
+    bnez        t2, L(ret0)
+    ldx.d       t1, a1, t8
+
+
+    srl.d       t0, t4, a4
+    sub.d       a2, a2, t6
+L(not_equal):
+    sub.d       t2, t0, a5
+    andn        t3, a6, t0
+
+    xor         t4, t0, t1
+    and         t2, t2, t3
+    or          t2, t2, t4
+    ctz.d       t2, t2
+
+    bstrins.d   t2, zero, 2, 0
+    srli.d      t4, t2, 3
+    srl.d       t0, t0, t2
+    srl.d       t1, t1, t2
+
+    andi        t0, t0, 0xff
+    andi        t1, t1, 0xff
+    sub.d       t2, t0, t1
+    sub.d       t3, t1, t0
+
+
+    masknez     t0, t2, a7
+    maskeqz     t1, t3, a7
+    sltu        t2, a2, t4
+    or          a0, t0, t1
+
+    masknez     a0, a0, t2
+    jr          ra
+L(loop_not_equal):
+    add.d       a2, a2, t7
+    b           L(not_equal)
+END(STRNCMP)
+
+libc_hidden_builtin_def (STRNCMP)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+new file mode 100644
+index 00000000..0b4eee2a
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+@@ -0,0 +1,208 @@
+/* Optimized strncmp implementation using Loongarch LSX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRNCMP __strncmp_lsx
+
+LEAF(STRNCMP, 6)
+    beqz            a2, L(ret0)
+    pcalau12i       t0, %pc_hi20(L(INDEX))
+    andi            a3, a0, 0xf
+    vld             vr2, t0, %pc_lo12(L(INDEX))
+
+    andi            a4, a1, 0xf
+    li.d            t2, 16
+    bne             a3, a4, L(unaligned)
+    xor             t0, a0, a3
+
+    xor             t1, a1, a4
+    vld             vr0, t0, 0
+    vld             vr1, t1, 0
+    vreplgr2vr.b    vr3, a3
+
+
+    sub.d           t2, t2, a3
+    vadd.b          vr3, vr3, vr2
+    vshuf.b         vr0, vr3, vr0, vr3
+    vshuf.b         vr1, vr3, vr1, vr3
+
+    vseq.b          vr3, vr0, vr1
+    vmin.bu         vr3, vr0, vr3
+    bgeu            t2, a2, L(al_early_end)
+    vsetanyeqz.b    fcc0, vr3
+
+    bcnez           fcc0, L(al_end)
+    add.d           a3, a0, a2
+    addi.d          a4, a3, -1
+    bstrins.d       a4, zero, 3, 0
+
+    sub.d           a2, a3, a4
+L(al_loop):
+    vld             vr0, t0, 16
+    vld             vr1, t1, 16
+    addi.d          t0, t0, 16
+
+
+    addi.d          t1, t1, 16
+    vseq.b          vr3, vr0, vr1
+    vmin.bu         vr3, vr0, vr3
+    beq             t0, a4, L(al_early_end)
+
+    vsetanyeqz.b    fcc0, vr3
+    bceqz           fcc0, L(al_loop)
+L(al_end):
+    vseqi.b         vr3, vr3, 0
+    vfrstpi.b       vr3, vr3, 0
+
+    vshuf.b         vr0, vr0, vr0, vr3
+    vshuf.b         vr1, vr1, vr1, vr3
+    vpickve2gr.bu   t0, vr0, 0
+    vpickve2gr.bu   t1, vr1, 0
+
+    sub.d           a0, t0, t1
+    jr              ra
+L(al_early_end):
+    vreplgr2vr.b    vr4, a2
+    vslt.b          vr4, vr2, vr4
+
+
+    vorn.v          vr3, vr3, vr4
+    b               L(al_end)
+L(unaligned):
+    slt             a5, a3, a4
+    xor             t0, a0, a1
+
+    maskeqz         t0, t0, a5
+    xor             a0, a0, t0
+    xor             a1, a1, t0
+    andi            a3, a0, 0xf
+
+    andi            a4, a1, 0xf
+    xor             t0, a0, a3
+    xor             t1, a1, a4
+    vld             vr0, t0, 0
+
+    vld             vr3, t1, 0
+    sub.d           t2, t2, a3
+    vreplgr2vr.b    vr4, a3
+    vreplgr2vr.b    vr5, a4
+
+
+    vaddi.bu        vr6, vr2, 16
+    vsub.b          vr7, vr4, vr5
+    vsub.b          vr6, vr6, vr7
+    vadd.b          vr4, vr2, vr4
+
+    vshuf.b         vr1, vr3, vr3, vr6
+    vshuf.b         vr0, vr7, vr0, vr4
+    vshuf.b         vr1, vr7, vr1, vr4
+    vseq.b          vr4, vr0, vr1
+
+    vmin.bu         vr4, vr0, vr4
+    bgeu            t2, a2, L(un_early_end)
+    vsetanyeqz.b    fcc0, vr4
+    bcnez           fcc0, L(un_end)
+
+    add.d           a6, a0, a2
+    vslt.b          vr5, vr2, vr5
+    addi.d          a7, a6, -1
+    vor.v           vr3, vr3, vr5
+
+
+    bstrins.d       a7, zero, 3, 0
+    sub.d           a2, a6, a7
+L(un_loop):
+    vld             vr0, t0, 16
+    addi.d          t0, t0, 16
+
+    vsetanyeqz.b    fcc0, vr3
+    bcnez           fcc0, L(has_zero)
+    beq             t0, a7, L(end_with_len)
+    vor.v           vr1, vr3, vr3
+
+    vld             vr3, t1, 16
+    addi.d          t1, t1, 16
+    vshuf.b         vr1, vr3, vr1, vr6
+    vseq.b          vr4, vr0, vr1
+
+    vmin.bu         vr4, vr0, vr4
+    vsetanyeqz.b    fcc0, vr4
+    bceqz           fcc0, L(un_loop)
+L(un_end):
+    vseqi.b         vr4, vr4, 0
+
+
+    vfrstpi.b       vr4, vr4, 0
+    vshuf.b         vr0, vr0, vr0, vr4
+    vshuf.b         vr1, vr1, vr1, vr4
+    vpickve2gr.bu   t0, vr0, 0
+
+    vpickve2gr.bu   t1, vr1, 0
+    sub.d           t2, t0, t1
+    sub.d           t3, t1, t0
+    masknez         t0, t2, a5
+
+    maskeqz         t1, t3, a5
+    or              a0, t0, t1
+    jr              ra
+L(has_zero):
+    vshuf.b         vr1, vr3, vr3, vr6
+
+    vseq.b          vr4, vr0, vr1
+    vmin.bu         vr4, vr0, vr4
+    bne             t0, a7, L(un_end)
+L(un_early_end):
+    vreplgr2vr.b    vr5, a2
+
+    vslt.b          vr5, vr2, vr5
+    vorn.v          vr4, vr4, vr5
+    b               L(un_end)
+L(end_with_len):
+    sub.d           a6, a3, a4
+
+    bgeu            a6, a2, 1f
+    vld             vr4, t1, 16
+1:
+    vshuf.b         vr1, vr4, vr3, vr6
+    vseq.b          vr4, vr0, vr1
+
+    vmin.bu         vr4, vr0, vr4
+    vreplgr2vr.b    vr5, a2
+    vslt.b          vr5, vr2, vr5
+    vorn.v          vr4, vr4, vr5
+
+    b               L(un_end)
+L(ret0):
+    move            a0, zero
+    jr              ra
+END(STRNCMP)
+
+    .section         .rodata.cst16,"M",@progbits,16
+    .align           4
+L(INDEX):
+    .dword           0x0706050403020100
+    .dword           0x0f0e0d0c0b0a0908
+
+libc_hidden_builtin_def (STRNCMP)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp.c b/sysdeps/loongarch/lp64/multiarch/strncmp.c
+new file mode 100644
+index 00000000..af6d0bc4
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp.c
+@@ -0,0 +1,35 @@
+/* Multiple versions of strncmp.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strncmp __redirect_strncmp
+# include <string.h>
+# undef strncmp
+
+# define SYMBOL_NAME strncmp
+# include "ifunc-strncmp.h"
+
+libc_ifunc_redirected (__redirect_strncmp, strncmp, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strncmp, __GI_strncmp, __redirect_strncmp)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strncmp);
+# endif
+#endif
+-- 
+2.33.0
+
--- a/LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch
+++ b/LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch
@ -0,0 +1,465 @@
+From e494d32d3b76eee0d59cfab37789a356459b517a Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Thu, 24 Aug 2023 16:50:17 +0800
+Subject: [PATCH 08/29] LoongArch: Add ifunc support for strnlen{aligned, lsx,
+ lasx}
+
+Based on the glibc microbenchmark, strnlen-aligned implementation could
+reduce the runtime more than 10%, strnlen-lsx implementation could reduce
+the runtime about 50%-78%, strnlen-lasx implementation could reduce the
+runtime about 50%-88%.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   8 ++
+ .../loongarch/lp64/multiarch/ifunc-strnlen.h  |  41 +++++++
+ .../lp64/multiarch/strnlen-aligned.S          | 102 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/strnlen-lasx.S   | 100 +++++++++++++++++
+ .../loongarch/lp64/multiarch/strnlen-lsx.S    |  89 +++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strnlen.c    |  39 +++++++
+ 7 files changed, 382 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index afa51041..c4dd3143 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -3,6 +3,9 @@ sysdep_routines += \
+   strlen-aligned \
+   strlen-lsx \
+   strlen-lasx \
+  strnlen-aligned \
+  strnlen-lsx \
+  strnlen-lasx \
+   strchr-aligned \
+   strchr-lsx \
+   strchr-lasx \
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 25eb96b0..7cec0b77 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -38,6 +38,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
+ 	      )
+ 
+  IFUNC_IMPL (i, name, strnlen,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LASX, __strnlen_lasx)
+	      IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LSX, __strnlen_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_aligned)
+	      )
+
+   IFUNC_IMPL (i, name, strchr,
+ #if !defined __loongarch_soft_float
+ 	      IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
+new file mode 100644
+index 00000000..5cf89810
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
+@@ -0,0 +1,41 @@
+/* Common definition for strnlen ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LASX)
+    return OPTIMIZE (lasx);
+  else if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (aligned);
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+new file mode 100644
+index 00000000..b900430a
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+@@ -0,0 +1,102 @@
+/* Optimized strnlen implementation using basic Loongarch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRNLEN __strnlen_aligned
+#else
+# define STRNLEN __strnlen
+#endif
+
+LEAF(STRNLEN, 6)
+    beqz        a1, L(out)
+    lu12i.w     a2, 0x01010
+    andi        t1, a0, 0x7
+    move        t4, a0
+
+    bstrins.d   a0, zero, 2, 0
+    ori         a2, a2, 0x101
+    li.w        t0, -1
+    ld.d        t2, a0, 0
+
+    slli.d      t3, t1, 3
+    bstrins.d   a2, a2, 63, 32
+    li.w        t5, 8
+    slli.d      a3, a2, 7
+
+    sub.w       t1, t5, t1
+    sll.d       t0, t0, t3
+    orn         t2, t2, t0
+    sub.d       t0, t2, a2
+
+
+    andn        t3, a3, t2
+    and         t0, t0, t3
+    bnez        t0, L(count_pos)
+    sub.d       t5, a1, t1
+
+    bgeu        t1, a1, L(out)
+    addi.d      a0, a0, 8
+L(loop):
+    ld.d        t2, a0, 0
+    sub.d       t0, t2, a2
+
+    andn        t1, a3, t2
+    sltui       t6, t5, 9
+    and         t0, t0, t1
+    or          t7, t0, t6
+
+    bnez        t7, L(count_pos)
+    ld.d        t2, a0, 8
+    addi.d      a0, a0, 16
+    sub.d       t0, t2, a2
+
+
+    andn        t1, a3, t2
+    sltui       t6, t5, 17
+    and         t0, t0, t1
+    addi.d      t5, t5, -16
+
+    or          t7, t0, t6
+    beqz        t7, L(loop)
+    addi.d      a0, a0, -8
+L(count_pos):
+    ctz.d       t1, t0
+
+    sub.d       a0, a0, t4
+    srli.d      t1, t1, 3
+    add.d       a0, t1, a0
+    sltu        t0, a0, a1
+
+    masknez     t1, a1, t0
+    maskeqz     a0, a0, t0
+    or          a0, a0, t1
+    jr          ra
+
+
+L(out):
+    move        a0, a1
+    jr          ra
+END(STRNLEN)
+
+weak_alias (STRNLEN, strnlen)
+libc_hidden_builtin_def (STRNLEN)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+new file mode 100644
+index 00000000..2c03d3d9
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+@@ -0,0 +1,100 @@
+/* Optimized strnlen implementation using loongarch LASX instructions
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRNLEN __strnlen_lasx
+
+LEAF(STRNLEN, 6)
+    beqz            a1, L(ret0)
+    andi            t1, a0, 0x3f
+    li.d            t3, 65
+    sub.d           a2, a0, t1
+
+    xvld            xr0, a2, 0
+    xvld            xr1, a2, 32
+    sub.d           t1, t3, t1
+    move            a3, a0
+
+    sltu            t1, a1, t1
+    xvmsknz.b       xr0, xr0
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr2, xr0, 4
+
+    xvpickve.w      xr3, xr1, 4
+    vilvl.h         vr0, vr2, vr0
+    vilvl.h         vr1, vr3, vr1
+    vilvl.w         vr0, vr1, vr0
+
+
+    movfr2gr.d      t0, fa0
+    sra.d           t0, t0, a0
+    orn             t1, t1, t0
+    bnez            t1, L(end)
+
+    add.d           a4, a0, a1
+    move            a0, a2
+    addi.d          a4, a4, -1
+    bstrins.d       a4, zero, 5, 0
+
+L(loop):
+    xvld            xr0, a0, 64
+    xvld            xr1, a0, 96
+    addi.d          a0, a0, 64
+    beq             a0, a4, L(out)
+
+    xvmin.bu        xr2, xr0, xr1
+    xvsetanyeqz.b   fcc0, xr2
+    bceqz           fcc0, L(loop)
+L(out):
+    xvmsknz.b       xr0, xr0
+
+
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr2, xr0, 4
+    xvpickve.w      xr3, xr1, 4
+    vilvl.h         vr0, vr2, vr0
+
+    vilvl.h         vr1, vr3, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+L(end):
+    sub.d           a0, a0, a3
+
+    cto.d           t0, t0
+    add.d           a0, a0, t0
+    sltu            t1, a0, a1
+    masknez         t0, a1, t1
+
+    maskeqz         t1, a0, t1
+    or              a0, t0, t1
+    jr              ra
+L(ret0):
+    move            a0, zero
+
+
+    jr              ra
+END(STRNLEN)
+
+libc_hidden_def (STRNLEN)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+new file mode 100644
+index 00000000..b769a895
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+@@ -0,0 +1,89 @@
+/* Optimized strnlen implementation using loongarch LSX instructions
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRNLEN __strnlen_lsx
+
+LEAF(STRNLEN, 6)
+    beqz            a1, L(ret0)
+    andi            t1, a0, 0x1f
+    li.d            t3, 33
+    sub.d           a2, a0, t1
+
+    vld             vr0, a2, 0
+    vld             vr1, a2, 16
+    sub.d           t1, t3, t1
+    move            a3, a0
+
+    sltu            t1, a1, t1
+    vmsknz.b        vr0, vr0
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+
+    movfr2gr.s      t0, fa0
+    sra.w           t0, t0, a0
+    orn             t1, t1, t0
+    bnez            t1, L(end)
+
+
+    add.d           a4, a0, a1
+    move            a0, a2
+    addi.d          a4, a4, -1
+    bstrins.d       a4, zero, 4, 0
+
+L(loop):
+    vld             vr0, a0, 32
+    vld             vr1, a0, 48
+    addi.d          a0, a0, 32
+    beq             a0, a4, L(out)
+
+    vmin.bu         vr2, vr0, vr1
+    vsetanyeqz.b    fcc0, vr2
+    bceqz           fcc0, L(loop)
+L(out):
+    vmsknz.b        vr0, vr0
+
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+L(end):
+    sub.d           a0, a0, a3
+
+
+    cto.w           t0, t0
+    add.d           a0, a0, t0
+    sltu            t1, a0, a1
+    masknez         t0, a1, t1
+
+    maskeqz         t1, a0, t1
+    or              a0, t0, t1
+    jr              ra
+L(ret0):
+    move            a0, zero
+
+    jr              ra
+END(STRNLEN)
+
+libc_hidden_builtin_def (STRNLEN)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen.c b/sysdeps/loongarch/lp64/multiarch/strnlen.c
+new file mode 100644
+index 00000000..38b7a25a
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen.c
+@@ -0,0 +1,39 @@
+/* Multiple versions of strnlen.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strnlen __redirect_strnlen
+# define __strnlen __redirect___strnlen
+# include <string.h>
+# undef __strnlen
+# undef strnlen
+
+# define SYMBOL_NAME strnlen
+# include "ifunc-strnlen.h"
+
+libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ());
+weak_alias (__strnlen, strnlen);
+# ifdef SHARED
+__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen)
+  __attribute__((visibility ("hidden"))) __attribute_copy__ (strnlen);
+__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen)
+  __attribute__((weak, visibility ("hidden"))) __attribute_copy__ (strnlen);
+# endif
+#endif
+-- 
+2.33.0
+
--- a/LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch
+++ b/LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch
@ -0,0 +1,670 @@
+From d537d0ab45a55048c8da483e73be4448ddb45525 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Wed, 13 Sep 2023 15:35:00 +0800
+Subject: [PATCH 23/29] LoongArch: Add ifunc support for strrchr{aligned, lsx,
+ lasx}
+
+According to glibc strrchr microbenchmark test results, this implementation
+could reduce the runtime time as following:
+
+Name                Percent of rutime reduced
+strrchr-lasx        10%-50%
+strrchr-lsx         0%-50%
+strrchr-aligned     5%-50%
+
+Generic strrchr is implemented by function strlen + memrchr, the lasx version
+will compare with generic strrchr implemented by strlen-lasx + memrchr-lasx,
+the lsx version will compare with generic strrchr implemented by strlen-lsx +
+memrchr-lsx, the aligned version will compare with generic strrchr implemented
+by strlen-aligned + memrchr-generic.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   8 +
+ .../loongarch/lp64/multiarch/ifunc-strrchr.h  |  41 ++++
+ .../lp64/multiarch/strrchr-aligned.S          | 170 +++++++++++++++++
+ .../loongarch/lp64/multiarch/strrchr-lasx.S   | 176 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/strrchr-lsx.S    | 144 ++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strrchr.c    |  36 ++++
+ 7 files changed, 578 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 39550bea..fe863e1b 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -9,6 +9,9 @@ sysdep_routines += \
+   strchr-aligned \
+   strchr-lsx \
+   strchr-lasx \
+  strrchr-aligned \
+  strrchr-lsx \
+  strrchr-lasx \
+   strchrnul-aligned \
+   strchrnul-lsx \
+   strchrnul-lasx \
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 39a14f1d..529e2369 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -94,6 +94,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
+ 	      )
+ 
+  IFUNC_IMPL (i, name, strrchr,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx)
+	      IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned)
+	      )
+
+   IFUNC_IMPL (i, name, memcpy,
+ #if !defined __loongarch_soft_float
+               IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
+new file mode 100644
+index 00000000..bbb34089
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
+@@ -0,0 +1,41 @@
+/* Common definition for strrchr ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LASX)
+    return OPTIMIZE (lasx);
+  else if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (aligned);
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
+new file mode 100644
+index 00000000..a73deb78
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
+@@ -0,0 +1,170 @@
+/* Optimized strrchr implementation using basic LoongArch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRRCHR __strrchr_aligned
+#else
+# define STRRCHR strrchr
+#endif
+
+LEAF(STRRCHR, 6)
+    slli.d      t0, a0, 3
+    bstrins.d   a0, zero, 2, 0
+    lu12i.w     a2, 0x01010
+    ld.d        t2, a0, 0
+
+    andi        a1, a1, 0xff
+    ori         a2, a2, 0x101
+    li.d        t3, -1
+    bstrins.d	a2, a2, 63, 32
+
+    sll.d       t5, t3, t0
+    slli.d      a3, a2, 7
+    orn         t4, t2, t5
+    mul.d       a1, a1, a2
+
+    sub.d       t0, t4, a2
+    andn        t1, a3, t4
+    and         t1, t0, t1
+    beqz        t1, L(find_tail)
+
+
+    ctz.d       t0, t1
+    orn         t0, zero, t0
+    xor         t2, t4, a1
+    srl.d       t0, t3, t0
+
+    orn         t2, t2, t0
+    orn         t2, t2, t5
+    revb.d      t2, t2
+    sub.d       t1, t2, a2
+
+    andn        t0, a3, t2
+    and         t1, t0, t1
+    ctz.d       t0, t1
+    srli.d      t0, t0, 3
+
+    addi.d      a0, a0, 7
+    sub.d       a0, a0, t0
+    maskeqz     a0, a0, t1
+    jr          ra
+
+
+L(find_tail):
+    addi.d      a4, a0, 8
+    addi.d      a0, a0, 8
+L(loop_ascii):
+    ld.d        t2, a0, 0
+    sub.d       t1, t2, a2
+
+    and         t0, t1, a3
+    bnez        t0, L(more_check)
+    ld.d        t2, a0, 8
+    sub.d       t1, t2, a2
+
+    and         t0, t1, a3
+    addi.d      a0, a0, 16
+    beqz        t0, L(loop_ascii)
+    addi.d      a0, a0, -8
+
+L(more_check):
+    andn        t0, a3, t2
+    and         t1, t1, t0
+    bnez        t1, L(tail)
+    addi.d      a0, a0, 8
+
+
+L(loop_nonascii):
+    ld.d        t2, a0, 0
+    sub.d       t1, t2, a2
+    andn        t0, a3, t2
+    and         t1, t0, t1
+
+    bnez        t1, L(tail)
+    ld.d        t2, a0, 8
+    addi.d      a0, a0, 16
+    sub.d       t1, t2, a2
+
+    andn        t0, a3, t2
+    and         t1, t0, t1
+    beqz        t1, L(loop_nonascii)
+    addi.d      a0, a0, -8
+
+L(tail):
+    ctz.d       t0, t1
+    orn         t0, zero, t0
+    xor         t2, t2, a1
+    srl.d       t0, t3, t0
+
+
+    orn         t2, t2, t0
+    revb.d      t2, t2
+    sub.d       t1, t2, a2
+    andn        t0, a3, t2
+
+    and         t1, t0, t1
+    bnez        t1, L(count_pos)
+L(find_loop):
+    beq         a0, a4, L(find_end)
+    ld.d        t2, a0, -8
+
+    addi.d      a0, a0, -8
+    xor         t2, t2, a1
+    sub.d       t1, t2, a2
+    andn        t0, a3, t2
+
+    and         t1, t0, t1
+    beqz        t1, L(find_loop)
+    revb.d      t2, t2
+    sub.d       t1, t2, a2
+
+
+    andn        t0, a3, t2
+    and         t1, t0, t1
+L(count_pos):
+    ctz.d       t0, t1
+    addi.d      a0, a0, 7
+
+    srli.d      t0, t0, 3
+    sub.d       a0, a0, t0
+    jr          ra
+    nop
+
+L(find_end):
+    xor         t2, t4, a1
+    orn         t2, t2, t5
+    revb.d      t2, t2
+    sub.d       t1, t2, a2
+
+
+    andn        t0, a3, t2
+    and         t1, t0, t1
+    ctz.d       t0, t1
+    srli.d      t0, t0, 3
+
+    addi.d      a0, a4, -1
+    sub.d       a0, a0, t0
+    maskeqz     a0, a0, t1
+    jr          ra
+END(STRRCHR)
+
+libc_hidden_builtin_def(STRRCHR)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
+new file mode 100644
+index 00000000..5a6e2297
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
+@@ -0,0 +1,176 @@
+/* Optimized strrchr implementation using LoongArch LASX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#define STRRCHR __strrchr_lasx
+
+LEAF(STRRCHR, 6)
+    move            a2, a0
+    bstrins.d       a0, zero, 5, 0
+    xvld            xr0, a0, 0
+    xvld            xr1, a0, 32
+
+    li.d            t2, -1
+    xvreplgr2vr.b   xr4, a1
+    xvmsknz.b       xr2, xr0
+    xvmsknz.b       xr3, xr1
+
+    xvpickve.w      xr5, xr2, 4
+    xvpickve.w      xr6, xr3, 4
+    vilvl.h         vr2, vr5, vr2
+    vilvl.h         vr3, vr6, vr3
+
+    vilvl.w         vr2, vr3, vr2
+    movfr2gr.d      t0, fa2
+    sra.d           t0, t0, a2
+    beq             t0, t2, L(find_tail)
+
+
+    xvseq.b         xr2, xr0, xr4
+    xvseq.b         xr3, xr1, xr4
+    xvmsknz.b       xr2, xr2
+    xvmsknz.b       xr3, xr3
+
+    xvpickve.w      xr4, xr2, 4
+    xvpickve.w      xr5, xr3, 4
+    vilvl.h         vr2, vr4, vr2
+    vilvl.h         vr3, vr5, vr3
+
+    vilvl.w         vr1, vr3, vr2
+    slli.d          t3, t2, 1
+    movfr2gr.d      t1, fa1
+    cto.d           t0, t0
+
+    srl.d           t1, t1, a2
+    sll.d           t3, t3, t0
+    addi.d          a0, a2, 63
+    andn            t1, t1, t3
+
+
+    clz.d           t0, t1
+    sub.d           a0, a0, t0
+    maskeqz         a0, a0, t1
+    jr              ra
+
+    .align          5
+L(find_tail):
+    addi.d          a3, a0, 64
+L(loop):
+    xvld            xr2, a0, 64
+    xvld            xr3, a0, 96
+    addi.d          a0, a0, 64
+
+    xvmin.bu        xr5, xr2, xr3
+    xvsetanyeqz.b   fcc0, xr5
+    bceqz           fcc0, L(loop)
+    xvmsknz.b       xr5, xr2
+
+
+    xvmsknz.b       xr6, xr3
+    xvpickve.w      xr7, xr5, 4
+    xvpickve.w      xr8, xr6, 4
+    vilvl.h         vr5, vr7, vr5
+
+    vilvl.h         vr6, vr8, vr6
+    xvseq.b         xr2, xr2, xr4
+    xvseq.b         xr3, xr3, xr4
+    xvmsknz.b       xr2, xr2
+
+    xvmsknz.b       xr3, xr3
+    xvpickve.w      xr7, xr2, 4
+    xvpickve.w      xr8, xr3, 4
+    vilvl.h         vr2, vr7, vr2
+
+    vilvl.h         vr3, vr8, vr3
+    vilvl.w         vr5, vr6, vr5
+    vilvl.w         vr2, vr3, vr2
+    movfr2gr.d      t0, fa5
+
+
+    movfr2gr.d      t1, fa2
+    slli.d          t3, t2, 1
+    cto.d           t0, t0
+    sll.d           t3, t3, t0
+
+    andn            t1, t1, t3
+    beqz            t1, L(find_loop)
+    clz.d           t0, t1
+    addi.d          a0, a0, 63
+
+    sub.d           a0, a0, t0
+    jr              ra
+L(find_loop):
+    beq             a0, a3, L(find_end)
+    xvld            xr2, a0, -64
+
+    xvld            xr3, a0, -32
+    addi.d          a0, a0, -64
+    xvseq.b         xr2, xr2, xr4
+    xvseq.b         xr3, xr3, xr4
+
+
+    xvmax.bu        xr5, xr2, xr3
+    xvseteqz.v      fcc0, xr5
+    bcnez           fcc0, L(find_loop)
+    xvmsknz.b       xr0, xr2
+
+    xvmsknz.b       xr1, xr3
+    xvpickve.w      xr2, xr0, 4
+    xvpickve.w      xr3, xr1, 4
+    vilvl.h         vr0, vr2, vr0
+
+    vilvl.h         vr1, vr3, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+    addi.d          a0, a0, 63
+
+    clz.d           t0, t0
+    sub.d           a0, a0, t0
+    jr              ra
+    nop
+
+
+L(find_end):
+    xvseq.b         xr2, xr0, xr4
+    xvseq.b         xr3, xr1, xr4
+    xvmsknz.b       xr2, xr2
+    xvmsknz.b       xr3, xr3
+
+    xvpickve.w      xr4, xr2, 4
+    xvpickve.w      xr5, xr3, 4
+    vilvl.h         vr2, vr4, vr2
+    vilvl.h         vr3, vr5, vr3
+
+    vilvl.w         vr1, vr3, vr2
+    movfr2gr.d      t1, fa1
+    addi.d          a0, a2, 63
+    srl.d           t1, t1, a2
+
+    clz.d           t0, t1
+    sub.d           a0, a0, t0
+    maskeqz         a0, a0, t1
+    jr              ra
+END(STRRCHR)
+
+libc_hidden_builtin_def(STRRCHR)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
+new file mode 100644
+index 00000000..8f2fd22e
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
+@@ -0,0 +1,144 @@
+/* Optimized strrchr implementation using LoongArch LSX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#define STRRCHR __strrchr_lsx
+
+LEAF(STRRCHR, 6)
+    move            a2, a0
+    bstrins.d       a0, zero, 4, 0
+    vld             vr0, a0, 0
+    vld             vr1, a0, 16
+
+    li.d            t2, -1
+    vreplgr2vr.b    vr4, a1
+    vmsknz.b        vr2, vr0
+    vmsknz.b        vr3, vr1
+
+    vilvl.h         vr2, vr3, vr2
+    movfr2gr.s      t0, fa2
+    sra.w           t0, t0, a2
+    beq             t0, t2, L(find_tail)
+
+    vseq.b          vr2, vr0, vr4
+    vseq.b          vr3, vr1, vr4
+    vmsknz.b        vr2, vr2
+    vmsknz.b        vr3, vr3
+
+
+    vilvl.h         vr1, vr3, vr2
+    slli.d          t3, t2, 1
+    movfr2gr.s      t1, fa1
+    cto.w           t0, t0
+
+    srl.w           t1, t1, a2
+    sll.d           t3, t3, t0
+    addi.d          a0, a2, 31
+    andn            t1, t1, t3
+
+    clz.w           t0, t1
+    sub.d           a0, a0, t0
+    maskeqz         a0, a0, t1
+    jr              ra
+
+    .align          5
+L(find_tail):
+    addi.d          a3, a0, 32
+L(loop):
+    vld             vr2, a0, 32
+    vld             vr3, a0, 48
+    addi.d          a0, a0, 32
+
+    vmin.bu         vr5, vr2, vr3
+    vsetanyeqz.b    fcc0, vr5
+    bceqz           fcc0, L(loop)
+    vmsknz.b        vr5, vr2
+
+    vmsknz.b        vr6, vr3
+    vilvl.h         vr5, vr6, vr5
+    vseq.b          vr2, vr2, vr4
+    vseq.b          vr3, vr3, vr4
+
+    vmsknz.b        vr2, vr2
+    vmsknz.b        vr3, vr3
+    vilvl.h         vr2, vr3, vr2
+    movfr2gr.s      t0, fa5
+
+
+    movfr2gr.s      t1, fa2
+    slli.d          t3, t2, 1
+    cto.w           t0, t0
+    sll.d           t3, t3, t0
+
+    andn            t1, t1, t3
+    beqz            t1, L(find_loop)
+    clz.w           t0, t1
+    addi.d          a0, a0, 31
+
+    sub.d           a0, a0, t0
+    jr              ra
+L(find_loop):
+    beq             a0, a3, L(find_end)
+    vld             vr2, a0, -32
+
+    vld             vr3, a0, -16
+    addi.d          a0, a0, -32
+    vseq.b          vr2, vr2, vr4
+    vseq.b          vr3, vr3, vr4
+
+
+    vmax.bu         vr5, vr2, vr3
+    vseteqz.v       fcc0, vr5
+    bcnez           fcc0, L(find_loop)
+    vmsknz.b        vr0, vr2
+
+    vmsknz.b        vr1, vr3
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+    addi.d          a0, a0, 31
+
+    clz.w           t0, t0
+    sub.d           a0, a0, t0
+    jr              ra
+    nop
+
+L(find_end):
+    vseq.b          vr2, vr0, vr4
+    vseq.b          vr3, vr1, vr4
+    vmsknz.b        vr2, vr2
+    vmsknz.b        vr3, vr3
+
+
+    vilvl.h         vr1, vr3, vr2
+    movfr2gr.s      t1, fa1
+    addi.d          a0, a2, 31
+    srl.w           t1, t1, a2
+
+    clz.w           t0, t1
+    sub.d           a0, a0, t0
+    maskeqz         a0, a0, t1
+    jr              ra
+END(STRRCHR)
+
+libc_hidden_builtin_def(STRRCHR)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr.c b/sysdeps/loongarch/lp64/multiarch/strrchr.c
+new file mode 100644
+index 00000000..d9c9f660
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr.c
+@@ -0,0 +1,36 @@
+/* Multiple versions of strrchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strrchr __redirect_strrchr
+# include <string.h>
+# undef strrchr
+
+# define SYMBOL_NAME strrchr
+# include "ifunc-strrchr.h"
+
+libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ());
+weak_alias (strrchr, rindex)
+# ifdef SHARED
+__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strrchr);
+# endif
+
+#endif
+-- 
+2.33.0
+
--- a/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch
+++ b/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch
@ -0,0 +1,626 @@
+From b5979df8ad07823c79a934c1fa0a91ec0abffb61 Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Fri, 8 Sep 2023 14:10:55 +0800
+Subject: [PATCH 20/29] LoongArch: Add lasx/lsx support for
+ _dl_runtime_profile.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/bits/link.h            |  24 ++-
+ sysdeps/loongarch/bits/link_lavcurrent.h |  25 +++
+ sysdeps/loongarch/dl-audit-check.h       |  23 +++
+ sysdeps/loongarch/dl-link.sym            |   8 +-
+ sysdeps/loongarch/dl-machine.h           |  11 +-
+ sysdeps/loongarch/dl-trampoline.S        | 177 +----------------
+ sysdeps/loongarch/dl-trampoline.h        | 242 +++++++++++++++++++++++
+ 7 files changed, 331 insertions(+), 179 deletions(-)
+ create mode 100644 sysdeps/loongarch/bits/link_lavcurrent.h
+ create mode 100644 sysdeps/loongarch/dl-audit-check.h
+
+diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h
+index 7fa61312..00f6f25f 100644
+--- a/sysdeps/loongarch/bits/link.h
+++ b/sysdeps/loongarch/bits/link.h
+@@ -20,10 +20,26 @@
+ #error "Never include <bits/link.h> directly; use <link.h> instead."
+ #endif
+ 
+#ifndef __loongarch_soft_float
+typedef float La_loongarch_vr
+    __attribute__ ((__vector_size__ (16), __aligned__ (16)));
+typedef float La_loongarch_xr
+    __attribute__ ((__vector_size__ (32), __aligned__ (16)));
+
+typedef union
+{
+  double fpreg[4];
+  La_loongarch_vr vr[2];
+  La_loongarch_xr xr[1];
+} La_loongarch_vector __attribute__ ((__aligned__ (16)));
+#endif
+
+ typedef struct La_loongarch_regs
+ {
+   unsigned long int lr_reg[8]; /* a0 - a7 */
+-  double lr_fpreg[8];	       /* fa0 - fa7 */
+#ifndef __loongarch_soft_float
+  La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/
+#endif
+   unsigned long int lr_ra;
+   unsigned long int lr_sp;
+ } La_loongarch_regs;
+@@ -33,8 +49,10 @@ typedef struct La_loongarch_retval
+ {
+   unsigned long int lrv_a0;
+   unsigned long int lrv_a1;
+-  double lrv_fa0;
+-  double lrv_fa1;
+#ifndef __loongarch_soft_float
+  La_loongarch_vector lrv_vec0;
+  La_loongarch_vector lrv_vec1;
+#endif
+ } La_loongarch_retval;
+ 
+ __BEGIN_DECLS
+diff --git a/sysdeps/loongarch/bits/link_lavcurrent.h b/sysdeps/loongarch/bits/link_lavcurrent.h
+new file mode 100644
+index 00000000..15f1eb84
+--- /dev/null
+++ b/sysdeps/loongarch/bits/link_lavcurrent.h
+@@ -0,0 +1,25 @@
+/* Data structure for communication from the run-time dynamic linker for
+   loaded ELF shared objects.  LAV_CURRENT definition.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _LINK_H
+# error "Never include <bits/link_lavcurrent.h> directly; use <link.h> instead."
+#endif
+
+/* Version numbers for la_version handshake interface.  */
+#define LAV_CURRENT	3
+diff --git a/sysdeps/loongarch/dl-audit-check.h b/sysdeps/loongarch/dl-audit-check.h
+new file mode 100644
+index 00000000..a139c939
+--- /dev/null
+++ b/sysdeps/loongarch/dl-audit-check.h
+@@ -0,0 +1,23 @@
+/* rtld-audit version check.  LoongArch version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+static inline bool
+_dl_audit_check_version (unsigned int lav)
+{
+  return lav == LAV_CURRENT;
+}
+diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
+index 868ab7c6..b534968e 100644
+--- a/sysdeps/loongarch/dl-link.sym
+++ b/sysdeps/loongarch/dl-link.sym
+@@ -6,9 +6,13 @@ DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
+ DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
+ 
+ DL_OFFSET_RG_A0         offsetof(struct La_loongarch_regs, lr_reg)
+-DL_OFFSET_RG_FA0        offsetof(struct La_loongarch_regs, lr_fpreg)
+#ifndef __loongarch_soft_float
+DL_OFFSET_RG_VEC0       offsetof(struct La_loongarch_regs, lr_vec)
+#endif
+ DL_OFFSET_RG_RA         offsetof(struct La_loongarch_regs, lr_ra)
+ DL_OFFSET_RG_SP         offsetof(struct La_loongarch_regs, lr_sp)
+ 
+ DL_OFFSET_RV_A0         offsetof(struct La_loongarch_retval, lrv_a0)
+-DL_OFFSET_RV_FA0        offsetof(struct La_loongarch_retval, lrv_a1)
+#ifndef __loongarch_soft_float
+DL_OFFSET_RV_VEC0       offsetof(struct La_loongarch_retval, lrv_vec0)
+#endif
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index 066bb233..8a2db9de 100644
+--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
+@@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ #if !defined __loongarch_soft_float
+       extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
+       extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
+      extern void _dl_runtime_profile_lasx (void) attribute_hidden;
+      extern void _dl_runtime_profile_lsx (void) attribute_hidden;
+ #endif
+       extern void _dl_runtime_resolve (void) attribute_hidden;
+       extern void _dl_runtime_profile (void) attribute_hidden;
+@@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ 	 end in this function.  */
+       if (profile != 0)
+ 	{
+-	   gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
+#if !defined __loongarch_soft_float
+	  if (SUPPORT_LASX)
+	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
+	  else if (SUPPORT_LSX)
+	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
+	  else
+#endif
+	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
+ 
+ 	  if (GLRO(dl_profile) != NULL
+ 	      && _dl_name_match_p (GLRO(dl_profile), l))
+diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
+index 8fd91469..bb449ecf 100644
+--- a/sysdeps/loongarch/dl-trampoline.S
+++ b/sysdeps/loongarch/dl-trampoline.S
+@@ -22,190 +22,21 @@
+ #if !defined __loongarch_soft_float
+ #define USE_LASX
+ #define _dl_runtime_resolve _dl_runtime_resolve_lasx
+#define _dl_runtime_profile _dl_runtime_profile_lasx
+ #include "dl-trampoline.h"
+ #undef FRAME_SIZE
+ #undef USE_LASX
+ #undef _dl_runtime_resolve
+#undef _dl_runtime_profile
+ 
+ #define USE_LSX
+ #define _dl_runtime_resolve _dl_runtime_resolve_lsx
+#define _dl_runtime_profile _dl_runtime_profile_lsx
+ #include "dl-trampoline.h"
+ #undef FRAME_SIZE
+ #undef USE_LSX
+ #undef _dl_runtime_resolve
+#undef _dl_runtime_profile
+ #endif
+ 
+ #include "dl-trampoline.h"
+-
+-#include "dl-link.h"
+-
+-ENTRY (_dl_runtime_profile)
+-       /* LoongArch we get called with:
+-	t0	      linkr_map pointer
+-	t1	      the scaled offset stored in t0, which can be used
+-		      to calculate the offset of the current symbol in .rela.plt
+-	t2	      %hi(%pcrel(.got.plt)) stored in t2, no use in this function
+-	t3	      dl resolver entry point, no use in this function
+-
+-	Stack frame layout:
+-	[sp,    #96] La_loongarch_regs
+-	[sp,    #48] La_loongarch_retval
+-	[sp,    #40] frame size return from pltenter
+-	[sp,    #32] dl_profile_call saved a1
+-	[sp,    #24] dl_profile_call saved a0
+-	[sp,    #16] T1
+-	[sp,     #0] ra, fp   <- fp
+-       */
+-
+-# define OFFSET_T1              16
+-# define OFFSET_SAVED_CALL_A0   OFFSET_T1 + 8
+-# define OFFSET_FS              OFFSET_SAVED_CALL_A0 + 16
+-# define OFFSET_RV              OFFSET_FS + 8
+-# define OFFSET_RG              OFFSET_RV + DL_SIZEOF_RV
+-
+-# define SF_SIZE                (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
+-
+-	/* Save arguments to stack. */
+-	ADDI	sp, sp, -SF_SIZE
+-	REG_S	ra, sp, 0
+-	REG_S	fp, sp, 8
+-
+-	or	fp, sp, zero
+-
+-	REG_S	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+-	REG_S	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+-	REG_S	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+-	REG_S	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+-	REG_S	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+-	REG_S	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+-	REG_S	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+-	REG_S	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_S	fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+-	FREG_S	fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+-	FREG_S	fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+-	FREG_S	fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+-	FREG_S	fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+-	FREG_S	fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+-	FREG_S	fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+-	FREG_S	fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+-
+-	/* Update .got.plt and obtain runtime address of callee.  */
+-	SLLI	a1, t1, 1
+-	or	a0, t0, zero
+-	ADD	a1, a1, t1
+-	or	a2, ra, zero		/* return addr */
+-	ADDI	a3, fp, OFFSET_RG	/* La_loongarch_regs pointer */
+-	ADDI	a4, fp, OFFSET_FS 	/* frame size return from pltenter */
+-
+-	REG_S	a0, fp, OFFSET_SAVED_CALL_A0
+-	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+-
+-	la	t2, _dl_profile_fixup
+-	jirl	ra, t2, 0
+-
+-	REG_L	t3, fp, OFFSET_FS
+-	bge	t3, zero, 1f
+-
+-	/* Save the return.  */
+-	or	t4, v0, zero
+-
+-	/* Restore arguments from stack.  */
+-	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+-	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+-	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+-	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+-	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+-	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+-	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+-	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+-	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+-	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+-	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+-	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+-	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+-	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+-	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+-
+-	REG_L   ra, fp, 0
+-	REG_L   fp, fp, SZREG
+-
+-	ADDI	sp, sp, SF_SIZE
+-	jirl	zero, t4, 0
+-
+-1:
+-	/* The new frame size is in t3.  */
+-	SUB	sp, fp, t3
+-	BSTRINS sp, zero, 3, 0
+-
+-	REG_S	a0, fp, OFFSET_T1
+-
+-	or	a0, sp, zero
+-	ADDI	a1, fp, SF_SIZE
+-	or	a2, t3,	zero
+-	la	t5, memcpy
+-	jirl	ra, t5, 0
+-
+-	REG_L	t6, fp, OFFSET_T1
+-
+-	/* Call the function.  */
+-	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+-	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+-	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+-	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+-	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+-	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+-	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+-	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+-	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+-	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+-	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+-	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+-	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+-	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+-	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+-	jirl	ra, t6, 0
+-
+-	REG_S	a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
+-	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_S	fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0
+-	FREG_S	fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG
+-#endif
+-
+-	/* Setup call to pltexit.  */
+-	REG_L	a0, fp, OFFSET_SAVED_CALL_A0
+-	REG_L	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+-	ADDI	a2, fp, OFFSET_RG
+-	ADDI	a3, fp, OFFSET_RV
+-	la	t7, _dl_audit_pltexit
+-	jirl	ra, t7, 0
+-
+-	REG_L	a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
+-	REG_L	a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_L	fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0
+-	FREG_L	fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG
+-#endif
+-
+-	/* RA from within La_loongarch_reg.  */
+-	REG_L   ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
+-	or	sp, fp, zero
+-	ADDI	sp, sp, SF_SIZE
+-	REG_S   fp, fp, SZREG
+-
+-	jirl	zero, ra, 0
+-
+-END (_dl_runtime_profile)
+diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
+index 99fcacab..e298439d 100644
+--- a/sysdeps/loongarch/dl-trampoline.h
+++ b/sysdeps/loongarch/dl-trampoline.h
+@@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve)
+ 	/* Invoke the callee. */
+ 	jirl	zero, t1, 0
+ END (_dl_runtime_resolve)
+
+#include "dl-link.h"
+
+ENTRY (_dl_runtime_profile)
+       /* LoongArch we get called with:
+	t0	      linkr_map pointer
+	t1	      the scaled offset stored in t0, which can be used
+		      to calculate the offset of the current symbol in .rela.plt
+	t2	      %hi(%pcrel(.got.plt)) stored in t2, no use in this function
+	t3	      dl resolver entry point, no use in this function
+
+	Stack frame layout:
+	[sp,    #208] La_loongarch_regs
+	[sp,    #128] La_loongarch_retval // align: 16
+	[sp,    #112] frame size return from pltenter
+	[sp,    #80 ] dl_profile_call saved vec1
+	[sp,    #48 ] dl_profile_call saved vec0 // align: 16
+	[sp,    #32 ] dl_profile_call saved a1
+	[sp,    #24 ] dl_profile_call saved a0
+	[sp,    #16 ] T1
+	[sp,     #0 ] ra, fp   <- fp
+       */
+
+# define OFFSET_T1              16
+# define OFFSET_SAVED_CALL_A0   OFFSET_T1 + 8
+# define OFFSET_FS              OFFSET_SAVED_CALL_A0 + 16 + 8 + 64
+# define OFFSET_RV              OFFSET_FS + 8 + 8
+# define OFFSET_RG              OFFSET_RV + DL_SIZEOF_RV
+
+# define SF_SIZE                (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
+
+	/* Save arguments to stack. */
+	ADDI	sp, sp, -SF_SIZE
+	REG_S	ra, sp, 0
+	REG_S	fp, sp, 8
+
+	or	fp, sp, zero
+
+	REG_S	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+	REG_S	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+	REG_S	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+	REG_S	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+	REG_S	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+	REG_S	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+	REG_S	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+	REG_S	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+
+#ifdef USE_LASX
+	xvst	xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
+	xvst	xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
+	xvst	xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
+	xvst	xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
+	xvst	xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
+	xvst	xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
+	xvst	xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
+	xvst	xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
+#elif defined USE_LSX
+	vst	vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
+	vst	vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
+	vst	vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
+	vst	vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
+	vst	vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
+	vst	vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
+	vst	vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
+	vst	vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
+#elif !defined __loongarch_soft_float
+	FREG_S	fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
+	FREG_S	fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
+	FREG_S	fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
+	FREG_S	fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
+	FREG_S	fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
+	FREG_S	fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
+	FREG_S	fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
+	FREG_S	fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
+#endif
+
+	/* Update .got.plt and obtain runtime address of callee.  */
+	SLLI	a1, t1, 1
+	or	a0, t0, zero
+	ADD	a1, a1, t1
+	or	a2, ra, zero		/* return addr */
+	ADDI	a3, fp, OFFSET_RG	/* La_loongarch_regs pointer */
+	ADDI	a4, fp, OFFSET_FS 	/* frame size return from pltenter */
+
+	REG_S	a0, fp, OFFSET_SAVED_CALL_A0
+	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+
+	la	t2, _dl_profile_fixup
+	jirl	ra, t2, 0
+
+	REG_L	t3, fp, OFFSET_FS
+	bge	t3, zero, 1f
+
+	/* Save the return.  */
+	or	t4, v0, zero
+
+	/* Restore arguments from stack.  */
+	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+
+#ifdef USE_LASX
+	xvld	xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
+	xvld	xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
+	xvld	xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
+	xvld	xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
+	xvld	xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
+	xvld	xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
+	xvld	xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
+	xvld	xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
+#elif defined USE_LSX
+	vld	vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
+	vld	vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
+	vld	vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
+	vld	vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
+	vld	vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
+	vld	vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
+	vld	vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
+	vld	vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
+#elif !defined __loongarch_soft_float
+	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
+	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
+	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
+	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
+	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
+	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
+	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
+	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
+#endif
+
+	REG_L   ra, fp, 0
+	REG_L   fp, fp, SZREG
+
+	ADDI	sp, sp, SF_SIZE
+	jirl	zero, t4, 0
+
+1:
+	/* The new frame size is in t3.  */
+	SUB	sp, fp, t3
+	BSTRINS sp, zero, 3, 0
+
+	REG_S	a0, fp, OFFSET_T1
+
+	or	a0, sp, zero
+	ADDI	a1, fp, SF_SIZE
+	or	a2, t3,	zero
+	la	t5, memcpy
+	jirl	ra, t5, 0
+
+	REG_L	t6, fp, OFFSET_T1
+
+	/* Call the function.  */
+	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+
+#ifdef USE_LASX
+	xvld	xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
+	xvld	xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
+	xvld	xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
+	xvld	xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
+	xvld	xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
+	xvld	xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
+	xvld	xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
+	xvld	xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
+#elif defined USE_LSX
+	vld	vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
+	vld	vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
+	vld	vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
+	vld	vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
+	vld	vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
+	vld	vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
+	vld	vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
+	vld	vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
+#elif !defined __loongarch_soft_float
+	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
+	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
+	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
+	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
+	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
+	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
+	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
+	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
+#endif
+
+	jirl	ra, t6, 0
+
+	REG_S	a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
+	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
+
+#ifdef USE_LASX
+	xvst	xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+	xvst	xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
+#elif defined USE_LSX
+	vst	vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+	vst	vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
+#elif !defined __loongarch_soft_float
+	FREG_S	fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+	FREG_S	fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
+#endif
+
+	/* Setup call to pltexit.  */
+	REG_L	a0, fp, OFFSET_SAVED_CALL_A0
+	REG_L	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+	ADDI	a2, fp, OFFSET_RG
+	ADDI	a3, fp, OFFSET_RV
+	la	t7, _dl_audit_pltexit
+	jirl	ra, t7, 0
+
+	REG_L	a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
+	REG_L	a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
+
+#ifdef USE_LASX
+	xvld	xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+	xvld	xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
+#elif defined USE_LSX
+	vld	vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+	vld	vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
+#elif !defined __loongarch_soft_float
+	FREG_L	fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
+	FREG_L	fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
+#endif
+
+	/* RA from within La_loongarch_reg.  */
+	REG_L   ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
+	or	sp, fp, zero
+	ADDI	sp, sp, SF_SIZE
+	REG_S   fp, fp, SZREG
+
+	jirl	zero, ra, 0
+
+END (_dl_runtime_profile)
+-- 
+2.33.0
+
--- a/LoongArch-Add-minuimum-binutils-required-version.patch
+++ b/LoongArch-Add-minuimum-binutils-required-version.patch
@ -0,0 +1,102 @@
+From 7353f21f6ed1754b67e455e2b80123787efa9e91 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Tue, 8 Aug 2023 14:15:43 +0800
+Subject: [PATCH 02/29] LoongArch: Add minuimum binutils required version
+
+LoongArch glibc can add some LASX/LSX vector instructions codes,
+change the required minimum binutils version to 2.41 which could
+support vector instructions. HAVE_LOONGARCH_VEC_ASM is removed
+accordingly.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ config.h.in                       | 5 -----
+ sysdeps/loongarch/configure       | 5 ++---
+ sysdeps/loongarch/configure.ac    | 4 ++--
+ sysdeps/loongarch/dl-machine.h    | 4 ++--
+ sysdeps/loongarch/dl-trampoline.S | 2 +-
+ 5 files changed, 7 insertions(+), 13 deletions(-)
+
+diff --git a/config.h.in b/config.h.in
+index 0dedc124..44a34072 100644
+--- a/config.h.in
+++ b/config.h.in
+@@ -141,11 +141,6 @@
+ /* LOONGARCH floating-point ABI for ld.so.  */
+ #undef LOONGARCH_ABI_FRLEN
+ 
+-/* Assembler support LoongArch LASX/LSX vector instructions.
+-   This macro becomes obsolete when glibc increased the minimum
+-   required version of GNU 'binutils' to 2.41 or later. */
+-#define HAVE_LOONGARCH_VEC_ASM 0
+-
+ /* Linux specific: minimum supported kernel version.  */
+ #undef	__LINUX_KERNEL_VERSION
+ 
+diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure
+index 5843c7cf..395ddc92 100644
+--- a/sysdeps/loongarch/configure
+++ b/sysdeps/loongarch/configure
+@@ -128,8 +128,7 @@ rm -f conftest*
+ fi
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_asm" >&5
+ printf "%s\n" "$libc_cv_loongarch_vec_asm" >&6; }
+-if test $libc_cv_loongarch_vec_asm = yes; then
+-  printf "%s\n" "#define HAVE_LOONGARCH_VEC_ASM 1" >>confdefs.h
+-
+if test $libc_cv_loongarch_vec_asm = no; then
+  as_fn_error $? "binutils version is too old, use 2.41 or newer version" "$LINENO" 5
+ fi
+ 
+diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac
+index ba89d834..989287c6 100644
+--- a/sysdeps/loongarch/configure.ac
+++ b/sysdeps/loongarch/configure.ac
+@@ -74,6 +74,6 @@ else
+   libc_cv_loongarch_vec_asm=no
+ fi
+ rm -f conftest*])
+-if test $libc_cv_loongarch_vec_asm = yes; then
+-  AC_DEFINE(HAVE_LOONGARCH_VEC_ASM)
+if test $libc_cv_loongarch_vec_asm = no; then
+  AC_MSG_ERROR([binutils version is too old, use 2.41 or newer version])
+ fi
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index 51ce9af8..066bb233 100644
+--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
+@@ -270,7 +270,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+   /* If using PLTs, fill in the first two entries of .got.plt.  */
+   if (l->l_info[DT_JMPREL])
+     {
+-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
+#if !defined __loongarch_soft_float
+       extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
+       extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
+ #endif
+@@ -300,7 +300,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ 	  /* This function will get called to fix up the GOT entry
+ 	     indicated by the offset on the stack, and then jump to
+ 	     the resolved address.  */
+-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
+#if !defined __loongarch_soft_float
+ 	  if (SUPPORT_LASX)
+ 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
+ 	  else if (SUPPORT_LSX)
+diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
+index f6ba5e44..8fd91469 100644
+--- a/sysdeps/loongarch/dl-trampoline.S
+++ b/sysdeps/loongarch/dl-trampoline.S
+@@ -19,7 +19,7 @@
+ #include <sysdep.h>
+ #include <sys/asm.h>
+ 
+-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
+#if !defined __loongarch_soft_float
+ #define USE_LASX
+ #define _dl_runtime_resolve _dl_runtime_resolve_lasx
+ #include "dl-trampoline.h"
+-- 
+2.33.0
+
--- a/LoongArch-Change-loongarch-to-LoongArch-in-comments.patch
+++ b/LoongArch-Change-loongarch-to-LoongArch-in-comments.patch
@ -0,0 +1,277 @@
+From e5ccd79e81de7ad5821fde83875973e878d85d4b Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:40 +0800
+Subject: [PATCH 19/29] LoongArch: Change loongarch to LoongArch in comments
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S    | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S       | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S        | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S  | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memmove-aligned.S   | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memmove-lasx.S      | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memmove-lsx.S       | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchr-aligned.S    | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchr-lasx.S       | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchr-lsx.S        | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S    | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S     | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S    | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S        | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strlen-aligned.S    | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strlen-lasx.S       | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strlen-lsx.S        | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S   | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S       | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S   | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S      | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S       | 2 +-
+ 24 files changed, 24 insertions(+), 24 deletions(-)
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+index 299dd49c..7eb34395 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized memcpy_aligned implementation using basic Loongarch instructions.
+/* Optimized memcpy_aligned implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+index 4aae5bf8..ae148df5 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized memcpy implementation using Loongarch LASX instructions.
+/* Optimized memcpy implementation using LoongArch LASX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+index 6ebbe7a2..feb2bb0e 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized memcpy implementation using Loongarch LSX instructions.
+/* Optimized memcpy implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+index 8e60a22d..31019b13 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized unaligned memcpy implementation using basic Loongarch instructions.
+/* Optimized unaligned memcpy implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+index 5354f383..a02114c0 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized memmove_aligned implementation using basic Loongarch instructions.
+/* Optimized memmove_aligned implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+index ff68e7a2..95d8ee7b 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized memmove implementation using Loongarch LASX instructions.
+/* Optimized memmove implementation using LoongArch LASX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+index 9e1502a7..8a936770 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized memmove implementation using Loongarch LSX instructions.
+/* Optimized memmove implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+index 90a64b6b..3284ce25 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized memmove_unaligned implementation using basic Loongarch instructions.
+/* Optimized memmove_unaligned implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+index 5fb01806..62020054 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strchr implementation using basic Loongarch instructions.
+/* Optimized strchr implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+index 254402da..4d3cc588 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strchr implementation using loongarch LASX SIMD instructions.
+/* Optimized strchr implementation using LoongArch LASX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+index dae98b0a..8b78c35c 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strlen implementation using loongarch LSX SIMD instructions.
+/* Optimized strlen implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+index 1c01a023..20856a06 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strchrnul implementation using basic Loongarch instructions.
+/* Optimized strchrnul implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+index d45495e4..4753d4ce 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strchrnul implementation using loongarch LASX SIMD instructions.
+/* Optimized strchrnul implementation using LoongArch LASX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+index 07d793ae..671e740c 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strchrnul implementation using loongarch LSX SIMD instructions.
+/* Optimized strchrnul implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+index f5f4f336..ba1f9667 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strcmp implementation using basic Loongarch instructions.
+/* Optimized strcmp implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+index 2e177a38..091c8c9e 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strcmp implementation using Loongarch LSX instructions.
+/* Optimized strcmp implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+index e9e1d2fc..ed0548e4 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strlen implementation using basic Loongarch instructions.
+/* Optimized strlen implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+index 258c47ce..91342f34 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strlen implementation using loongarch LASX SIMD instructions.
+/* Optimized strlen implementation using LoongArch LASX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+index b194355e..b09c12e0 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strlen implementation using Loongarch LSX SIMD instructions.
+/* Optimized strlen implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+index e2687fa7..f63de872 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strncmp implementation using basic Loongarch instructions.
+/* Optimized strncmp implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+index 0b4eee2a..83cb801d 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strncmp implementation using Loongarch LSX instructions.
+/* Optimized strncmp implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+index b900430a..a8296a1b 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strnlen implementation using basic Loongarch instructions.
+/* Optimized strnlen implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+index 2c03d3d9..aa6c812d 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strnlen implementation using loongarch LASX instructions
+/* Optimized strnlen implementation using LoongArch LASX instructions
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+index b769a895..d0febe3e 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strnlen implementation using loongarch LSX instructions
+/* Optimized strnlen implementation using LoongArch LSX instructions
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+-- 
+2.33.0
+
--- a/LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch
+++ b/LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch
@ -0,0 +1,67 @@
+From fb72c81f9894b23797f6e2e066532c0963f5155f Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Wed, 13 Sep 2023 15:35:01 +0800
+Subject: [PATCH 24/29] LoongArch: Change to put magic number to .rodata
+ section
+
+Change to put magic number to .rodata section in memmove-lsx, and use
+pcalau12i and %pc_lo12 with vld to get the data.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ .../loongarch/lp64/multiarch/memmove-lsx.S    | 20 +++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+index 8a936770..5eb819ef 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+@@ -209,13 +209,10 @@ L(al_less_16):
+     nop
+ 
+ 
+-L(magic_num):
+-    .dword          0x0706050403020100
+-    .dword          0x0f0e0d0c0b0a0908
+ L(unaligned):
+-    pcaddi          t2, -4
+    pcalau12i       t2, %pc_hi20(L(INDEX))
+     bstrins.d       a1, zero, 3, 0
+-    vld             vr8, t2, 0
+    vld             vr8, t2, %pc_lo12(L(INDEX))
+     vld             vr0, a1, 0
+ 
+     vld             vr1, a1, 16
+@@ -413,13 +410,10 @@ L(back_al_less_16):
+     vst             vr1, a0, 0
+     jr              ra
+ 
+-L(magic_num_2):
+-    .dword          0x0706050403020100
+-    .dword          0x0f0e0d0c0b0a0908
+ L(back_unaligned):
+-    pcaddi          t2, -4
+    pcalau12i       t2, %pc_hi20(L(INDEX))
+     bstrins.d       a4, zero, 3, 0
+-    vld             vr8, t2, 0
+    vld             vr8, t2, %pc_lo12(L(INDEX))
+     vld             vr0, a4, 0
+ 
+     vld             vr1, a4, -16
+@@ -529,6 +523,12 @@ L(back_un_less_16):
+     jr              ra
+ END(MEMMOVE_NAME)
+ 
+    .section        .rodata.cst16,"M",@progbits,16
+    .align          4
+L(INDEX):
+    .dword          0x0706050403020100
+    .dword          0x0f0e0d0c0b0a0908
+
+ libc_hidden_builtin_def (MEMCPY_NAME)
+ libc_hidden_builtin_def (MEMMOVE_NAME)
+ #endif
+-- 
+2.33.0
+
--- a/LoongArch-Micro-optimize-LD_PCREL.patch
+++ b/LoongArch-Micro-optimize-LD_PCREL.patch
@ -0,0 +1,44 @@
+From 7f703cf758c4f185dd62f2a4f463002bb514af16 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 27 Aug 2023 00:36:51 +0800
+Subject: [PATCH 13/29] LoongArch: Micro-optimize LD_PCREL
+
+We are requiring Binutils >= 2.41, so explicit relocation syntax is
+always supported by the assembler.  Use it to reduce one instruction.
+
+Signed-off-by: Xi Ruoyao <xry111@xry111.site>
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/unix/sysv/linux/loongarch/pointer_guard.h | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
+index b25e353b..d6c78687 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
+++ b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
+@@ -19,17 +19,15 @@
+ #ifndef POINTER_GUARD_H
+ #define POINTER_GUARD_H
+ 
+-/* Load a got-relative EXPR into G, using T.
+-   Note G and T are register names.  */
+/* Load a got-relative EXPR into register G.  */
+ #define LD_GLOBAL(G, EXPR) \
+   la.global G,  EXPR; \
+   REG_L     G,  G,  0;
+ 
+-/* Load a pc-relative EXPR into G, using T.
+-   Note G and T are register names.  */
+/* Load a pc-relative EXPR into register G.  */
+ #define LD_PCREL(G, EXPR) \
+-  la.pcrel  G,  EXPR; \
+-  REG_L     G,  G,  0;
+  pcalau12i G, %pc_hi20(EXPR); \
+  REG_L     G, G, %pc_lo12(EXPR);
+ 
+ #if (IS_IN (rtld) \
+      || (!defined SHARED && (IS_IN (libc) \
+-- 
+2.33.0
+
--- a/LoongArch-Redefine-macro-LEAF-ENTRY.patch
+++ b/LoongArch-Redefine-macro-LEAF-ENTRY.patch
@ -0,0 +1,65 @@
+From 8dcd8c837df2e3cf81675522487697522f1542f8 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Tue, 8 Aug 2023 14:15:42 +0800
+Subject: [PATCH 01/29] LoongArch: Redefine macro LEAF/ENTRY.
+
+The following usage of macro LEAF/ENTRY are all feasible:
+1. LEAF(fcn) -- the align value of fcn is .align 3(default value)
+2. LEAF(fcn, 6) -- the align value of fcn is .align 6
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/sys/asm.h | 36 ++++++++++++++++++++++++++----------
+ 1 file changed, 26 insertions(+), 10 deletions(-)
+
+diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
+index d1a279b8..c5eb8afa 100644
+--- a/sysdeps/loongarch/sys/asm.h
+++ b/sysdeps/loongarch/sys/asm.h
+@@ -39,16 +39,32 @@
+ #define FREG_L fld.d
+ #define FREG_S fst.d
+ 
+-/* Declare leaf routine.  */
+-#define LEAF(symbol) \
+-  .text; \
+-  .globl symbol; \
+-  .align 3; \
+-  cfi_startproc; \
+-  .type symbol, @function; \
+-  symbol:
+-
+-#define ENTRY(symbol) LEAF (symbol)
+/*  Declare leaf routine.
+    The usage of macro LEAF/ENTRY is as follows:
+    1. LEAF(fcn) -- the align value of fcn is .align 3 (default value)
+    2. LEAF(fcn, 6) -- the align value of fcn is .align 6
+*/
+#define LEAF_IMPL(symbol, aln, ...)	\
+	.text;				\
+	.globl symbol;			\
+	.align aln;			\
+	.type symbol, @function;	\
+symbol: \
+	cfi_startproc;
+
+
+#define LEAF(...) LEAF_IMPL(__VA_ARGS__, 3)
+#define ENTRY(...) LEAF(__VA_ARGS__)
+
+#define	LEAF_NO_ALIGN(symbol)		\
+	.text;				\
+	.globl	symbol;			\
+	.type	symbol, @function;	\
+symbol: \
+	cfi_startproc;
+
+#define ENTRY_NO_ALIGN(symbol) LEAF_NO_ALIGN(symbol)
+
+ 
+ /* Mark end of function.  */
+ #undef END
+-- 
+2.33.0
+
--- a/LoongArch-Remove-support-code-for-old-linker-in-star.patch
+++ b/LoongArch-Remove-support-code-for-old-linker-in-star.patch
@ -0,0 +1,56 @@
+From f8d66a269cb6f1a7087afadf3375bdf0553abf53 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 27 Aug 2023 00:36:50 +0800
+Subject: [PATCH 12/29] LoongArch: Remove support code for old linker in
+ start.S
+
+We are requiring Binutils >= 2.41, so la.pcrel always works here.
+
+Signed-off-by: Xi Ruoyao <xry111@xry111.site>
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/start.S | 19 +++----------------
+ 1 file changed, 3 insertions(+), 16 deletions(-)
+
+diff --git a/sysdeps/loongarch/start.S b/sysdeps/loongarch/start.S
+index e9d82033..bf6bfc9e 100644
+--- a/sysdeps/loongarch/start.S
+++ b/sysdeps/loongarch/start.S
+@@ -60,20 +60,7 @@ ENTRY (ENTRY_POINT)
+ 	cfi_undefined (1)
+ 	or		a5, a0, zero /* rtld_fini */
+ 
+-#if ENABLE_STATIC_PIE
+-/* For static PIE, the GOT cannot be used in _start because the GOT entries are
+-   offsets instead of real addresses before __libc_start_main.
+-   __libc_start_main and/or main may be not local, so we rely on the linker to
+-   produce PLT entries for them.  GNU ld >= 2.40 supports this.  */
+-# define LA la.pcrel
+-#else
+-/* Old GNU ld (< 2.40) cannot handle PC relative address against a non-local
+-   function correctly.  We deem these old linkers failing to support static PIE
+-   and load the addresses from GOT.  */
+-# define LA la.got
+-#endif
+-
+-	LA		a0, t0, main
+	la.pcrel	a0, t0, main
+ 	REG_L		a1, sp, 0
+ 	ADDI		a2, sp, SZREG
+ 
+@@ -84,9 +71,9 @@ ENTRY (ENTRY_POINT)
+ 	move		a4, zero /* used to be fini */
+ 	or		a6, sp, zero /* stack_end */
+ 
+-	LA		ra, t0, __libc_start_main
+	la.pcrel	ra, t0, __libc_start_main
+ 	jirl		ra, ra, 0
+ 
+-	LA		ra, t0, abort
+	la.pcrel	ra, t0, abort
+ 	jirl		ra, ra, 0
+ END (ENTRY_POINT)
+-- 
+2.33.0
+
--- a/LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch
+++ b/LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch
@ -0,0 +1,28 @@
+From b4b4bb7c9220a0bbdf5aec0ac8c1de1d22329280 Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Thu, 14 Sep 2023 19:48:24 +0800
+Subject: [PATCH 21/29] LoongArch: Replace deprecated $v0 with $a0 to eliminate
+ 'as' Warnings.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/dl-machine.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index 8a2db9de..57913cef 100644
+--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
+@@ -90,7 +90,7 @@ static inline ElfW (Addr) elf_machine_dynamic (void)
+ 	or	$a0, $sp, $zero   \n\
+ 	bl	_dl_start   \n\
+ 	# Stash user entry point in s0.   \n\
+-	or	$s0, $v0, $zero   \n\
+	or	$s0, $a0, $zero   \n\
+ 	# Load the original argument count.   \n\
+ 	ld.d	$a1, $sp, 0   \n\
+ 	# Call _dl_init (struct link_map *main_map, int argc, \
+-- 
+2.33.0
+
--- a/LoongArch-Unify-Register-Names.patch
+++ b/LoongArch-Unify-Register-Names.patch
@ -0,0 +1,81 @@
+From 458ab6d5f39cca1cabd83abd2022f67491f6f5ed Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Fri, 20 Oct 2023 09:20:02 +0800
+Subject: [PATCH 27/29] LoongArch: Unify Register Names.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/__longjmp.S | 20 ++++++++++----------
+ sysdeps/loongarch/setjmp.S    | 18 +++++++++---------
+ 2 files changed, 19 insertions(+), 19 deletions(-)
+
+diff --git a/sysdeps/loongarch/__longjmp.S b/sysdeps/loongarch/__longjmp.S
+index cbde1946..e87ce311 100644
+--- a/sysdeps/loongarch/__longjmp.S
+++ b/sysdeps/loongarch/__longjmp.S
+@@ -43,18 +43,18 @@ ENTRY (__longjmp)
+ 	REG_L s8, a0, 12*SZREG
+ 
+ #ifndef __loongarch_soft_float
+-	FREG_L $f24, a0, 13*SZREG + 0*SZFREG
+-	FREG_L $f25, a0, 13*SZREG + 1*SZFREG
+-	FREG_L $f26, a0, 13*SZREG + 2*SZFREG
+-	FREG_L $f27, a0, 13*SZREG + 3*SZFREG
+-	FREG_L $f28, a0, 13*SZREG + 4*SZFREG
+-	FREG_L $f29, a0, 13*SZREG + 5*SZFREG
+-	FREG_L $f30, a0, 13*SZREG + 6*SZFREG
+-	FREG_L $f31, a0, 13*SZREG + 7*SZFREG
+	FREG_L fs0, a0, 13*SZREG + 0*SZFREG
+	FREG_L fs1, a0, 13*SZREG + 1*SZFREG
+	FREG_L fs2, a0, 13*SZREG + 2*SZFREG
+	FREG_L fs3, a0, 13*SZREG + 3*SZFREG
+	FREG_L fs4, a0, 13*SZREG + 4*SZFREG
+	FREG_L fs5, a0, 13*SZREG + 5*SZFREG
+	FREG_L fs6, a0, 13*SZREG + 6*SZFREG
+	FREG_L fs7, a0, 13*SZREG + 7*SZFREG
+ #endif
+ 
+-	sltui	a0,a1,1
+	sltui	a0, a1, 1
+ 	ADD	a0, a0, a1	 # a0 = (a1 == 0) ? 1 : a1
+-	jirl	zero,ra,0
+	jirl	zero, ra, 0
+ 
+ END (__longjmp)
+diff --git a/sysdeps/loongarch/setjmp.S b/sysdeps/loongarch/setjmp.S
+index 6c7065cd..b6e4f727 100644
+--- a/sysdeps/loongarch/setjmp.S
+++ b/sysdeps/loongarch/setjmp.S
+@@ -52,19 +52,19 @@ ENTRY (__sigsetjmp)
+ 	REG_S s8, a0, 12*SZREG
+ 
+ #ifndef __loongarch_soft_float
+-	FREG_S $f24, a0, 13*SZREG + 0*SZFREG
+-	FREG_S $f25, a0, 13*SZREG + 1*SZFREG
+-	FREG_S $f26, a0, 13*SZREG + 2*SZFREG
+-	FREG_S $f27, a0, 13*SZREG + 3*SZFREG
+-	FREG_S $f28, a0, 13*SZREG + 4*SZFREG
+-	FREG_S $f29, a0, 13*SZREG + 5*SZFREG
+-	FREG_S $f30, a0, 13*SZREG + 6*SZFREG
+-	FREG_S $f31, a0, 13*SZREG + 7*SZFREG
+	FREG_S fs0, a0, 13*SZREG + 0*SZFREG
+	FREG_S fs1, a0, 13*SZREG + 1*SZFREG
+	FREG_S fs2, a0, 13*SZREG + 2*SZFREG
+	FREG_S fs3, a0, 13*SZREG + 3*SZFREG
+	FREG_S fs4, a0, 13*SZREG + 4*SZFREG
+	FREG_S fs5, a0, 13*SZREG + 5*SZFREG
+	FREG_S fs6, a0, 13*SZREG + 6*SZFREG
+	FREG_S fs7, a0, 13*SZREG + 7*SZFREG
+ #endif
+ 
+ #if !IS_IN (libc) && IS_IN(rtld)
+ 	li.w		v0, 0
+-	jirl		zero,ra,0
+	jirl		zero, ra, 0
+ #else
+ 	b		__sigjmp_save
+ #endif
+-- 
+2.33.0
+
--- a/LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch
+++ b/LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch
@ -0,0 +1,24 @@
+From 4828d1aa0028e819a5fb336d962e8f7cbfedf8b4 Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Mon, 23 Oct 2023 15:53:38 +0800
+Subject: [PATCH 28/29] LoongArch: Update hwcap.h to sync with LoongArch
+ kernel.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
+index 5104b69c..7acec23d 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
+++ b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
+@@ -35,3 +35,4 @@
+ #define HWCAP_LOONGARCH_LBT_X86         (1 << 10)
+ #define HWCAP_LOONGARCH_LBT_ARM         (1 << 11)
+ #define HWCAP_LOONGARCH_LBT_MIPS        (1 << 12)
+#define HWCAP_LOONGARCH_PTW             (1 << 13)
+-- 
+2.33.0
+
--- a/LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch
+++ b/LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch
@ -0,0 +1,30 @@
+From 4938840b15ff9734fdcc63cc0744ce3f3bbb0b16 Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Mon, 14 Aug 2023 15:34:08 +0800
+Subject: [PATCH 05/29] LoongArch: elf: Add new LoongArch reloc types 109 into
+ elf.h
+
+These reloc types are generated by GNU assembler >= 2.41 for relaxation
+support.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ elf/elf.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/elf/elf.h b/elf/elf.h
+index d623bdeb..9c51073f 100644
+--- a/elf/elf.h
+++ b/elf/elf.h
+@@ -4213,6 +4213,7 @@ enum
+ #define R_LARCH_SUB6 106
+ #define R_LARCH_ADD_ULEB128 107
+ #define R_LARCH_SUB_ULEB128 108
+#define R_LARCH_64_PCREL 109
+ 
+ /* ARC specific declarations.  */
+ 
+-- 
+2.33.0
+
--- a/Loongarch-Add-ifunc-support-and-add-different-versio.patch
+++ b/Loongarch-Add-ifunc-support-and-add-different-versio.patch
@ -0,0 +1,528 @@
+From 43abd8772a143cd96688c081500397dd712e631b Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Tue, 8 Aug 2023 14:15:44 +0800
+Subject: [PATCH 03/29] Loongarch: Add ifunc support and add different versions
+ of strlen
+
+strlen-lasx is implemeted by LASX simd instructions(256bit)
+strlen-lsx is implemeted by LSX simd instructions(128bit)
+strlen-align is implemented by LA basic instructions and never use unaligned memory acess
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   7 ++
+ .../lp64/multiarch/ifunc-impl-list.c          |  41 +++++++
+ .../loongarch/lp64/multiarch/ifunc-strlen.h   |  40 +++++++
+ .../loongarch/lp64/multiarch/strlen-aligned.S | 100 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/strlen-lasx.S    |  63 +++++++++++
+ sysdeps/loongarch/lp64/multiarch/strlen-lsx.S |  71 +++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strlen.c     |  37 +++++++
+ sysdeps/loongarch/sys/regdef.h                |  57 ++++++++++
+ .../unix/sysv/linux/loongarch/cpu-features.h  |   2 +
+ 9 files changed, 418 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/Makefile
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+new file mode 100644
+index 00000000..76c506c9
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -0,0 +1,7 @@
+ifeq ($(subdir),string)
+sysdep_routines += \
+  strlen-aligned \
+  strlen-lsx \
+  strlen-lasx \
+# sysdep_routines
+endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+new file mode 100644
+index 00000000..1a2a576f
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -0,0 +1,41 @@
+/* Enumerate available IFUNC implementations of a function LoongArch64 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <assert.h>
+#include <string.h>
+#include <wchar.h>
+#include <ldsodefs.h>
+#include <ifunc-impl-list.h>
+#include <stdio.h>
+
+size_t
+__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+			size_t max)
+{
+
+  size_t i = max;
+
+  IFUNC_IMPL (i, name, strlen,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LASX, __strlen_lasx)
+	      IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LSX, __strlen_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
+	      )
+  return i;
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
+new file mode 100644
+index 00000000..6258bb76
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
+@@ -0,0 +1,40 @@
+/* Common definition for strlen ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LASX)
+    return OPTIMIZE (lasx);
+  else if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (aligned);
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+new file mode 100644
+index 00000000..e9e1d2fc
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+@@ -0,0 +1,100 @@
+/* Optimized strlen implementation using basic Loongarch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRLEN __strlen_aligned
+#else
+# define STRLEN strlen
+#endif
+
+LEAF(STRLEN, 6)
+    move        a1, a0
+    bstrins.d   a0, zero, 2, 0
+    lu12i.w     a2, 0x01010
+    li.w        t0, -1
+
+    ld.d        t2, a0, 0
+    andi        t1, a1, 0x7
+    ori         a2, a2, 0x101
+    slli.d      t1, t1, 3
+
+    bstrins.d   a2, a2, 63, 32
+    sll.d       t1, t0, t1
+    slli.d      t3, a2, 7
+    nor         a3, zero, t3
+
+    orn         t2, t2, t1
+    sub.d       t0, t2, a2
+    nor         t1, t2, a3
+    and         t0, t0, t1
+
+
+    bnez        t0, L(count_pos)
+    addi.d      a0, a0, 8
+L(loop_16_7bit):
+    ld.d        t2, a0, 0
+    sub.d       t1, t2, a2
+
+    and         t0, t1, t3
+    bnez        t0, L(more_check)
+    ld.d        t2, a0, 8
+    sub.d       t1, t2, a2
+
+    and         t0, t1, t3
+    addi.d      a0, a0, 16
+    beqz        t0, L(loop_16_7bit)
+    addi.d      a0, a0, -8
+
+L(more_check):
+    nor         t0, t2, a3
+    and         t0, t1, t0
+    bnez        t0, L(count_pos)
+    addi.d      a0, a0, 8
+
+
+L(loop_16_8bit):
+    ld.d        t2, a0, 0
+    sub.d       t1, t2, a2
+    nor         t0, t2, a3
+    and         t0, t0, t1
+
+    bnez        t0, L(count_pos)
+    ld.d        t2, a0, 8
+    addi.d      a0, a0, 16
+    sub.d       t1, t2, a2
+
+    nor         t0, t2, a3
+    and         t0, t0, t1
+    beqz        t0, L(loop_16_8bit)
+    addi.d      a0, a0, -8
+
+L(count_pos):
+    ctz.d       t1, t0
+    sub.d       a0, a0, a1
+    srli.d      t1, t1, 3
+    add.d       a0, a0, t1
+
+    jr          ra
+END(STRLEN)
+
+libc_hidden_builtin_def (STRLEN)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+new file mode 100644
+index 00000000..258c47ce
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+@@ -0,0 +1,63 @@
+/* Optimized strlen implementation using loongarch LASX SIMD instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRLEN __strlen_lasx
+
+LEAF(STRLEN, 6)
+    move            a1, a0
+    bstrins.d       a0, zero, 4, 0
+    li.d            t1, -1
+    xvld            xr0, a0, 0
+
+    xvmsknz.b       xr0, xr0
+    xvpickve.w      xr1, xr0, 4
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0  # sign extend
+
+    sra.w           t0, t0, a1
+    beq             t0, t1, L(loop)
+    cto.w           a0, t0
+    jr              ra
+
+L(loop):
+    xvld            xr0, a0, 32
+    addi.d          a0, a0, 32
+    xvsetanyeqz.b   fcc0, xr0
+    bceqz           fcc0, L(loop)
+
+
+    xvmsknz.b       xr0, xr0
+    sub.d           a0, a0, a1
+    xvpickve.w      xr1, xr0, 4
+    vilvl.h         vr0, vr1, vr0
+
+    movfr2gr.s      t0, fa0
+    cto.w           t0, t0
+    add.d           a0, a0, t0
+    jr              ra
+END(STRLEN)
+
+libc_hidden_builtin_def (STRLEN)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+new file mode 100644
+index 00000000..b194355e
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+@@ -0,0 +1,71 @@
+/* Optimized strlen implementation using Loongarch LSX SIMD instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRLEN __strlen_lsx
+
+LEAF(STRLEN, 6)
+    move            a1, a0
+    bstrins.d       a0, zero, 4, 0
+    vld             vr0, a0, 0
+    vld             vr1, a0, 16
+
+    li.d            t1, -1
+    vmsknz.b        vr0, vr0
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+
+    movfr2gr.s      t0, fa0
+    sra.w           t0, t0, a1
+    beq             t0, t1, L(loop)
+    cto.w           a0, t0
+
+    jr              ra
+    nop
+    nop
+    nop
+
+
+L(loop):
+    vld             vr0, a0, 32
+    vld             vr1, a0, 48
+    addi.d          a0, a0, 32
+    vmin.bu         vr2, vr0, vr1
+
+    vsetanyeqz.b    fcc0, vr2
+    bceqz           fcc0, L(loop)
+    vmsknz.b        vr0, vr0
+    vmsknz.b        vr1, vr1
+
+    vilvl.h         vr0, vr1, vr0
+    sub.d           a0, a0, a1
+    movfr2gr.s      t0, fa0
+    cto.w           t0, t0
+
+    add.d           a0, a0, t0
+    jr              ra
+END(STRLEN)
+
+libc_hidden_builtin_def (STRLEN)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen.c b/sysdeps/loongarch/lp64/multiarch/strlen.c
+new file mode 100644
+index 00000000..381c2daa
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strlen.c
+@@ -0,0 +1,37 @@
+/* Multiple versions of strlen.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+
+#if IS_IN (libc)
+# define strlen __redirect_strlen
+# include <string.h>
+# undef strlen
+
+# define SYMBOL_NAME strlen
+# include "ifunc-strlen.h"
+
+libc_ifunc_redirected (__redirect_strlen, strlen, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strlen, __GI_strlen, __redirect_strlen)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strlen);
+# endif
+
+#endif
+diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
+index 5100f36d..524d2e32 100644
+--- a/sysdeps/loongarch/sys/regdef.h
+++ b/sysdeps/loongarch/sys/regdef.h
+@@ -89,6 +89,14 @@
+ #define fs5 $f29
+ #define fs6 $f30
+ #define fs7 $f31
+#define fcc0 $fcc0
+#define fcc1 $fcc1
+#define fcc2 $fcc2
+#define fcc3 $fcc3
+#define fcc4 $fcc4
+#define fcc5 $fcc5
+#define fcc6 $fcc6
+#define fcc7 $fcc7
+ 
+ #define vr0 $vr0
+ #define vr1 $vr1
+@@ -98,6 +106,30 @@
+ #define vr5 $vr5
+ #define vr6 $vr6
+ #define vr7 $vr7
+#define vr8 $vr8
+#define vr9 $vr9
+#define vr10 $vr10
+#define vr11 $vr11
+#define vr12 $vr12
+#define vr13 $vr13
+#define vr14 $vr14
+#define vr15 $vr15
+#define vr16 $vr16
+#define vr17 $vr17
+#define vr18 $vr18
+#define vr19 $vr19
+#define vr20 $vr20
+#define vr21 $vr21
+#define vr22 $vr22
+#define vr23 $vr23
+#define vr24 $vr24
+#define vr25 $vr25
+#define vr26 $vr26
+#define vr27 $vr27
+#define vr28 $vr28
+#define vr29 $vr29
+#define vr30 $vr30
+#define vr31 $vr31
+ 
+ #define xr0 $xr0
+ #define xr1 $xr1
+@@ -107,5 +139,30 @@
+ #define xr5 $xr5
+ #define xr6 $xr6
+ #define xr7 $xr7
+#define xr7 $xr7
+#define xr8 $xr8
+#define xr9 $xr9
+#define xr10 $xr10
+#define xr11 $xr11
+#define xr12 $xr12
+#define xr13 $xr13
+#define xr14 $xr14
+#define xr15 $xr15
+#define xr16 $xr16
+#define xr17 $xr17
+#define xr18 $xr18
+#define xr19 $xr19
+#define xr20 $xr20
+#define xr21 $xr21
+#define xr22 $xr22
+#define xr23 $xr23
+#define xr24 $xr24
+#define xr25 $xr25
+#define xr26 $xr26
+#define xr27 $xr27
+#define xr28 $xr28
+#define xr29 $xr29
+#define xr30 $xr30
+#define xr31 $xr31
+ 
+ #endif /* _SYS_REGDEF_H */
+diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+index e371e13b..d1a280a5 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+@@ -25,5 +25,7 @@
+ #define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+ #define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+ 
+#define INIT_ARCH()
+
+ #endif /* _CPU_FEATURES_LOONGARCH64_H  */
+ 
+-- 
+2.33.0
+
--- a/Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch
+++ b/Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch
--- a/Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch
+++ b/Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch
@ -0,0 +1,706 @@
+From aca7d7f0dde5f56344e8e58e5f6648c96bb1f1cc Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Tue, 15 Aug 2023 09:08:11 +0800
+Subject: [PATCH 06/29] Loongarch: Add ifunc support for strchr{aligned, lsx,
+ lasx} and strchrnul{aligned, lsx, lasx}
+
+These implementations improve the time to run strchr{nul}
+microbenchmark in glibc as below:
+strchr-lasx       reduces the runtime about 50%-83%
+strchr-lsx        reduces the runtime about 30%-67%
+strchr-aligned    reduces the runtime about 10%-20%
+strchrnul-lasx    reduces the runtime about 50%-83%
+strchrnul-lsx     reduces the runtime about 36%-65%
+strchrnul-aligned reduces the runtime about 6%-10%
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |  6 ++
+ .../lp64/multiarch/ifunc-impl-list.c          | 16 +++
+ .../loongarch/lp64/multiarch/ifunc-strchr.h   | 41 ++++++++
+ .../lp64/multiarch/ifunc-strchrnul.h          | 41 ++++++++
+ .../loongarch/lp64/multiarch/strchr-aligned.S | 99 +++++++++++++++++++
+ .../loongarch/lp64/multiarch/strchr-lasx.S    | 91 +++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strchr-lsx.S | 73 ++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strchr.c     | 36 +++++++
+ .../lp64/multiarch/strchrnul-aligned.S        | 95 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/strchrnul-lasx.S | 22 +++++
+ .../loongarch/lp64/multiarch/strchrnul-lsx.S  | 22 +++++
+ sysdeps/loongarch/lp64/multiarch/strchrnul.c  | 39 ++++++++
+ 12 files changed, 581 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 76c506c9..110a8c5c 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -3,5 +3,11 @@ sysdep_routines += \
+   strlen-aligned \
+   strlen-lsx \
+   strlen-lasx \
+  strchr-aligned \
+  strchr-lsx \
+  strchr-lasx \
+  strchrnul-aligned \
+  strchrnul-lsx \
+  strchrnul-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 1a2a576f..c7164b45 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -37,5 +37,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ #endif
+ 	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
+ 	      )
+
+  IFUNC_IMPL (i, name, strchr,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
+	      IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LSX, __strchr_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_aligned)
+	      )
+
+  IFUNC_IMPL (i, name, strchrnul,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LASX, __strchrnul_lasx)
+	      IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LSX, __strchrnul_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
+	      )
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
+new file mode 100644
+index 00000000..4494db79
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
+@@ -0,0 +1,41 @@
+/* Common definition for strchr ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LASX)
+    return OPTIMIZE (lasx);
+  else if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (aligned);
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
+new file mode 100644
+index 00000000..8a925120
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
+@@ -0,0 +1,41 @@
+/* Common definition for strchrnul ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LASX)
+    return OPTIMIZE (lasx);
+  else if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (aligned);
+}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+new file mode 100644
+index 00000000..5fb01806
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+@@ -0,0 +1,99 @@
+/* Optimized strchr implementation using basic Loongarch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRCHR_NAME __strchr_aligned
+#else
+# define STRCHR_NAME strchr
+#endif
+
+LEAF(STRCHR_NAME, 6)
+    slli.d      t1, a0, 3
+    bstrins.d   a0, zero, 2, 0
+    lu12i.w     a2, 0x01010
+    ld.d        t2, a0, 0
+
+    ori         a2, a2, 0x101
+    andi        a1, a1, 0xff
+    bstrins.d   a2, a2, 63, 32
+    li.w        t0, -1
+
+    mul.d       a1, a1, a2
+    sll.d       t0, t0, t1
+    slli.d      a3, a2, 7
+    orn         t2, t2, t0
+
+    sll.d       t3, a1, t1
+    xor         t4, t2, t3
+    sub.d       a4, t2, a2
+    sub.d       a5, t4, a2
+
+
+    andn        a4, a4, t2
+    andn        a5, a5, t4
+    or          t0, a4, a5
+    and         t0, t0, a3
+
+    bnez        t0, L(end)
+    addi.d      a0, a0, 8
+L(loop):
+    ld.d        t4, a0, 0
+    xor         t2, t4, a1
+
+    sub.d       a4, t4, a2
+    sub.d       a5, t2, a2
+    andn        a4, a4, t4
+    andn        a5, a5, t2
+
+    or          t0, a4, a5
+    and         t0, t0, a3
+    bnez        t0, L(end)
+    ld.d        t4, a0, 8
+
+
+    addi.d      a0, a0, 16
+    xor         t2, t4, a1
+    sub.d       a4, t4, a2
+    sub.d       a5, t2, a2
+
+    andn        a4, a4, t4
+    andn        a5, a5, t2
+    or          t0, a4, a5
+    and         t0, t0, a3
+
+    beqz        t0, L(loop)
+    addi.d      a0, a0, -8
+L(end):
+    and         t0, a5, a3
+    and         t1, a4, a3
+
+    ctz.d       t0, t0
+    ctz.d       t1, t1
+    srli.w      t2, t0, 3
+    sltu        t3, t1, t0
+
+
+    add.d       a0, a0, t2
+    masknez     a0, a0, t3
+    jr          ra
+END(STRCHR_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+new file mode 100644
+index 00000000..254402da
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+@@ -0,0 +1,91 @@
+/* Optimized strchr implementation using loongarch LASX SIMD instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#ifndef AS_STRCHRNUL
+# define STRCHR __strchr_lasx
+#endif
+
+LEAF(STRCHR, 6)
+    andi            t1, a0, 0x1f
+    bstrins.d       a0, zero, 4, 0
+    xvld            xr0, a0, 0
+    li.d            t2, -1
+
+    xvreplgr2vr.b   xr1, a1
+    sll.d           t1, t2, t1
+    xvxor.v         xr2, xr0, xr1
+    xvmin.bu        xr0, xr0, xr2
+
+    xvmsknz.b       xr0, xr0
+    xvpickve.w      xr3, xr0, 4
+    vilvl.h         vr0, vr3, vr0
+    movfr2gr.s      t0, fa0
+
+    orn             t0, t0, t1
+    bne             t0, t2, L(end)
+    addi.d          a0, a0, 32
+    nop
+
+
+L(loop):
+    xvld            xr0, a0, 0
+    xvxor.v         xr2, xr0, xr1
+    xvmin.bu        xr0, xr0, xr2
+    xvsetanyeqz.b   fcc0, xr0
+
+    bcnez           fcc0, L(loop_end)
+    xvld            xr0, a0, 32
+    addi.d          a0, a0, 64
+    xvxor.v         xr2, xr0, xr1
+
+    xvmin.bu        xr0, xr0, xr2
+    xvsetanyeqz.b   fcc0, xr0
+    bceqz           fcc0, L(loop)
+    addi.d          a0, a0, -32
+
+L(loop_end):
+    xvmsknz.b       xr0, xr0
+    xvpickve.w      xr1, xr0, 4
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+
+
+L(end):
+    cto.w           t0, t0
+    add.d           a0, a0, t0
+#ifndef AS_STRCHRNUL
+    vreplgr2vr.b    vr0, t0
+    xvpermi.q       xr3, xr2, 1
+
+    vshuf.b         vr0, vr3, vr2, vr0
+    vpickve2gr.bu   t0, vr0, 0
+    masknez         a0, a0, t0
+#endif
+    jr              ra
+
+END(STRCHR)
+
+libc_hidden_builtin_def(STRCHR)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+new file mode 100644
+index 00000000..dae98b0a
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+@@ -0,0 +1,73 @@
+/* Optimized strlen implementation using loongarch LSX SIMD instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#ifndef AS_STRCHRNUL
+# define STRCHR __strchr_lsx
+#endif
+
+LEAF(STRCHR, 6)
+    andi            t1, a0, 0xf
+    bstrins.d       a0, zero, 3, 0
+    vld             vr0, a0, 0
+    li.d            t2, -1
+
+    vreplgr2vr.b    vr1, a1
+    sll.d           t3, t2, t1
+    vxor.v          vr2, vr0, vr1
+    vmin.bu         vr0, vr0, vr2
+
+    vmsknz.b        vr0, vr0
+    movfr2gr.s      t0, fa0
+    ext.w.h         t0, t0
+    orn             t0, t0, t3
+
+    beq             t0, t2, L(loop)
+L(found):
+    cto.w           t0, t0
+    add.d           a0, a0, t0
+#ifndef AS_STRCHRNUL
+    vreplve.b       vr2, vr2, t0
+    vpickve2gr.bu   t1, vr2, 0
+    masknez         a0, a0, t1
+#endif
+    jr              ra
+
+
+L(loop):
+    vld             vr0, a0, 16
+    addi.d          a0, a0, 16
+    vxor.v          vr2, vr0, vr1
+    vmin.bu         vr0, vr0, vr2
+
+    vsetanyeqz.b    fcc0, vr0
+    bceqz           fcc0, L(loop)
+    vmsknz.b        vr0, vr0
+    movfr2gr.s      t0, fa0
+
+    b               L(found)
+END(STRCHR)
+
+libc_hidden_builtin_def (STRCHR)
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr.c b/sysdeps/loongarch/lp64/multiarch/strchr.c
+new file mode 100644
+index 00000000..404e97bd
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchr.c
+@@ -0,0 +1,36 @@
+/* Multiple versions of strchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strchr __redirect_strchr
+# include <string.h>
+# undef strchr
+
+# define SYMBOL_NAME strchr
+# include "ifunc-strchr.h"
+
+libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
+weak_alias(strchr, index)
+# ifdef SHARED
+__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strchr);
+# endif
+
+#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+new file mode 100644
+index 00000000..1c01a023
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+@@ -0,0 +1,95 @@
+/* Optimized strchrnul implementation using basic Loongarch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRCHRNUL_NAME __strchrnul_aligned
+#else
+# define STRCHRNUL_NAME __strchrnul
+#endif
+
+LEAF(STRCHRNUL_NAME, 6)
+    slli.d      t1, a0, 3
+    bstrins.d   a0, zero, 2, 0
+    lu12i.w     a2, 0x01010
+    ld.d        t2, a0, 0
+
+    ori         a2, a2, 0x101
+    andi        a1, a1, 0xff
+    bstrins.d   a2, a2, 63, 32
+    li.w        t0, -1
+
+    mul.d       a1, a1, a2
+    sll.d       t0, t0, t1
+    slli.d      a3, a2, 7
+    orn         t2, t2, t0
+
+    sll.d       t3, a1, t1
+    xor         t4, t2, t3
+    sub.d       a4, t2, a2
+    sub.d       a5, t4, a2
+
+
+    andn        a4, a4, t2
+    andn        a5, a5, t4
+    or          t0, a4, a5
+    and         t0, t0, a3
+
+    bnez        t0, L(end)
+    addi.d      a0, a0, 8
+L(loop):
+    ld.d        t4, a0, 0
+    xor         t2, t4, a1
+
+    sub.d       a4, t4, a2
+    sub.d       a5, t2, a2
+    andn        a4, a4, t4
+    andn        a5, a5, t2
+
+    or          t0, a4, a5
+    and         t0, t0, a3
+    bnez        t0, L(end)
+    ld.d        t4, a0, 8
+
+
+    addi.d      a0, a0, 16
+    xor         t2, t4, a1
+    sub.d       a4, t4, a2
+    sub.d       a5, t2, a2
+
+    andn        a4, a4, t4
+    andn        a5, a5, t2
+    or          t0, a4, a5
+    and         t0, t0, a3
+
+    beqz        t0, L(loop)
+    addi.d      a0, a0, -8
+L(end):
+    ctz.d       t0, t0
+    srli.w      t0, t0, 3
+
+
+    add.d       a0, a0, t0
+    jr          ra
+END(STRCHRNUL_NAME)
+
+libc_hidden_builtin_def (STRCHRNUL_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+new file mode 100644
+index 00000000..d45495e4
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+@@ -0,0 +1,22 @@
+/* Optimized strchrnul implementation using loongarch LASX SIMD instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define STRCHR __strchrnul_lasx
+#define AS_STRCHRNUL
+#include "strchr-lasx.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+new file mode 100644
+index 00000000..07d793ae
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+@@ -0,0 +1,22 @@
+/* Optimized strchrnul implementation using loongarch LSX SIMD instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define STRCHR __strchrnul_lsx
+#define AS_STRCHRNUL
+#include "strchr-lsx.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul.c b/sysdeps/loongarch/lp64/multiarch/strchrnul.c
+new file mode 100644
+index 00000000..f3b8296e
+--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul.c
+@@ -0,0 +1,39 @@
+/* Multiple versions of strchrnul.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+
+#if IS_IN (libc)
+# define strchrnul __redirect_strchrnul
+# define __strchrnul __redirect___strchrnul
+# include <string.h>
+# undef __strchrnul
+# undef strchrnul
+
+# define SYMBOL_NAME strchrnul
+# include "ifunc-strchrnul.h"
+
+libc_ifunc_redirected (__redirect_strchrnul, __strchrnul,
+                       IFUNC_SELECTOR ());
+weak_alias (__strchrnul, strchrnul)
+# ifdef SHARED
+__hidden_ver1 (__strchrnul, __GI___strchrnul, __redirect_strchrnul)
+  __attribute__((visibility ("hidden"))) __attribute_copy__ (strchrnul);
+# endif
+#endif
+-- 
+2.33.0
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,11 @@
+Anolis OS
+=======================================
+# 代码仓库说明
+## 分支说明
+>进行代码开发工作时，请注意选择当前版本对应的分支
+* aX分支为对应大版本的主分支,如a8分支对应当前最新版本
+* aX.Y分支为对应小版本的维护分支，如a8.2分支对应8.2版本
+## 开发流程
+1. 首先fork目标分支到自己的namespace
+2. 在自己的fork分支上做出修改
+3. 向对应的仓库中提交merge request，源分支为fork分支
--- a/Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch
+++ b/Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch
@ -0,0 +1,478 @@
+From c0f3b0a8c71c26d5351e8ddabe3e8a323803e683 Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Thu, 21 Sep 2023 09:10:11 +0800
+Subject: [PATCH 26/29] Revert "LoongArch: Add glibc.cpu.hwcap support."
+
+This reverts commit a53451559dc9cce765ea5bcbb92c4007e058e92b.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/Makefile                    |  4 -
+ sysdeps/loongarch/Versions                    |  5 --
+ sysdeps/loongarch/cpu-tunables.c              | 89 -------------------
+ sysdeps/loongarch/dl-get-cpu-features.c       | 25 ------
+ sysdeps/loongarch/dl-machine.h                | 27 +-----
+ sysdeps/loongarch/dl-tunables.list            | 25 ------
+ .../unix/sysv/linux/loongarch/cpu-features.c  | 29 ------
+ .../unix/sysv/linux/loongarch/cpu-features.h  | 18 +---
+ .../unix/sysv/linux/loongarch/dl-procinfo.c   | 60 -------------
+ sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 -----
+ .../unix/sysv/linux/loongarch/libc-start.c    | 34 -------
+ 11 files changed, 8 insertions(+), 329 deletions(-)
+ delete mode 100644 sysdeps/loongarch/Versions
+ delete mode 100644 sysdeps/loongarch/cpu-tunables.c
+ delete mode 100644 sysdeps/loongarch/dl-get-cpu-features.c
+ delete mode 100644 sysdeps/loongarch/dl-tunables.list
+ delete mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+ delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+ delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+ delete mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c
+
+diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
+index 30a1f4a8..43d2f583 100644
+--- a/sysdeps/loongarch/Makefile
+++ b/sysdeps/loongarch/Makefile
+@@ -6,10 +6,6 @@ ifeq ($(subdir),elf)
+ gen-as-const-headers += dl-link.sym
+ endif
+ 
+-ifeq ($(subdir),elf)
+-  sysdep-dl-routines += dl-get-cpu-features
+-endif
+-
+ # LoongArch's assembler also needs to know about PIC as it changes the
+ # definition of some assembler macros.
+ ASFLAGS-.os += $(pic-ccflag)
+diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions
+deleted file mode 100644
+index 33ae2cc0..00000000
+--- a/sysdeps/loongarch/Versions
+++ /dev/null
+@@ -1,5 +0,0 @@
+-ld {
+-  GLIBC_PRIVATE {
+-    _dl_larch_get_cpu_features;
+-  }
+-}
+diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c
+deleted file mode 100644
+index 8e9fab93..00000000
+--- a/sysdeps/loongarch/cpu-tunables.c
+++ /dev/null
+@@ -1,89 +0,0 @@
+-/* LoongArch CPU feature tuning.
+-   This file is part of the GNU C Library.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-# include <stdbool.h>
+-# include <stdint.h>
+-# include <unistd.h>		/* Get STDOUT_FILENO for _dl_printf.  */
+-# include <elf/dl-tunables.h>
+-# include <string.h>
+-# include <cpu-features.h>
+-# include <ldsodefs.h>
+-# include <sys/auxv.h>
+-
+-# define HWCAP_LOONGARCH_IFUNC \
+-  (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX)
+-
+-# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len)			\
+-  _Static_assert (sizeof (#name) - 1 == len, #name " != " #len);	\
+-  if (!memcmp (f, #name, len) &&					\
+-      (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name))			\
+-    {									\
+-      hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC));	\
+-      break;								\
+-    }									\
+-
+-attribute_hidden
+-void
+-TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
+-{
+-  const char *p = valp->strval;
+-  size_t len;
+-  unsigned long hwcap = 0;
+-  const char *c;
+-
+-  do {
+-      for (c = p; *c != ','; c++)
+-	if (*c == '\0')
+-	  break;
+-
+-      len = c - p;
+-
+-      switch(len)
+-      {
+-	default:
+-	  _dl_fatal_printf (
+-	    "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+-			    );
+-	  break;
+-	case 3:
+-	  {
+-	    CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3);
+-	    CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3);
+-	    _dl_fatal_printf (
+-		"Some features are invalid or not supported on this machine!!\n"
+-		"The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+-                       );
+-	  }
+-	  break;
+-	case 4:
+-	  {
+-	    CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4);
+-	    _dl_fatal_printf (
+-		"Some features are invalid or not supported on this machine!!\n"
+-		"The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+-                       );
+-	  }
+-	  break;
+-      }
+-
+-      p += len + 1;
+-    }
+-  while (*c != '\0');
+-
+-  GLRO (dl_larch_cpu_features).hwcap &= hwcap;
+-}
+diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c
+deleted file mode 100644
+index 7cd9bc15..00000000
+--- a/sysdeps/loongarch/dl-get-cpu-features.c
+++ /dev/null
+@@ -1,25 +0,0 @@
+-/* Define _dl_larch_get_cpu_features.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <https://www.gnu.org/licenses/>.  */
+-
+-
+-#include <ldsodefs.h>
+-
+-const struct cpu_features *
+-_dl_larch_get_cpu_features (void)
+-{
+-  return &GLRO(dl_larch_cpu_features);
+-}
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index b395a928..57913cef 100644
+--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
+@@ -29,8 +29,6 @@
+ #include <dl-static-tls.h>
+ #include <dl-machine-rel.h>
+ 
+-#include <cpu-features.c>
+-
+ #ifndef _RTLD_PROLOGUE
+ # define _RTLD_PROLOGUE(entry)					\
+ 	".globl\t" __STRING (entry) "\n\t"			\
+@@ -55,23 +53,6 @@
+ #define ELF_MACHINE_NO_REL 1
+ #define ELF_MACHINE_NO_RELA 0
+ 
+-#define DL_PLATFORM_INIT dl_platform_init ()
+-
+-static inline void __attribute__ ((unused))
+-dl_platform_init (void)
+-{
+-  if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
+-    /* Avoid an empty string which would disturb us.  */
+-    GLRO(dl_platform) = NULL;
+-
+-#ifdef SHARED
+-  /* init_cpu_features has been called early from __libc_start_main in
+-     static executable.  */
+-  init_cpu_features (&GLRO(dl_larch_cpu_features));
+-#endif
+-}
+-
+-
+ /* Return nonzero iff ELF header is compatible with the running host.  */
+ static inline int
+ elf_machine_matches_host (const ElfW (Ehdr) *ehdr)
+@@ -309,9 +290,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+       if (profile != 0)
+ 	{
+ #if !defined __loongarch_soft_float
+-	  if (RTLD_SUPPORT_LASX)
+	  if (SUPPORT_LASX)
+ 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
+-	  else if (RTLD_SUPPORT_LSX)
+	  else if (SUPPORT_LSX)
+ 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
+ 	  else
+ #endif
+@@ -329,9 +310,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ 	     indicated by the offset on the stack, and then jump to
+ 	     the resolved address.  */
+ #if !defined __loongarch_soft_float
+-	  if (RTLD_SUPPORT_LASX)
+	  if (SUPPORT_LASX)
+ 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
+-	  else if (RTLD_SUPPORT_LSX)
+	  else if (SUPPORT_LSX)
+ 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
+ 	  else
+ #endif
+diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list
+deleted file mode 100644
+index 66b34275..00000000
+--- a/sysdeps/loongarch/dl-tunables.list
+++ /dev/null
+@@ -1,25 +0,0 @@
+-# LoongArch specific tunables.
+-# Copyright (C) 2023 Free Software Foundation, Inc.
+-# This file is part of the GNU C Library.
+-
+-# The GNU C Library is free software; you can redistribute it and/or
+-# modify it under the terms of the GNU Lesser General Public
+-# License as published by the Free Software Foundation; either
+-# version 2.1 of the License, or (at your option) any later version.
+-
+-# The GNU C Library is distributed in the hope that it will be useful,
+-# but WITHOUT ANY WARRANTY; without even the implied warranty of
+-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-# Lesser General Public License for more details.
+-
+-# You should have received a copy of the GNU Lesser General Public
+-# License along with the GNU C Library; if not, see
+-# <http://www.gnu.org/licenses/>.
+-
+-glibc {
+-  cpu {
+-    hwcaps {
+-      type: STRING
+-    }
+-  }
+-}
+diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+deleted file mode 100644
+index 1290c4ce..00000000
+--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+++ /dev/null
+@@ -1,29 +0,0 @@
+-/* Initialize CPU feature data.  LoongArch64 version.
+-   This file is part of the GNU C Library.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-#include <cpu-features.h>
+-#include <elf/dl-hwcaps.h>
+-#include <elf/dl-tunables.h>
+-extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden;
+-
+-static inline void
+-init_cpu_features (struct cpu_features *cpu_features)
+-{
+-    GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap);
+-    TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
+-}
+diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+index 450963ce..d1a280a5 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+@@ -19,23 +19,13 @@
+ #ifndef _CPU_FEATURES_LOONGARCH64_H
+ #define _CPU_FEATURES_LOONGARCH64_H
+ 
+-#include <stdint.h>
+ #include <sys/auxv.h>
+ 
+-struct cpu_features
+- {
+-    uint64_t hwcap;
+- };
+#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
+#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+ 
+-/* Get a pointer to the CPU features structure.  */
+-extern const struct cpu_features *_dl_larch_get_cpu_features (void)
+-     __attribute__ ((pure));
+-
+-#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL)
+-#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX)
+-#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX)
+-#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+-#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+ #define INIT_ARCH()
+ 
+ #endif /* _CPU_FEATURES_LOONGARCH64_H  */
+
+diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+deleted file mode 100644
+index 6217fda9..00000000
+--- a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+++ /dev/null
+@@ -1,60 +0,0 @@
+-/* Data for LoongArch64 version of processor capability information.
+-   Linux version.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-   This file is part of the GNU C Library.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-/* If anything should be added here check whether the size of each string
+-   is still ok with the given array size.
+-
+-   All the #ifdefs in the definitions are quite irritating but
+-   necessary if we want to avoid duplicating the information.  There
+-   are three different modes:
+-
+-   - PROCINFO_DECL is defined.  This means we are only interested in
+-     declarations.
+-
+-   - PROCINFO_DECL is not defined:
+-
+-     + if SHARED is defined the file is included in an array
+-       initializer.  The .element = { ... } syntax is needed.
+-
+-     + if SHARED is not defined a normal array initialization is
+-       needed.
+-  */
+-
+-#ifndef PROCINFO_CLASS
+-# define PROCINFO_CLASS
+-#endif
+-
+-#if !IS_IN (ldconfig)
+-# if !defined PROCINFO_DECL && defined SHARED
+-  ._dl_larch_cpu_features
+-# else
+-PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features
+-# endif
+-# ifndef PROCINFO_DECL
+-= { }
+-# endif
+-# if !defined SHARED || defined PROCINFO_DECL
+-;
+-# else
+-,
+-# endif
+-#endif
+-
+-#undef PROCINFO_DECL
+-#undef PROCINFO_CLASS
+diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+deleted file mode 100644
+index 455fd71a..00000000
+--- a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+++ /dev/null
+@@ -1,21 +0,0 @@
+-/* Operating system support for run-time dynamic linker.  LoongArch version.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-   This file is part of the GNU C Library.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-#include <config.h>
+-#include <sysdeps/loongarch/cpu-tunables.c>
+-#include <sysdeps/unix/sysv/linux/dl-sysdep.c>
+diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
+deleted file mode 100644
+index f1346ece..00000000
+--- a/sysdeps/unix/sysv/linux/loongarch/libc-start.c
+++ /dev/null
+@@ -1,34 +0,0 @@
+-/* Override csu/libc-start.c on LoongArch64.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-   This file is part of the GNU C Library.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-#ifndef SHARED
+-
+-/* Mark symbols hidden in static PIE for early self relocation to work.  */
+-# if BUILD_PIE_DEFAULT
+-#  pragma GCC visibility push(hidden)
+-# endif
+-
+-# include <ldsodefs.h>
+-# include <cpu-features.c>
+-
+-extern struct cpu_features _dl_larch_cpu_features;
+-
+-# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features)
+-
+-#endif
+-#include <csu/libc-start.c>
+-- 
+2.33.0
+
--- a/496
+++ b/496
@ -1,496 +0,0 @@
-# This file names the currently supported and somewhat tested locales.
-# If you have any additions please file a glibc bug report.
-SUPPORTED-LOCALES=\
-C.UTF-8/UTF-8 \
-aa_DJ.UTF-8/UTF-8 \
-aa_DJ/ISO-8859-1 \
-aa_ER/UTF-8 \
-aa_ER@saaho/UTF-8 \
-aa_ET/UTF-8 \
-af_ZA.UTF-8/UTF-8 \
-af_ZA/ISO-8859-1 \
-agr_PE/UTF-8 \
-ak_GH/UTF-8 \
-am_ET/UTF-8 \
-an_ES.UTF-8/UTF-8 \
-an_ES/ISO-8859-15 \
-anp_IN/UTF-8 \
-ar_AE.UTF-8/UTF-8 \
-ar_AE/ISO-8859-6 \
-ar_BH.UTF-8/UTF-8 \
-ar_BH/ISO-8859-6 \
-ar_DZ.UTF-8/UTF-8 \
-ar_DZ/ISO-8859-6 \
-ar_EG.UTF-8/UTF-8 \
-ar_EG/ISO-8859-6 \
-ar_IN/UTF-8 \
-ar_IQ.UTF-8/UTF-8 \
-ar_IQ/ISO-8859-6 \
-ar_JO.UTF-8/UTF-8 \
-ar_JO/ISO-8859-6 \
-ar_KW.UTF-8/UTF-8 \
-ar_KW/ISO-8859-6 \
-ar_LB.UTF-8/UTF-8 \
-ar_LB/ISO-8859-6 \
-ar_LY.UTF-8/UTF-8 \
-ar_LY/ISO-8859-6 \
-ar_MA.UTF-8/UTF-8 \
-ar_MA/ISO-8859-6 \
-ar_OM.UTF-8/UTF-8 \
-ar_OM/ISO-8859-6 \
-ar_QA.UTF-8/UTF-8 \
-ar_QA/ISO-8859-6 \
-ar_SA.UTF-8/UTF-8 \
-ar_SA/ISO-8859-6 \
-ar_SD.UTF-8/UTF-8 \
-ar_SD/ISO-8859-6 \
-ar_SS/UTF-8 \
-ar_SY.UTF-8/UTF-8 \
-ar_SY/ISO-8859-6 \
-ar_TN.UTF-8/UTF-8 \
-ar_TN/ISO-8859-6 \
-ar_YE.UTF-8/UTF-8 \
-ar_YE/ISO-8859-6 \
-ayc_PE/UTF-8 \
-az_AZ/UTF-8 \
-az_IR/UTF-8 \
-as_IN/UTF-8 \
-ast_ES.UTF-8/UTF-8 \
-ast_ES/ISO-8859-15 \
-be_BY.UTF-8/UTF-8 \
-be_BY/CP1251 \
-be_BY@latin/UTF-8 \
-bem_ZM/UTF-8 \
-ber_DZ/UTF-8 \
-ber_MA/UTF-8 \
-bg_BG.UTF-8/UTF-8 \
-bg_BG/CP1251 \
-bhb_IN.UTF-8/UTF-8 \
-bho_IN/UTF-8 \
-bho_NP/UTF-8 \
-bi_VU/UTF-8 \
-bn_BD/UTF-8 \
-bn_IN/UTF-8 \
-bo_CN/UTF-8 \
-bo_IN/UTF-8 \
-br_FR.UTF-8/UTF-8 \
-br_FR/ISO-8859-1 \
-br_FR@euro/ISO-8859-15 \
-brx_IN/UTF-8 \
-bs_BA.UTF-8/UTF-8 \
-bs_BA/ISO-8859-2 \
-byn_ER/UTF-8 \
-ca_AD.UTF-8/UTF-8 \
-ca_AD/ISO-8859-15 \
-ca_ES.UTF-8/UTF-8 \
-ca_ES/ISO-8859-1 \
-ca_ES@euro/ISO-8859-15 \
-ca_ES@valencia/UTF-8 \
-ca_FR.UTF-8/UTF-8 \
-ca_FR/ISO-8859-15 \
-ca_IT.UTF-8/UTF-8 \
-ca_IT/ISO-8859-15 \
-ce_RU/UTF-8 \
-chr_US/UTF-8 \
-cmn_TW/UTF-8 \
-crh_UA/UTF-8 \
-cs_CZ.UTF-8/UTF-8 \
-cs_CZ/ISO-8859-2 \
-csb_PL/UTF-8 \
-cv_RU/UTF-8 \
-cy_GB.UTF-8/UTF-8 \
-cy_GB/ISO-8859-14 \
-da_DK.UTF-8/UTF-8 \
-da_DK/ISO-8859-1 \
-da_DK.ISO-8859-15/ISO-8859-15 \
-de_AT.UTF-8/UTF-8 \
-de_AT/ISO-8859-1 \
-de_AT@euro/ISO-8859-15 \
-de_BE.UTF-8/UTF-8 \
-de_BE/ISO-8859-1 \
-de_BE@euro/ISO-8859-15 \
-de_CH.UTF-8/UTF-8 \
-de_CH/ISO-8859-1 \
-de_DE.UTF-8/UTF-8 \
-de_DE/ISO-8859-1 \
-de_DE@euro/ISO-8859-15 \
-de_IT.UTF-8/UTF-8 \
-de_IT/ISO-8859-1 \
-de_LI.UTF-8/UTF-8 \
-de_LU.UTF-8/UTF-8 \
-de_LU/ISO-8859-1 \
-de_LU@euro/ISO-8859-15 \
-doi_IN/UTF-8 \
-dsb_DE/UTF-8 \
-dv_MV/UTF-8 \
-dz_BT/UTF-8 \
-el_GR.UTF-8/UTF-8 \
-el_GR/ISO-8859-7 \
-el_GR@euro/ISO-8859-7 \
-el_CY.UTF-8/UTF-8 \
-el_CY/ISO-8859-7 \
-en_AG/UTF-8 \
-en_AU.UTF-8/UTF-8 \
-en_AU/ISO-8859-1 \
-en_BW.UTF-8/UTF-8 \
-en_BW/ISO-8859-1 \
-en_CA.UTF-8/UTF-8 \
-en_CA/ISO-8859-1 \
-en_DK.UTF-8/UTF-8 \
-en_DK/ISO-8859-1 \
-en_GB.UTF-8/UTF-8 \
-en_GB/ISO-8859-1 \
-en_GB.ISO-8859-15/ISO-8859-15 \
-en_HK.UTF-8/UTF-8 \
-en_HK/ISO-8859-1 \
-en_IE.UTF-8/UTF-8 \
-en_IE/ISO-8859-1 \
-en_IE@euro/ISO-8859-15 \
-en_IL/UTF-8 \
-en_IN/UTF-8 \
-en_NG/UTF-8 \
-en_NZ.UTF-8/UTF-8 \
-en_NZ/ISO-8859-1 \
-en_PH.UTF-8/UTF-8 \
-en_PH/ISO-8859-1 \
-en_SC.UTF-8/UTF-8 \
-en_SG.UTF-8/UTF-8 \
-en_SG/ISO-8859-1 \
-en_US.UTF-8/UTF-8 \
-en_US/ISO-8859-1 \
-en_US.ISO-8859-15/ISO-8859-15 \
-en_US@ampm/UTF-8 \
-en_US.UTF-8@ampm/UTF-8 \
-en_ZA.UTF-8/UTF-8 \
-en_ZA/ISO-8859-1 \
-en_ZM/UTF-8 \
-en_ZW.UTF-8/UTF-8 \
-en_ZW/ISO-8859-1 \
-eo/UTF-8 \
-es_AR.UTF-8/UTF-8 \
-es_AR/ISO-8859-1 \
-es_BO.UTF-8/UTF-8 \
-es_BO/ISO-8859-1 \
-es_CL.UTF-8/UTF-8 \
-es_CL/ISO-8859-1 \
-es_CO.UTF-8/UTF-8 \
-es_CO/ISO-8859-1 \
-es_CR.UTF-8/UTF-8 \
-es_CR/ISO-8859-1 \
-es_CU/UTF-8 \
-es_DO.UTF-8/UTF-8 \
-es_DO/ISO-8859-1 \
-es_EC.UTF-8/UTF-8 \
-es_EC/ISO-8859-1 \
-es_ES.UTF-8/UTF-8 \
-es_ES/ISO-8859-1 \
-es_ES@euro/ISO-8859-15 \
-es_GT.UTF-8/UTF-8 \
-es_GT/ISO-8859-1 \
-es_HN.UTF-8/UTF-8 \
-es_HN/ISO-8859-1 \
-es_MX.UTF-8/UTF-8 \
-es_MX/ISO-8859-1 \
-es_NI.UTF-8/UTF-8 \
-es_NI/ISO-8859-1 \
-es_PA.UTF-8/UTF-8 \
-es_PA/ISO-8859-1 \
-es_PE.UTF-8/UTF-8 \
-es_PE/ISO-8859-1 \
-es_PR.UTF-8/UTF-8 \
-es_PR/ISO-8859-1 \
-es_PY.UTF-8/UTF-8 \
-es_PY/ISO-8859-1 \
-es_SV.UTF-8/UTF-8 \
-es_SV/ISO-8859-1 \
-es_US.UTF-8/UTF-8 \
-es_US/ISO-8859-1 \
-es_UY.UTF-8/UTF-8 \
-es_UY/ISO-8859-1 \
-es_VE.UTF-8/UTF-8 \
-es_VE/ISO-8859-1 \
-et_EE.UTF-8/UTF-8 \
-et_EE/ISO-8859-1 \
-et_EE.ISO-8859-15/ISO-8859-15 \
-eu_ES.UTF-8/UTF-8 \
-eu_ES/ISO-8859-1 \
-eu_ES@euro/ISO-8859-15 \
-fa_IR/UTF-8 \
-ff_SN/UTF-8 \
-fi_FI.UTF-8/UTF-8 \
-fi_FI/ISO-8859-1 \
-fi_FI@euro/ISO-8859-15 \
-fil_PH/UTF-8 \
-fo_FO.UTF-8/UTF-8 \
-fo_FO/ISO-8859-1 \
-fr_BE.UTF-8/UTF-8 \
-fr_BE/ISO-8859-1 \
-fr_BE@euro/ISO-8859-15 \
-fr_CA.UTF-8/UTF-8 \
-fr_CA/ISO-8859-1 \
-fr_CH.UTF-8/UTF-8 \
-fr_CH/ISO-8859-1 \
-fr_FR.UTF-8/UTF-8 \
-fr_FR/ISO-8859-1 \
-fr_FR@euro/ISO-8859-15 \
-fr_LU.UTF-8/UTF-8 \
-fr_LU/ISO-8859-1 \
-fr_LU@euro/ISO-8859-15 \
-fur_IT/UTF-8 \
-fy_NL/UTF-8 \
-fy_DE/UTF-8 \
-ga_IE.UTF-8/UTF-8 \
-ga_IE/ISO-8859-1 \
-ga_IE@euro/ISO-8859-15 \
-gd_GB.UTF-8/UTF-8 \
-gd_GB/ISO-8859-15 \
-gez_ER/UTF-8 \
-gez_ER@abegede/UTF-8 \
-gez_ET/UTF-8 \
-gez_ET@abegede/UTF-8 \
-gl_ES.UTF-8/UTF-8 \
-gl_ES/ISO-8859-1 \
-gl_ES@euro/ISO-8859-15 \
-gu_IN/UTF-8 \
-gv_GB.UTF-8/UTF-8 \
-gv_GB/ISO-8859-1 \
-ha_NG/UTF-8 \
-hak_TW/UTF-8 \
-he_IL.UTF-8/UTF-8 \
-he_IL/ISO-8859-8 \
-hi_IN/UTF-8 \
-hif_FJ/UTF-8 \
-hne_IN/UTF-8 \
-hr_HR.UTF-8/UTF-8 \
-hr_HR/ISO-8859-2 \
-hsb_DE/ISO-8859-2 \
-hsb_DE.UTF-8/UTF-8 \
-ht_HT/UTF-8 \
-hu_HU.UTF-8/UTF-8 \
-hu_HU/ISO-8859-2 \
-hy_AM/UTF-8 \
-hy_AM.ARMSCII-8/ARMSCII-8 \
-ia_FR/UTF-8 \
-id_ID.UTF-8/UTF-8 \
-id_ID/ISO-8859-1 \
-ig_NG/UTF-8 \
-ik_CA/UTF-8 \
-is_IS.UTF-8/UTF-8 \
-is_IS/ISO-8859-1 \
-it_CH.UTF-8/UTF-8 \
-it_CH/ISO-8859-1 \
-it_IT.UTF-8/UTF-8 \
-it_IT/ISO-8859-1 \
-it_IT@euro/ISO-8859-15 \
-iu_CA/UTF-8 \
-ja_JP.EUC-JP/EUC-JP \
-ja_JP.UTF-8/UTF-8 \
-ka_GE.UTF-8/UTF-8 \
-ka_GE/GEORGIAN-PS \
-kab_DZ/UTF-8 \
-kk_KZ.UTF-8/UTF-8 \
-kk_KZ/PT154 \
-kl_GL.UTF-8/UTF-8 \
-kl_GL/ISO-8859-1 \
-km_KH/UTF-8 \
-kn_IN/UTF-8 \
-ko_KR.EUC-KR/EUC-KR \
-ko_KR.UTF-8/UTF-8 \
-kok_IN/UTF-8 \
-ks_IN/UTF-8 \
-ks_IN@devanagari/UTF-8 \
-ku_TR.UTF-8/UTF-8 \
-ku_TR/ISO-8859-9 \
-kw_GB.UTF-8/UTF-8 \
-kw_GB/ISO-8859-1 \
-ky_KG/UTF-8 \
-lb_LU/UTF-8 \
-lg_UG.UTF-8/UTF-8 \
-lg_UG/ISO-8859-10 \
-li_BE/UTF-8 \
-li_NL/UTF-8 \
-lij_IT/UTF-8 \
-ln_CD/UTF-8 \
-lo_LA/UTF-8 \
-lt_LT.UTF-8/UTF-8 \
-lt_LT/ISO-8859-13 \
-lv_LV.UTF-8/UTF-8 \
-lv_LV/ISO-8859-13 \
-lzh_TW/UTF-8 \
-mag_IN/UTF-8 \
-mai_IN/UTF-8 \
-mai_NP/UTF-8 \
-mfe_MU/UTF-8 \
-mg_MG.UTF-8/UTF-8 \
-mg_MG/ISO-8859-15 \
-mhr_RU/UTF-8 \
-mi_NZ.UTF-8/UTF-8 \
-mi_NZ/ISO-8859-13 \
-miq_NI/UTF-8 \
-mjw_IN/UTF-8 \
-mk_MK.UTF-8/UTF-8 \
-mk_MK/ISO-8859-5 \
-ml_IN/UTF-8 \
-mn_MN/UTF-8 \
-mni_IN/UTF-8 \
-mr_IN/UTF-8 \
-ms_MY.UTF-8/UTF-8 \
-ms_MY/ISO-8859-1 \
-mt_MT.UTF-8/UTF-8 \
-mt_MT/ISO-8859-3 \
-my_MM/UTF-8 \
-nan_TW/UTF-8 \
-nan_TW@latin/UTF-8 \
-nb_NO.UTF-8/UTF-8 \
-nb_NO/ISO-8859-1 \
-nds_DE/UTF-8 \
-nds_NL/UTF-8 \
-ne_NP/UTF-8 \
-nhn_MX/UTF-8 \
-niu_NU/UTF-8 \
-niu_NZ/UTF-8 \
-nl_AW/UTF-8 \
-nl_BE.UTF-8/UTF-8 \
-nl_BE/ISO-8859-1 \
-nl_BE@euro/ISO-8859-15 \
-nl_NL.UTF-8/UTF-8 \
-nl_NL/ISO-8859-1 \
-nl_NL@euro/ISO-8859-15 \
-nn_NO.UTF-8/UTF-8 \
-nn_NO/ISO-8859-1 \
-nr_ZA/UTF-8 \
-nso_ZA/UTF-8 \
-oc_FR.UTF-8/UTF-8 \
-oc_FR/ISO-8859-1 \
-om_ET/UTF-8 \
-om_KE.UTF-8/UTF-8 \
-om_KE/ISO-8859-1 \
-or_IN/UTF-8 \
-os_RU/UTF-8 \
-pa_IN/UTF-8 \
-pa_PK/UTF-8 \
-pap_AW/UTF-8 \
-pap_CW/UTF-8 \
-pl_PL.UTF-8/UTF-8 \
-pl_PL/ISO-8859-2 \
-ps_AF/UTF-8 \
-pt_BR.UTF-8/UTF-8 \
-pt_BR/ISO-8859-1 \
-pt_PT.UTF-8/UTF-8 \
-pt_PT/ISO-8859-1 \
-pt_PT@euro/ISO-8859-15 \
-quz_PE/UTF-8 \
-raj_IN/UTF-8 \
-ro_RO.UTF-8/UTF-8 \
-ro_RO/ISO-8859-2 \
-ru_RU.KOI8-R/KOI8-R \
-ru_RU.UTF-8/UTF-8 \
-ru_RU/ISO-8859-5 \
-ru_UA.UTF-8/UTF-8 \
-ru_UA/KOI8-U \
-rw_RW/UTF-8 \
-sa_IN/UTF-8 \
-sah_RU/UTF-8 \
-sat_IN/UTF-8 \
-sc_IT/UTF-8 \
-sd_IN/UTF-8 \
-sd_IN@devanagari/UTF-8 \
-se_NO/UTF-8 \
-sgs_LT/UTF-8 \
-shn_MM/UTF-8 \
-shs_CA/UTF-8 \
-si_LK/UTF-8 \
-sid_ET/UTF-8 \
-sk_SK.UTF-8/UTF-8 \
-sk_SK/ISO-8859-2 \
-sl_SI.UTF-8/UTF-8 \
-sl_SI/ISO-8859-2 \
-sm_WS/UTF-8 \
-so_DJ.UTF-8/UTF-8 \
-so_DJ/ISO-8859-1 \
-so_ET/UTF-8 \
-so_KE.UTF-8/UTF-8 \
-so_KE/ISO-8859-1 \
-so_SO.UTF-8/UTF-8 \
-so_SO/ISO-8859-1 \
-sq_AL.UTF-8/UTF-8 \
-sq_AL/ISO-8859-1 \
-sq_MK/UTF-8 \
-sr_ME/UTF-8 \
-sr_RS/UTF-8 \
-sr_RS@latin/UTF-8 \
-ss_ZA/UTF-8 \
-st_ZA.UTF-8/UTF-8 \
-st_ZA/ISO-8859-1 \
-sv_FI.UTF-8/UTF-8 \
-sv_FI/ISO-8859-1 \
-sv_FI@euro/ISO-8859-15 \
-sv_SE.UTF-8/UTF-8 \
-sv_SE/ISO-8859-1 \
-sv_SE.ISO-8859-15/ISO-8859-15 \
-sw_KE/UTF-8 \
-sw_TZ/UTF-8 \
-szl_PL/UTF-8 \
-ta_IN/UTF-8 \
-ta_LK/UTF-8 \
-tcy_IN.UTF-8/UTF-8 \
-te_IN/UTF-8 \
-tg_TJ.UTF-8/UTF-8 \
-tg_TJ/KOI8-T \
-th_TH.UTF-8/UTF-8 \
-th_TH/TIS-620 \
-the_NP/UTF-8 \
-ti_ER/UTF-8 \
-ti_ET/UTF-8 \
-tig_ER/UTF-8 \
-tk_TM/UTF-8 \
-tl_PH.UTF-8/UTF-8 \
-tl_PH/ISO-8859-1 \
-tn_ZA/UTF-8 \
-to_TO/UTF-8 \
-tpi_PG/UTF-8 \
-tr_CY.UTF-8/UTF-8 \
-tr_CY/ISO-8859-9 \
-tr_TR.UTF-8/UTF-8 \
-tr_TR/ISO-8859-9 \
-ts_ZA/UTF-8 \
-tt_RU/UTF-8 \
-tt_RU@iqtelif/UTF-8 \
-ug_CN/UTF-8 \
-uk_UA.UTF-8/UTF-8 \
-uk_UA/KOI8-U \
-unm_US/UTF-8 \
-ur_IN/UTF-8 \
-ur_PK/UTF-8 \
-uz_UZ.UTF-8/UTF-8 \
-uz_UZ/ISO-8859-1 \
-uz_UZ@cyrillic/UTF-8 \
-ve_ZA/UTF-8 \
-vi_VN/UTF-8 \
-wa_BE/ISO-8859-1 \
-wa_BE@euro/ISO-8859-15 \
-wa_BE.UTF-8/UTF-8 \
-wae_CH/UTF-8 \
-wal_ET/UTF-8 \
-wo_SN/UTF-8 \
-xh_ZA.UTF-8/UTF-8 \
-xh_ZA/ISO-8859-1 \
-yi_US.UTF-8/UTF-8 \
-yi_US/CP1255 \
-yo_NG/UTF-8 \
-yue_HK/UTF-8 \
-yuw_PG/UTF-8 \
-zh_CN.GB18030/GB18030 \
-zh_CN.GBK/GBK \
-zh_CN.UTF-8/UTF-8 \
-zh_CN/GB2312 \
-zh_HK.UTF-8/UTF-8 \
-zh_HK/BIG5-HKSCS \
-zh_SG.UTF-8/UTF-8 \
-zh_SG.GBK/GBK \
-zh_SG/GB2312 \
-zh_TW.EUC-TW/EUC-TW \
-zh_TW.UTF-8/UTF-8 \
-zh_TW/BIG5 \
-zu_ZA.UTF-8/UTF-8 \
-zu_ZA/ISO-8859-1 \
--- a/build-locale-archive.c
+++ b/build-locale-archive.c
@ -1,862 +0,0 @@
-#define _GNU_SOURCE
-#include <assert.h>
-#include <dirent.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <locale.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <getopt.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include "../locale/hashval.h"
-#define __LC_LAST 13
-#include "../locale/locarchive.h"
-#include "../crypt/md5.h"
-
-const char *alias_file = DATADIR "/locale/locale.alias";
-const char *locar_file = PREFIX "/lib/locale/locale-archive";
-const char *tmpl_file = PREFIX "/lib/locale/locale-archive.tmpl";
-const char *loc_path = PREFIX "/lib/locale/";
-/* Flags set by `--verbose` option.  */
-int be_quiet = 1;
-int verbose = 0;
-int max_locarchive_open_retry = 10;
-const char *output_prefix;
-
-/* Endianness should have been taken care of by localedef.  We don't need to do
-   additional swapping.  We need this variable exported however, since
-   locarchive.c uses it to determine if it needs to swap endianness of a value
-   before writing to or reading from the archive.  */
-bool swap_endianness_p = false;
-
-static const char *locnames[] =
-  {
-#define DEFINE_CATEGORY(category, category_name, items, a) \
-  [category] = category_name,
-#include "../locale/categories.def"
-#undef  DEFINE_CATEGORY
-  };
-
-static int
-is_prime (unsigned long candidate)
-{
-  /* No even number and none less than 10 will be passed here.  */
-  unsigned long int divn = 3;
-  unsigned long int sq = divn * divn;
-
-  while (sq < candidate && candidate % divn != 0)
-    {
-      ++divn;
-      sq += 4 * divn;
-      ++divn;
-    }
-
-  return candidate % divn != 0;
-}
-
-unsigned long
-next_prime (unsigned long seed)
-{
-  /* Make it definitely odd.  */
-  seed |= 1;
-
-  while (!is_prime (seed))
-    seed += 2;
-
-  return seed;
-}
-
-void
-error (int status, int errnum, const char *message, ...)
-{
-  va_list args;
-
-  va_start (args, message);
-  fflush (stdout);
-  fprintf (stderr, "%s: ", program_invocation_name);
-  vfprintf (stderr, message, args);
-  va_end (args);
-  if (errnum)
-    fprintf (stderr, ": %s", strerror (errnum));
-  putc ('\n', stderr);
-  fflush (stderr);
-  if (status)
-    exit (errnum == EROFS ? 0 : status);
-}
-
-void *
-xmalloc (size_t size)
-{
-  void *p = malloc (size);
-  if (p == NULL)
-    error (EXIT_FAILURE, errno, "could not allocate %zd bytes of memory", size);
-  return p;
-}
-
-static void
-open_tmpl_archive (struct locarhandle *ah)
-{
-  struct stat64 st;
-  int fd;
-  struct locarhead head;
-  const char *archivefname = ah->fname == NULL ? tmpl_file : ah->fname;
-
-  /* Open the archive.  We must have exclusive write access.  */
-  fd = open64 (archivefname, O_RDONLY);
-  if (fd == -1)
-    error (EXIT_FAILURE, errno, "cannot open locale archive template file \"%s\"",
-	   archivefname);
-
-  if (fstat64 (fd, &st) < 0)
-    error (EXIT_FAILURE, errno, "cannot stat locale archive template file \"%s\"",
-	   archivefname);
-
-  /* Read the header.  */
-  if (TEMP_FAILURE_RETRY (read (fd, &head, sizeof (head))) != sizeof (head))
-    error (EXIT_FAILURE, errno, "cannot read archive header");
-
-  ah->fd = fd;
-  ah->mmaped = (head.sumhash_offset
-		+ head.sumhash_size * sizeof (struct sumhashent));
-  if (ah->mmaped > (unsigned long) st.st_size)
-    error (EXIT_FAILURE, 0, "locale archive template file truncated");
-  ah->mmaped = st.st_size;
-  ah->reserved = st.st_size;
-
-  /* Now we know how large the administrative information part is.
-     Map all of it.  */
-  ah->addr = mmap64 (NULL, ah->mmaped, PROT_READ, MAP_SHARED, fd, 0);
-  if (ah->addr == MAP_FAILED)
-    error (EXIT_FAILURE, errno, "cannot map archive header");
-}
-
-/* Open the locale archive.  */
-extern void open_archive (struct locarhandle *ah, bool readonly);
-
-/* Close the locale archive.  */
-extern void close_archive (struct locarhandle *ah);
-
-/* Add given locale data to the archive.  */
-extern int add_locale_to_archive (struct locarhandle *ah, const char *name,
-				  locale_data_t data, bool replace);
-
-extern void add_alias (struct locarhandle *ah, const char *alias,
-		       bool replace, const char *oldname,
-		       uint32_t *locrec_offset_p);
-
-extern struct namehashent *
-insert_name (struct locarhandle *ah,
-	     const char *name, size_t name_len, bool replace);
-
-struct nameent
-{
-  char *name;
-  struct locrecent *locrec;
-};
-
-struct dataent
-{
-  const unsigned char *sum;
-  uint32_t file_offset;
-};
-
-static int
-nameentcmp (const void *a, const void *b)
-{
-  struct locrecent *la = ((const struct nameent *) a)->locrec;
-  struct locrecent *lb = ((const struct nameent *) b)->locrec;
-  uint32_t start_a = -1, end_a = 0;
-  uint32_t start_b = -1, end_b = 0;
-  int cnt;
-
-  for (cnt = 0; cnt < __LC_LAST; ++cnt)
-    if (cnt != LC_ALL)
-      {
-	if (la->record[cnt].offset < start_a)
-	  start_a = la->record[cnt].offset;
-	if (la->record[cnt].offset + la->record[cnt].len > end_a)
-	  end_a = la->record[cnt].offset + la->record[cnt].len;
-      }
-  assert (start_a != (uint32_t)-1);
-  assert (end_a != 0);
-
-  for (cnt = 0; cnt < __LC_LAST; ++cnt)
-    if (cnt != LC_ALL)
-      {
-	if (lb->record[cnt].offset < start_b)
-	  start_b = lb->record[cnt].offset;
-	if (lb->record[cnt].offset + lb->record[cnt].len > end_b)
-	  end_b = lb->record[cnt].offset + lb->record[cnt].len;
-      }
-  assert (start_b != (uint32_t)-1);
-  assert (end_b != 0);
-
-  if (start_a != start_b)
-    return (int)start_a - (int)start_b;
-  return (int)end_a - (int)end_b;
-}
-
-static int
-dataentcmp (const void *a, const void *b)
-{
-  if (((const struct dataent *) a)->file_offset
-      < ((const struct dataent *) b)->file_offset)
-    return -1;
-
-  if (((const struct dataent *) a)->file_offset
-      > ((const struct dataent *) b)->file_offset)
-    return 1;
-
-  return 0;
-}
-
-static int
-sumsearchfn (const void *key, const void *ent)
-{
-  uint32_t keyn = *(uint32_t *)key;
-  uint32_t entn = ((struct dataent *)ent)->file_offset;
-
-  if (keyn < entn)
-    return -1;
-  if (keyn > entn)
-    return 1;
-  return 0;
-}
-
-static void
-compute_data (struct locarhandle *ah, struct nameent *name, size_t sumused,
-	      struct dataent *files, locale_data_t data)
-{
-  int cnt;
-  struct locrecent *locrec = name->locrec;
-  struct dataent *file;
-  data[LC_ALL].addr = ((char *) ah->addr) + locrec->record[LC_ALL].offset;
-  data[LC_ALL].size = locrec->record[LC_ALL].len;
-  for (cnt = 0; cnt < __LC_LAST; ++cnt)
-    if (cnt != LC_ALL)
-      {
-	data[cnt].addr = ((char *) ah->addr) + locrec->record[cnt].offset;
-	data[cnt].size = locrec->record[cnt].len;
-	if (data[cnt].addr >= data[LC_ALL].addr
-	    && data[cnt].addr + data[cnt].size
-	       <= data[LC_ALL].addr + data[LC_ALL].size)
-	  __md5_buffer (data[cnt].addr, data[cnt].size, data[cnt].sum);
-	else
-	  {
-	    file = bsearch (&locrec->record[cnt].offset, files, sumused,
-			    sizeof (*files), sumsearchfn);
-	    if (file == NULL)
-	      error (EXIT_FAILURE, 0, "inconsistent template file");
-	    memcpy (data[cnt].sum, file->sum, sizeof (data[cnt].sum));
-	  }
-      }
-}
-
-static int
-fill_archive (struct locarhandle *tmpl_ah,
-	      const char *fname,
-	      size_t install_langs_count, char *install_langs_list[],
-	      size_t nlist, char *list[],
-	      const char *primary)
-{
-  struct locarhandle ah;
-  struct locarhead *head;
-  int result = 0;
-  struct nameent *names;
-  struct namehashent *namehashtab;
-  size_t cnt, used;
-  struct dataent *files;
-  struct sumhashent *sumhashtab;
-  size_t sumused;
-  struct locrecent *primary_locrec = NULL;
-  struct nameent *primary_nameent = NULL;
-
-  head = tmpl_ah->addr;
-  names = (struct nameent *) malloc (head->namehash_used
-				     * sizeof (struct nameent));
-  files = (struct dataent *) malloc (head->sumhash_used
-				     * sizeof (struct dataent));
-  if (names == NULL || files == NULL)
-    error (EXIT_FAILURE, errno, "could not allocate tables");
-
-  namehashtab = (struct namehashent *) ((char *) tmpl_ah->addr
-					+ head->namehash_offset);
-  sumhashtab = (struct sumhashent *) ((char *) tmpl_ah->addr
-				      + head->sumhash_offset);
-
-  for (cnt = used = 0; cnt < head->namehash_size; ++cnt)
-    if (namehashtab[cnt].locrec_offset != 0)
-      {
-	char * name;
-	int i;
-	assert (used < head->namehash_used);
-        name = tmpl_ah->addr + namehashtab[cnt].name_offset;
-        if (install_langs_count == 0)
-          {
-	    /* Always intstall the entry.  */
-            names[used].name = name;
-            names[used++].locrec
-                = (struct locrecent *) ((char *) tmpl_ah->addr +
-                                        namehashtab[cnt].locrec_offset);
-          }
-        else
-          {
-	    /* Only install the entry if the user asked for it via
-	       --install-langs.  */
-            for (i = 0; i < install_langs_count; i++)
-              {
-		/* Add one for "_" and one for the null terminator.  */
-		size_t len = strlen (install_langs_list[i]) + 2;
-		char *install_lang = (char *)xmalloc (len);
-                strcpy (install_lang, install_langs_list[i]);
-                if (strchr (install_lang, '_') == NULL)
-                  strcat (install_lang, "_");
-                if (strncmp (name, install_lang, strlen (install_lang)) == 0)
-                  {
-                    names[used].name = name;
-                    names[used++].locrec
-		      = (struct locrecent *) ((char *)tmpl_ah->addr
-					      + namehashtab[cnt].locrec_offset);
-                  }
-		free (install_lang);
-              }
-          }
-      }
-
-  /* Sort the names.  */
-  qsort (names, used, sizeof (struct nameent), nameentcmp);
-
-  for (cnt = sumused = 0; cnt < head->sumhash_size; ++cnt)
-    if (sumhashtab[cnt].file_offset != 0)
-      {
-	assert (sumused < head->sumhash_used);
-	files[sumused].sum = (const unsigned char *) sumhashtab[cnt].sum;
-	files[sumused++].file_offset = sumhashtab[cnt].file_offset;
-      }
-
-  /* Sort by file locations.  */
-  qsort (files, sumused, sizeof (struct dataent), dataentcmp);
-
-  /* Open the archive.  This call never returns if we cannot
-     successfully open the archive.  */
-  ah.fname = NULL;
-  if (fname != NULL)
-    ah.fname = fname;
-  open_archive (&ah, false);
-
-  if (primary != NULL)
-    {
-      for (cnt = 0; cnt < used; ++cnt)
-	if (strcmp (names[cnt].name, primary) == 0)
-	  break;
-      if (cnt < used)
-	{
-	  locale_data_t data;
-
-	  compute_data (tmpl_ah, &names[cnt], sumused, files, data);
-	  result |= add_locale_to_archive (&ah, primary, data, 0);
-	  primary_locrec = names[cnt].locrec;
-	  primary_nameent = &names[cnt];
-	}
-    }
-
-  for (cnt = 0; cnt < used; ++cnt)
-    if (&names[cnt] == primary_nameent)
-      continue;
-    else if ((cnt > 0 && names[cnt - 1].locrec == names[cnt].locrec)
-	     || names[cnt].locrec == primary_locrec)
-      {
-	const char *oldname;
-	struct namehashent *namehashent;
-	uint32_t locrec_offset;
-
-	if (names[cnt].locrec == primary_locrec)
-	  oldname = primary;
-	else
-	  oldname = names[cnt - 1].name;
-	namehashent = insert_name (&ah, oldname, strlen (oldname), true);
-	assert (namehashent->name_offset != 0);
-	assert (namehashent->locrec_offset != 0);
-	locrec_offset = namehashent->locrec_offset;
-	add_alias (&ah, names[cnt].name, 0, oldname, &locrec_offset);
-      }
-    else
-      {
-	locale_data_t data;
-
-	compute_data (tmpl_ah, &names[cnt], sumused, files, data);
-	result |= add_locale_to_archive (&ah, names[cnt].name, data, 0);
-      }
-
-  while (nlist-- > 0)
-    {
-      const char *fname = *list++;
-      size_t fnamelen = strlen (fname);
-      struct stat64 st;
-      DIR *dirp;
-      struct dirent64 *d;
-      int seen;
-      locale_data_t data;
-      int cnt;
-
-      /* First see whether this really is a directory and whether it
-	 contains all the require locale category files.  */
-      if (stat64 (fname, &st) < 0)
-	{
-	  error (0, 0, "stat of \"%s\" failed: %s: ignored", fname,
-		 strerror (errno));
-	  continue;
-	}
-      if (!S_ISDIR (st.st_mode))
-	{
-	  error (0, 0, "\"%s\" is no directory; ignored", fname);
-	  continue;
-	}
-
-      dirp = opendir (fname);
-      if (dirp == NULL)
-	{
-	  error (0, 0, "cannot open directory \"%s\": %s: ignored",
-		 fname, strerror (errno));
-	  continue;
-	}
-
-      seen = 0;
-      while ((d = readdir64 (dirp)) != NULL)
-	{
-	  for (cnt = 0; cnt < __LC_LAST; ++cnt)
-	    if (cnt != LC_ALL)
-	      if (strcmp (d->d_name, locnames[cnt]) == 0)
-		{
-		  unsigned char d_type;
-
-		  /* We have an object of the required name.  If it's
-		     a directory we have to look at a file with the
-		     prefix "SYS_".  Otherwise we have found what we
-		     are looking for.  */
-#ifdef _DIRENT_HAVE_D_TYPE
-		  d_type = d->d_type;
-
-		  if (d_type != DT_REG)
-#endif
-		    {
-		      char fullname[fnamelen + 2 * strlen (d->d_name) + 7];
-
-#ifdef _DIRENT_HAVE_D_TYPE
-		      if (d_type == DT_UNKNOWN || d_type == DT_LNK)
-#endif
-			{
-			  strcpy (stpcpy (stpcpy (fullname, fname), "/"),
-				  d->d_name);
-
-			  if (stat64 (fullname, &st) == -1)
-			    /* We cannot stat the file, ignore it.  */
-			    break;
-
-			  d_type = IFTODT (st.st_mode);
-			}
-
-		      if (d_type == DT_DIR)
-			{
-			  /* We have to do more tests.  The file is a
-			     directory and it therefore must contain a
-			     regular file with the same name except a
-			     "SYS_" prefix.  */
-			  char *t = stpcpy (stpcpy (fullname, fname), "/");
-			  strcpy (stpcpy (stpcpy (t, d->d_name), "/SYS_"),
-				  d->d_name);
-
-			  if (stat64 (fullname, &st) == -1)
-			    /* There is no SYS_* file or we cannot
-			       access it.  */
-			    break;
-
-			  d_type = IFTODT (st.st_mode);
-			}
-		    }
-
-		  /* If we found a regular file (eventually after
-		     following a symlink) we are successful.  */
-		  if (d_type == DT_REG)
-		    ++seen;
-		  break;
-		}
-	}
-
-      closedir (dirp);
-
-      if (seen != __LC_LAST - 1)
-	{
-	  /* We don't have all locale category files.  Ignore the name.  */
-	  error (0, 0, "incomplete set of locale files in \"%s\"",
-		 fname);
-	  continue;
-	}
-
-      /* Add the files to the archive.  To do this we first compute
-	 sizes and the MD5 sums of all the files.  */
-      for (cnt = 0; cnt < __LC_LAST; ++cnt)
-	if (cnt != LC_ALL)
-	  {
-	    char fullname[fnamelen + 2 * strlen (locnames[cnt]) + 7];
-	    int fd;
-
-	    strcpy (stpcpy (stpcpy (fullname, fname), "/"), locnames[cnt]);
-	    fd = open64 (fullname, O_RDONLY);
-	    if (fd == -1 || fstat64 (fd, &st) == -1)
-	      {
-		/* Cannot read the file.  */
-		if (fd != -1)
-		  close (fd);
-		break;
-	      }
-
-	    if (S_ISDIR (st.st_mode))
-	      {
-		char *t;
-		close (fd);
-		t = stpcpy (stpcpy (fullname, fname), "/");
-		strcpy (stpcpy (stpcpy (t, locnames[cnt]), "/SYS_"),
-			locnames[cnt]);
-
-		fd = open64 (fullname, O_RDONLY);
-		if (fd == -1 || fstat64 (fd, &st) == -1
-		    || !S_ISREG (st.st_mode))
-		  {
-		    if (fd != -1)
-		      close (fd);
-		    break;
-		  }
-	      }
-
-	    /* Map the file.  */
-	    data[cnt].addr = mmap64 (NULL, st.st_size, PROT_READ, MAP_SHARED,
-				     fd, 0);
-	    if (data[cnt].addr == MAP_FAILED)
-	      {
-		/* Cannot map it.  */
-		close (fd);
-		break;
-	      }
-
-	    data[cnt].size = st.st_size;
-	    __md5_buffer (data[cnt].addr, st.st_size, data[cnt].sum);
-
-	    /* We don't need the file descriptor anymore.  */
-	    close (fd);
-	  }
-
-      if (cnt != __LC_LAST)
-	{
-	  while (cnt-- > 0)
-	    if (cnt != LC_ALL)
-	      munmap (data[cnt].addr, data[cnt].size);
-
-	  error (0, 0, "cannot read all files in \"%s\": ignored", fname);
-
-	  continue;
-	}
-
-      result |= add_locale_to_archive (&ah, basename (fname), data, 0);
-
-      for (cnt = 0; cnt < __LC_LAST; ++cnt)
-	if (cnt != LC_ALL)
-	  munmap (data[cnt].addr, data[cnt].size);
-    }
-
-  /* We are done.  */
-  close_archive (&ah);
-
-  return result;
-}
-
-void usage()
-{
-  printf ("\
-Usage: build-locale-archive [OPTION]... [TEMPLATE-FILE] [ARCHIVE-FILE]\n\
- Builds a locale archive from a template file.\n\
- Options:\n\
-  -h, --help                 Print this usage message.\n\
-  -v, --verbose              Verbose execution.\n\
-  -l, --install-langs=LIST   Only include locales given in LIST into the \n\
-                             locale archive.  LIST is a colon separated list\n\
-                             of locale prefixes, for example \"de:en:ja\".\n\
-                             The special argument \"all\" means to install\n\
-                             all languages and it must be present by itself.\n\
-                             If \"all\" is present with any other language it\n\
-                             will be treated as the name of a locale.\n\
-                             If the --install-langs option is missing, all\n\
-                             locales are installed. The colon separated list\n\
-                             can contain any strings matching the beginning of\n\
-                             locale names.\n\
-                             If a string does not contain a \"_\", it is added.\n\
-                             Examples:\n\
-                               --install-langs=\"en\"\n\
-                                 installs en_US, en_US.iso88591,\n\
-                                 en_US.iso885915, en_US.utf8,\n\
-                                 en_GB ...\n\
-                               --install-langs=\"en_US.utf8\"\n\
-                                 installs only en_US.utf8.\n\
-                               --install-langs=\"ko\"\n\
-                                 installs ko_KR, ko_KR.euckr,\n\
-                                 ko_KR.utf8 but *not* kok_IN\n\
-                                 because \"ko\" does not contain\n\
-                                 \"_\" and it is silently added\n\
-                               --install-langs\"ko:kok\"\n\
-                                 installs ko_KR, ko_KR.euckr,\n\
-                                 ko_KR.utf8, kok_IN, and\n\
-                                 kok_IN.utf8.\n\
-                               --install-langs=\"POSIX\" will\n\
-                                 installs *no* locales at all\n\
-                                 because POSIX matches none of\n\
-                                 the locales. Actually, any string\n\
-                                 matching nothing will do that.\n\
-                                 POSIX and C will always be\n\
-                                 available because they are\n\
-                                 builtin.\n\
-                             Aliases are installed as well,\n\
-                             i.e. --install-langs=\"de\"\n\
-                             will install not only every locale starting with\n\
-                             \"de\" but also the aliases \"deutsch\"\n\
-                             and and \"german\" although the latter does not\n\
-                             start with \"de\".\n\
-\n\
-  If the arguments TEMPLATE-FILE and ARCHIVE-FILE are not given the locations\n\
-  where the glibc used expects these files are used by default.\n\
-");
-}
-
-int main (int argc, char *argv[])
-{
-  char path[4096];
-  DIR *dirp;
-  struct dirent64 *d;
-  struct stat64 st;
-  char *list[16384], *primary;
-  char *lang;
-  int install_langs_count = 0;
-  int i;
-  char *install_langs_arg, *ila_start;
-  char **install_langs_list = NULL;
-  unsigned int cnt = 0;
-  struct locarhandle tmpl_ah;
-  char *new_locar_fname = NULL;
-  size_t loc_path_len = strlen (loc_path);
-
-  while (1)
-    {
-      int c;
-
-      static struct option long_options[] =
-        {
-            {"help",            no_argument,       0, 'h'},
-            {"verbose",         no_argument,       0, 'v'},
-            {"install-langs",   required_argument, 0, 'l'},
-            {0, 0, 0, 0}
-        };
-      /* getopt_long stores the option index here. */
-      int option_index = 0;
-
-      c = getopt_long (argc, argv, "vhl:",
-                       long_options, &option_index);
-
-      /* Detect the end of the options. */
-      if (c == -1)
-        break;
-
-      switch (c)
-        {
-        case 0:
-          printf ("unknown option %s", long_options[option_index].name);
-          if (optarg)
-            printf (" with arg %s", optarg);
-          printf ("\n");
-          usage ();
-          exit (1);
-
-        case 'v':
-          verbose = 1;
-          be_quiet = 0;
-          break;
-
-        case 'h':
-          usage ();
-          exit (0);
-
-        case 'l':
-          install_langs_arg = ila_start = strdup (optarg);
-          /* If the argument to --install-lang is "all", do
-             not limit the list of languages to install and install
-             them all.  We do not support installing a single locale
-	     called "all".  */
-#define MAGIC_INSTALL_ALL "all"
-          if (install_langs_arg != NULL
-	      && install_langs_arg[0] != '\0'
-	      && !(strncmp(install_langs_arg, MAGIC_INSTALL_ALL,
-			   strlen(MAGIC_INSTALL_ALL)) == 0
-		   && strlen (install_langs_arg) == 3))
-            {
-	      /* Count the number of languages we will install.  */
-              while (true)
-                {
-                  lang = strtok(install_langs_arg, ":;,");
-                  if (lang == NULL)
-                    break;
-                  install_langs_count++;
-                  install_langs_arg = NULL;
-                }
-	      free (ila_start);
-
-	      /* Reject an entire string made up of delimiters.  */
-	      if (install_langs_count == 0)
-		break;
-
-	      /* Copy the list.  */
-	      install_langs_list = (char **)xmalloc (sizeof(char *) * install_langs_count);
-	      install_langs_arg = ila_start = strdup (optarg);
-	      install_langs_count = 0;
-	      while (true)
-                {
-                  lang = strtok(install_langs_arg, ":;,");
-                  if (lang == NULL)
-                    break;
-                  install_langs_list[install_langs_count] = lang;
-		  install_langs_count++;
-                  install_langs_arg = NULL;
-                }
-            }
-          break;
-
-        case '?':
-          /* getopt_long already printed an error message. */
-          usage ();
-          exit (0);
-
-        default:
-          abort ();
-        }
-    }
-  tmpl_ah.fname = NULL;
-  if (optind < argc)
-    tmpl_ah.fname = argv[optind];
-  if (optind + 1 < argc)
-    new_locar_fname = argv[optind + 1];
-  if (verbose)
-    {
-      if (tmpl_ah.fname)
-        printf("input archive file specified on command line: %s\n",
-               tmpl_ah.fname);
-      else
-        printf("using default input archive file.\n");
-      if (new_locar_fname)
-        printf("output archive file specified on command line: %s\n",
-               new_locar_fname);
-      else
-        printf("using default output archive file.\n");
-    }
-
-  dirp = opendir (loc_path);
-  if (dirp == NULL)
-    error (EXIT_FAILURE, errno, "cannot open directory \"%s\"", loc_path);
-
-  open_tmpl_archive (&tmpl_ah);
-
-  if (new_locar_fname)
-    unlink (new_locar_fname);
-  else
-    unlink (locar_file);
-  primary = getenv ("LC_ALL");
-  if (primary == NULL)
-    primary = getenv ("LANG");
-  if (primary != NULL)
-    {
-      if (strncmp (primary, "ja", 2) != 0
-	  && strncmp (primary, "ko", 2) != 0
-	  && strncmp (primary, "zh", 2) != 0)
-	{
-	  char *ptr = malloc (strlen (primary) + strlen (".utf8") + 1), *p, *q;
-	  /* This leads to invalid locales sometimes:
-	     de_DE.iso885915@euro -> de_DE.utf8@euro */
-	  if (ptr != NULL)
-	    {
-	      p = ptr;
-	      q = primary;
-	      while (*q && *q != '.' && *q != '@')
-		*p++ = *q++;
-	      if (*q == '.')
-		while (*q && *q != '@')
-		  q++;
-	      p = stpcpy (p, ".utf8");
-	      strcpy (p, q);
-	      primary = ptr;
-	    }
-	  else
-	    primary = NULL;
-	}
-    }
-
-  memcpy (path, loc_path, loc_path_len);
-
-  while ((d = readdir64 (dirp)) != NULL)
-    {
-      if (strcmp (d->d_name, ".") == 0 || strcmp (d->d_name, "..") == 0)
-	continue;
-      if (strchr (d->d_name, '_') == NULL)
-	continue;
-
-      size_t d_name_len = strlen (d->d_name);
-      if (loc_path_len + d_name_len + 1 > sizeof (path))
-	{
-	  error (0, 0, "too long filename \"%s\"", d->d_name);
-	  continue;
-	}
-
-      memcpy (path + loc_path_len, d->d_name, d_name_len + 1);
-      if (stat64 (path, &st) < 0)
-	{
-	  error (0, errno, "cannot stat \"%s\"", path);
-	  continue;
-	}
-      if (! S_ISDIR (st.st_mode))
-	continue;
-      if (cnt == 16384)
-	{
-	  error (0, 0, "too many directories in \"%s\"", loc_path);
-	  break;
-	}
-      list[cnt] = strdup (path);
-      if (list[cnt] == NULL)
-	{
-	  error (0, errno, "cannot add file to list \"%s\"", path);
-	  continue;
-	}
-      if (primary != NULL && cnt > 0 && strcmp (primary, d->d_name) == 0)
-	{
-	  char *p = list[0];
-	  list[0] = list[cnt];
-	  list[cnt] = p;
-	}
-      cnt++;
-    }
-  closedir (dirp);
-  /* Store the archive to the file specified as the second argument on the
-     command line or the default locale archive.  */
-  fill_archive (&tmpl_ah, new_locar_fname,
-                install_langs_count, install_langs_list,
-                cnt, list, primary);
-  close_archive (&tmpl_ah);
-  truncate (tmpl_file, 0);
-  if (install_langs_count > 0)
-    {
-      free (ila_start);
-      free (install_langs_list);
-    }
-  char *tz_argv[] = { "/usr/sbin/tzdata-update", NULL };
-  execve (tz_argv[0], (char *const *)tz_argv, (char *const *)&tz_argv[1]);
-  exit (0);
-}
--- a/1
+++ b/1
@ -1 +0,0 @@
-an8_10
--- a/1
+++ b/1
@ -1 +0,0 @@
-c81d2388896379997bc359d4f2084239  glibc-2.28.tar.xz
--- a/elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch
+++ b/elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch
@ -0,0 +1,39 @@
+From fc60db3cf29ba157d09ba4f4b92e3ab382b0339d Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 9 Aug 2023 19:12:54 +0800
+Subject: [PATCH 04/29] elf: Add new LoongArch reloc types (101 to 108) into
+ elf.h
+
+These reloc types are generated by GNU assembler >= 2.41 for relaxation
+support.
+
+Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=57a930e3
+Signed-off-by: Xi Ruoyao <xry111@xry111.site>
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ elf/elf.h | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/elf/elf.h b/elf/elf.h
+index 89fc8021..d623bdeb 100644
+--- a/elf/elf.h
+++ b/elf/elf.h
+@@ -4205,6 +4205,14 @@ enum
+ #define R_LARCH_TLS_GD_HI20 98
+ #define R_LARCH_32_PCREL 99
+ #define R_LARCH_RELAX 100
+#define R_LARCH_DELETE 101
+#define R_LARCH_ALIGN 102
+#define R_LARCH_PCREL20_S2 103
+#define R_LARCH_CFA 104
+#define R_LARCH_ADD6 105
+#define R_LARCH_SUB6 106
+#define R_LARCH_ADD_ULEB128 107
+#define R_LARCH_SUB_ULEB128 108
+ 
+ /* ARC specific declarations.  */
+ 
+-- 
+2.33.0
+
--- a/glibc-2.28-Add-macro-defination-of-lasx-lsx-and-fcc-.patch
+++ b/glibc-2.28-Add-macro-defination-of-lasx-lsx-and-fcc-.patch
--- a/glibc-2.28-Add-new-struct-user_fp_state-in-user.h.patch
+++ b/glibc-2.28-Add-new-struct-user_fp_state-in-user.h.patch
@ -1,29 +0,0 @@
-From dc2d26d52c129c47fa1f16bd0157cd20c6d9a958 Mon Sep 17 00:00:00 2001
-From: caiyinyu <caiyinyu@loongson.cn>
-Date: Wed, 21 Jun 2023 11:55:02 +0800
-Subject: [PATCH 08/14] glibc-2.28: Add new struct user_fp_state in user.h
-
-Change-Id: Idc233cc11c8f76b624dc2891b432f4d02a53cebc
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
- sysdeps/unix/sysv/linux/loongarch/sys/user.h | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/sysdeps/unix/sysv/linux/loongarch/sys/user.h b/sysdeps/unix/sysv/linux/loongarch/sys/user.h
-index f9108350..21e340f6 100644
--- a/sysdeps/unix/sysv/linux/loongarch/sys/user.h
-+++ b/sysdeps/unix/sysv/linux/loongarch/sys/user.h
-@@ -28,4 +28,10 @@ struct user_regs_struct
-   uint64_t reserved[11];
- };
- 
-+struct user_fp_struct {
-+  uint64_t    fpr[32];
-+  uint64_t    fcc;
-+  uint32_t    fcsr;
-+};
-+
- #endif	/* _SYS_USER_H */
-- 
-2.33.0
-
--- a/glibc-2.28-Fix-ifunc-str-mem-functions-xfail-problem.patch
+++ b/glibc-2.28-Fix-ifunc-str-mem-functions-xfail-problem.patch
@ -1,162 +0,0 @@
-From 647a0a28e5c9aed2f1fa59bbb7595133e7a4e62f Mon Sep 17 00:00:00 2001
-From: caiyinyu <caiyinyu@loongson.cn>
-Date: Mon, 24 Apr 2023 18:09:55 +0800
-Subject: [PATCH 03/14] glibc-2.28: Fix ifunc str/mem functions xfail problems.
-
-Change-Id: Ibff4229fcfef23c0b19fb94b21a4d17b49eceec6
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
- .../lp64/multiarch/ifunc-impl-list.c          | 76 +++++++++----------
- 1 file changed, 38 insertions(+), 38 deletions(-)
-
-diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
-index c2b6bbf7..fdeae797 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
-+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
-@@ -36,105 +36,105 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
-   size_t i = 0;
- 
-   IFUNC_IMPL (i, name, memcpy,
-	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_lasx)
-	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_lsx)
-+	      IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
-+	      IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LSX, __memcpy_lsx)
-+	      IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_UAL, __memcpy_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_aligned)
-	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_unaligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, memmove,
-	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_lasx)
-	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_lsx)
-+	      IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LASX, __memmove_lasx)
-+	      IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LSX, __memmove_lsx)
-+	      IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_UAL, __memmove_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned)
-	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_unaligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, memset,
-	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_lasx)
-	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_lsx)
-+	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LASX, __memset_lasx)
-+	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LSX, __memset_lsx)
-+	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_UAL, __memset_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned)
-	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_unaligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, memchr,
-	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_lasx)
-	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_lsx)
-+	      IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx)
-+	      IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx)
- 	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, memrchr,
-	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_lasx)
-	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_lsx)
-+	      IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LASX, __memrchr_lasx)
-+	      IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LSX, __memrchr_lsx)
- 	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic)
- 	      )
- 
-   IFUNC_IMPL (i, name, memcmp,
-	      IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_lasx)
-	      IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_lsx)
-+	      IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LASX, __memcmp_lasx)
-+	      IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LSX, __memcmp_lsx)
- 	      IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_aligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, rawmemchr,
-	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_lasx)
-	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_lsx)
-+	      IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx)
-+	      IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx)
- 	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, strchr,
-	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_lasx)
-	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
-+	      IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LSX, __strchr_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_UAL, __strchr_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_aligned)
-	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_unaligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, strrchr,
-	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_lasx)
-	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx)
-+	      IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx)
- 	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, strlen,
-	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_lasx)
-	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LASX, __strlen_lasx)
-+	      IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LSX, __strlen_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_UAL, __strlen_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
-	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_unaligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, strnlen,
-	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_lasx)
-	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LASX, __strnlen_lasx)
-+	      IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LSX, __strnlen_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_UAL, __strnlen_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_aligned)
-	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_unaligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, strchrnul,
-	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_lasx)
-	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LASX, __strchrnul_lasx)
-+	      IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LSX, __strchrnul_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_UAL, __strchrnul_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
-	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_unaligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, strncmp,
-	      IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strncmp, SUPPORT_LSX, __strncmp_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strncmp, SUPPORT_UAL, __strncmp_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned)
-	      IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_unaligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, strcpy,
-	      IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LSX, __strcpy_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_UAL, __strcpy_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_aligned)
-	      IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_unaligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, stpcpy,
-	      IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_lsx)
-+	      IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LSX, __stpcpy_lsx)
- 	      IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
- 	      )
- 
-   IFUNC_IMPL (i, name, strcmp,
-	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strcmp, SUPPORT_LSX, __strcmp_lsx)
-+	      IFUNC_IMPL_ADD (array, i, strcmp, SUPPORT_UAL, __strcmp_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned)
-	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_unaligned)
- 	      )
- 
-   return i;
-- 
-2.33.0
-
--- a/glibc-2.28-Redefine-macro-LEAF-ENTRY.patch
+++ b/glibc-2.28-Redefine-macro-LEAF-ENTRY.patch
@ -1,57 +0,0 @@
-From 00537d6945e71af8c9b0b1e7c2695f6a9a1ef1f5 Mon Sep 17 00:00:00 2001
-From: caiyinyu <caiyinyu@loongson.cn>
-Date: Sun, 25 Jun 2023 16:23:25 +0800
-Subject: [PATCH 09/14] glibc-2.28: Redefine macro LEAF/ENTRY.
-
-    The following usage of macro LEAF/ENTRY are all feasible:
-    1. LEAF(fcn) -- the align value of fcn is .align 3 (default value)
-    2. LEAF(fcn, 6) -- the align value of fcn is .align 6
-
-Change-Id: Ie3df4df8dba5259b665bd0e4702aaab0a09a5f65
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
- sysdeps/loongarch/sys/asm.h | 15 ++++++++++-----
- 1 file changed, 10 insertions(+), 5 deletions(-)
-
-diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
-index 357a5ba3..734e45ae 100644
--- a/sysdeps/loongarch/sys/asm.h
-+++ b/sysdeps/loongarch/sys/asm.h
-@@ -26,16 +26,21 @@
- #endif
- 
- 
-/* Declare leaf routine.  */
-#define	LEAF(symbol, aln)		\
-+/*  Declare leaf routine.
-+    The usage of macro LEAF/ENTRY is as follows:
-+    1. LEAF(fcn) -- the align value of fcn is .align 3 (default value)
-+    2. LEAF(fcn, 6) -- the align value of fcn is .align 6
-+*/
-+#define	LEAF_IMPL(symbol, aln, ...)	\
- 	.text;				\
- 	.globl	symbol;			\
- 	.align	aln;			\
- 	.type	symbol, @function;	\
- symbol: \
-	cfi_startproc;			\
-+	cfi_startproc;
- 
-# define ENTRY(symbol, aln) LEAF(symbol, aln)
-+#define LEAF(...) LEAF_IMPL(__VA_ARGS__, 3)
-+#define ENTRY(...) LEAF(__VA_ARGS__)
- 
- #define	LEAF_NO_ALIGN(symbol)			\
- 	.text;				\
-@@ -44,7 +49,7 @@ symbol: \
- symbol: \
- 	cfi_startproc;
- 
-# define ENTRY_NO_ALIGN(symbol) LEAF_NO_ALIGN(symbol)
-+#define ENTRY_NO_ALIGN(symbol) LEAF_NO_ALIGN(symbol)
- 
- /* Mark end of function.  */
- #undef END
-- 
-2.33.0
-
--- a/glibc-2.28-Refactor-code-and-fix-bug-in-_dl_runtime_.patch
+++ b/glibc-2.28-Refactor-code-and-fix-bug-in-_dl_runtime_.patch
@ -1,306 +0,0 @@
-From 27a004c9777340afd86fc0d129f6ffad508bf090 Mon Sep 17 00:00:00 2001
-From: caiyinyu <caiyinyu@loongson.cn>
-Date: Tue, 11 Jul 2023 16:09:55 +0800
-Subject: [PATCH 12/14] glibc-2.28: Refactor code and fix bug in
- _dl_runtime_resolve.
-
-Change-Id: I4907e6643ef25b87d7862e957ce9bf6d201da816
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
- sysdeps/loongarch/dl-machine.h    |   8 +-
- sysdeps/loongarch/dl-trampoline.S |   7 ++
- sysdeps/loongarch/dl-trampoline.h | 159 +++++++++++++-----------------
- sysdeps/loongarch/sys/asm.h       |   9 ++
- 4 files changed, 90 insertions(+), 93 deletions(-)
-
-diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
-index 6e9c6258..ff520a07 100644
--- a/sysdeps/loongarch/dl-machine.h
-+++ b/sysdeps/loongarch/dl-machine.h
-@@ -381,9 +381,13 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
-   /* If using PLTs, fill in the first two entries of .got.plt.  */
-   if (l->l_info[DT_JMPREL])
-     {
-      extern void _dl_runtime_resolve (void) __attribute__ ((visibility ("hidden")));
-+
-+#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
-       extern void _dl_runtime_resolve_lasx (void) __attribute__ ((visibility ("hidden")));
-       extern void _dl_runtime_resolve_lsx (void) __attribute__ ((visibility ("hidden")));
-+#endif
-+      extern void _dl_runtime_resolve (void) __attribute__ ((visibility ("hidden")));
-+
-       ElfW(Addr) *gotplt = (ElfW(Addr) *) D_PTR (l, l_info[DT_PLTGOT]);
-       /* If a library is prelinked but we have to relocate anyway,
- 	 we have to be able to undo the prelinking of .got.plt.
-@@ -391,11 +395,13 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
-       if (gotplt[1])
- 	l->l_mach.plt = gotplt[1] + l->l_addr;
- 
-+#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
-       if (SUPPORT_LASX)
- 	gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
-       else if (SUPPORT_LSX)
- 	gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
-       else
-+#endif
- 	gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve;
- 
-       gotplt[1] = (ElfW(Addr)) l;
-diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
-index 5f627a63..78d741f3 100644
--- a/sysdeps/loongarch/dl-trampoline.S
-+++ b/sysdeps/loongarch/dl-trampoline.S
-@@ -16,16 +16,23 @@
-    License along with the GNU C Library.  If not, see
-    <http://www.gnu.org/licenses/>.  */
- 
-+#include <sysdep.h>
-+#include <sys/asm.h>
-+
-+#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
- #define USE_LASX
- #define _dl_runtime_resolve _dl_runtime_resolve_lasx
- #include "dl-trampoline.h"
-+#undef FRAME_SIZE
- #undef USE_LASX
- #undef _dl_runtime_resolve
- 
- #define USE_LSX
- #define _dl_runtime_resolve _dl_runtime_resolve_lsx
- #include "dl-trampoline.h"
-+#undef FRAME_SIZE
- #undef USE_LSX
- #undef _dl_runtime_resolve
-+#endif
- 
- #include "dl-trampoline.h"
-diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
-index 96f41f1d..9a6d9b6c 100644
--- a/sysdeps/loongarch/dl-trampoline.h
-+++ b/sysdeps/loongarch/dl-trampoline.h
-@@ -17,31 +17,24 @@
-    License along with the GNU C Library.  If not, see
-    <http://www.gnu.org/licenses/>.  */
- 
-#include <sysdep.h>
-#include <sys/asm.h>
-
- /* Assembler veneer called from the PLT header code for lazy loading.
-    The PLT header passes its own args in t0-t2.  */
-
-#ifdef __loongarch_soft_float
-# define FRAME_SIZE (-((-10 * SZREG) & ALMASK))
-+#ifdef USE_LASX
-+# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZXREG) & ALMASK))
-+#elif defined USE_LSX
-+# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZVREG) & ALMASK))
-+#elif !defined __loongarch_soft_float
-+# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG) & ALMASK))
- #else
-# define FRAME_SIZE (-((-10 * SZREG - 8 * 256) & ALMASK))
-+# define FRAME_SIZE (-((-9 * SZREG) & ALMASK))
- #endif
- 
- ENTRY (_dl_runtime_resolve, 3)
-  # Save arguments to stack.
-
-#ifdef __loongarch64
-	li.d    t3, -FRAME_SIZE
-	add.d	sp, sp, t3
-#elif defined __loongarch32
-	li.w    t3, -FRAME_SIZE
-	add.w	sp, sp, t3
-#endif
- 
-+	/* Save arguments to stack. */
-+	ADDI	sp, sp, -FRAME_SIZE
- 
-	REG_S	ra, sp, 9*SZREG
-+	REG_S	ra, sp, 0*SZREG
- 	REG_S	a0, sp, 1*SZREG
- 	REG_S	a1, sp, 2*SZREG
- 	REG_S	a2, sp, 3*SZREG
-@@ -51,55 +44,45 @@ ENTRY (_dl_runtime_resolve, 3)
- 	REG_S	a6, sp, 7*SZREG
- 	REG_S	a7, sp, 8*SZREG
- 
-#ifndef __loongarch_soft_float
-	FREG_S	fa0, sp, 10*SZREG + 0*SZFREG
-	FREG_S	fa1, sp, 10*SZREG + 1*SZFREG
-	FREG_S	fa2, sp, 10*SZREG + 2*SZFREG
-	FREG_S	fa3, sp, 10*SZREG + 3*SZFREG
-	FREG_S	fa4, sp, 10*SZREG + 4*SZFREG
-	FREG_S	fa5, sp, 10*SZREG + 5*SZFREG
-	FREG_S	fa6, sp, 10*SZREG + 6*SZFREG
-	FREG_S	fa7, sp, 10*SZREG + 7*SZFREG
- #ifdef USE_LASX
-	xvst	xr0, sp, 10*SZREG + 0*256
-	xvst	xr1, sp, 10*SZREG + 1*256
-	xvst	xr2, sp, 10*SZREG + 2*256
-	xvst	xr3, sp, 10*SZREG + 3*256
-	xvst	xr4, sp, 10*SZREG + 4*256
-	xvst	xr5, sp, 10*SZREG + 5*256
-	xvst	xr6, sp, 10*SZREG + 6*256
-	xvst	xr7, sp, 10*SZREG + 7*256
-+	xvst	xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG
-+	xvst	xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG
-+	xvst	xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG
-+	xvst	xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG
-+	xvst	xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG
-+	xvst	xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG
-+	xvst	xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG
-+	xvst	xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG
- #elif defined USE_LSX
-	vst	vr0, sp, 10*SZREG + 0*128
-	vst	vr1, sp, 10*SZREG + 1*128
-	vst	vr2, sp, 10*SZREG + 2*128
-	vst	vr3, sp, 10*SZREG + 3*128
-	vst	vr4, sp, 10*SZREG + 4*128
-	vst	vr5, sp, 10*SZREG + 5*128
-	vst	vr6, sp, 10*SZREG + 6*128
-	vst	vr7, sp, 10*SZREG + 7*128
-#endif
-+	vst	vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG
-+	vst	vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG
-+	vst	vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG
-+	vst	vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG
-+	vst	vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG
-+	vst	vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG
-+	vst	vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG
-+	vst	vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG
-+#elif !defined __loongarch_soft_float
-+	FREG_S	fa0, sp, 9*SZREG + 0*SZFREG
-+	FREG_S	fa1, sp, 9*SZREG + 1*SZFREG
-+	FREG_S	fa2, sp, 9*SZREG + 2*SZFREG
-+	FREG_S	fa3, sp, 9*SZREG + 3*SZFREG
-+	FREG_S	fa4, sp, 9*SZREG + 4*SZFREG
-+	FREG_S	fa5, sp, 9*SZREG + 5*SZFREG
-+	FREG_S	fa6, sp, 9*SZREG + 6*SZFREG
-+	FREG_S	fa7, sp, 9*SZREG + 7*SZFREG
- #endif
- 
-  # Update .got.plt and obtain runtime address of callee.
-#ifdef __loongarch64
-	slli.d	a1, t1, 1
-+	/* Update .got.plt and obtain runtime address of callee */
-+	SLLI	a1, t1, 1
- 	or	a0, t0, zero
-	add.d	a1, a1, t1
-+	ADD	a1, a1, t1
- 	la	a2, _dl_fixup
- 	jirl	ra, a2, 0
- 	or	t1, v0, zero
-#elif defined __loongarch32
-	slli.w	a1, t1, 1
-	or	a0, t0, zero
-	add.w	a1, a1, t1
-	la	a2, _dl_fixup
-	jirl	ra, a2, 0
-	or	t1, v0, zero
-#endif
- 
-  # Restore arguments from stack.
-	REG_L	ra, sp, 9*SZREG
-+	/* Restore arguments from stack. */
-+	REG_L	ra, sp, 0*SZREG
- 	REG_L	a0, sp, 1*SZREG
- 	REG_L	a1, sp, 2*SZREG
- 	REG_L	a2, sp, 3*SZREG
-@@ -109,45 +92,37 @@ ENTRY (_dl_runtime_resolve, 3)
- 	REG_L	a6, sp, 7*SZREG
- 	REG_L	a7, sp, 8*SZREG
- 
-#ifndef __loongarch_soft_float
-	FREG_L	fa0, sp, 10*SZREG + 0*SZFREG
-	FREG_L	fa1, sp, 10*SZREG + 1*SZFREG
-	FREG_L	fa2, sp, 10*SZREG + 2*SZFREG
-	FREG_L	fa3, sp, 10*SZREG + 3*SZFREG
-	FREG_L	fa4, sp, 10*SZREG + 4*SZFREG
-	FREG_L	fa5, sp, 10*SZREG + 5*SZFREG
-	FREG_L	fa6, sp, 10*SZREG + 6*SZFREG
-	FREG_L	fa7, sp, 10*SZREG + 7*SZFREG
- #ifdef USE_LASX
-	xvld	xr0, sp, 10*SZREG + 0*256
-	xvld	xr1, sp, 10*SZREG + 1*256
-	xvld	xr2, sp, 10*SZREG + 2*256
-	xvld	xr3, sp, 10*SZREG + 3*256
-	xvld	xr4, sp, 10*SZREG + 4*256
-	xvld	xr5, sp, 10*SZREG + 5*256
-	xvld	xr6, sp, 10*SZREG + 6*256
-	xvld	xr7, sp, 10*SZREG + 7*256
-+	xvld	xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG
-+	xvld	xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG
-+	xvld	xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG
-+	xvld	xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG
-+	xvld	xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG
-+	xvld	xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG
-+	xvld	xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG
-+	xvld	xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG
- #elif defined USE_LSX
-	vld	vr0, sp, 10*SZREG + 0*128
-	vld	vr1, sp, 10*SZREG + 1*128
-	vld	vr2, sp, 10*SZREG + 2*128
-	vld	vr3, sp, 10*SZREG + 3*128
-	vld	vr4, sp, 10*SZREG + 4*128
-	vld	vr5, sp, 10*SZREG + 5*128
-	vld	vr6, sp, 10*SZREG + 6*128
-	vld	vr7, sp, 10*SZREG + 7*128
-#endif
-#endif
-
-#ifdef __loongarch64
-	li.d    t3, FRAME_SIZE
-	add.d	sp, sp, t3
-#elif defined __loongarch32
-	li.w    t3, FRAME_SIZE
-	addi.w	sp, sp, FRAME_SIZE
-+	vld	vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG
-+	vld	vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG
-+	vld	vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG
-+	vld	vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG
-+	vld	vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG
-+	vld	vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG
-+	vld	vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG
-+	vld	vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG
-+#elif !defined __loongarch_soft_float
-+	FREG_L	fa0, sp, 9*SZREG + 0*SZFREG
-+	FREG_L	fa1, sp, 9*SZREG + 1*SZFREG
-+	FREG_L	fa2, sp, 9*SZREG + 2*SZFREG
-+	FREG_L	fa3, sp, 9*SZREG + 3*SZFREG
-+	FREG_L	fa4, sp, 9*SZREG + 4*SZFREG
-+	FREG_L	fa5, sp, 9*SZREG + 5*SZFREG
-+	FREG_L	fa6, sp, 9*SZREG + 6*SZFREG
-+	FREG_L	fa7, sp, 9*SZREG + 7*SZFREG
- #endif
- 
-+	ADDI	sp, sp, FRAME_SIZE
- 
-  # Invoke the callee.
-+	/* Invoke the callee. */
- 	jirl	zero, t1, 0
- END (_dl_runtime_resolve)
-diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
-index 734e45ae..e80c6245 100644
--- a/sysdeps/loongarch/sys/asm.h
-+++ b/sysdeps/loongarch/sys/asm.h
-@@ -9,8 +9,17 @@
- # define PTRLOG 3
- # define SZREG	8
- # define SZFREG	8
-+# define SZVREG 16
-+# define SZXREG 32
- # define REG_L ld.d
- # define REG_S st.d
-+# define SRLI srli.d
-+# define SLLI slli.d
-+# define ADDI addi.d
-+# define ADD  add.d
-+# define SUB  sub.d
-+# define BSTRINS  bstrins.d
-+# define LI  li.d
- # define FREG_L fld.d
- # define FREG_S fst.d
- #elif defined __loongarch32
-- 
-2.33.0
-
--- a/glibc-2.28-Refactor-code-of-raw-mem-functions.patch
+++ b/glibc-2.28-Refactor-code-of-raw-mem-functions.patch
--- a/glibc-2.28-Refactor-code-of-st-r-p-functions.patch
+++ b/glibc-2.28-Refactor-code-of-st-r-p-functions.patch
--- a/glibc-2.28-Remove-unseless-ANDROID_CHANGES-and-relat.patch
+++ b/glibc-2.28-Remove-unseless-ANDROID_CHANGES-and-relat.patch
@ -1,292 +0,0 @@
-From e2dd1f13592fa3b99b70eb54cc61e9f98cdcb123 Mon Sep 17 00:00:00 2001
-From: caiyinyu <caiyinyu@loongson.cn>
-Date: Mon, 17 Apr 2023 17:20:04 +0800
-Subject: [PATCH 01/14] glibc-2.28: Remove unseless ANDROID_CHANGES and related
- code.
-
-Change-Id: Ib08e92d435126c7b56096ff6f24f1c6b5ea57f46
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
- sysdeps/loongarch/lp64/memchr.S                     |  6 ------
- sysdeps/loongarch/lp64/memcpy.S                     | 13 -------------
- sysdeps/loongarch/lp64/memset.S                     |  6 ------
- sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S |  6 ------
- .../loongarch/lp64/multiarch/memmove-unaligned.S    |  6 ------
- sysdeps/loongarch/lp64/multiarch/memset-unaligned.S |  7 -------
- sysdeps/loongarch/lp64/multiarch/strchr-unaligned.S |  2 --
- .../loongarch/lp64/multiarch/strchrnul-unaligned.S  |  2 --
- sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S |  2 --
- sysdeps/loongarch/lp64/multiarch/strlen-unaligned.S |  2 --
- .../loongarch/lp64/multiarch/strncmp-unaligned.S    |  2 --
- .../loongarch/lp64/multiarch/strnlen-unaligned.S    |  2 --
- 12 files changed, 56 deletions(-)
-
-diff --git a/sysdeps/loongarch/lp64/memchr.S b/sysdeps/loongarch/lp64/memchr.S
-index ec34b1af..75c4e15c 100644
--- a/sysdeps/loongarch/lp64/memchr.S
-+++ b/sysdeps/loongarch/lp64/memchr.S
-@@ -11,11 +11,7 @@
- #define MEMCHR_NAME memchr
- #endif
- 
-#ifdef ANDROID_CHANGES
-LEAF(MEMCHR_NAME, 0)
-#else
- LEAF(MEMCHR_NAME)
-#endif
-     .align      6
-     beqz        a2, L(out)
-     andi        t1, a0, 0x7
-@@ -92,8 +88,6 @@ L(out):
-     jr          ra
- END(MEMCHR_NAME)
- 
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (MEMCHR_NAME)
- #endif
-#endif
-diff --git a/sysdeps/loongarch/lp64/memcpy.S b/sysdeps/loongarch/lp64/memcpy.S
-index 1076e678..b6ca60a1 100644
--- a/sysdeps/loongarch/lp64/memcpy.S
-+++ b/sysdeps/loongarch/lp64/memcpy.S
-@@ -35,29 +35,18 @@
-     st.d        t6, reg, n+48; \
-     st.d        t7, reg, n+56;
- 
-#ifdef ANDROID_CHANGES
-LEAF(MEMMOVE_NAME, 0)
-#else
- LEAF(MEMMOVE_NAME)
-#endif
-
-     .align      6
-     sub.d       t0, a0, a1
-     bltu        t0, a2, L(copy_back)
- 
- END(MEMMOVE_NAME)
- 
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (MEMMOVE_NAME)
- #endif
-#endif
- 
-#ifdef ANDROID_CHANGES
-LEAF(MEMCPY_NAME, 0)
-#else
- LEAF(MEMCPY_NAME)
-#endif
- 
-     srai.d      a3, a2, 4
-     beqz        a3, L(short_data)  # less than 16 bytes
-@@ -811,8 +800,6 @@ L(back_end):
- 
- END(MEMCPY_NAME)
- 
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (MEMCPY_NAME)
- #endif
-#endif
-diff --git a/sysdeps/loongarch/lp64/memset.S b/sysdeps/loongarch/lp64/memset.S
-index 9fe42b24..41629e7e 100644
--- a/sysdeps/loongarch/lp64/memset.S
-+++ b/sysdeps/loongarch/lp64/memset.S
-@@ -21,11 +21,7 @@
-     st.d        a1, a0, n+48;   \
-     st.d        a1, a0, n+56;
- 
-#ifdef ANDROID_CHANGES
-LEAF(MEMSET_NAME, 0)
-#else
- LEAF(MEMSET_NAME)
-#endif
-     .align          6
-     move        t0, a0
-     andi        a3, a0, 0x7
-@@ -166,8 +162,6 @@ L(short_0):
- 
- END(MEMSET_NAME)
- 
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (MEMSET_NAME)
- #endif
-#endif
-diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
-index 5e38df0d..64b60244 100644
--- a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
-+++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
-@@ -31,11 +31,7 @@
- 	st.d    t6, reg, n+48; \
- 	st.d    t7, reg, n+56;
- 
-#ifdef ANDROID_CHANGES
-LEAF(MEMCPY_NAME, 0)
-#else
- LEAF(MEMCPY_NAME)
-#endif
- 
- //1st var: dst ptr: void *a1 $r4 a0
- //2nd var: src ptr: void *a2 $r5 a1
-@@ -250,10 +246,8 @@ end_0_8_unalign:
- 
- END(MEMCPY_NAME)
- 
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (MEMCPY_NAME)
- #endif
-#endif
- 
- #endif
-diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
-index 27ed0c9c..42920a1a 100644
--- a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
-+++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
-@@ -100,11 +100,7 @@
- 	LD_64(a4, -1024); \
- 	ST_64(a3, -1024);
- 
-#ifdef ANDROID_CHANGES
-LEAF(MEMMOVE_NAME, 0)
-#else
- LEAF(MEMMOVE_NAME)
-#endif
- 
- //1st var: dest ptr: void *str1 $r4 a0
- //2nd var: src  ptr: void *str2 $r5 a1
-@@ -469,10 +465,8 @@ end_unalign_proc_back:
- 
- END(MEMMOVE_NAME)
- 
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (MEMMOVE_NAME)
- #endif
-#endif
- 
- #endif
-diff --git a/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
-index 16ff2ef7..54e51546 100644
--- a/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
-+++ b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
-@@ -33,12 +33,7 @@
- //2nd var: int val  $5   a1
- //3rd var: size_t num  $6  a2
- 
-#ifdef ANDROID_CHANGES
-LEAF(MEMSET_NAME, 0)
-#else
- LEAF(MEMSET_NAME)
-#endif
-
- 	.align	6
- 	bstrins.d a1, a1, 15, 8
- 	add.d	  t7, a0, a2
-@@ -168,10 +163,8 @@ end_0_8_unalign:
- 
- END(MEMSET_NAME)
- 
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (MEMSET_NAME)
- #endif
-#endif
- 
- #endif
-diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-unaligned.S
-index 1d5e56c5..de6c7f4f 100644
--- a/sysdeps/loongarch/lp64/multiarch/strchr-unaligned.S
-+++ b/sysdeps/loongarch/lp64/multiarch/strchr-unaligned.S
-@@ -123,10 +123,8 @@ L(_mc8_a):
- 	jr		ra
- END(STRCHR_NAME)
- 
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (STRCHR_NAME)
- #endif
-#endif
- 
- #endif
-diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-unaligned.S
-index 6338d005..abc246ca 100644
--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-unaligned.S
-+++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-unaligned.S
-@@ -136,11 +136,9 @@ L(_mc8_a):
- 	jr		ra
- END(STRCHRNUL_NAME)
- 
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- weak_alias(STRCHRNUL_NAME, strchrnul)
- libc_hidden_builtin_def (STRCHRNUL_NAME)
- #endif
-#endif
- 
- #endif
-diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
-index 449733cb..c77dc1a9 100644
--- a/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
-+++ b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
-@@ -190,10 +190,8 @@ strcpy_page_cross:
- 	beqz		has_nul, strcpy_page_cross_ok
- 	b		strcpy_end
- END(STRCPY)
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (STRCPY)
- #endif
-#endif
- 
- #endif
-diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-unaligned.S
-index e9b7cf67..2fe0fb34 100644
--- a/sysdeps/loongarch/lp64/multiarch/strlen-unaligned.S
-+++ b/sysdeps/loongarch/lp64/multiarch/strlen-unaligned.S
-@@ -107,10 +107,8 @@ strlen_loop_noascii:
- 	jr		ra
- END(STRLEN)
- 
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (STRLEN)
- #endif
-#endif
- 
- #endif
-diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-unaligned.S
-index 558df29b..6ec107ca 100644
--- a/sysdeps/loongarch/lp64/multiarch/strncmp-unaligned.S
-+++ b/sysdeps/loongarch/lp64/multiarch/strncmp-unaligned.S
-@@ -248,10 +248,8 @@ strncmp_ret0:
-    then exchange(src1,src2).  */
- 
- END(STRNCMP)
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (STRNCMP)
- #endif
-#endif
- 
- #endif
-diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-unaligned.S
-index 60eccf00..4a195b7c 100644
--- a/sysdeps/loongarch/lp64/multiarch/strnlen-unaligned.S
-+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-unaligned.S
-@@ -136,10 +136,8 @@ L(_hit_limit):
-     move	len, limit
-     jr		ra
- END(STRNLEN)
-#ifndef ANDROID_CHANGES
- #ifdef _LIBC
- libc_hidden_builtin_def (STRNLEN)
- #endif
-#endif
- 
- #endif
-- 
-2.33.0
-
--- a/glibc-2.28-Remove-useless-IS_LA-264-364-464-and-IS_L.patch
+++ b/glibc-2.28-Remove-useless-IS_LA-264-364-464-and-IS_L.patch
@ -1,40 +0,0 @@
-From f4041e5da609a9f5da966fa000c00b150788a948 Mon Sep 17 00:00:00 2001
-From: caiyinyu <caiyinyu@loongson.cn>
-Date: Sun, 23 Jul 2023 14:32:08 +0800
-Subject: [PATCH 13/14] glibc-2.28: Remove useless IS_LA{264,364,464} and
- IS_LA{264, 364, 464}.
-
-Change-Id: Id9a573510e2a493151191372d651f381ec2aefe7
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
- sysdeps/unix/sysv/linux/loongarch/cpu-features.h | 7 -------
- 1 file changed, 7 deletions(-)
-
-diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
-index b46a8489..2703d4f7 100644
--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
-+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
-@@ -22,10 +22,6 @@
- #include <stdint.h>
- #include <sys/auxv.h>
- 
-#define LA264 0x14a000
-#define LA364 0x14b000
-#define LA464 0x14c011
-
- struct cpu_features
- {
-   uint64_t cpucfg_prid;
-@@ -42,9 +38,6 @@ extern const struct cpu_features *_dl_larch_get_cpu_features (void)
- 		:"=r"(ret)			\
- 		:"r"(index));
- 
-#define IS_LA264(prid) (prid == LA264)
-#define IS_LA364(prid) (prid == LA364)
-#define IS_LA464(prid) (prid == LA464)
- #define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
- #define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
- #define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
-- 
-2.33.0
-
--- a/glibc-2.28-Use-RTLD_SUPPORT_-LSX-LASX-to-choose-_dl_.patch
+++ b/glibc-2.28-Use-RTLD_SUPPORT_-LSX-LASX-to-choose-_dl_.patch
@ -1,123 +0,0 @@
-From c94d9376e241dc52eb9f2a2107313b7836e0e9ad Mon Sep 17 00:00:00 2001
-From: caiyinyu <caiyinyu@loongson.cn>
-Date: Wed, 6 Sep 2023 16:41:09 +0800
-Subject: [PATCH 14/14] glibc-2.28: Use RTLD_SUPPORT_{LSX, LASX} to choose
- _dl_runtime_resolve.
-
-Key Points:
-1. On lasx & lsx platforms, use _dl_runtime_resolve_{lsx, lasx} to save vector registers.
-2. Via "tunables", users can choose str/mem functions with
-   `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX`.
-   Note: glibc.cpu.hwcaps doesn't affect _dl_runtime_resolve_{lsx, lasx} selection.
-
-Usage Notes:
-1. Only valid inputs: LASX, LSX, UAL. Case-sensitive, comma-separated, no spaces.
-2. Example: `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL` turns on LASX & UAL.
-   Unmentioned features turn off. With default ifunc: lasx > lsx > unaligned >
-   aligned > generic, effect is: lasx > unaligned > aligned > generic; lsx off.
-3. Incorrect GLIBC_TUNABLES settings will show error messages.
-4. Valid input examples:
-   - GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX: lasx > aligned > generic.
-   - GLIBC_TUNABLES=glibc.cpu.hwcaps=LSX,UAL: lsx > unaligned > aligned > generic.
-   - GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL,LASX,UAL,LSX,LASX,UAL: Repetitions
-     allowed but not recommended. Results in: lasx > lsx > unaligned > aligned >
-     generic.
-
-Change-Id: I555ce2039bc36bf071fc9265d7b0bb7b93b96ae7
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
- sysdeps/loongarch/cpu-tunables.c                 |  2 +-
- sysdeps/loongarch/dl-machine.h                   | 11 ++++++-----
- sysdeps/unix/sysv/linux/loongarch/cpu-features.c |  2 ++
- sysdeps/unix/sysv/linux/loongarch/cpu-features.h | 10 +++++++---
- 4 files changed, 16 insertions(+), 9 deletions(-)
-
-diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c
-index 840c1b8c..e0799ca9 100644
--- a/sysdeps/loongarch/cpu-tunables.c
-+++ b/sysdeps/loongarch/cpu-tunables.c
-@@ -88,7 +88,7 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
-     }
-   while (*c != '\0');
- 
-  GLRO (dl_hwcap) &= hwcap;
-+  GLRO (dl_larch_cpu_features).hwcap &= hwcap;
- }
- 
- #endif
-diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
-index ff520a07..b5f43c84 100644
--- a/sysdeps/loongarch/dl-machine.h
-+++ b/sysdeps/loongarch/dl-machine.h
-@@ -75,13 +75,14 @@ dl_platform_init (void)
-     GLRO(dl_platform) = NULL;
- 
- #ifdef SHARED
-+  /* init_cpu_features has been called early from __libc_start_main in
-+     static executable.  */
-+  init_cpu_features (&GLRO(dl_larch_cpu_features));
- 
- #if HAVE_TUNABLES
-   TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
- #endif
-  /* init_cpu_features has been called early from __libc_start_main in
-     static executable.  */
-  init_cpu_features (&GLRO(dl_larch_cpu_features));
-+
- #endif
- }
- 
-@@ -396,9 +397,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
- 	l->l_mach.plt = gotplt[1] + l->l_addr;
- 
- #if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
-      if (SUPPORT_LASX)
-+      if (RTLD_SUPPORT_LASX)
- 	gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
-      else if (SUPPORT_LSX)
-+      else if (RTLD_SUPPORT_LSX)
- 	gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
-       else
- #endif
-diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
-index 80870f3c..cf015011 100644
--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
-+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
-@@ -29,4 +29,6 @@ init_cpu_features (struct cpu_features *cpu_features)
- 
-   __cpucfg(cpucfg_word, 2);
-   cpu_features->cpucfg_word_idx2 = cpucfg_word;
-+
-+  GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap);
- }
-diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
-index 2703d4f7..17c9f5a7 100644
--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
-+++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
-@@ -26,6 +26,7 @@ struct cpu_features
- {
-   uint64_t cpucfg_prid;
-   uint64_t cpucfg_word_idx2;
-+  uint64_t hwcap;
- };
- 
- /* Get a pointer to the CPU features structure.  */
-@@ -38,9 +39,12 @@ extern const struct cpu_features *_dl_larch_get_cpu_features (void)
- 		:"=r"(ret)			\
- 		:"r"(index));
- 
-#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
-#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
-#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
-+#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL)
-+#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX)
-+#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX)
-+
-+#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
-+#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
- 
- #endif /* _CPU_FEATURES_LOONGARCH64_H  */
- 
-- 
-2.33.0
-
--- a/glibc-2.28-config-Added-HAVE_LOONGARCH_VEC_ASM.patch
+++ b/glibc-2.28-config-Added-HAVE_LOONGARCH_VEC_ASM.patch
@ -1,91 +0,0 @@
-From 58b1f882644f839259505dde3205e226a1c649f1 Mon Sep 17 00:00:00 2001
-From: caiyinyu <caiyinyu@loongson.cn>
-Date: Tue, 11 Jul 2023 15:42:26 +0800
-Subject: [PATCH 10/14] glibc-2.28: config: Added HAVE_LOONGARCH_VEC_ASM.
-
-Change-Id: Iea464ea0c975a351682a60f66251167f6c79385b
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
- config.h.in                    |  5 +++++
- sysdeps/loongarch/configure    | 28 ++++++++++++++++++++++++++++
- sysdeps/loongarch/configure.ac | 15 +++++++++++++++
- 3 files changed, 48 insertions(+)
-
-diff --git a/config.h.in b/config.h.in
-index 94d5ea36..fa53cc2d 100644
--- a/config.h.in
-+++ b/config.h.in
-@@ -123,6 +123,11 @@
- /* RISC-V floating-point ABI for ld.so.  */
- #undef RISCV_ABI_FLEN
- 
-+/* Assembler support LoongArch LASX/LSX vector instructions.
-+   This macro becomes obsolete when glibc increased the minimum
-+   required version of GNU 'binutils' to 2.41 or later. */
-+#define HAVE_LOONGARCH_VEC_ASM 0
-+
- /* Linux specific: minimum supported kernel version.  */
- #undef	__LINUX_KERNEL_VERSION
- 
-diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure
-index 1e5abf81..0f0dae3a 100755
--- a/sysdeps/loongarch/configure
-+++ b/sysdeps/loongarch/configure
-@@ -2,3 +2,31 @@
-  # Local configure fragment for sysdeps/loongarch/elf.
- 
- #AC_DEFINE(PI_STATIC_AND_HIDDEN)
-+
-+# Check if asm support vector instructions.
-+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for vector support in assembler" >&5
-+$as_echo_n "checking for vector support in assembler... " >&6; }
-+if ${libc_cv_loongarch_vec_asm+:} false; then :
-+  $as_echo_n "(cached) " >&6
-+else
-+  cat > conftest.s <<\EOF
-+        vld  $vr0, $sp, 0
-+EOF
-+if { ac_try='${CC-cc} -c $CFLAGS conftest.s -o conftest 1>&5'
-+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-+  (eval $ac_try) 2>&5
-+  ac_status=$?
-+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-+  test $ac_status = 0; }; }; then
-+  libc_cv_loongarch_vec_asm=yes
-+else
-+  libc_cv_loongarch_vec_asm=no
-+fi
-+rm -f conftest*
-+fi
-+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_asm" >&5
-+$as_echo "$libc_cv_loongarch_vec_asm" >&6; }
-+if test $libc_cv_loongarch_vec_asm = yes; then
-+  $as_echo "#define HAVE_LOONGARCH_VEC_ASM 1" >>confdefs.h
-+
-+fi
-diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac
-index 67b46ce0..aac0efa9 100644
--- a/sysdeps/loongarch/configure.ac
-+++ b/sysdeps/loongarch/configure.ac
-@@ -4,3 +4,18 @@ GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
- dnl It is always possible to access static and hidden symbols in an
- dnl position independent way.
- #AC_DEFINE(PI_STATIC_AND_HIDDEN)
-+
-+# Check if asm support vector instructions.
-+AC_CACHE_CHECK(for vector support in assembler, libc_cv_loongarch_vec_asm, [dnl
-+cat > conftest.s <<\EOF
-+        vld  $vr0, $sp, 0
-+EOF
-+if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.s -o conftest 1>&AS_MESSAGE_LOG_FD); then
-+  libc_cv_loongarch_vec_asm=yes
-+else
-+  libc_cv_loongarch_vec_asm=no
-+fi
-+rm -f conftest*])
-+if test $libc_cv_loongarch_vec_asm = yes; then
-+  AC_DEFINE(HAVE_LOONGARCH_VEC_ASM)
-+fi
-- 
-2.33.0
-
--- a/glibc-2.28-remove-ABILPX32-related-code.patch
+++ b/glibc-2.28-remove-ABILPX32-related-code.patch
@ -1,75 +0,0 @@
-From 0153532f680527c4378a10673518cabda2e02584 Mon Sep 17 00:00:00 2001
-From: caiyinyu <caiyinyu@loongson.cn>
-Date: Fri, 26 May 2023 14:58:39 +0800
-Subject: [PATCH 05/14] glibc-2.28: remove ABILPX32 related code.
-
-Change-Id: I73eb5bc4d4ca12e4d45ed6b533fa38d60a3a633f
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
- elf/elf.h                                       | 3 +--
- sysdeps/loongarch/dl-machine.h                  | 2 --
- sysdeps/loongarch/nptl/bits/pthreadtypes-arch.h | 2 +-
- sysdeps/loongarch/sys/regdef.h                  | 4 +---
- 4 files changed, 3 insertions(+), 8 deletions(-)
-
-diff --git a/elf/elf.h b/elf/elf.h
-index 65d1fb46..4bfbad61 100644
--- a/elf/elf.h
-+++ b/elf/elf.h
-@@ -3933,10 +3933,9 @@ enum
- #define R_NDS32_TLS_TPOFF	102
- #define R_NDS32_TLS_DESC	119
- 
-/* LoongISA ELF Flags */
-+/* LoongArch ELF Flags */
- #define EF_LARCH_ABI             0x0003
- #define EF_LARCH_ABI_LP64        0x0003
-#define EF_LARCH_ABI_LPX32       0x0002
- #define EF_LARCH_ABI_LP32        0x0001
- 
- /* Loongarch specific dynamic relocations. */
-diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
-index 2d527241..6e9c6258 100644
--- a/sysdeps/loongarch/dl-machine.h
-+++ b/sysdeps/loongarch/dl-machine.h
-@@ -96,8 +96,6 @@ elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
- 
- #ifdef _ABILP64
-   if ((ehdr->e_flags & EF_LARCH_ABI) != EF_LARCH_ABI_LP64)
-#elif defined _ABILPX32
-  if ((ehdr->e_flags & EF_LARCH_ABI) != EF_LARCH_ABI_LPX32)
- #elif defined _ABILP32
-   if ((ehdr->e_flags & EF_LARCH_ABI) != EF_LARCH_ABI_LP32)
- #else
-diff --git a/sysdeps/loongarch/nptl/bits/pthreadtypes-arch.h b/sysdeps/loongarch/nptl/bits/pthreadtypes-arch.h
-index 5a761355..aa63bce1 100644
--- a/sysdeps/loongarch/nptl/bits/pthreadtypes-arch.h
-+++ b/sysdeps/loongarch/nptl/bits/pthreadtypes-arch.h
-@@ -32,7 +32,7 @@
- # define __SIZEOF_PTHREAD_BARRIER_T 		32
- # define __SIZEOF_PTHREAD_BARRIERATTR_T 	 4
- #else
-# error "rv32i-based systems are not supported"
-+# error "32-bit based systems are not supported"
- #endif
- 
- #define __PTHREAD_COMPAT_PADDING_MID
-diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
-index 769784b8..36f00939 100644
--- a/sysdeps/loongarch/sys/regdef.h
-+++ b/sysdeps/loongarch/sys/regdef.h
-@@ -72,10 +72,8 @@
- # define fs6	$f30
- # define fs7	$f31
- 
-#elif _LOONGARCH_SIM == _ABILPX32
-# error ABILPX32
- #elif _LOONGARCH_SIM == _ABILP32
-# error ABILP32
-+# error ABILP32 not support yet
- #else
- # error noABI
- #endif
-- 
-2.33.0
-
--- a/glibc-2.28-use-new-macro-LEAF-and-ENTRY-and-modify-r.patch
+++ b/glibc-2.28-use-new-macro-LEAF-and-ENTRY-and-modify-r.patch
--- a/glibc-2.38.tar.xz
+++ b/glibc-2.38.tar.xz
--- a/glibc-Add-Hygon-Support.patch
+++ b/glibc-Add-Hygon-Support.patch
@ -1,29 +0,0 @@
-From 4e32231c73cb7a9b004fb160d03c26498ec3860d Mon Sep 17 00:00:00 2001
-From: Zhao Hang <wb-zh951434@alibaba-inc.com>
-Date: Tue, 28 May 2024 10:10:11 +0800
-Subject: [PATCH] Add Hygon Support
-
-Signed-off-by: Zhao Hang <wb-zh951434@alibaba-inc.com>
---
- sysdeps/x86/cpu-features.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
-index 1248a702..c72dc2f3 100644
--- a/sysdeps/x86/cpu-features.c
-+++ b/sysdeps/x86/cpu-features.c
-@@ -546,8 +546,9 @@ init_cpu_features (struct cpu_features *cpu_features)
- 	      |= bit_arch_Prefer_AVX2_STRCMP;
- 	}
-     }
-  /* This spells out "AuthenticAMD".  */
-  else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
-+  /* This spells out "AuthenticAMD" or "HygonGenuine".  */
-+  else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)||(ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
-+
-     {
-       unsigned int extended_model;
- 
-- 
-2.31.1
-
--- a/glibc-Add-a-testcase-to-check-alignment-of-PT_LOAD-segment-2.patch
+++ b/glibc-Add-a-testcase-to-check-alignment-of-PT_LOAD-segment-2.patch
@ -1,147 +0,0 @@
-From 58f93dff514cc0bdf3c72eff590dcf5fe5bf9e00 Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Wed, 19 Jul 2023 23:09:09 +0800
-Subject: [PATCH 3/6] Add a testcase to check alignment of PT_LOAD segment [BZ
- #28676]
-
-Backport from master commit: fc2334a
-
-Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
---
- elf/Makefile        | 13 ++++++++++++-
- elf/tst-align3.c    | 38 ++++++++++++++++++++++++++++++++++++++
- elf/tst-alignmod3.c | 32 ++++++++++++++++++++++++++++++++
- 3 files changed, 82 insertions(+), 1 deletion(-)
- create mode 100644 elf/tst-align3.c
- create mode 100644 elf/tst-alignmod3.c
-
-diff --git a/elf/Makefile b/elf/Makefile
-index 634c3113..442817ca 100644
--- a/elf/Makefile
-+++ b/elf/Makefile
-@@ -331,6 +331,7 @@ tests += \
-   tst-addr1 \
-   tst-align \
-   tst-align2 \
-+  tst-align3 \
-   tst-audit-tlsdesc \
-   tst-audit-tlsdesc-dlopen \
-   tst-audit1 \
-@@ -466,7 +467,9 @@ endif
- test-srcs = \
-   tst-pathopt
-   # tests-srcs
-
-+ifeq (yes,$(have-fpie))
-+tests-pie += tst-align3
-+endif
- selinux-enabled := $(shell cat /selinux/enforce 2> /dev/null)
- 
- ifneq ($(selinux-enabled),1)
-@@ -647,6 +650,7 @@ modules-names = \
-   tst-absolute-zero-lib \
-   tst-alignmod \
-   tst-alignmod2 \
-+  tst-alignmod3 \
-   tst-array2dep \
-   tst-array5dep \
-   tst-audit-tlsdesc-mod1 \
-@@ -1669,6 +1673,13 @@ CFLAGS-tst-alignmod2.c += $(stack-align-test-flags)
- $(objpfx)tst-align: $(libdl)
- $(objpfx)tst-align.out: $(objpfx)tst-alignmod.so
- $(objpfx)tst-align2: $(objpfx)tst-alignmod2.so
-+$(objpfx)tst-align3: $(objpfx)tst-alignmod3.so
-+ifeq (yes,$(have-fpie))
-+CFLAGS-tst-align3.c += $(PIE-ccflag)
-+endif
-+LDFLAGS-tst-align3 += -Wl,-z,max-page-size=0x200000
-+LDFLAGS-tst-alignmod3.so += -Wl,-z,max-page-size=0x200000
-+$(objpfx)tst-alignmod3.so: $(libsupport)
- 
- $(objpfx)unload3: $(libdl)
- $(objpfx)unload3.out: $(objpfx)unload3mod1.so $(objpfx)unload3mod2.so \
-diff --git a/elf/tst-align3.c b/elf/tst-align3.c
-new file mode 100644
-index 00000000..ac86d623
--- /dev/null
-+++ b/elf/tst-align3.c
-@@ -0,0 +1,38 @@
-+/* Check alignment of PT_LOAD segment in a shared library.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <support/check.h>
-+#include <tst-stack-align.h>
-+
-+/* This should cover all possible page sizes we currently support.  */
-+#define ALIGN 0x200000
-+
-+int bar __attribute__ ((aligned (ALIGN))) = 1;
-+
-+extern int do_load_test (void);
-+
-+static int
-+do_test (void)
-+{
-+  printf ("bar: %p\n", &bar);
-+  TEST_VERIFY (is_aligned (&bar, ALIGN) == 0);
-+
-+  return do_load_test ();
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/elf/tst-alignmod3.c b/elf/tst-alignmod3.c
-new file mode 100644
-index 00000000..0d33f237
--- /dev/null
-+++ b/elf/tst-alignmod3.c
-@@ -0,0 +1,32 @@
-+/* Check alignment of PT_LOAD segment in a shared library.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <support/check.h>
-+#include <tst-stack-align.h>
-+
-+/* This should cover all possible page sizes we currently support.  */
-+#define ALIGN 0x200000
-+
-+int foo __attribute__ ((aligned (ALIGN))) = 1;
-+
-+void
-+do_load_test (void)
-+{
-+  printf ("foo: %p\n", &foo);
-+  TEST_VERIFY (is_aligned (&foo, ALIGN) == 0);
-+}
-- 
-2.27.0
-
--- a/glibc-Properly-check-stack-alignment-BZ-27901.patch
+++ b/glibc-Properly-check-stack-alignment-BZ-27901.patch
@ -1,325 +0,0 @@
-From 6152628751bf13f74c9336263a9c22f29ccd8ffb Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Wed, 19 Jul 2023 23:01:53 +0800
-Subject: [PATCH 1/6] Properly check stack alignment [BZ #27901]
-
-1. Replace
-
-if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0)
-
-which may be optimized out by compiler, with
-
-int
-__attribute__ ((weak, noclone, noinline))
-is_aligned (void *p, int align)
-{
-  return (((uintptr_t) p) & (align - 1)) != 0;
-}
-
-2. Add TEST_STACK_ALIGN_INIT to TEST_STACK_ALIGN.
-3. Add a common TEST_STACK_ALIGN_INIT to check 16-byte stack alignment
-for both i386 and x86-64.
-4. Update powerpc to use TEST_STACK_ALIGN_INIT.
-
-Reviewed-by: Carlos O'Donell <carlos@redhat.com>
-Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
---
- sysdeps/generic/tst-stack-align.h   | 40 ++++++++++++++++---------
- sysdeps/i386/i686/tst-stack-align.h | 44 ---------------------------
- sysdeps/i386/tst-stack-align.h      | 41 -------------------------
- sysdeps/powerpc/tst-stack-align.h   | 27 +++++------------
- sysdeps/x86/tst-stack-align.h       | 28 ++++++++++++++++++
- sysdeps/x86_64/tst-stack-align.h    | 46 -----------------------------
- 6 files changed, 61 insertions(+), 165 deletions(-)
- delete mode 100644 sysdeps/i386/i686/tst-stack-align.h
- delete mode 100644 sysdeps/i386/tst-stack-align.h
- create mode 100644 sysdeps/x86/tst-stack-align.h
- delete mode 100644 sysdeps/x86_64/tst-stack-align.h
-
-diff --git a/sysdeps/generic/tst-stack-align.h b/sysdeps/generic/tst-stack-align.h
-index e5cb3310..e6050901 100644
--- a/sysdeps/generic/tst-stack-align.h
-+++ b/sysdeps/generic/tst-stack-align.h
-@@ -1,4 +1,5 @@
-/* Copyright (C) 2003-2018 Free Software Foundation, Inc.
-+/* Check stack alignment.  Generic version.
-+   Copyright (C) 2003-2021 Free Software Foundation, Inc.
-    This file is part of the GNU C Library.
- 
-    The GNU C Library is free software; you can redistribute it and/or
-@@ -18,17 +19,28 @@
- #include <stdio.h>
- #include <stdint.h>
- 
-+int
-+__attribute__ ((weak, noclone, noinline))
-+is_aligned (void *p, int align)
-+{
-+  return (((uintptr_t) p) & (align - 1)) != 0;
-+}
-+
-+#ifndef TEST_STACK_ALIGN_INIT
-+# define TEST_STACK_ALIGN_INIT() 0
-+#endif
-+
- #define TEST_STACK_ALIGN() \
-  ({									     \
-    double _d = 12.0;							     \
-    long double _ld = 15.0;						     \
-    int _ret = 0;							     \
-    printf ("double:  %g %p %zu\n", _d, &_d, __alignof (double));	     \
-    if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0)		     \
-      _ret = 1;								     \
-									     \
-    printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double));    \
-    if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0)	     \
-      _ret = 1;								     \
-    _ret;								     \
-    })
-+  ({								     \
-+    double _d = 12.0;						     \
-+    long double _ld = 15.0;					     \
-+    int _ret = TEST_STACK_ALIGN_INIT ();			     \
-+								     \
-+    printf ("double:  %g %p %zu\n", _d, &_d, __alignof (double));    \
-+    _ret += is_aligned (&_d, __alignof (double));		     \
-+								     \
-+    printf ("ldouble: %Lg %p %zu\n", _ld, &_ld,			     \
-+	    __alignof (long double));				     \
-+    _ret += is_aligned (&_ld, __alignof (long double));		     \
-+    _ret;							     \
-+   })
-diff --git a/sysdeps/i386/i686/tst-stack-align.h b/sysdeps/i386/i686/tst-stack-align.h
-deleted file mode 100644
-index 975f26ef..00000000
--- a/sysdeps/i386/i686/tst-stack-align.h
-+++ /dev/null
-@@ -1,44 +0,0 @@
-/* Copyright (C) 2003-2018 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <stdio.h>
-#include <stdint.h>
-#ifndef __SSE__
-#include_next <tst-stack-align.h>
-#else
-#include <xmmintrin.h>
-
-#define TEST_STACK_ALIGN() \
-  ({									     \
-    __m128 _m;								     \
-    double _d = 12.0;							     \
-    long double _ld = 15.0;						     \
-    int _ret = 0;							     \
-    printf ("__m128:  %p %zu\n", &_m, __alignof (__m128));		     \
-    if ((((uintptr_t) &_m) & (__alignof (__m128) - 1)) != 0)		     \
-      _ret = 1;								     \
-									     \
-    printf ("double:  %g %p %zu\n", _d, &_d, __alignof (double));	     \
-    if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0)		     \
-      _ret = 1;								     \
-									     \
-    printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double));    \
-    if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0)	     \
-      _ret = 1;								     \
-    _ret;								     \
-    })
-#endif
-diff --git a/sysdeps/i386/tst-stack-align.h b/sysdeps/i386/tst-stack-align.h
-deleted file mode 100644
-index 394ff773..00000000
--- a/sysdeps/i386/tst-stack-align.h
-+++ /dev/null
-@@ -1,41 +0,0 @@
-/* Copyright (C) 2004-2018 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <stdio.h>
-#include <stdint.h>
-
-typedef struct { int i[4]; } int_al16 __attribute__((aligned (16)));
-
-#define TEST_STACK_ALIGN() \
-  ({									     \
-    int_al16 _m;							     \
-    double _d = 12.0;							     \
-    long double _ld = 15.0;						     \
-    int _ret = 0;							     \
-    printf ("int_al16:  %p %zu\n", &_m, __alignof (int_al16));		     \
-    if ((((uintptr_t) &_m) & (__alignof (int_al16) - 1)) != 0)		     \
-      _ret = 1;								     \
-									     \
-    printf ("double:  %g %p %zu\n", _d, &_d, __alignof (double));	     \
-    if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0)		     \
-      _ret = 1;								     \
-									     \
-    printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double));    \
-    if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0)	     \
-      _ret = 1;								     \
-    _ret;								     \
-    })
-diff --git a/sysdeps/powerpc/tst-stack-align.h b/sysdeps/powerpc/tst-stack-align.h
-index 7fd7013b..d7400b28 100644
--- a/sysdeps/powerpc/tst-stack-align.h
-+++ b/sysdeps/powerpc/tst-stack-align.h
-@@ -1,4 +1,5 @@
-/* Copyright (C) 2005-2018 Free Software Foundation, Inc.
-+/* Check stack alignment.  PowerPC version.
-+   Copyright (C) 2005-2021 Free Software Foundation, Inc.
-    This file is part of the GNU C Library.
- 
-    The GNU C Library is free software; you can redistribute it and/or
-@@ -15,10 +16,7 @@
-    License along with the GNU C Library; if not, see
-    <http://www.gnu.org/licenses/>.  */
- 
-#include <stdio.h>
-#include <stdint.h>
-
-#define TEST_STACK_ALIGN() \
-+#define TEST_STACK_ALIGN_INIT() \
-   ({									     \
-     /* Altivec __vector int etc. needs 16byte aligned stack.		     \
-        Instead of using altivec.h here, use aligned attribute instead.  */   \
-@@ -27,20 +25,9 @@
-         int _i __attribute__((aligned (16)));				     \
- 	int _j[3];							     \
-       } _s = { ._i = 18, ._j[0] = 19, ._j[1] = 20, ._j[2] = 21 };	     \
-    double _d = 12.0;							     \
-    long double _ld = 15.0;						     \
-    int _ret = 0;							     \
-     printf ("__vector int:  { %d, %d, %d, %d } %p %zu\n", _s._i, _s._j[0],   \
-             _s._j[1], _s._j[2], &_s, __alignof (_s));			     \
-    if ((((uintptr_t) &_s) & (__alignof (_s) - 1)) != 0)		     \
-      _ret = 1;								     \
-									     \
-    printf ("double:  %g %p %zu\n", _d, &_d, __alignof (double));	     \
-    if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0)		     \
-      _ret = 1;								     \
-									     \
-    printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double));    \
-    if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0)	     \
-      _ret = 1;								     \
-    _ret;								     \
-    })
-+    is_aligned (&_s, __alignof (_s));					     \
-+   })
-+
-+#include_next <tst-stack-align.h>
-diff --git a/sysdeps/x86/tst-stack-align.h b/sysdeps/x86/tst-stack-align.h
-new file mode 100644
-index 00000000..02ecc72d
--- /dev/null
-+++ b/sysdeps/x86/tst-stack-align.h
-@@ -0,0 +1,28 @@
-+/* Check stack alignment.  X86 version.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+typedef struct { int i[16]; } int_al16 __attribute__((aligned (16)));
-+
-+#define TEST_STACK_ALIGN_INIT() \
-+  ({                                                            \
-+    int_al16 _m;                                                \
-+    printf ("int_al16:  %p %zu\n", &_m, __alignof (int_al16));  \
-+    is_aligned (&_m, __alignof (int_al16));                     \
-+   })
-+
-+#include_next <tst-stack-align.h>
-diff --git a/sysdeps/x86_64/tst-stack-align.h b/sysdeps/x86_64/tst-stack-align.h
-deleted file mode 100644
-index b2ef77f6..00000000
--- a/sysdeps/x86_64/tst-stack-align.h
-+++ /dev/null
-@@ -1,46 +0,0 @@
-/* Copyright (C) 2003-2018 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <stdio.h>
-#include <stdint.h>
-
-#define TEST_STACK_ALIGN() \
-  ({									     \
-    /* AMD64 ABI mandates 16byte aligned stack.				     \
-       Unfortunately, current GCC doesn't support __int128 or __float128     \
-       types, so use aligned attribute instead.  */			     \
-    struct _S								     \
-      {									     \
-        int _i __attribute__((aligned (16)));				     \
-	int _pad[3];							     \
-      } _s = { ._i = 18 };						     \
-    double _d = 12.0;							     \
-    long double _ld = 15.0;						     \
-    int _ret = 0;							     \
-    printf ("__int128:  %d %p %zu\n", _s._i, &_s, __alignof (_s));	     \
-    if ((((uintptr_t) &_s) & (__alignof (_s) - 1)) != 0)		     \
-      _ret = 1;								     \
-									     \
-    printf ("double:  %g %p %zu\n", _d, &_d, __alignof (double));	     \
-    if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0)		     \
-      _ret = 1;								     \
-									     \
-    printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double));    \
-    if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0)	     \
-      _ret = 1;								     \
-    _ret;								     \
-    })
-- 
-2.27.0
-
--- a/glibc-RHEL-10481.patch
+++ b/glibc-RHEL-10481.patch
@ -1,112 +0,0 @@
-commit 849274d48fc59bfa6db3c713c8ced8026b20f3b7
-Author: Florian Weimer <fweimer@redhat.com>
-Date:   Thu Nov 16 19:55:35 2023 +0100
-
-    elf: Fix force_first handling in dlclose (bug 30981)
-    
-    The force_first parameter was ineffective because the dlclose'd
-    object was not necessarily the first in the maps array.  Also
-    enable force_first handling unconditionally, regardless of namespace.
-    The initial object in a namespace should be destructed first, too.
-    
-    The _dl_sort_maps_dfs function had early returns for relocation
-    dependency processing which broke force_first handling, too, and
-    this is fixed in this change as well.
-    
-    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
-
-diff --git a/elf/dl-close.c b/elf/dl-close.c
-index 66524b6708c59f29..8107c2d5f6ad2bc6 100644
--- a/elf/dl-close.c
-+++ b/elf/dl-close.c
-@@ -182,6 +182,16 @@ _dl_close_worker (struct link_map *map, bool force)
-     }
-   assert (idx == nloaded);
- 
-+  /* Put the dlclose'd map first, so that its destructor runs first.
-+     The map variable is NULL after a retry.  */
-+  if (map != NULL)
-+    {
-+      maps[map->l_idx] = maps[0];
-+      maps[map->l_idx]->l_idx = map->l_idx;
-+      maps[0] = map;
-+      maps[0]->l_idx = 0;
-+    }
-+
-   /* Keep track of the lowest index link map we have covered already.  */
-   int done_index = -1;
-   while (++done_index < nloaded)
-@@ -255,9 +265,10 @@ _dl_close_worker (struct link_map *map, bool force)
- 	  }
-     }
- 
-  /* Sort the entries.  We can skip looking for the binary itself which is
-     at the front of the search list for the main namespace.  */
-  _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true);
-+  /* Sort the entries.  Unless retrying, the maps[0] object (the
-+     original argument to dlclose) needs to remain first, so that its
-+     destructor runs first.  */
-+  _dl_sort_maps (maps, nloaded, /* force_first */ map != NULL, true);
- 
-   /* Call all termination functions at once.  */
-   bool unload_any = false;
-@@ -768,7 +779,11 @@ _dl_close_worker (struct link_map *map, bool force)
-   /* Recheck if we need to retry, release the lock.  */
-  out:
-   if (dl_close_state == rerun)
-    goto retry;
-+    {
-+      /* The map may have been deallocated.  */
-+      map = NULL;
-+      goto retry;
-+    }
- 
-   dl_close_state = not_pending;
- }
-diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c
-index aeb79b40b45054c0..c17ac325eca658ef 100644
--- a/elf/dl-sort-maps.c
-+++ b/elf/dl-sort-maps.c
-@@ -260,13 +260,12 @@ _dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps,
- 	     The below memcpy is not needed in the do_reldeps case here,
- 	     since we wrote back to maps[] during DFS traversal.  */
- 	  if (maps_head == maps)
-	    return;
-+	    break;
- 	}
-       assert (maps_head == maps);
-      return;
-     }
-
-  memcpy (maps, rpo, sizeof (struct link_map *) * nmaps);
-+  else
-+    memcpy (maps, rpo, sizeof (struct link_map *) * nmaps);
- 
-   /* Skipping the first object at maps[0] is not valid in general,
-      since traversing along object dependency-links may "find" that
-diff --git a/elf/dso-sort-tests-1.def b/elf/dso-sort-tests-1.def
-index 4bf9052db16fb352..cf6453e9eb85ac65 100644
--- a/elf/dso-sort-tests-1.def
-+++ b/elf/dso-sort-tests-1.def
-@@ -56,14 +56,16 @@ output: b>a>{}<a<b
- # relocation(dynamic) dependencies. While this is technically unspecified, the
- # presumed reasonable practical behavior is for the destructor order to respect
- # the static DT_NEEDED links (here this means the a->b->c->d order).
-# The older dynamic_sort=1 algorithm does not achieve this, while the DFS-based
-# dynamic_sort=2 algorithm does, although it is still arguable whether going
-# beyond spec to do this is the right thing to do.
-+# The older dynamic_sort=1 algorithm originally did not achieve this,
-+# but this was a bug in the way _dl_sort_maps was called from _dl_close_worker,
-+# effectively disabling proper force_first handling.
-+# The new dynamic_sort=2 algorithm shows the effect of the simpler force_first
-+# handling: the a object is simply moved to the front.
- # The below expected outputs are what the two algorithms currently produce
- # respectively, for regression testing purposes.
- tst-bz15311: {+a;+e;+f;+g;+d;%d;-d;-g;-f;-e;-a};a->b->c->d;d=>[ba];c=>a;b=>e=>a;c=>f=>b;d=>g=>c
-output(glibc.rtld.dynamic_sort=1): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<a<c<d<g<f<b<e];}
-output(glibc.rtld.dynamic_sort=2): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<g<f<a<b<c<d<e];}
-+output(glibc.rtld.dynamic_sort=1): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<a<b<c<d<g<f<e];}
-+output(glibc.rtld.dynamic_sort=2): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<a<g<f<b<c<d<e];}
- 
- # Test that even in the presence of dependency loops involving dlopen'ed
- # object, that object is initialized last (and not unloaded prematurely).
--- a/glibc-RHEL-1192.patch
+++ b/glibc-RHEL-1192.patch
@ -1,83 +0,0 @@
-commit c00b984fcd53f679ca2dafcd1aee2c89836e6e73
-Author: Florian Weimer <fweimer@redhat.com>
-Date:   Tue Aug 29 08:28:31 2023 +0200
-
-    nscd: Skip unusable entries in first pass in prune_cache (bug 30800)
-    
-    Previously, if an entry was marked unusable for any reason, but had
-    not timed out yet, the assert would trigger.
-    
-    One way to get into such state is if a data change is detected during
-    re-validation of an entry.  This causes the entry to be marked as not
-    usable.  If exits nscd soon after that, then the clock jumps
-    backwards, and nscd restarted, the cache re-validation run after
-    startup triggers the removed assert.
-    
-    The change is more complicated than just the removal of the assert
-    because entries marked as not usable should be garbage-collected in
-    the second pass.  To make this happen, it is necessary to update some
-    book-keeping data.
-    
-    Reviewed-by: DJ Delorie <dj@redhat.com>
-
-diff --git a/nscd/cache.c b/nscd/cache.c
-index efe4214d953edb30..2fd3f78ebb567bbe 100644
--- a/nscd/cache.c
-+++ b/nscd/cache.c
-@@ -371,8 +371,11 @@ prune_cache (struct database_dyn *table, time_t now, int fd)
- 		       serv2str[runp->type], str, dh->timeout);
- 	    }
- 
-	  /* Check whether the entry timed out.  */
-	  if (dh->timeout < now)
-+	  /* Check whether the entry timed out.  Timed out entries
-+	     will be revalidated.  For unusable records, it is still
-+	     necessary to record that the bucket needs to be scanned
-+	     again below.  */
-+	  if (dh->timeout < now || !dh->usable)
- 	    {
- 	      /* This hash bucket could contain entries which need to
- 		 be looked at.  */
-@@ -384,7 +387,7 @@ prune_cache (struct database_dyn *table, time_t now, int fd)
- 	      /* We only have to look at the data of the first entries
- 		 since the count information is kept in the data part
- 		 which is shared.  */
-	      if (runp->first)
-+	      if (runp->first && dh->usable)
- 		{
- 
- 		  /* At this point there are two choices: we reload the
-@@ -400,9 +403,6 @@ prune_cache (struct database_dyn *table, time_t now, int fd)
- 		    {
- 		      /* Remove the value.  */
- 		      dh->usable = false;
-
-		      /* We definitely have some garbage entries now.  */
-		      any = true;
- 		    }
- 		  else
- 		    {
-@@ -414,18 +414,15 @@ prune_cache (struct database_dyn *table, time_t now, int fd)
- 
- 		      time_t timeout = readdfcts[runp->type] (table, runp, dh);
- 		      next_timeout = MIN (next_timeout, timeout);
-
-		      /* If the entry has been replaced, we might need
-			 cleanup.  */
-		      any |= !dh->usable;
- 		    }
- 		}
-+
-+	      /* If the entry has been replaced, we might need cleanup.  */
-+	      any |= !dh->usable;
- 	    }
- 	  else
-	    {
-	      assert (dh->usable);
-	      next_timeout = MIN (next_timeout, dh->timeout);
-	    }
-+	    /* Entry has not timed out and is usable.  */
-+	    next_timeout = MIN (next_timeout, dh->timeout);
- 
- 	  run = runp->next;
- 	}
--- a/glibc-RHEL-13720-1.patch
+++ b/glibc-RHEL-13720-1.patch
@ -1,72 +0,0 @@
-commit 2aa0974d2573441bffd596b07bff8698b1f2f18c
-Author: Florian Weimer <fweimer@redhat.com>
-Date:   Fri Oct 20 14:29:50 2023 +0200
-
-    elf: ldconfig should skip temporary files created by package managers
-    
-    This avoids crashes due to partially written files, after a package
-    update is interrupted.
-    
-    Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
-
-Conflicts:
-	elf/ldconfig.c
-	  (missing alloca removal downstream)
-
-diff --git a/elf/ldconfig.c b/elf/ldconfig.c
-index 8c66d7e5426d8cc4..51de08f91fbaf093 100644
--- a/elf/ldconfig.c
-+++ b/elf/ldconfig.c
-@@ -771,6 +771,31 @@ struct dlib_entry
-   struct dlib_entry *next;
- };
- 
-+/* Skip some temporary DSO files.  These files may be partially written
-+   and lead to ldconfig crashes when examined.  */
-+static bool
-+skip_dso_based_on_name (const char *name, size_t len)
-+{
-+  /* Skip temporary files created by the prelink program.  Files with
-+     names like these are never really DSOs we want to look at.  */
-+  if (len >= sizeof (".#prelink#") - 1)
-+    {
-+      if (strcmp (name + len - sizeof (".#prelink#") + 1,
-+		  ".#prelink#") == 0)
-+	return true;
-+      if (len >= sizeof (".#prelink#.XXXXXX") - 1
-+	  && memcmp (name + len - sizeof (".#prelink#.XXXXXX")
-+		     + 1, ".#prelink#.", sizeof (".#prelink#.") - 1) == 0)
-+	return true;
-+    }
-+  /* Skip temporary files created by RPM.  */
-+  if (memchr (name, len, ';') != NULL)
-+    return true;
-+  /* Skip temporary files created by dpkg.  */
-+  if (len > 4 && memcmp (name + len - 4, ".tmp", 4) == 0)
-+    return true;
-+  return false;
-+}
- 
- static void
- search_dir (const struct dir_entry *entry)
-@@ -849,18 +874,8 @@ search_dir (const struct dir_entry *entry)
- 	continue;
- 
-       size_t len = strlen (direntry->d_name);
-      /* Skip temporary files created by the prelink program.  Files with
-	 names like these are never really DSOs we want to look at.  */
-      if (len >= sizeof (".#prelink#") - 1)
-	{
-	  if (strcmp (direntry->d_name + len - sizeof (".#prelink#") + 1,
-		      ".#prelink#") == 0)
-	    continue;
-	  if (len >= sizeof (".#prelink#.XXXXXX") - 1
-	      && memcmp (direntry->d_name + len - sizeof (".#prelink#.XXXXXX")
-			 + 1, ".#prelink#.", sizeof (".#prelink#.") - 1) == 0)
-	    continue;
-	}
-+      if (skip_dso_based_on_name (direntry->d_name, len))
-+	continue;
-       len += strlen (entry->path) + 2;
-       if (len > file_name_len)
- 	{
--- a/glibc-RHEL-13720-2.patch
+++ b/glibc-RHEL-13720-2.patch
@ -1,61 +0,0 @@
-commit cfb5a97a93ea656e3b2263e42142a4032986d9ba
-Author: Florian Weimer <fweimer@redhat.com>
-Date:   Mon Oct 23 12:53:16 2023 +0200
-
-    ldconfig: Fixes for skipping temporary files.
-    
-    Arguments to a memchr call were swapped, causing incorrect skipping
-    of files.
-    
-    Files related to dpkg have different names: they actually end in
-    .dpkg-new and .dpkg-tmp, not .tmp as I mistakenly assumed.
-    
-    Fixes commit 2aa0974d2573441bffd59 ("elf: ldconfig should skip
-    temporary files created by package managers").
-
-diff --git a/elf/ldconfig.c b/elf/ldconfig.c
-index 51de08f91fbaf093..fb19dd68d41c07a4 100644
--- a/elf/ldconfig.c
-+++ b/elf/ldconfig.c
-@@ -771,6 +771,17 @@ struct dlib_entry
-   struct dlib_entry *next;
- };
- 
-+/* Return true if the N bytes at NAME end with with the characters in
-+   the string SUFFIX.  (NAME[N + 1] does not have to be a null byte.)
-+   Expected to be called with a string literal for SUFFIX.  */
-+static inline bool
-+endswithn (const char *name, size_t n, const char *suffix)
-+{
-+  return (n >= strlen (suffix)
-+	  && memcmp (name + n - strlen (suffix), suffix,
-+		     strlen (suffix)) == 0);
-+}
-+
- /* Skip some temporary DSO files.  These files may be partially written
-    and lead to ldconfig crashes when examined.  */
- static bool
-@@ -780,8 +791,7 @@ skip_dso_based_on_name (const char *name, size_t len)
-      names like these are never really DSOs we want to look at.  */
-   if (len >= sizeof (".#prelink#") - 1)
-     {
-      if (strcmp (name + len - sizeof (".#prelink#") + 1,
-		  ".#prelink#") == 0)
-+      if (endswithn (name, len, ".#prelink#"))
- 	return true;
-       if (len >= sizeof (".#prelink#.XXXXXX") - 1
- 	  && memcmp (name + len - sizeof (".#prelink#.XXXXXX")
-@@ -789,10 +799,11 @@ skip_dso_based_on_name (const char *name, size_t len)
- 	return true;
-     }
-   /* Skip temporary files created by RPM.  */
-  if (memchr (name, len, ';') != NULL)
-+  if (memchr (name, ';', len) != NULL)
-     return true;
-   /* Skip temporary files created by dpkg.  */
-  if (len > 4 && memcmp (name + len - 4, ".tmp", 4) == 0)
-+  if (endswithn (name, len, ".dpkg-new")
-+      || endswithn (name, len, ".dpkg-tmp"))
-     return true;
-   return false;
- }
--- a/glibc-RHEL-15696-1.patch
+++ b/glibc-RHEL-15696-1.patch
@ -1,259 +0,0 @@
-From 97700a34f36721b11a754cf37a1cc40695ece1fd Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Mon, 21 Jan 2019 11:23:59 -0800
-Subject: [PATCH] x86-64 memchr/wmemchr: Properly handle the length parameter
- [BZ# 24097]
-Content-type: text/plain; charset=UTF-8
-
-On x32, the size_t parameter may be passed in the lower 32 bits of a
-64-bit register with the non-zero upper 32 bits.  The string/memory
-functions written in assembly can only use the lower 32 bits of a
-64-bit register as length or must clear the upper 32 bits before using
-the full 64-bit register for length.
-
-This pach fixes memchr/wmemchr for x32.  Tested on x86-64 and x32.  On
-x86-64, libc.so is the same with and withou the fix.
-
-	[BZ# 24097]
-	CVE-2019-6488
-	* sysdeps/x86_64/memchr.S: Use RDX_LP for length.  Clear the
-	upper 32 bits of RDX register.
-	* sysdeps/x86_64/multiarch/memchr-avx2.S: Likewise.
-	* sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-memchr and
-	tst-size_t-wmemchr.
-	* sysdeps/x86_64/x32/test-size_t.h: New file.
-	* sysdeps/x86_64/x32/tst-size_t-memchr.c: Likewise.
-	* sysdeps/x86_64/x32/tst-size_t-wmemchr.c: Likewise.
---
- sysdeps/x86_64/memchr.S                 | 10 ++--
- sysdeps/x86_64/multiarch/memchr-avx2.S  |  8 ++-
- sysdeps/x86_64/x32/Makefile             |  8 +++
- sysdeps/x86_64/x32/test-size_t.h        | 35 ++++++++++++
- sysdeps/x86_64/x32/tst-size_t-memchr.c  | 72 +++++++++++++++++++++++++
- sysdeps/x86_64/x32/tst-size_t-wmemchr.c | 20 +++++++
- 6 files changed, 148 insertions(+), 5 deletions(-)
- create mode 100644 sysdeps/x86_64/x32/test-size_t.h
- create mode 100644 sysdeps/x86_64/x32/tst-size_t-memchr.c
- create mode 100644 sysdeps/x86_64/x32/tst-size_t-wmemchr.c
-
-Conflicts:
-	ChangeLog
-	(removed)
-	NEWS
-	(removed)
-
-diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
-index feef5d4f..cb320257 100644
--- a/sysdeps/x86_64/memchr.S
-+++ b/sysdeps/x86_64/memchr.S
-@@ -34,12 +34,16 @@ ENTRY(MEMCHR)
- 	mov	%edi, %ecx
- 
- #ifdef USE_AS_WMEMCHR
-	test	%rdx, %rdx
-+	test	%RDX_LP, %RDX_LP
- 	jz	L(return_null)
-	shl	$2, %rdx
-+	shl	$2, %RDX_LP
- #else
-+# ifdef __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	movl	%edx, %edx
-+# endif
- 	punpcklbw %xmm1, %xmm1
-	test	%rdx, %rdx
-+	test	%RDX_LP, %RDX_LP
- 	jz	L(return_null)
- 	punpcklbw %xmm1, %xmm1
- #endif
-diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
-index 5f5e7725..c81da19b 100644
--- a/sysdeps/x86_64/multiarch/memchr-avx2.S
-+++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
-@@ -40,16 +40,20 @@
- ENTRY (MEMCHR)
- # ifndef USE_AS_RAWMEMCHR
- 	/* Check for zero length.  */
-	testq	%rdx, %rdx
-+	test	%RDX_LP, %RDX_LP
- 	jz	L(null)
- # endif
- 	movl	%edi, %ecx
- 	/* Broadcast CHAR to YMM0.  */
- 	vmovd	%esi, %xmm0
- # ifdef USE_AS_WMEMCHR
-	shl	$2, %rdx
-+	shl	$2, %RDX_LP
- 	vpbroadcastd %xmm0, %ymm0
- # else
-+#  ifdef __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	movl	%edx, %edx
-+#  endif
- 	vpbroadcastb %xmm0, %ymm0
- # endif
- 	/* Check if we may cross page boundary with one vector load.  */
-diff --git a/sysdeps/x86_64/x32/Makefile b/sysdeps/x86_64/x32/Makefile
-index f2ebc24f..7d528889 100644
--- a/sysdeps/x86_64/x32/Makefile
-+++ b/sysdeps/x86_64/x32/Makefile
-@@ -4,3 +4,11 @@ ifeq ($(subdir),math)
- # 64-bit llround.  Add -fno-builtin-lround to silence the compiler.
- CFLAGS-s_llround.c += -fno-builtin-lround
- endif
-+
-+ifeq ($(subdir),string)
-+tests += tst-size_t-memchr
-+endif
-+
-+ifeq ($(subdir),wcsmbs)
-+tests += tst-size_t-wmemchr
-+endif
-diff --git a/sysdeps/x86_64/x32/test-size_t.h b/sysdeps/x86_64/x32/test-size_t.h
-new file mode 100644
-index 00000000..78a94086
--- /dev/null
-+++ b/sysdeps/x86_64/x32/test-size_t.h
-@@ -0,0 +1,35 @@
-+/* Test string/memory functions with size_t in the lower 32 bits of
-+   64-bit register.
-+   Copyright (C) 2019 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <http://www.gnu.org/licenses/>.  */
-+
-+#define TEST_MAIN
-+#include <string/test-string.h>
-+
-+/* On x32, parameter_t may be passed in a 64-bit register with the LEN
-+   field in the lower 32 bits.  When the LEN field of 64-bit register
-+   is passed to string/memory function as the size_t parameter, only
-+   the lower 32 bits can be used.  */
-+typedef struct
-+{
-+  union
-+    {
-+      size_t len;
-+      void (*fn) (void);
-+    };
-+  void *p;
-+} parameter_t;
-diff --git a/sysdeps/x86_64/x32/tst-size_t-memchr.c b/sysdeps/x86_64/x32/tst-size_t-memchr.c
-new file mode 100644
-index 00000000..29a3daf1
--- /dev/null
-+++ b/sysdeps/x86_64/x32/tst-size_t-memchr.c
-@@ -0,0 +1,72 @@
-+/* Test memchr with size_t in the lower 32 bits of 64-bit register.
-+   Copyright (C) 2019 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <http://www.gnu.org/licenses/>.  */
-+
-+#ifndef WIDE
-+# define TEST_NAME "memchr"
-+#else
-+# define TEST_NAME "wmemchr"
-+#endif /* WIDE */
-+#include "test-size_t.h"
-+
-+#ifndef WIDE
-+# define MEMCHR memchr
-+# define CHAR char
-+# define UCHAR unsigned char
-+#else
-+# include <wchar.h>
-+# define MEMCHR wmemchr
-+# define CHAR wchar_t
-+# define UCHAR wchar_t
-+#endif /* WIDE */
-+
-+IMPL (MEMCHR, 1)
-+
-+typedef CHAR * (*proto_t) (const CHAR*, int, size_t);
-+
-+static CHAR *
-+__attribute__ ((noinline, noclone))
-+do_memchr (parameter_t a, parameter_t b)
-+{
-+  return CALL (&b, a.p, (uintptr_t) b.p, a.len);
-+}
-+
-+static int
-+test_main (void)
-+{
-+  test_init ();
-+
-+  parameter_t src = { { page_size / sizeof (CHAR) }, buf2 };
-+  parameter_t c = { { 0 }, (void *) (uintptr_t) 0x12 };
-+
-+  int ret = 0;
-+  FOR_EACH_IMPL (impl, 0)
-+    {
-+      c.fn = impl->fn;
-+      CHAR *res = do_memchr (src, c);
-+      if (res)
-+	{
-+	  error (0, 0, "Wrong result in function %s: %p != NULL",
-+		 impl->name, res);
-+	  ret = 1;
-+	}
-+    }
-+
-+  return ret ? EXIT_FAILURE : EXIT_SUCCESS;
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/sysdeps/x86_64/x32/tst-size_t-wmemchr.c b/sysdeps/x86_64/x32/tst-size_t-wmemchr.c
-new file mode 100644
-index 00000000..877801d6
--- /dev/null
-+++ b/sysdeps/x86_64/x32/tst-size_t-wmemchr.c
-@@ -0,0 +1,20 @@
-+/* Test wmemchr with size_t in the lower 32 bits of 64-bit register.
-+   Copyright (C) 2019 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <http://www.gnu.org/licenses/>.  */
-+
-+#define WIDE 1
-+#include "tst-size_t-memchr.c"
-- 
-GitLab
-
--- a/glibc-RHEL-15696-10.patch
+++ b/glibc-RHEL-15696-10.patch
@ -1,41 +0,0 @@
-From ddf0992cf57a93200e0c782e2a94d0733a5a0b87 Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Sun, 9 Jan 2022 16:02:21 -0600
-Subject: [PATCH] x86: Fix __wcsncmp_avx2 in strcmp-avx2.S [BZ# 28755]
-Content-type: text/plain; charset=UTF-8
-
-Fixes [BZ# 28755] for wcsncmp by redirecting length >= 2^56 to
-__wcscmp_avx2. For x86_64 this covers the entire address range so any
-length larger could not possibly be used to bound `s1` or `s2`.
-
-test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass.
-
-Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
---
- sysdeps/x86_64/multiarch/strcmp-avx2.S | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
-index 156c1949..8fb8eedc 100644
--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
-+++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
-@@ -83,6 +83,16 @@ ENTRY (STRCMP)
- 	je	L(char0)
- 	jb	L(zero)
- #  ifdef USE_AS_WCSCMP
-+#  ifndef __ILP32__
-+	movq	%rdx, %rcx
-+	/* Check if length could overflow when multiplied by
-+	   sizeof(wchar_t). Checking top 8 bits will cover all potential
-+	   overflow cases as well as redirect cases where its impossible to
-+	   length to bound a valid memory region. In these cases just use
-+	   'wcscmp'.  */
-+	shrq	$56, %rcx
-+	jnz	__wcscmp_avx2
-+#  endif
- 	/* Convert units: from wide to byte char.  */
- 	shl	$2, %RDX_LP
- #  endif
-- 
-GitLab
-
--- a/glibc-RHEL-15696-100.patch
+++ b/glibc-RHEL-15696-100.patch
@ -1,257 +0,0 @@
-From 244b415d386487521882debb845a040a4758cb18 Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Fri, 25 Mar 2022 17:13:33 -0500
-Subject: [PATCH] x86: Small improvements for wcslen
-Content-type: text/plain; charset=UTF-8
-
-Just a few QOL changes.
-    1. Prefer `add` > `lea` as it has high execution units it can run
-       on.
-    2. Don't break macro-fusion between `test` and `jcc`
-    3. Reduce code size by removing gratuitous padding bytes (-90
-       bytes).
-
-geometric_mean(N=20) of all benchmarks New / Original: 0.959
-
-All string/memory tests pass.
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/wcslen.S | 86 ++++++++++++++++++++---------------------
- 1 file changed, 41 insertions(+), 45 deletions(-)
-
-diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S
-index 9f5f7232..254bb030 100644
--- a/sysdeps/x86_64/wcslen.S
-+++ b/sysdeps/x86_64/wcslen.S
-@@ -41,82 +41,82 @@ ENTRY (__wcslen)
- 	pxor	%xmm0, %xmm0
- 
- 	lea	32(%rdi), %rax
-	lea	16(%rdi), %rcx
-+	addq	$16, %rdi
- 	and	$-16, %rax
- 
- 	pcmpeqd	(%rax), %xmm0
- 	pmovmskb %xmm0, %edx
- 	pxor	%xmm1, %xmm1
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	pcmpeqd	(%rax), %xmm1
- 	pmovmskb %xmm1, %edx
- 	pxor	%xmm2, %xmm2
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	pcmpeqd	(%rax), %xmm2
- 	pmovmskb %xmm2, %edx
- 	pxor	%xmm3, %xmm3
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	pcmpeqd	(%rax), %xmm3
- 	pmovmskb %xmm3, %edx
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	pcmpeqd	(%rax), %xmm0
- 	pmovmskb %xmm0, %edx
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	pcmpeqd	(%rax), %xmm1
- 	pmovmskb %xmm1, %edx
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	pcmpeqd	(%rax), %xmm2
- 	pmovmskb %xmm2, %edx
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	pcmpeqd	(%rax), %xmm3
- 	pmovmskb %xmm3, %edx
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	pcmpeqd	(%rax), %xmm0
- 	pmovmskb %xmm0, %edx
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	pcmpeqd	(%rax), %xmm1
- 	pmovmskb %xmm1, %edx
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	pcmpeqd	(%rax), %xmm2
- 	pmovmskb %xmm2, %edx
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	pcmpeqd	(%rax), %xmm3
- 	pmovmskb %xmm3, %edx
-+	addq	$16, %rax
- 	test	%edx, %edx
-	lea	16(%rax), %rax
- 	jnz	L(exit)
- 
- 	and	$-0x40, %rax
-@@ -133,104 +133,100 @@ L(aligned_64_loop):
- 	pminub	%xmm0, %xmm2
- 	pcmpeqd	%xmm3, %xmm2
- 	pmovmskb %xmm2, %edx
-+	addq	$64, %rax
- 	test	%edx, %edx
-	lea	64(%rax), %rax
- 	jz	L(aligned_64_loop)
- 
- 	pcmpeqd	-64(%rax), %xmm3
- 	pmovmskb %xmm3, %edx
-+    addq	$48, %rdi
- 	test	%edx, %edx
-	lea	48(%rcx), %rcx
- 	jnz	L(exit)
- 
- 	pcmpeqd	%xmm1, %xmm3
- 	pmovmskb %xmm3, %edx
-+    addq	$-16, %rdi
- 	test	%edx, %edx
-	lea	-16(%rcx), %rcx
- 	jnz	L(exit)
- 
- 	pcmpeqd	-32(%rax), %xmm3
- 	pmovmskb %xmm3, %edx
-+    addq	$-16, %rdi
- 	test	%edx, %edx
-	lea	-16(%rcx), %rcx
- 	jnz	L(exit)
- 
- 	pcmpeqd	%xmm6, %xmm3
- 	pmovmskb %xmm3, %edx
-+    addq	$-16, %rdi
- 	test	%edx, %edx
-	lea	-16(%rcx), %rcx
-	jnz	L(exit)
-
-	jmp	L(aligned_64_loop)
-+	jz	L(aligned_64_loop)
- 
- 	.p2align 4
- L(exit):
-	sub	%rcx, %rax
-+	sub	%rdi, %rax
- 	shr	$2, %rax
- 	test	%dl, %dl
- 	jz	L(exit_high)
- 
-	mov	%dl, %cl
-	and	$15, %cl
-+	andl	$15, %edx
- 	jz	L(exit_1)
- 	ret
- 
-	.p2align 4
-+	/* No align here. Naturally aligned % 16 == 1.  */
- L(exit_high):
-	mov	%dh, %ch
-	and	$15, %ch
-+	andl	$(15 << 8), %edx
- 	jz	L(exit_3)
- 	add	$2, %rax
- 	ret
- 
-	.p2align 4
-+	.p2align 3
- L(exit_1):
- 	add	$1, %rax
- 	ret
- 
-	.p2align 4
-+	.p2align 3
- L(exit_3):
- 	add	$3, %rax
- 	ret
- 
-	.p2align 4
-+	.p2align 3
- L(exit_tail0):
-	xor	%rax, %rax
-+	xorl	%eax, %eax
- 	ret
- 
-	.p2align 4
-+	.p2align 3
- L(exit_tail1):
-	mov	$1, %rax
-+	movl	$1, %eax
- 	ret
- 
-	.p2align 4
-+	.p2align 3
- L(exit_tail2):
-	mov	$2, %rax
-+	movl	$2, %eax
- 	ret
- 
-	.p2align 4
-+	.p2align 3
- L(exit_tail3):
-	mov	$3, %rax
-+	movl	$3, %eax
- 	ret
- 
-	.p2align 4
-+	.p2align 3
- L(exit_tail4):
-	mov	$4, %rax
-+	movl	$4, %eax
- 	ret
- 
-	.p2align 4
-+	.p2align 3
- L(exit_tail5):
-	mov	$5, %rax
-+	movl	$5, %eax
- 	ret
- 
-	.p2align 4
-+	.p2align 3
- L(exit_tail6):
-	mov	$6, %rax
-+	movl	$6, %eax
- 	ret
- 
-	.p2align 4
-+	.p2align 3
- L(exit_tail7):
-	mov	$7, %rax
-+	movl	$7, %eax
- 	ret
- 
- END (__wcslen)
-- 
-GitLab
-
--- a/glibc-RHEL-15696-101.patch
+++ b/glibc-RHEL-15696-101.patch
@ -1,964 +0,0 @@
-From 7cbc03d03091d5664060924789afe46d30a5477e Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Fri, 15 Apr 2022 12:28:00 -0500
-Subject: [PATCH] x86: Remove memcmp-sse4.S
-Content-type: text/plain; charset=UTF-8
-
-Code didn't actually use any sse4 instructions since `ptest` was
-removed in:
-
-commit 2f9062d7171850451e6044ef78d91ff8c017b9c0
-Author: Noah Goldstein <goldstein.w.n@gmail.com>
-Date:   Wed Nov 10 16:18:56 2021 -0600
-
-    x86: Shrink memcmp-sse4.S code size
-
-The new memcmp-sse2 implementation is also faster.
-
-geometric_mean(N=20) of page cross cases SSE2 / SSE4: 0.905
-
-Note there are two regressions preferring SSE2 for Size = 1 and Size =
-65.
-
-Size = 1:
-size, align0, align1, ret, New Time/Old Time
-   1,      1,      1,   0,               1.2
-   1,      1,      1,   1,             1.197
-   1,      1,      1,  -1,               1.2
-
-This is intentional. Size == 1 is significantly less hot based on
-profiles of GCC11 and Python3 than sizes [4, 8] (which is made
-hotter).
-
-Python3 Size = 1        -> 13.64%
-Python3 Size = [4, 8]   -> 60.92%
-
-GCC11   Size = 1        ->  1.29%
-GCC11   Size = [4, 8]   -> 33.86%
-
-size, align0, align1, ret, New Time/Old Time
-   4,      4,      4,   0,             0.622
-   4,      4,      4,   1,             0.797
-   4,      4,      4,  -1,             0.805
-   5,      5,      5,   0,             0.623
-   5,      5,      5,   1,             0.777
-   5,      5,      5,  -1,             0.802
-   6,      6,      6,   0,             0.625
-   6,      6,      6,   1,             0.813
-   6,      6,      6,  -1,             0.788
-   7,      7,      7,   0,             0.625
-   7,      7,      7,   1,             0.799
-   7,      7,      7,  -1,             0.795
-   8,      8,      8,   0,             0.625
-   8,      8,      8,   1,             0.848
-   8,      8,      8,  -1,             0.914
-   9,      9,      9,   0,             0.625
-
-Size = 65:
-size, align0, align1, ret, New Time/Old Time
-  65,      0,      0,   0,             1.103
-  65,      0,      0,   1,             1.216
-  65,      0,      0,  -1,             1.227
-  65,     65,      0,   0,             1.091
-  65,      0,     65,   1,              1.19
-  65,     65,     65,  -1,             1.215
-
-This is because A) the checks in range [65, 96] are now unrolled 2x
-and B) because smaller values <= 16 are now given a hotter path. By
-contrast the SSE4 version has a branch for Size = 80. The unrolled
-version has get better performance for returns which need both
-comparisons.
-
-size, align0, align1, ret, New Time/Old Time
- 128,      4,      8,   0,             0.858
- 128,      4,      8,   1,             0.879
- 128,      4,      8,  -1,             0.888
-
-As well, out of microbenchmark environments that are not full
-predictable the branch will have a real-cost.
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/multiarch/Makefile          |   2 -
- sysdeps/x86_64/multiarch/ifunc-impl-list.c |   4 -
- sysdeps/x86_64/multiarch/ifunc-memcmp.h    |   4 -
- sysdeps/x86_64/multiarch/memcmp-sse4.S     | 804 ---------------------
- 4 files changed, 814 deletions(-)
- delete mode 100644 sysdeps/x86_64/multiarch/memcmp-sse4.S
-
-diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
-index bca82e38..b503e4b8 100644
--- a/sysdeps/x86_64/multiarch/Makefile
-+++ b/sysdeps/x86_64/multiarch/Makefile
-@@ -11,7 +11,6 @@ sysdep_routines += \
-   memcmp-avx2-movbe-rtm \
-   memcmp-evex-movbe \
-   memcmp-sse2 \
-  memcmp-sse4 \
-   memcmp-ssse3 \
-   memcpy-ssse3 \
-   memcpy-ssse3-back \
-@@ -174,7 +173,6 @@ sysdep_routines += \
-   wmemcmp-avx2-movbe-rtm \
-   wmemcmp-c \
-   wmemcmp-evex-movbe \
-  wmemcmp-sse4 \
-   wmemcmp-ssse3 \
- # sysdep_routines
- endif
-diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-index 14314367..450a2917 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-@@ -78,8 +78,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			       && CPU_FEATURE_USABLE (BMI2)
- 			       && CPU_FEATURE_USABLE (MOVBE)),
- 			      __memcmp_evex_movbe)
-	      IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSE4_1),
-			      __memcmp_sse4_1)
- 	      IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSSE3),
- 			      __memcmp_ssse3)
- 	      IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2))
-@@ -824,8 +822,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			       && CPU_FEATURE_USABLE (BMI2)
- 			       && CPU_FEATURE_USABLE (MOVBE)),
- 			      __wmemcmp_evex_movbe)
-	      IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSE4_1),
-			      __wmemcmp_sse4_1)
- 	      IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSSE3),
- 			      __wmemcmp_ssse3)
- 	      IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
-diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
-index 690dffe8..0bc47a7f 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h
-+++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
-@@ -21,7 +21,6 @@
- 
- extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe_rtm) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden;
-@@ -47,9 +46,6 @@ IFUNC_SELECTOR (void)
- 	return OPTIMIZE (avx2_movbe);
-     }
- 
-  if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
-    return OPTIMIZE (sse4_1);
-
-   if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
-     return OPTIMIZE (ssse3);
- 
-diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
-deleted file mode 100644
-index 50060006..00000000
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
-+++ /dev/null
-@@ -1,804 +0,0 @@
-/* memcmp with SSE4.1, wmemcmp with SSE4.1
-   Copyright (C) 2010-2018 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# ifndef MEMCMP
-#  define MEMCMP	__memcmp_sse4_1
-# endif
-
-#ifdef USE_AS_WMEMCMP
-# define CMPEQ	pcmpeqd
-# define CHAR_SIZE	4
-#else
-# define CMPEQ	pcmpeqb
-# define CHAR_SIZE	1
-#endif
-
-
-/* Warning!
-           wmemcmp has to use SIGNED comparison for elements.
-           memcmp has to use UNSIGNED comparison for elemnts.
-*/
-
-	.section .text.sse4.1,"ax",@progbits
-ENTRY (MEMCMP)
-# ifdef USE_AS_WMEMCMP
-	shl	$2, %RDX_LP
-# elif defined __ILP32__
-	/* Clear the upper 32 bits.  */
-	mov	%edx, %edx
-# endif
-	cmp	$79, %RDX_LP
-	ja	L(79bytesormore)
-
-	cmp	$CHAR_SIZE, %RDX_LP
-	jbe	L(firstbyte)
-
-	/* N in (CHAR_SIZE, 79) bytes.  */
-	cmpl	$32, %edx
-	ja	L(more_32_bytes)
-
-	cmpl	$16, %edx
-	jae	L(16_to_32_bytes)
-
-# ifndef USE_AS_WMEMCMP
-	cmpl	$8, %edx
-	jae	L(8_to_16_bytes)
-
-	cmpl	$4, %edx
-	jb	L(2_to_3_bytes)
-
-	movl	(%rdi), %eax
-	movl	(%rsi), %ecx
-
-	bswap	%eax
-	bswap	%ecx
-
-	shlq	$32, %rax
-	shlq	$32, %rcx
-
-	movl	-4(%rdi, %rdx), %edi
-	movl	-4(%rsi, %rdx), %esi
-
-	bswap	%edi
-	bswap	%esi
-
-	orq	%rdi, %rax
-	orq	%rsi, %rcx
-	subq	%rcx, %rax
-	cmovne	%edx, %eax
-	sbbl	%ecx, %ecx
-	orl	%ecx, %eax
-	ret
-
-	.p2align 4,, 8
-L(2_to_3_bytes):
-	movzwl	(%rdi), %eax
-	movzwl	(%rsi), %ecx
-	shll	$8, %eax
-	shll	$8, %ecx
-	bswap	%eax
-	bswap	%ecx
-	movzbl	-1(%rdi, %rdx), %edi
-	movzbl	-1(%rsi, %rdx), %esi
-	orl	%edi, %eax
-	orl	%esi, %ecx
-	subl	%ecx, %eax
-	ret
-
-	.p2align 4,, 8
-L(8_to_16_bytes):
-	movq	(%rdi), %rax
-	movq	(%rsi), %rcx
-
-	bswap	%rax
-	bswap	%rcx
-
-	subq	%rcx, %rax
-	jne	L(8_to_16_bytes_done)
-
-	movq	-8(%rdi, %rdx), %rax
-	movq	-8(%rsi, %rdx), %rcx
-
-	bswap	%rax
-	bswap	%rcx
-
-	subq	%rcx, %rax
-
-L(8_to_16_bytes_done):
-	cmovne	%edx, %eax
-	sbbl	%ecx, %ecx
-	orl	%ecx, %eax
-	ret
-# else
-	xorl	%eax, %eax
-	movl	(%rdi), %ecx
-	cmpl	(%rsi), %ecx
-	jne	L(8_to_16_bytes_done)
-	movl	4(%rdi), %ecx
-	cmpl	4(%rsi), %ecx
-	jne	L(8_to_16_bytes_done)
-	movl	-4(%rdi, %rdx), %ecx
-	cmpl	-4(%rsi, %rdx), %ecx
-	jne	L(8_to_16_bytes_done)
-	ret
-# endif
-
-	.p2align 4,, 3
-L(ret_zero):
-	xorl	%eax, %eax
-L(zero):
-	ret
-
-	.p2align 4,, 8
-L(firstbyte):
-	jb	L(ret_zero)
-# ifdef USE_AS_WMEMCMP
-	xorl	%eax, %eax
-	movl	(%rdi), %ecx
-	cmpl	(%rsi), %ecx
-	je	L(zero)
-L(8_to_16_bytes_done):
-	setg	%al
-	leal	-1(%rax, %rax), %eax
-# else
-	movzbl	(%rdi), %eax
-	movzbl	(%rsi), %ecx
-	sub	%ecx, %eax
-# endif
-	ret
-
-	.p2align 4
-L(vec_return_begin_48):
-	addq	$16, %rdi
-	addq	$16, %rsi
-L(vec_return_begin_32):
-	bsfl	%eax, %eax
-# ifdef USE_AS_WMEMCMP
-	movl	32(%rdi, %rax), %ecx
-	xorl	%edx, %edx
-	cmpl	32(%rsi, %rax), %ecx
-	setg	%dl
-	leal	-1(%rdx, %rdx), %eax
-# else
-	movzbl	32(%rsi, %rax), %ecx
-	movzbl	32(%rdi, %rax), %eax
-	subl	%ecx, %eax
-# endif
-	ret
-
-	.p2align 4
-L(vec_return_begin_16):
-	addq	$16, %rdi
-	addq	$16, %rsi
-L(vec_return_begin):
-	bsfl	%eax, %eax
-# ifdef USE_AS_WMEMCMP
-	movl	(%rdi, %rax), %ecx
-	xorl	%edx, %edx
-	cmpl	(%rsi, %rax), %ecx
-	setg	%dl
-	leal	-1(%rdx, %rdx), %eax
-# else
-	movzbl	(%rsi, %rax), %ecx
-	movzbl	(%rdi, %rax), %eax
-	subl	%ecx, %eax
-# endif
-	ret
-
-	.p2align 4
-L(vec_return_end_16):
-	subl	$16, %edx
-L(vec_return_end):
-	bsfl	%eax, %eax
-	addl	%edx, %eax
-# ifdef USE_AS_WMEMCMP
-	movl	-16(%rdi, %rax), %ecx
-	xorl	%edx, %edx
-	cmpl	-16(%rsi, %rax), %ecx
-	setg	%dl
-	leal	-1(%rdx, %rdx), %eax
-# else
-	movzbl	-16(%rsi, %rax), %ecx
-	movzbl	-16(%rdi, %rax), %eax
-	subl	%ecx, %eax
-# endif
-	ret
-
-	.p2align 4,, 8
-L(more_32_bytes):
-	movdqu	(%rdi), %xmm0
-	movdqu	(%rsi), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin)
-
-	movdqu	16(%rdi), %xmm0
-	movdqu	16(%rsi), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_16)
-
-	cmpl	$64, %edx
-	jbe	L(32_to_64_bytes)
-	movdqu	32(%rdi), %xmm0
-	movdqu	32(%rsi), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_32)
-
-	.p2align 4,, 6
-L(32_to_64_bytes):
-	movdqu	-32(%rdi, %rdx), %xmm0
-	movdqu	-32(%rsi, %rdx), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_end_16)
-
-	movdqu	-16(%rdi, %rdx), %xmm0
-	movdqu	-16(%rsi, %rdx), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_end)
-	ret
-
-	.p2align 4
-L(16_to_32_bytes):
-	movdqu	(%rdi), %xmm0
-	movdqu	(%rsi), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin)
-
-	movdqu	-16(%rdi, %rdx), %xmm0
-	movdqu	-16(%rsi, %rdx), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_end)
-	ret
-
-
-	.p2align 4
-L(79bytesormore):
-	movdqu	(%rdi), %xmm0
-	movdqu	(%rsi), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin)
-
-
-	mov	%rsi, %rcx
-	and	$-16, %rsi
-	add	$16, %rsi
-	sub	%rsi, %rcx
-
-	sub	%rcx, %rdi
-	add	%rcx, %rdx
-	test	$0xf, %rdi
-	jz	L(2aligned)
-
-	cmp	$128, %rdx
-	ja	L(128bytesormore)
-
-	.p2align 4,, 6
-L(less128bytes):
-	movdqu	(%rdi), %xmm1
-	CMPEQ	(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin)
-
-	movdqu	16(%rdi), %xmm1
-	CMPEQ	16(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_16)
-
-	movdqu	32(%rdi), %xmm1
-	CMPEQ	32(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_32)
-
-	movdqu	48(%rdi), %xmm1
-	CMPEQ	48(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_48)
-
-	cmp	$96, %rdx
-	jb	L(32_to_64_bytes)
-
-	addq	$64, %rdi
-	addq	$64, %rsi
-	subq	$64, %rdx
-
-	.p2align 4,, 6
-L(last_64_bytes):
-	movdqu	(%rdi), %xmm1
-	CMPEQ	(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin)
-
-	movdqu	16(%rdi), %xmm1
-	CMPEQ	16(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_16)
-
-	movdqu	-32(%rdi, %rdx), %xmm0
-	movdqu	-32(%rsi, %rdx), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_end_16)
-
-	movdqu	-16(%rdi, %rdx), %xmm0
-	movdqu	-16(%rsi, %rdx), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_end)
-	ret
-
-	.p2align 4
-L(128bytesormore):
-	cmp	$256, %rdx
-	ja	L(unaligned_loop)
-L(less256bytes):
-	movdqu	(%rdi), %xmm1
-	CMPEQ	(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin)
-
-	movdqu	16(%rdi), %xmm1
-	CMPEQ	16(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_16)
-
-	movdqu	32(%rdi), %xmm1
-	CMPEQ	32(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_32)
-
-	movdqu	48(%rdi), %xmm1
-	CMPEQ	48(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_48)
-
-	addq	$64, %rdi
-	addq	$64, %rsi
-
-	movdqu	(%rdi), %xmm1
-	CMPEQ	(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin)
-
-	movdqu	16(%rdi), %xmm1
-	CMPEQ	16(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_16)
-
-	movdqu	32(%rdi), %xmm1
-	CMPEQ	32(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_32)
-
-	movdqu	48(%rdi), %xmm1
-	CMPEQ	48(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_48)
-
-	addq	$-128, %rdx
-	subq	$-64, %rsi
-	subq	$-64, %rdi
-
-	cmp	$64, %rdx
-	ja	L(less128bytes)
-
-	cmp	$32, %rdx
-	ja	L(last_64_bytes)
-
-	movdqu	-32(%rdi, %rdx), %xmm0
-	movdqu	-32(%rsi, %rdx), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_end_16)
-
-	movdqu	-16(%rdi, %rdx), %xmm0
-	movdqu	-16(%rsi, %rdx), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_end)
-	ret
-
-	.p2align 4
-L(unaligned_loop):
-# ifdef DATA_CACHE_SIZE_HALF
-	mov	$DATA_CACHE_SIZE_HALF, %R8_LP
-# else
-	mov	__x86_data_cache_size_half(%rip), %R8_LP
-# endif
-	movq	%r8, %r9
-	addq	%r8, %r8
-	addq	%r9, %r8
-	cmpq	%r8, %rdx
-	ja	L(L2_L3_cache_unaligned)
-	sub	$64, %rdx
-	.p2align 4
-L(64bytesormore_loop):
-	movdqu	(%rdi), %xmm0
-	movdqu	16(%rdi), %xmm1
-	movdqu	32(%rdi), %xmm2
-	movdqu	48(%rdi), %xmm3
-
-	CMPEQ	(%rsi), %xmm0
-	CMPEQ	16(%rsi), %xmm1
-	CMPEQ	32(%rsi), %xmm2
-	CMPEQ	48(%rsi), %xmm3
-
-	pand	%xmm0, %xmm1
-	pand	%xmm2, %xmm3
-	pand	%xmm1, %xmm3
-
-	pmovmskb %xmm3, %eax
-	incw	%ax
-	jnz	L(64bytesormore_loop_end)
-
-	add	$64, %rsi
-	add	$64, %rdi
-	sub	$64, %rdx
-	ja	L(64bytesormore_loop)
-
-	.p2align 4,, 6
-L(loop_tail):
-	addq	%rdx, %rdi
-	movdqu	(%rdi), %xmm0
-	movdqu	16(%rdi), %xmm1
-	movdqu	32(%rdi), %xmm2
-	movdqu	48(%rdi), %xmm3
-
-	addq	%rdx, %rsi
-	movdqu	(%rsi), %xmm4
-	movdqu	16(%rsi), %xmm5
-	movdqu	32(%rsi), %xmm6
-	movdqu	48(%rsi), %xmm7
-
-	CMPEQ	%xmm4, %xmm0
-	CMPEQ	%xmm5, %xmm1
-	CMPEQ	%xmm6, %xmm2
-	CMPEQ	%xmm7, %xmm3
-
-	pand	%xmm0, %xmm1
-	pand	%xmm2, %xmm3
-	pand	%xmm1, %xmm3
-
-	pmovmskb %xmm3, %eax
-	incw	%ax
-	jnz	L(64bytesormore_loop_end)
-	ret
-
-L(L2_L3_cache_unaligned):
-	subq	$64, %rdx
-	.p2align 4
-L(L2_L3_unaligned_128bytes_loop):
-	prefetchnta 0x1c0(%rdi)
-	prefetchnta 0x1c0(%rsi)
-
-	movdqu	(%rdi), %xmm0
-	movdqu	16(%rdi), %xmm1
-	movdqu	32(%rdi), %xmm2
-	movdqu	48(%rdi), %xmm3
-
-	CMPEQ	(%rsi), %xmm0
-	CMPEQ	16(%rsi), %xmm1
-	CMPEQ	32(%rsi), %xmm2
-	CMPEQ	48(%rsi), %xmm3
-
-	pand	%xmm0, %xmm1
-	pand	%xmm2, %xmm3
-	pand	%xmm1, %xmm3
-
-	pmovmskb %xmm3, %eax
-	incw	%ax
-	jnz	L(64bytesormore_loop_end)
-
-	add	$64, %rsi
-	add	$64, %rdi
-	sub	$64, %rdx
-	ja	L(L2_L3_unaligned_128bytes_loop)
-	jmp	L(loop_tail)
-
-
-	/* This case is for machines which are sensitive for unaligned
-	 * instructions.  */
-	.p2align 4
-L(2aligned):
-	cmp	$128, %rdx
-	ja	L(128bytesormorein2aligned)
-L(less128bytesin2aligned):
-	movdqa	(%rdi), %xmm1
-	CMPEQ	(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin)
-
-	movdqa	16(%rdi), %xmm1
-	CMPEQ	16(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_16)
-
-	movdqa	32(%rdi), %xmm1
-	CMPEQ	32(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_32)
-
-	movdqa	48(%rdi), %xmm1
-	CMPEQ	48(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_48)
-
-	cmp	$96, %rdx
-	jb	L(32_to_64_bytes)
-
-	addq	$64, %rdi
-	addq	$64, %rsi
-	subq	$64, %rdx
-
-	.p2align 4,, 6
-L(aligned_last_64_bytes):
-	movdqa	(%rdi), %xmm1
-	CMPEQ	(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin)
-
-	movdqa	16(%rdi), %xmm1
-	CMPEQ	16(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_16)
-
-	movdqu	-32(%rdi, %rdx), %xmm0
-	movdqu	-32(%rsi, %rdx), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_end_16)
-
-	movdqu	-16(%rdi, %rdx), %xmm0
-	movdqu	-16(%rsi, %rdx), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_end)
-	ret
-
-	.p2align 4
-L(128bytesormorein2aligned):
-	cmp	$256, %rdx
-	ja	L(aligned_loop)
-L(less256bytesin2alinged):
-	movdqa	(%rdi), %xmm1
-	CMPEQ	(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin)
-
-	movdqa	16(%rdi), %xmm1
-	CMPEQ	16(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_16)
-
-	movdqa	32(%rdi), %xmm1
-	CMPEQ	32(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_32)
-
-	movdqa	48(%rdi), %xmm1
-	CMPEQ	48(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_48)
-
-	addq	$64, %rdi
-	addq	$64, %rsi
-
-	movdqa	(%rdi), %xmm1
-	CMPEQ	(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin)
-
-	movdqa	16(%rdi), %xmm1
-	CMPEQ	16(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_16)
-
-	movdqa	32(%rdi), %xmm1
-	CMPEQ	32(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_32)
-
-	movdqa	48(%rdi), %xmm1
-	CMPEQ	48(%rsi), %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_begin_48)
-
-	addq	$-128, %rdx
-	subq	$-64, %rsi
-	subq	$-64, %rdi
-
-	cmp	$64, %rdx
-	ja	L(less128bytesin2aligned)
-
-	cmp	$32, %rdx
-	ja	L(aligned_last_64_bytes)
-
-	movdqu	-32(%rdi, %rdx), %xmm0
-	movdqu	-32(%rsi, %rdx), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_end_16)
-
-	movdqu	-16(%rdi, %rdx), %xmm0
-	movdqu	-16(%rsi, %rdx), %xmm1
-	CMPEQ	%xmm0, %xmm1
-	pmovmskb %xmm1, %eax
-	incw	%ax
-	jnz	L(vec_return_end)
-	ret
-
-	.p2align 4
-L(aligned_loop):
-# ifdef DATA_CACHE_SIZE_HALF
-	mov	$DATA_CACHE_SIZE_HALF, %R8_LP
-# else
-	mov	__x86_data_cache_size_half(%rip), %R8_LP
-# endif
-	movq	%r8, %r9
-	addq	%r8, %r8
-	addq	%r9, %r8
-	cmpq	%r8, %rdx
-	ja	L(L2_L3_cache_aligned)
-
-	sub	$64, %rdx
-	.p2align 4
-L(64bytesormore_loopin2aligned):
-	movdqa	(%rdi), %xmm0
-	movdqa	16(%rdi), %xmm1
-	movdqa	32(%rdi), %xmm2
-	movdqa	48(%rdi), %xmm3
-
-	CMPEQ	(%rsi), %xmm0
-	CMPEQ	16(%rsi), %xmm1
-	CMPEQ	32(%rsi), %xmm2
-	CMPEQ	48(%rsi), %xmm3
-
-	pand	%xmm0, %xmm1
-	pand	%xmm2, %xmm3
-	pand	%xmm1, %xmm3
-
-	pmovmskb %xmm3, %eax
-	incw	%ax
-	jnz	L(64bytesormore_loop_end)
-	add	$64, %rsi
-	add	$64, %rdi
-	sub	$64, %rdx
-	ja	L(64bytesormore_loopin2aligned)
-	jmp	L(loop_tail)
-
-L(L2_L3_cache_aligned):
-	subq	$64, %rdx
-	.p2align 4
-L(L2_L3_aligned_128bytes_loop):
-	prefetchnta 0x1c0(%rdi)
-	prefetchnta 0x1c0(%rsi)
-	movdqa	(%rdi), %xmm0
-	movdqa	16(%rdi), %xmm1
-	movdqa	32(%rdi), %xmm2
-	movdqa	48(%rdi), %xmm3
-
-	CMPEQ	(%rsi), %xmm0
-	CMPEQ	16(%rsi), %xmm1
-	CMPEQ	32(%rsi), %xmm2
-	CMPEQ	48(%rsi), %xmm3
-
-	pand	%xmm0, %xmm1
-	pand	%xmm2, %xmm3
-	pand	%xmm1, %xmm3
-
-	pmovmskb %xmm3, %eax
-	incw	%ax
-	jnz	L(64bytesormore_loop_end)
-
-	addq	$64, %rsi
-	addq	$64, %rdi
-	subq	$64, %rdx
-	ja	L(L2_L3_aligned_128bytes_loop)
-	jmp	L(loop_tail)
-
-	.p2align 4
-L(64bytesormore_loop_end):
-	pmovmskb %xmm0, %ecx
-	incw	%cx
-	jnz	L(loop_end_ret)
-
-	pmovmskb %xmm1, %ecx
-	notw	%cx
-	sall	$16, %ecx
-	jnz	L(loop_end_ret)
-
-	pmovmskb %xmm2, %ecx
-	notw	%cx
-	shlq	$32, %rcx
-	jnz	L(loop_end_ret)
-
-	addq	$48, %rdi
-	addq	$48, %rsi
-	movq	%rax, %rcx
-
-	.p2align 4,, 6
-L(loop_end_ret):
-	bsfq	%rcx, %rcx
-# ifdef USE_AS_WMEMCMP
-	movl	(%rdi, %rcx), %eax
-	xorl	%edx, %edx
-	cmpl	(%rsi, %rcx), %eax
-	setg	%dl
-	leal	-1(%rdx, %rdx), %eax
-# else
-	movzbl	(%rdi, %rcx), %eax
-	movzbl	(%rsi, %rcx), %ecx
-	subl	%ecx, %eax
-# endif
-	ret
-END (MEMCMP)
-#endif
-- 
-GitLab
-
--- a/glibc-RHEL-15696-102.patch
+++ b/glibc-RHEL-15696-102.patch
@ -1,263 +0,0 @@
-From 23102686ec67b856a2d4fd25ddaa1c0b8d175c4f Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Fri, 15 Apr 2022 12:28:01 -0500
-Subject: [PATCH] x86: Cleanup page cross code in memcmp-avx2-movbe.S
-Content-type: text/plain; charset=UTF-8
-
-Old code was both inefficient and wasted code size. New code (-62
-bytes) and comparable or better performance in the page cross case.
-
-geometric_mean(N=20) of page cross cases New / Original: 0.960
-
-size, align0, align1, ret, New Time/Old Time
-   1,   4095,      0,   0,             1.001
-   1,   4095,      0,   1,             0.999
-   1,   4095,      0,  -1,               1.0
-   2,   4094,      0,   0,               1.0
-   2,   4094,      0,   1,               1.0
-   2,   4094,      0,  -1,               1.0
-   3,   4093,      0,   0,               1.0
-   3,   4093,      0,   1,               1.0
-   3,   4093,      0,  -1,               1.0
-   4,   4092,      0,   0,             0.987
-   4,   4092,      0,   1,               1.0
-   4,   4092,      0,  -1,               1.0
-   5,   4091,      0,   0,             0.984
-   5,   4091,      0,   1,             1.002
-   5,   4091,      0,  -1,             1.005
-   6,   4090,      0,   0,             0.993
-   6,   4090,      0,   1,             1.001
-   6,   4090,      0,  -1,             1.003
-   7,   4089,      0,   0,             0.991
-   7,   4089,      0,   1,               1.0
-   7,   4089,      0,  -1,             1.001
-   8,   4088,      0,   0,             0.875
-   8,   4088,      0,   1,             0.881
-   8,   4088,      0,  -1,             0.888
-   9,   4087,      0,   0,             0.872
-   9,   4087,      0,   1,             0.879
-   9,   4087,      0,  -1,             0.883
-  10,   4086,      0,   0,             0.878
-  10,   4086,      0,   1,             0.886
-  10,   4086,      0,  -1,             0.873
-  11,   4085,      0,   0,             0.878
-  11,   4085,      0,   1,             0.881
-  11,   4085,      0,  -1,             0.879
-  12,   4084,      0,   0,             0.873
-  12,   4084,      0,   1,             0.889
-  12,   4084,      0,  -1,             0.875
-  13,   4083,      0,   0,             0.873
-  13,   4083,      0,   1,             0.863
-  13,   4083,      0,  -1,             0.863
-  14,   4082,      0,   0,             0.838
-  14,   4082,      0,   1,             0.869
-  14,   4082,      0,  -1,             0.877
-  15,   4081,      0,   0,             0.841
-  15,   4081,      0,   1,             0.869
-  15,   4081,      0,  -1,             0.876
-  16,   4080,      0,   0,             0.988
-  16,   4080,      0,   1,              0.99
-  16,   4080,      0,  -1,             0.989
-  17,   4079,      0,   0,             0.978
-  17,   4079,      0,   1,             0.981
-  17,   4079,      0,  -1,              0.98
-  18,   4078,      0,   0,             0.981
-  18,   4078,      0,   1,              0.98
-  18,   4078,      0,  -1,             0.985
-  19,   4077,      0,   0,             0.977
-  19,   4077,      0,   1,             0.979
-  19,   4077,      0,  -1,             0.986
-  20,   4076,      0,   0,             0.977
-  20,   4076,      0,   1,             0.986
-  20,   4076,      0,  -1,             0.984
-  21,   4075,      0,   0,             0.977
-  21,   4075,      0,   1,             0.983
-  21,   4075,      0,  -1,             0.988
-  22,   4074,      0,   0,             0.983
-  22,   4074,      0,   1,             0.994
-  22,   4074,      0,  -1,             0.993
-  23,   4073,      0,   0,              0.98
-  23,   4073,      0,   1,             0.992
-  23,   4073,      0,  -1,             0.995
-  24,   4072,      0,   0,             0.989
-  24,   4072,      0,   1,             0.989
-  24,   4072,      0,  -1,             0.991
-  25,   4071,      0,   0,              0.99
-  25,   4071,      0,   1,             0.999
-  25,   4071,      0,  -1,             0.996
-  26,   4070,      0,   0,             0.993
-  26,   4070,      0,   1,             0.995
-  26,   4070,      0,  -1,             0.998
-  27,   4069,      0,   0,             0.993
-  27,   4069,      0,   1,             0.999
-  27,   4069,      0,  -1,               1.0
-  28,   4068,      0,   0,             0.997
-  28,   4068,      0,   1,               1.0
-  28,   4068,      0,  -1,             0.999
-  29,   4067,      0,   0,             0.996
-  29,   4067,      0,   1,             0.999
-  29,   4067,      0,  -1,             0.999
-  30,   4066,      0,   0,             0.991
-  30,   4066,      0,   1,             1.001
-  30,   4066,      0,  -1,             0.999
-  31,   4065,      0,   0,             0.988
-  31,   4065,      0,   1,             0.998
-  31,   4065,      0,  -1,             0.998
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S | 98 ++++++++++++--------
- 1 file changed, 61 insertions(+), 37 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
-index 16fc673e..99258cf5 100644
--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
-+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
-@@ -429,22 +429,21 @@ L(page_cross_less_vec):
- # ifndef USE_AS_WMEMCMP
- 	cmpl	$8, %edx
- 	jae	L(between_8_15)
-+	/* Fall through for [4, 7].  */
- 	cmpl	$4, %edx
-	jae	L(between_4_7)
-+	jb	L(between_2_3)
- 
-	/* Load as big endian to avoid branches.  */
-	movzwl	(%rdi), %eax
-	movzwl	(%rsi), %ecx
-	shll	$8, %eax
-	shll	$8, %ecx
-	bswap	%eax
-	bswap	%ecx
-	movzbl	-1(%rdi, %rdx), %edi
-	movzbl	-1(%rsi, %rdx), %esi
-	orl	%edi, %eax
-	orl	%esi, %ecx
-	/* Subtraction is okay because the upper 8 bits are zero.  */
-	subl	%ecx, %eax
-+	movbe	(%rdi), %eax
-+	movbe	(%rsi), %ecx
-+	shlq	$32, %rax
-+	shlq	$32, %rcx
-+	movbe	-4(%rdi, %rdx), %edi
-+	movbe	-4(%rsi, %rdx), %esi
-+	orq	%rdi, %rax
-+	orq	%rsi, %rcx
-+	subq	%rcx, %rax
-+	/* Fast path for return zero.  */
-+	jnz	L(ret_nonzero)
- 	/* No ymm register was touched.  */
- 	ret
- 
-@@ -457,9 +456,33 @@ L(one_or_less):
- 	/* No ymm register was touched.  */
- 	ret
- 
-+	.p2align 4,, 5
-+L(ret_nonzero):
-+	sbbl	%eax, %eax
-+	orl	$1, %eax
-+	/* No ymm register was touched.  */
-+	ret
-+
-+	.p2align 4,, 2
-+L(zero):
-+	xorl	%eax, %eax
-+	/* No ymm register was touched.  */
-+	ret
-+
- 	.p2align 4
- L(between_8_15):
-# endif
-+	movbe	(%rdi), %rax
-+	movbe	(%rsi), %rcx
-+	subq	%rcx, %rax
-+	jnz	L(ret_nonzero)
-+	movbe	-8(%rdi, %rdx), %rax
-+	movbe	-8(%rsi, %rdx), %rcx
-+	subq	%rcx, %rax
-+	/* Fast path for return zero.  */
-+	jnz	L(ret_nonzero)
-+	/* No ymm register was touched.  */
-+	ret
-+# else
- 	/* If USE_AS_WMEMCMP fall through into 8-15 byte case.  */
- 	vmovq	(%rdi), %xmm1
- 	vmovq	(%rsi), %xmm2
-@@ -475,16 +498,13 @@ L(between_8_15):
- 	VPCMPEQ	%xmm1, %xmm2, %xmm2
- 	vpmovmskb %xmm2, %eax
- 	subl	$0xffff, %eax
-+	/* Fast path for return zero.  */
- 	jnz	L(return_vec_0)
- 	/* No ymm register was touched.  */
- 	ret
-+# endif
- 
-	.p2align 4
-L(zero):
-	xorl	%eax, %eax
-	ret
-
-	.p2align 4
-+	.p2align 4,, 10
- L(between_16_31):
- 	/* From 16 to 31 bytes.  No branch when size == 16.  */
- 	vmovdqu	(%rsi), %xmm2
-@@ -501,11 +521,17 @@ L(between_16_31):
- 	VPCMPEQ	(%rdi), %xmm2, %xmm2
- 	vpmovmskb %xmm2, %eax
- 	subl	$0xffff, %eax
-+	/* Fast path for return zero.  */
- 	jnz	L(return_vec_0)
- 	/* No ymm register was touched.  */
- 	ret
- 
- # ifdef USE_AS_WMEMCMP
-+	.p2align 4,, 2
-+L(zero):
-+	xorl	%eax, %eax
-+	ret
-+
- 	.p2align 4
- L(one_or_less):
- 	jb	L(zero)
-@@ -520,22 +546,20 @@ L(one_or_less):
- # else
- 
- 	.p2align 4
-L(between_4_7):
-	/* Load as big endian with overlapping movbe to avoid branches.
-	 */
-	movbe	(%rdi), %eax
-	movbe	(%rsi), %ecx
-	shlq	$32, %rax
-	shlq	$32, %rcx
-	movbe	-4(%rdi, %rdx), %edi
-	movbe	-4(%rsi, %rdx), %esi
-	orq	%rdi, %rax
-	orq	%rsi, %rcx
-	subq	%rcx, %rax
-	jz	L(zero_4_7)
-	sbbl	%eax, %eax
-	orl	$1, %eax
-L(zero_4_7):
-+L(between_2_3):
-+	/* Load as big endian to avoid branches.  */
-+	movzwl	(%rdi), %eax
-+	movzwl	(%rsi), %ecx
-+	bswap	%eax
-+	bswap	%ecx
-+	shrl	%eax
-+	shrl	%ecx
-+	movzbl	-1(%rdi, %rdx), %edi
-+	movzbl	-1(%rsi, %rdx), %esi
-+	orl	%edi, %eax
-+	orl	%esi, %ecx
-+	/* Subtraction is okay because the upper bit is zero.  */
-+	subl	%ecx, %eax
- 	/* No ymm register was touched.  */
- 	ret
- # endif
-- 
-GitLab
-
--- a/glibc-RHEL-15696-103.patch
+++ b/glibc-RHEL-15696-103.patch
@ -1,876 +0,0 @@
-From 5307aa9c1800f36a64c183c091c9af392c1fa75c Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Thu, 21 Apr 2022 20:52:28 -0500
-Subject: [PATCH] x86: Optimize {str|wcs}rchr-sse2
-Content-type: text/plain; charset=UTF-8
-
-The new code unrolls the main loop slightly without adding too much
-overhead and minimizes the comparisons for the search CHAR.
-
-Geometric Mean of all benchmarks New / Old: 0.741
-See email for all results.
-
-Full xcheck passes on x86_64 with and without multiarch enabled.
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/multiarch/strrchr-sse2.S |   2 +-
- sysdeps/x86_64/multiarch/wcsrchr-sse2.S |   3 +-
- sysdeps/x86_64/strrchr.S                | 510 +++++++++++++++---------
- sysdeps/x86_64/wcsrchr.S                | 266 +-----------
- 4 files changed, 338 insertions(+), 443 deletions(-)
-
-Conflicts:
-	sysdeps/x86_64/wcsrchr.S
-	(copyright header)
-
-diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2.S b/sysdeps/x86_64/multiarch/strrchr-sse2.S
-index 0ec76fe9..6bb1284b 100644
--- a/sysdeps/x86_64/multiarch/strrchr-sse2.S
-+++ b/sysdeps/x86_64/multiarch/strrchr-sse2.S
-@@ -17,7 +17,7 @@
-    <http://www.gnu.org/licenses/>.  */
- 
- #if IS_IN (libc)
-# define strrchr __strrchr_sse2
-+# define STRRCHR __strrchr_sse2
- 
- # undef weak_alias
- # define weak_alias(strrchr, rindex)
-diff --git a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S
-index d015e953..f26d53b5 100644
--- a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S
-+++ b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S
-@@ -17,7 +17,6 @@
-    <http://www.gnu.org/licenses/>.  */
- 
- #if IS_IN (libc)
-# define wcsrchr __wcsrchr_sse2
-+# define STRRCHR	__wcsrchr_sse2
- #endif
-
- #include "../wcsrchr.S"
-diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S
-index aca98e7e..a58cc220 100644
--- a/sysdeps/x86_64/strrchr.S
-+++ b/sysdeps/x86_64/strrchr.S
-@@ -19,210 +19,360 @@
- 
- #include <sysdep.h>
- 
-+#ifndef STRRCHR
-+# define STRRCHR	strrchr
-+#endif
-+
-+#ifdef USE_AS_WCSRCHR
-+# define PCMPEQ	pcmpeqd
-+# define CHAR_SIZE	4
-+# define PMINU	pminud
-+#else
-+# define PCMPEQ	pcmpeqb
-+# define CHAR_SIZE	1
-+# define PMINU	pminub
-+#endif
-+
-+#define PAGE_SIZE	4096
-+#define VEC_SIZE	16
-+
- 	.text
-ENTRY (strrchr)
-	movd	%esi, %xmm1
-+ENTRY(STRRCHR)
-+	movd	%esi, %xmm0
- 	movq	%rdi, %rax
-	andl	$4095, %eax
-	punpcklbw	%xmm1, %xmm1
-	cmpq	$4032, %rax
-	punpcklwd	%xmm1, %xmm1
-	pshufd	$0, %xmm1, %xmm1
-+	andl	$(PAGE_SIZE - 1), %eax
-+#ifndef USE_AS_WCSRCHR
-+	punpcklbw %xmm0, %xmm0
-+	punpcklwd %xmm0, %xmm0
-+#endif
-+	pshufd	$0, %xmm0, %xmm0
-+	cmpl	$(PAGE_SIZE - VEC_SIZE), %eax
- 	ja	L(cross_page)
-	movdqu	(%rdi), %xmm0
-+
-+L(cross_page_continue):
-+	movups	(%rdi), %xmm1
- 	pxor	%xmm2, %xmm2
-	movdqa	%xmm0, %xmm3
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm3
-	pmovmskb	%xmm0, %ecx
-	pmovmskb	%xmm3, %edx
-	testq	%rdx, %rdx
-	je	L(next_48_bytes)
-	leaq	-1(%rdx), %rax
-	xorq	%rdx, %rax
-	andq	%rcx, %rax
-	je	L(exit)
-	bsrq	%rax, %rax
-+	PCMPEQ	%xmm1, %xmm2
-+	pmovmskb %xmm2, %ecx
-+	testl	%ecx, %ecx
-+	jz	L(aligned_more)
-+
-+	PCMPEQ	%xmm0, %xmm1
-+	pmovmskb %xmm1, %eax
-+	leal	-1(%rcx), %edx
-+	xorl	%edx, %ecx
-+	andl	%ecx, %eax
-+	jz	L(ret0)
-+	bsrl	%eax, %eax
- 	addq	%rdi, %rax
-+	/* We are off by 3 for wcsrchr if search CHAR is non-zero. If
-+	   search CHAR is zero we are correct. Either way `andq
-+	   -CHAR_SIZE, %rax` gets the correct result.  */
-+#ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+#endif
-+L(ret0):
- 	ret
- 
-+	/* Returns for first vec x1/x2 have hard coded backward search
-+	   path for earlier matches.  */
- 	.p2align 4
-L(next_48_bytes):
-	movdqu	16(%rdi), %xmm4
-	movdqa	%xmm4, %xmm5
-	movdqu	32(%rdi), %xmm3
-	pcmpeqb	%xmm1, %xmm4
-	pcmpeqb	%xmm2, %xmm5
-	movdqu	48(%rdi), %xmm0
-	pmovmskb	%xmm5, %edx
-	movdqa	%xmm3, %xmm5
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm2, %xmm5
-	pcmpeqb	%xmm0, %xmm2
-	salq	$16, %rdx
-	pmovmskb	%xmm3, %r8d
-	pmovmskb	%xmm5, %eax
-	pmovmskb	%xmm2, %esi
-	salq	$32, %r8
-	salq	$32, %rax
-	pcmpeqb	%xmm1, %xmm0
-	orq	%rdx, %rax
-	movq	%rsi, %rdx
-	pmovmskb	%xmm4, %esi
-	salq	$48, %rdx
-	salq	$16, %rsi
-	orq	%r8, %rsi
-	orq	%rcx, %rsi
-	pmovmskb	%xmm0, %ecx
-	salq	$48, %rcx
-	orq	%rcx, %rsi
-	orq	%rdx, %rax
-	je	L(loop_header2)
-	leaq	-1(%rax), %rcx
-	xorq	%rax, %rcx
-	andq	%rcx, %rsi
-	je	L(exit)
-	bsrq	%rsi, %rsi
-	leaq	(%rdi,%rsi), %rax
-+L(first_vec_x0_test):
-+	PCMPEQ	%xmm0, %xmm1
-+	pmovmskb %xmm1, %eax
-+	testl	%eax, %eax
-+	jz	L(ret0)
-+	bsrl	%eax, %eax
-+	addq	%r8, %rax
-+#ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+#endif
- 	ret
- 
- 	.p2align 4
-L(loop_header2):
-	testq	%rsi, %rsi
-	movq	%rdi, %rcx
-	je	L(no_c_found)
-L(loop_header):
-	addq	$64, %rdi
-	pxor	%xmm7, %xmm7
-	andq	$-64, %rdi
-	jmp	L(loop_entry)
-+L(first_vec_x1):
-+	PCMPEQ	%xmm0, %xmm2
-+	pmovmskb %xmm2, %eax
-+	leal	-1(%rcx), %edx
-+	xorl	%edx, %ecx
-+	andl	%ecx, %eax
-+	jz	L(first_vec_x0_test)
-+	bsrl	%eax, %eax
-+	leaq	(VEC_SIZE)(%rdi, %rax), %rax
-+#ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+#endif
-+	ret
- 
- 	.p2align 4
-L(loop64):
-	testq	%rdx, %rdx
-	cmovne	%rdx, %rsi
-	cmovne	%rdi, %rcx
-	addq	$64, %rdi
-L(loop_entry):
-	movdqa	32(%rdi), %xmm3
-	pxor	%xmm6, %xmm6
-	movdqa	48(%rdi), %xmm2
-	movdqa	%xmm3, %xmm0
-	movdqa	16(%rdi), %xmm4
-	pminub	%xmm2, %xmm0
-	movdqa	(%rdi), %xmm5
-	pminub	%xmm4, %xmm0
-	pminub	%xmm5, %xmm0
-	pcmpeqb	%xmm7, %xmm0
-	pmovmskb	%xmm0, %eax
-	movdqa	%xmm5, %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb	%xmm0, %r9d
-	movdqa	%xmm4, %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb	%xmm0, %edx
-	movdqa	%xmm3, %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	salq	$16, %rdx
-	pmovmskb	%xmm0, %r10d
-	movdqa	%xmm2, %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	salq	$32, %r10
-	orq	%r10, %rdx
-	pmovmskb	%xmm0, %r8d
-	orq	%r9, %rdx
-	salq	$48, %r8
-	orq	%r8, %rdx
-+L(first_vec_x1_test):
-+	PCMPEQ	%xmm0, %xmm2
-+	pmovmskb %xmm2, %eax
- 	testl	%eax, %eax
-	je	L(loop64)
-	pcmpeqb	%xmm6, %xmm4
-	pcmpeqb	%xmm6, %xmm3
-	pcmpeqb	%xmm6, %xmm5
-	pmovmskb	%xmm4, %eax
-	pmovmskb	%xmm3, %r10d
-	pcmpeqb	%xmm6, %xmm2
-	pmovmskb	%xmm5, %r9d
-	salq	$32, %r10
-	salq	$16, %rax
-	pmovmskb	%xmm2, %r8d
-	orq	%r10, %rax
-	orq	%r9, %rax
-	salq	$48, %r8
-	orq	%r8, %rax
-	leaq	-1(%rax), %r8
-	xorq	%rax, %r8
-	andq	%r8, %rdx
-	cmovne	%rdi, %rcx
-	cmovne	%rdx, %rsi
-	bsrq	%rsi, %rsi
-	leaq	(%rcx,%rsi), %rax
-+	jz	L(first_vec_x0_test)
-+	bsrl	%eax, %eax
-+	leaq	(VEC_SIZE)(%rdi, %rax), %rax
-+#ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+#endif
-+	ret
-+
-+	.p2align 4
-+L(first_vec_x2):
-+	PCMPEQ	%xmm0, %xmm3
-+	pmovmskb %xmm3, %eax
-+	leal	-1(%rcx), %edx
-+	xorl	%edx, %ecx
-+	andl	%ecx, %eax
-+	jz	L(first_vec_x1_test)
-+	bsrl	%eax, %eax
-+	leaq	(VEC_SIZE * 2)(%rdi, %rax), %rax
-+#ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+#endif
-+	ret
-+
-+	.p2align 4
-+L(aligned_more):
-+	/* Save original pointer if match was in VEC 0.  */
-+	movq	%rdi, %r8
-+	andq	$-VEC_SIZE, %rdi
-+
-+	movaps	VEC_SIZE(%rdi), %xmm2
-+	pxor	%xmm3, %xmm3
-+	PCMPEQ	%xmm2, %xmm3
-+	pmovmskb %xmm3, %ecx
-+	testl	%ecx, %ecx
-+	jnz	L(first_vec_x1)
-+
-+	movaps	(VEC_SIZE * 2)(%rdi), %xmm3
-+	pxor	%xmm4, %xmm4
-+	PCMPEQ	%xmm3, %xmm4
-+	pmovmskb %xmm4, %ecx
-+	testl	%ecx, %ecx
-+	jnz	L(first_vec_x2)
-+
-+	addq	$VEC_SIZE, %rdi
-+	/* Save pointer again before realigning.  */
-+	movq	%rdi, %rsi
-+	andq	$-(VEC_SIZE * 2), %rdi
-+	.p2align 4
-+L(first_loop):
-+	/* Do 2x VEC at a time.  */
-+	movaps	(VEC_SIZE * 2)(%rdi), %xmm4
-+	movaps	(VEC_SIZE * 3)(%rdi), %xmm5
-+	/* Since SSE2 no pminud so wcsrchr needs seperate logic for
-+	   detecting zero. Note if this is found to be a bottleneck it
-+	   may be worth adding an SSE4.1 wcsrchr implementation.  */
-+#ifdef USE_AS_WCSRCHR
-+	movaps	%xmm5, %xmm6
-+	pxor	%xmm8, %xmm8
-+
-+	PCMPEQ	%xmm8, %xmm5
-+	PCMPEQ	%xmm4, %xmm8
-+	por	%xmm5, %xmm8
-+#else
-+	movaps	%xmm5, %xmm6
-+	PMINU	%xmm4, %xmm5
-+#endif
-+
-+	movaps	%xmm4, %xmm9
-+	PCMPEQ	%xmm0, %xmm4
-+	PCMPEQ	%xmm0, %xmm6
-+	movaps	%xmm6, %xmm7
-+	por	%xmm4, %xmm6
-+#ifndef USE_AS_WCSRCHR
-+	pxor	%xmm8, %xmm8
-+	PCMPEQ	%xmm5, %xmm8
-+#endif
-+	pmovmskb %xmm8, %ecx
-+	pmovmskb %xmm6, %eax
-+
-+	addq	$(VEC_SIZE * 2), %rdi
-+	/* Use `addl` 1) so we can undo it with `subl` and 2) it can
-+	   macro-fuse with `jz`.  */
-+	addl	%ecx, %eax
-+	jz	L(first_loop)
-+
-+	/* Check if there is zero match.  */
-+	testl	%ecx, %ecx
-+	jz	L(second_loop_match)
-+
-+	/* Check if there was a match in last iteration.  */
-+	subl	%ecx, %eax
-+	jnz	L(new_match)
-+
-+L(first_loop_old_match):
-+	PCMPEQ	%xmm0, %xmm2
-+	PCMPEQ	%xmm0, %xmm3
-+	pmovmskb %xmm2, %ecx
-+	pmovmskb %xmm3, %eax
-+	addl	%eax, %ecx
-+	jz	L(first_vec_x0_test)
-+	/* NB: We could move this shift to before the branch and save a
-+	   bit of code size / performance on the fall through. The
-+	   branch leads to the null case which generally seems hotter
-+	   than char in first 3x VEC.  */
-+	sall	$16, %eax
-+	orl	%ecx, %eax
-+
-+	bsrl	%eax, %eax
-+	addq	%rsi, %rax
-+#ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+#endif
-+	ret
-+
-+	.p2align 4
-+L(new_match):
-+	pxor	%xmm6, %xmm6
-+	PCMPEQ	%xmm9, %xmm6
-+	pmovmskb %xmm6, %eax
-+	sall	$16, %ecx
-+	orl	%eax, %ecx
-+
-+	/* We can't reuse either of the old comparisons as since we mask
-+	   of zeros after first zero (instead of using the full
-+	   comparison) we can't gurantee no interference between match
-+	   after end of string and valid match.  */
-+	pmovmskb %xmm4, %eax
-+	pmovmskb %xmm7, %edx
-+	sall	$16, %edx
-+	orl	%edx, %eax
-+
-+	leal	-1(%ecx), %edx
-+	xorl	%edx, %ecx
-+	andl	%ecx, %eax
-+	jz	L(first_loop_old_match)
-+	bsrl	%eax, %eax
-+	addq	%rdi, %rax
-+#ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+#endif
- 	ret
- 
-+	/* Save minimum state for getting most recent match. We can
-+	   throw out all previous work.  */
- 	.p2align 4
-L(no_c_found):
-	movl	$1, %esi
-	xorl	%ecx, %ecx
-	jmp	L(loop_header)
-+L(second_loop_match):
-+	movq	%rdi, %rsi
-+	movaps	%xmm4, %xmm2
-+	movaps	%xmm7, %xmm3
- 
- 	.p2align 4
-L(exit):
-	xorl	%eax, %eax
-+L(second_loop):
-+	movaps	(VEC_SIZE * 2)(%rdi), %xmm4
-+	movaps	(VEC_SIZE * 3)(%rdi), %xmm5
-+	/* Since SSE2 no pminud so wcsrchr needs seperate logic for
-+	   detecting zero. Note if this is found to be a bottleneck it
-+	   may be worth adding an SSE4.1 wcsrchr implementation.  */
-+#ifdef USE_AS_WCSRCHR
-+	movaps	%xmm5, %xmm6
-+	pxor	%xmm8, %xmm8
-+
-+	PCMPEQ	%xmm8, %xmm5
-+	PCMPEQ	%xmm4, %xmm8
-+	por	%xmm5, %xmm8
-+#else
-+	movaps	%xmm5, %xmm6
-+	PMINU	%xmm4, %xmm5
-+#endif
-+
-+	movaps	%xmm4, %xmm9
-+	PCMPEQ	%xmm0, %xmm4
-+	PCMPEQ	%xmm0, %xmm6
-+	movaps	%xmm6, %xmm7
-+	por	%xmm4, %xmm6
-+#ifndef USE_AS_WCSRCHR
-+	pxor	%xmm8, %xmm8
-+	PCMPEQ	%xmm5, %xmm8
-+#endif
-+
-+	pmovmskb %xmm8, %ecx
-+	pmovmskb %xmm6, %eax
-+
-+	addq	$(VEC_SIZE * 2), %rdi
-+	/* Either null term or new occurence of CHAR.  */
-+	addl	%ecx, %eax
-+	jz	L(second_loop)
-+
-+	/* No null term so much be new occurence of CHAR.  */
-+	testl	%ecx, %ecx
-+	jz	L(second_loop_match)
-+
-+
-+	subl	%ecx, %eax
-+	jnz	L(second_loop_new_match)
-+
-+L(second_loop_old_match):
-+	pmovmskb %xmm2, %ecx
-+	pmovmskb %xmm3, %eax
-+	sall	$16, %eax
-+	orl	%ecx, %eax
-+	bsrl	%eax, %eax
-+	addq	%rsi, %rax
-+#ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+#endif
- 	ret
- 
- 	.p2align 4
-+L(second_loop_new_match):
-+	pxor	%xmm6, %xmm6
-+	PCMPEQ	%xmm9, %xmm6
-+	pmovmskb %xmm6, %eax
-+	sall	$16, %ecx
-+	orl	%eax, %ecx
-+
-+	/* We can't reuse either of the old comparisons as since we mask
-+	   of zeros after first zero (instead of using the full
-+	   comparison) we can't gurantee no interference between match
-+	   after end of string and valid match.  */
-+	pmovmskb %xmm4, %eax
-+	pmovmskb %xmm7, %edx
-+	sall	$16, %edx
-+	orl	%edx, %eax
-+
-+	leal	-1(%ecx), %edx
-+	xorl	%edx, %ecx
-+	andl	%ecx, %eax
-+	jz	L(second_loop_old_match)
-+	bsrl	%eax, %eax
-+	addq	%rdi, %rax
-+#ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+#endif
-+	ret
-+
-+	.p2align 4,, 4
- L(cross_page):
-	movq	%rdi, %rax
-	pxor	%xmm0, %xmm0
-	andq	$-64, %rax
-	movdqu	(%rax), %xmm5
-	movdqa	%xmm5, %xmm6
-	movdqu	16(%rax), %xmm4
-	pcmpeqb	%xmm1, %xmm5
-	pcmpeqb	%xmm0, %xmm6
-	movdqu	32(%rax), %xmm3
-	pmovmskb	%xmm6, %esi
-	movdqa	%xmm4, %xmm6
-	movdqu	48(%rax), %xmm2
-	pcmpeqb	%xmm1, %xmm4
-	pcmpeqb	%xmm0, %xmm6
-	pmovmskb	%xmm6, %edx
-	movdqa	%xmm3, %xmm6
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm0, %xmm6
-	pcmpeqb	%xmm2, %xmm0
-	salq	$16, %rdx
-	pmovmskb	%xmm3, %r9d
-	pmovmskb	%xmm6, %r8d
-	pmovmskb	%xmm0, %ecx
-	salq	$32, %r9
-	salq	$32, %r8
-	pcmpeqb	%xmm1, %xmm2
-	orq	%r8, %rdx
-	salq	$48, %rcx
-	pmovmskb	%xmm5, %r8d
-	orq	%rsi, %rdx
-	pmovmskb	%xmm4, %esi
-	orq	%rcx, %rdx
-	pmovmskb	%xmm2, %ecx
-	salq	$16, %rsi
-	salq	$48, %rcx
-	orq	%r9, %rsi
-	orq	%r8, %rsi
-	orq	%rcx, %rsi
-+	movq	%rdi, %rsi
-+	andq	$-VEC_SIZE, %rsi
-+	movaps	(%rsi), %xmm1
-+	pxor	%xmm2, %xmm2
-+	PCMPEQ	%xmm1, %xmm2
-+	pmovmskb %xmm2, %edx
- 	movl	%edi, %ecx
-	subl	%eax, %ecx
-	shrq	%cl, %rdx
-	shrq	%cl, %rsi
-	testq	%rdx, %rdx
-	je	L(loop_header2)
-	leaq	-1(%rdx), %rax
-	xorq	%rdx, %rax
-	andq	%rax, %rsi
-	je	L(exit)
-	bsrq	%rsi, %rax
-+	andl	$(VEC_SIZE - 1), %ecx
-+	sarl	%cl, %edx
-+	jz	L(cross_page_continue)
-+	PCMPEQ	%xmm0, %xmm1
-+	pmovmskb %xmm1, %eax
-+	sarl	%cl, %eax
-+	leal	-1(%rdx), %ecx
-+	xorl	%edx, %ecx
-+	andl	%ecx, %eax
-+	jz	L(ret1)
-+	bsrl	%eax, %eax
- 	addq	%rdi, %rax
-+#ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+#endif
-+L(ret1):
- 	ret
-END (strrchr)
-+END(STRRCHR)
- 
-weak_alias (strrchr, rindex)
-libc_hidden_builtin_def (strrchr)
-+#ifndef USE_AS_WCSRCHR
-+	weak_alias (STRRCHR, rindex)
-+	libc_hidden_builtin_def (STRRCHR)
-+#endif
-diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S
-index 2f388537..ae3cfa7d 100644
--- a/sysdeps/x86_64/wcsrchr.S
-+++ b/sysdeps/x86_64/wcsrchr.S
-@@ -17,266 +17,12 @@
-    License along with the GNU C Library; if not, see
-    <http://www.gnu.org/licenses/>.  */
- 
-#include <sysdep.h>
- 
-	.text
-ENTRY (wcsrchr)
-+#define USE_AS_WCSRCHR	1
-+#define NO_PMINU	1
- 
-	movd	%rsi, %xmm1
-	mov	%rdi, %rcx
-	punpckldq %xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	punpckldq %xmm1, %xmm1
-	and	$63, %rcx
-	cmp	$48, %rcx
-	ja	L(crosscache)
-+#ifndef STRRCHR
-+# define STRRCHR	wcsrchr
-+#endif
- 
-	movdqu	(%rdi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm0, %rax
-	add	$16, %rdi
-
-	test	%rax, %rax
-	jnz	L(unaligned_match1)
-
-	test	%rcx, %rcx
-	jnz	L(return_null)
-
-	and	$-16, %rdi
-	xor	%r8, %r8
-	jmp	L(loop)
-
-	.p2align 4
-L(unaligned_match1):
-	test	%rcx, %rcx
-	jnz	L(prolog_find_zero_1)
-
-	mov	%rax, %r8
-	mov	%rdi, %rsi
-	and	$-16, %rdi
-	jmp	L(loop)
-
-	.p2align 4
-L(crosscache):
-	and	$15, %rcx
-	and	$-16, %rdi
-	pxor	%xmm3, %xmm3
-	movdqa	(%rdi), %xmm0
-	pcmpeqd	%xmm0, %xmm3
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm3, %rdx
-	pmovmskb %xmm0, %rax
-	shr	%cl, %rdx
-	shr	%cl, %rax
-	add	$16, %rdi
-
-	test	%rax, %rax
-	jnz	L(unaligned_match)
-
-	test	%rdx, %rdx
-	jnz	L(return_null)
-
-	xor	%r8, %r8
-	jmp	L(loop)
-
-	.p2align 4
-L(unaligned_match):
-	test	%rdx, %rdx
-	jnz	L(prolog_find_zero)
-
-	mov	%rax, %r8
-	lea	(%rdi, %rcx), %rsi
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	movdqa	(%rdi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	add	$16, %rdi
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm0, %rax
-	or	%rax, %rcx
-	jnz	L(matches)
-
-	movdqa	(%rdi), %xmm3
-	pcmpeqd	%xmm3, %xmm2
-	add	$16, %rdi
-	pcmpeqd	%xmm1, %xmm3
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm3, %rax
-	or	%rax, %rcx
-	jnz	L(matches)
-
-	movdqa	(%rdi), %xmm4
-	pcmpeqd	%xmm4, %xmm2
-	add	$16, %rdi
-	pcmpeqd	%xmm1, %xmm4
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm4, %rax
-	or	%rax, %rcx
-	jnz	L(matches)
-
-	movdqa	(%rdi), %xmm5
-	pcmpeqd	%xmm5, %xmm2
-	add	$16, %rdi
-	pcmpeqd	%xmm1, %xmm5
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm5, %rax
-	or	%rax, %rcx
-	jz	L(loop)
-
-	.p2align 4
-L(matches):
-	test	%rax, %rax
-	jnz	L(match)
-L(return_value):
-	test	%r8, %r8
-	jz	L(return_null)
-	mov	%r8, %rax
-	mov	%rsi, %rdi
-
-	test	$15 << 4, %ah
-	jnz	L(match_fourth_wchar)
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(match):
-	pmovmskb %xmm2, %rcx
-	test	%rcx, %rcx
-	jnz	L(find_zero)
-	mov	%rax, %r8
-	mov	%rdi, %rsi
-	jmp	L(loop)
-
-	.p2align 4
-L(find_zero):
-	test	$15, %cl
-	jnz	L(find_zero_in_first_wchar)
-	test	%cl, %cl
-	jnz	L(find_zero_in_second_wchar)
-	test	$15, %ch
-	jnz	L(find_zero_in_third_wchar)
-
-	and	$1 << 13 - 1, %rax
-	jz	L(return_value)
-
-	test	$15 << 4, %ah
-	jnz	L(match_fourth_wchar)
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(find_zero_in_first_wchar):
-	test	$1, %rax
-	jz	L(return_value)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(find_zero_in_second_wchar):
-	and	$1 << 5 - 1, %rax
-	jz	L(return_value)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(find_zero_in_third_wchar):
-	and	$1 << 9 - 1, %rax
-	jz	L(return_value)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero):
-	add	%rcx, %rdi
-	mov     %rdx, %rcx
-L(prolog_find_zero_1):
-	test	$15, %cl
-	jnz	L(prolog_find_zero_in_first_wchar)
-	test	%cl, %cl
-	jnz	L(prolog_find_zero_in_second_wchar)
-	test	$15, %ch
-	jnz	L(prolog_find_zero_in_third_wchar)
-
-	and	$1 << 13 - 1, %rax
-	jz	L(return_null)
-
-	test	$15 << 4, %ah
-	jnz	L(match_fourth_wchar)
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero_in_first_wchar):
-	test	$1, %rax
-	jz	L(return_null)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero_in_second_wchar):
-	and	$1 << 5 - 1, %rax
-	jz	L(return_null)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero_in_third_wchar):
-	and	$1 << 9 - 1, %rax
-	jz	L(return_null)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(match_second_wchar):
-	lea	-12(%rdi), %rax
-	ret
-
-	.p2align 4
-L(match_third_wchar):
-	lea	-8(%rdi), %rax
-	ret
-
-	.p2align 4
-L(match_fourth_wchar):
-	lea	-4(%rdi), %rax
-	ret
-
-	.p2align 4
-L(return_null):
-	xor	%rax, %rax
-	ret
-
-END (wcsrchr)
-+#include "../strrchr.S"
-- 
-GitLab
-
--- a/glibc-RHEL-15696-104.patch
+++ b/glibc-RHEL-15696-104.patch
@ -1,501 +0,0 @@
-From df7e295d18ffa34f629578c0017a9881af7620f6 Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Thu, 21 Apr 2022 20:52:29 -0500
-Subject: [PATCH] x86: Optimize {str|wcs}rchr-avx2
-Content-type: text/plain; charset=UTF-8
-
-The new code unrolls the main loop slightly without adding too much
-overhead and minimizes the comparisons for the search CHAR.
-
-Geometric Mean of all benchmarks New / Old: 0.832
-See email for all results.
-
-Full xcheck passes on x86_64 with and without multiarch enabled.
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/multiarch/strrchr-avx2.S | 426 +++++++++++++++---------
- 1 file changed, 269 insertions(+), 157 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S
-index c949410b..3d26fad4 100644
--- a/sysdeps/x86_64/multiarch/strrchr-avx2.S
-+++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S
-@@ -27,9 +27,13 @@
- # ifdef USE_AS_WCSRCHR
- #  define VPBROADCAST	vpbroadcastd
- #  define VPCMPEQ	vpcmpeqd
-+#  define VPMIN	vpminud
-+#  define CHAR_SIZE	4
- # else
- #  define VPBROADCAST	vpbroadcastb
- #  define VPCMPEQ	vpcmpeqb
-+#  define VPMIN	vpminub
-+#  define CHAR_SIZE	1
- # endif
- 
- # ifndef VZEROUPPER
-@@ -41,196 +45,304 @@
- # endif
- 
- # define VEC_SIZE	32
-+# define PAGE_SIZE	4096
- 
-	.section SECTION(.text),"ax",@progbits
-ENTRY (STRRCHR)
-	movd	%esi, %xmm4
-	movl	%edi, %ecx
-+	.section SECTION(.text), "ax", @progbits
-+ENTRY(STRRCHR)
-+	movd	%esi, %xmm7
-+	movl	%edi, %eax
- 	/* Broadcast CHAR to YMM4.  */
-	VPBROADCAST %xmm4, %ymm4
-+	VPBROADCAST %xmm7, %ymm7
- 	vpxor	%xmm0, %xmm0, %xmm0
- 
-	/* Check if we may cross page boundary with one vector load.  */
-	andl	$(2 * VEC_SIZE - 1), %ecx
-	cmpl	$VEC_SIZE, %ecx
-	ja	L(cros_page_boundary)
-+	/* Shift here instead of `andl` to save code size (saves a fetch
-+	   block).  */
-+	sall	$20, %eax
-+	cmpl	$((PAGE_SIZE - VEC_SIZE) << 20), %eax
-+	ja	L(cross_page)
- 
-+L(page_cross_continue):
- 	vmovdqu	(%rdi), %ymm1
-	VPCMPEQ	%ymm1, %ymm0, %ymm2
-	VPCMPEQ	%ymm1, %ymm4, %ymm3
-	vpmovmskb %ymm2, %ecx
-	vpmovmskb %ymm3, %eax
-	addq	$VEC_SIZE, %rdi
-+	/* Check end of string match.  */
-+	VPCMPEQ	%ymm1, %ymm0, %ymm6
-+	vpmovmskb %ymm6, %ecx
-+	testl	%ecx, %ecx
-+	jz	L(aligned_more)
-+
-+	/* Only check match with search CHAR if needed.  */
-+	VPCMPEQ	%ymm1, %ymm7, %ymm1
-+	vpmovmskb %ymm1, %eax
-+	/* Check if match before first zero.  */
-+	blsmskl	%ecx, %ecx
-+	andl	%ecx, %eax
-+	jz	L(ret0)
-+	bsrl	%eax, %eax
-+	addq	%rdi, %rax
-+	/* We are off by 3 for wcsrchr if search CHAR is non-zero. If
-+	   search CHAR is zero we are correct. Either way `andq
-+	   -CHAR_SIZE, %rax` gets the correct result.  */
-+# ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+# endif
-+L(ret0):
-+L(return_vzeroupper):
-+	ZERO_UPPER_VEC_REGISTERS_RETURN
-+
-+	/* Returns for first vec x1/x2 have hard coded backward search
-+	   path for earlier matches.  */
-+	.p2align 4,, 10
-+L(first_vec_x1):
-+	VPCMPEQ	%ymm2, %ymm7, %ymm6
-+	vpmovmskb %ymm6, %eax
-+	blsmskl	%ecx, %ecx
-+	andl	%ecx, %eax
-+	jnz	L(first_vec_x1_return)
-+
-+	.p2align 4,, 4
-+L(first_vec_x0_test):
-+	VPCMPEQ	%ymm1, %ymm7, %ymm6
-+	vpmovmskb %ymm6, %eax
-+	testl	%eax, %eax
-+	jz	L(ret1)
-+	bsrl	%eax, %eax
-+	addq	%r8, %rax
-+# ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+# endif
-+L(ret1):
-+	VZEROUPPER_RETURN
- 
-+	.p2align 4,, 10
-+L(first_vec_x0_x1_test):
-+	VPCMPEQ	%ymm2, %ymm7, %ymm6
-+	vpmovmskb %ymm6, %eax
-+	/* Check ymm2 for search CHAR match. If no match then check ymm1
-+	   before returning.  */
- 	testl	%eax, %eax
-	jnz	L(first_vec)
-+	jz	L(first_vec_x0_test)
-+	.p2align 4,, 4
-+L(first_vec_x1_return):
-+	bsrl	%eax, %eax
-+	leaq	1(%rdi, %rax), %rax
-+# ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+# endif
-+	VZEROUPPER_RETURN
- 
-	testl	%ecx, %ecx
-	jnz	L(return_null)
- 
-	andq	$-VEC_SIZE, %rdi
-	xorl	%edx, %edx
-	jmp	L(aligned_loop)
-+	.p2align 4,, 10
-+L(first_vec_x2):
-+	VPCMPEQ	%ymm3, %ymm7, %ymm6
-+	vpmovmskb %ymm6, %eax
-+	blsmskl	%ecx, %ecx
-+	/* If no in-range search CHAR match in ymm3 then need to check
-+	   ymm1/ymm2 for an earlier match (we delay checking search
-+	   CHAR matches until needed).  */
-+	andl	%ecx, %eax
-+	jz	L(first_vec_x0_x1_test)
-+	bsrl	%eax, %eax
-+	leaq	(VEC_SIZE + 1)(%rdi, %rax), %rax
-+# ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+# endif
-+	VZEROUPPER_RETURN
-+
- 
- 	.p2align 4
-L(first_vec):
-	/* Check if there is a nul CHAR.  */
-+L(aligned_more):
-+	/* Save original pointer if match was in VEC 0.  */
-+	movq	%rdi, %r8
-+
-+	/* Align src.  */
-+	orq	$(VEC_SIZE - 1), %rdi
-+	vmovdqu	1(%rdi), %ymm2
-+	VPCMPEQ	%ymm2, %ymm0, %ymm6
-+	vpmovmskb %ymm6, %ecx
- 	testl	%ecx, %ecx
-	jnz	L(char_and_nul_in_first_vec)
-+	jnz	L(first_vec_x1)
- 
-	/* Remember the match and keep searching.  */
-	movl	%eax, %edx
-	movq	%rdi, %rsi
-	andq	$-VEC_SIZE, %rdi
-	jmp	L(aligned_loop)
-+	vmovdqu	(VEC_SIZE + 1)(%rdi), %ymm3
-+	VPCMPEQ	%ymm3, %ymm0, %ymm6
-+	vpmovmskb %ymm6, %ecx
-+	testl	%ecx, %ecx
-+	jnz	L(first_vec_x2)
- 
-+	/* Save pointer again before realigning.  */
-+	movq	%rdi, %rsi
-+	addq	$(VEC_SIZE + 1), %rdi
-+	andq	$-(VEC_SIZE * 2), %rdi
- 	.p2align 4
-L(cros_page_boundary):
-	andl	$(VEC_SIZE - 1), %ecx
-	andq	$-VEC_SIZE, %rdi
-	vmovdqa	(%rdi), %ymm1
-	VPCMPEQ	%ymm1, %ymm0, %ymm2
-	VPCMPEQ	%ymm1, %ymm4, %ymm3
-	vpmovmskb %ymm2, %edx
-	vpmovmskb %ymm3, %eax
-	shrl	%cl, %edx
-	shrl	%cl, %eax
-	addq	$VEC_SIZE, %rdi
-
-	/* Check if there is a CHAR.  */
-+L(first_aligned_loop):
-+	/* Do 2x VEC at a time. Any more and the cost of finding the
-+	   match outweights loop benefit.  */
-+	vmovdqa	(VEC_SIZE * 0)(%rdi), %ymm4
-+	vmovdqa	(VEC_SIZE * 1)(%rdi), %ymm5
-+
-+	VPCMPEQ	%ymm4, %ymm7, %ymm6
-+	VPMIN	%ymm4, %ymm5, %ymm8
-+	VPCMPEQ	%ymm5, %ymm7, %ymm10
-+	vpor	%ymm6, %ymm10, %ymm5
-+	VPCMPEQ	%ymm8, %ymm0, %ymm8
-+	vpor	%ymm5, %ymm8, %ymm9
-+
-+	vpmovmskb %ymm9, %eax
-+	addq	$(VEC_SIZE * 2), %rdi
-+	/* No zero or search CHAR.  */
- 	testl	%eax, %eax
-	jnz	L(found_char)
-
-	testl	%edx, %edx
-	jnz	L(return_null)
-+	jz	L(first_aligned_loop)
- 
-	jmp	L(aligned_loop)
-
-	.p2align 4
-L(found_char):
-	testl	%edx, %edx
-	jnz	L(char_and_nul)
-+	/* If no zero CHAR then go to second loop (this allows us to
-+	   throw away all prior work).  */
-+	vpmovmskb %ymm8, %ecx
-+	testl	%ecx, %ecx
-+	jz	L(second_aligned_loop_prep)
- 
-	/* Remember the match and keep searching.  */
-	movl	%eax, %edx
-	leaq	(%rdi, %rcx), %rsi
-+	/* Search char could be zero so we need to get the true match.
-+	 */
-+	vpmovmskb %ymm5, %eax
-+	testl	%eax, %eax
-+	jnz	L(first_aligned_loop_return)
- 
-	.p2align 4
-L(aligned_loop):
-	vmovdqa	(%rdi), %ymm1
-	VPCMPEQ	%ymm1, %ymm0, %ymm2
-	addq	$VEC_SIZE, %rdi
-	VPCMPEQ	%ymm1, %ymm4, %ymm3
-	vpmovmskb %ymm2, %ecx
-	vpmovmskb %ymm3, %eax
-	orl	%eax, %ecx
-	jnz	L(char_nor_null)
-
-	vmovdqa	(%rdi), %ymm1
-	VPCMPEQ	%ymm1, %ymm0, %ymm2
-	add	$VEC_SIZE, %rdi
-	VPCMPEQ	%ymm1, %ymm4, %ymm3
-	vpmovmskb %ymm2, %ecx
-+	.p2align 4,, 4
-+L(first_vec_x1_or_x2):
-+	VPCMPEQ	%ymm3, %ymm7, %ymm3
-+	VPCMPEQ	%ymm2, %ymm7, %ymm2
- 	vpmovmskb %ymm3, %eax
-	orl	%eax, %ecx
-	jnz	L(char_nor_null)
-
-	vmovdqa	(%rdi), %ymm1
-	VPCMPEQ	%ymm1, %ymm0, %ymm2
-	addq	$VEC_SIZE, %rdi
-	VPCMPEQ	%ymm1, %ymm4, %ymm3
-	vpmovmskb %ymm2, %ecx
-	vpmovmskb %ymm3, %eax
-	orl	%eax, %ecx
-	jnz	L(char_nor_null)
-
-	vmovdqa	(%rdi), %ymm1
-	VPCMPEQ	%ymm1, %ymm0, %ymm2
-	addq	$VEC_SIZE, %rdi
-	VPCMPEQ	%ymm1, %ymm4, %ymm3
-	vpmovmskb %ymm2, %ecx
-	vpmovmskb %ymm3, %eax
-	orl	%eax, %ecx
-	jz	L(aligned_loop)
-
-	.p2align 4
-L(char_nor_null):
-	/* Find a CHAR or a nul CHAR in a loop.  */
-	testl	%eax, %eax
-	jnz	L(match)
-L(return_value):
-	testl	%edx, %edx
-	jz	L(return_null)
-	movl	%edx, %eax
-	movq	%rsi, %rdi
-+	vpmovmskb %ymm2, %edx
-+	/* Use add for macro-fusion.  */
-+	addq	%rax, %rdx
-+	jz	L(first_vec_x0_test)
-+	/* NB: We could move this shift to before the branch and save a
-+	   bit of code size / performance on the fall through. The
-+	   branch leads to the null case which generally seems hotter
-+	   than char in first 3x VEC.  */
-+	salq	$32, %rax
-+	addq	%rdx, %rax
-+	bsrq	%rax, %rax
-+	leaq	1(%rsi, %rax), %rax
-+# ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+# endif
-+	VZEROUPPER_RETURN
- 
-+	.p2align 4,, 8
-+L(first_aligned_loop_return):
-+	VPCMPEQ	%ymm4, %ymm0, %ymm4
-+	vpmovmskb %ymm4, %edx
-+	salq	$32, %rcx
-+	orq	%rdx, %rcx
-+
-+	vpmovmskb %ymm10, %eax
-+	vpmovmskb %ymm6, %edx
-+	salq	$32, %rax
-+	orq	%rdx, %rax
-+	blsmskq	%rcx, %rcx
-+	andq	%rcx, %rax
-+	jz	L(first_vec_x1_or_x2)
-+
-+	bsrq	%rax, %rax
-+	leaq	-(VEC_SIZE * 2)(%rdi, %rax), %rax
- # ifdef USE_AS_WCSRCHR
-	/* Keep the first bit for each matching CHAR for bsr.  */
-	andl	$0x11111111, %eax
-+	andq	$-CHAR_SIZE, %rax
- # endif
-	bsrl	%eax, %eax
-	leaq	-VEC_SIZE(%rdi, %rax), %rax
-L(return_vzeroupper):
-	ZERO_UPPER_VEC_REGISTERS_RETURN
-+	VZEROUPPER_RETURN
- 
-+	/* Search char cannot be zero.  */
- 	.p2align 4
-L(match):
-	/* Find a CHAR.  Check if there is a nul CHAR.  */
-	vpmovmskb %ymm2, %ecx
-	testl	%ecx, %ecx
-	jnz	L(find_nul)
-
-	/* Remember the match and keep searching.  */
-	movl	%eax, %edx
-+L(second_aligned_loop_set_furthest_match):
-+	/* Save VEC and pointer from most recent match.  */
-+L(second_aligned_loop_prep):
- 	movq	%rdi, %rsi
-	jmp	L(aligned_loop)
-+	vmovdqu	%ymm6, %ymm2
-+	vmovdqu	%ymm10, %ymm3
- 
- 	.p2align 4
-L(find_nul):
-# ifdef USE_AS_WCSRCHR
-	/* Keep the first bit for each matching CHAR for bsr.  */
-	andl	$0x11111111, %ecx
-	andl	$0x11111111, %eax
-# endif
-	/* Mask out any matching bits after the nul CHAR.  */
-	movl	%ecx, %r8d
-	subl	$1, %r8d
-	xorl	%ecx, %r8d
-	andl	%r8d, %eax
-+L(second_aligned_loop):
-+	/* Search 2x at at time.  */
-+	vmovdqa	(VEC_SIZE * 0)(%rdi), %ymm4
-+	vmovdqa	(VEC_SIZE * 1)(%rdi), %ymm5
-+
-+	VPCMPEQ	%ymm4, %ymm7, %ymm6
-+	VPMIN	%ymm4, %ymm5, %ymm1
-+	VPCMPEQ	%ymm5, %ymm7, %ymm10
-+	vpor	%ymm6, %ymm10, %ymm5
-+	VPCMPEQ	%ymm1, %ymm0, %ymm1
-+	vpor	%ymm5, %ymm1, %ymm9
-+
-+	vpmovmskb %ymm9, %eax
-+	addq	$(VEC_SIZE * 2), %rdi
- 	testl	%eax, %eax
-	/* If there is no CHAR here, return the remembered one.  */
-	jz	L(return_value)
-	bsrl	%eax, %eax
-	leaq	-VEC_SIZE(%rdi, %rax), %rax
-	VZEROUPPER_RETURN
-
-	.p2align 4
-L(char_and_nul):
-	/* Find both a CHAR and a nul CHAR.  */
-	addq	%rcx, %rdi
-	movl	%edx, %ecx
-L(char_and_nul_in_first_vec):
-# ifdef USE_AS_WCSRCHR
-	/* Keep the first bit for each matching CHAR for bsr.  */
-	andl	$0x11111111, %ecx
-	andl	$0x11111111, %eax
-# endif
-	/* Mask out any matching bits after the nul CHAR.  */
-	movl	%ecx, %r8d
-	subl	$1, %r8d
-	xorl	%ecx, %r8d
-	andl	%r8d, %eax
-+	jz	L(second_aligned_loop)
-+	vpmovmskb %ymm1, %ecx
-+	testl	%ecx, %ecx
-+	jz	L(second_aligned_loop_set_furthest_match)
-+	vpmovmskb %ymm5, %eax
- 	testl	%eax, %eax
-	/* Return null pointer if the nul CHAR comes first.  */
-	jz	L(return_null)
-	bsrl	%eax, %eax
-	leaq	-VEC_SIZE(%rdi, %rax), %rax
-+	jnz	L(return_new_match)
-+
-+	/* This is the hot patch. We know CHAR is inbounds and that
-+	   ymm3/ymm2 have latest match.  */
-+	.p2align 4,, 4
-+L(return_old_match):
-+	vpmovmskb %ymm3, %eax
-+	vpmovmskb %ymm2, %edx
-+	salq	$32, %rax
-+	orq	%rdx, %rax
-+	bsrq	%rax, %rax
-+	/* Search char cannot be zero so safe to just use lea for
-+	   wcsrchr.  */
-+	leaq	(VEC_SIZE * -2 -(CHAR_SIZE - 1))(%rsi, %rax), %rax
- 	VZEROUPPER_RETURN
- 
-	.p2align 4
-L(return_null):
-	xorl	%eax, %eax
-+	/* Last iteration also potentially has a match.  */
-+	.p2align 4,, 8
-+L(return_new_match):
-+	VPCMPEQ	%ymm4, %ymm0, %ymm4
-+	vpmovmskb %ymm4, %edx
-+	salq	$32, %rcx
-+	orq	%rdx, %rcx
-+
-+	vpmovmskb %ymm10, %eax
-+	vpmovmskb %ymm6, %edx
-+	salq	$32, %rax
-+	orq	%rdx, %rax
-+	blsmskq	%rcx, %rcx
-+	andq	%rcx, %rax
-+	jz	L(return_old_match)
-+	bsrq	%rax, %rax
-+	/* Search char cannot be zero so safe to just use lea for
-+	   wcsrchr.  */
-+	leaq	(VEC_SIZE * -2 -(CHAR_SIZE - 1))(%rdi, %rax), %rax
- 	VZEROUPPER_RETURN
- 
-END (STRRCHR)
-+	.p2align 4,, 4
-+L(cross_page):
-+	movq	%rdi, %rsi
-+	andq	$-VEC_SIZE, %rsi
-+	vmovdqu	(%rsi), %ymm1
-+	VPCMPEQ	%ymm1, %ymm0, %ymm6
-+	vpmovmskb %ymm6, %ecx
-+	/* Shift out zero CHAR matches that are before the begining of
-+	   src (rdi).  */
-+	shrxl	%edi, %ecx, %ecx
-+	testl	%ecx, %ecx
-+	jz	L(page_cross_continue)
-+	VPCMPEQ	%ymm1, %ymm7, %ymm1
-+	vpmovmskb %ymm1, %eax
-+
-+	/* Shift out search CHAR matches that are before the begining of
-+	   src (rdi).  */
-+	shrxl	%edi, %eax, %eax
-+	blsmskl	%ecx, %ecx
-+	/* Check if any search CHAR match in range.  */
-+	andl	%ecx, %eax
-+	jz	L(ret2)
-+	bsrl	%eax, %eax
-+	addq	%rdi, %rax
-+# ifdef USE_AS_WCSRCHR
-+	andq	$-CHAR_SIZE, %rax
-+# endif
-+L(ret2):
-+	VZEROUPPER_RETURN
-+END(STRRCHR)
- #endif
-- 
-GitLab
-
--- a/glibc-RHEL-15696-105.patch
+++ b/glibc-RHEL-15696-105.patch
@ -1,558 +0,0 @@
-From c966099cdc3e0fdf92f63eac09b22fa7e5f5f02d Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Thu, 21 Apr 2022 20:52:30 -0500
-Subject: [PATCH] x86: Optimize {str|wcs}rchr-evex
-Content-type: text/plain; charset=UTF-8
-
-The new code unrolls the main loop slightly without adding too much
-overhead and minimizes the comparisons for the search CHAR.
-
-Geometric Mean of all benchmarks New / Old: 0.755
-See email for all results.
-
-Full xcheck passes on x86_64 with and without multiarch enabled.
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/multiarch/strrchr-evex.S | 471 +++++++++++++++---------
- 1 file changed, 290 insertions(+), 181 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S
-index f920b5a5..f5b6d755 100644
--- a/sysdeps/x86_64/multiarch/strrchr-evex.S
-+++ b/sysdeps/x86_64/multiarch/strrchr-evex.S
-@@ -24,242 +24,351 @@
- #  define STRRCHR	__strrchr_evex
- # endif
- 
-# define VMOVU		vmovdqu64
-# define VMOVA		vmovdqa64
-+# define VMOVU	vmovdqu64
-+# define VMOVA	vmovdqa64
- 
- # ifdef USE_AS_WCSRCHR
-+#  define SHIFT_REG	esi
-+
-+#  define kunpck	kunpckbw
-+#  define kmov_2x	kmovd
-+#  define maskz_2x	ecx
-+#  define maskm_2x	eax
-+#  define CHAR_SIZE	4
-+#  define VPMIN	vpminud
-+#  define VPTESTN	vptestnmd
- #  define VPBROADCAST	vpbroadcastd
-#  define VPCMP		vpcmpd
-#  define SHIFT_REG	r8d
-+#  define VPCMP	vpcmpd
- # else
-+#  define SHIFT_REG	edi
-+
-+#  define kunpck	kunpckdq
-+#  define kmov_2x	kmovq
-+#  define maskz_2x	rcx
-+#  define maskm_2x	rax
-+
-+#  define CHAR_SIZE	1
-+#  define VPMIN	vpminub
-+#  define VPTESTN	vptestnmb
- #  define VPBROADCAST	vpbroadcastb
-#  define VPCMP		vpcmpb
-#  define SHIFT_REG	ecx
-+#  define VPCMP	vpcmpb
- # endif
- 
- # define XMMZERO	xmm16
- # define YMMZERO	ymm16
- # define YMMMATCH	ymm17
-# define YMM1		ymm18
-+# define YMMSAVE	ymm18
-+
-+# define YMM1	ymm19
-+# define YMM2	ymm20
-+# define YMM3	ymm21
-+# define YMM4	ymm22
-+# define YMM5	ymm23
-+# define YMM6	ymm24
-+# define YMM7	ymm25
-+# define YMM8	ymm26
- 
-# define VEC_SIZE	32
- 
-	.section .text.evex,"ax",@progbits
-ENTRY (STRRCHR)
-	movl	%edi, %ecx
-+# define VEC_SIZE	32
-+# define PAGE_SIZE	4096
-+	.section .text.evex, "ax", @progbits
-+ENTRY(STRRCHR)
-+	movl	%edi, %eax
- 	/* Broadcast CHAR to YMMMATCH.  */
- 	VPBROADCAST %esi, %YMMMATCH
- 
-	vpxorq	%XMMZERO, %XMMZERO, %XMMZERO
-
-	/* Check if we may cross page boundary with one vector load.  */
-	andl	$(2 * VEC_SIZE - 1), %ecx
-	cmpl	$VEC_SIZE, %ecx
-	ja	L(cros_page_boundary)
-+	andl	$(PAGE_SIZE - 1), %eax
-+	cmpl	$(PAGE_SIZE - VEC_SIZE), %eax
-+	jg	L(cross_page_boundary)
- 
-+L(page_cross_continue):
- 	VMOVU	(%rdi), %YMM1
-
-	/* Each bit in K0 represents a null byte in YMM1.  */
-	VPCMP	$0, %YMMZERO, %YMM1, %k0
-	/* Each bit in K1 represents a CHAR in YMM1.  */
-	VPCMP	$0, %YMMMATCH, %YMM1, %k1
-+	/* k0 has a 1 for each zero CHAR in YMM1.  */
-+	VPTESTN	%YMM1, %YMM1, %k0
- 	kmovd	%k0, %ecx
-	kmovd	%k1, %eax
-
-	addq	$VEC_SIZE, %rdi
-
-	testl	%eax, %eax
-	jnz	L(first_vec)
-
- 	testl	%ecx, %ecx
-	jnz	L(return_null)
-
-	andq	$-VEC_SIZE, %rdi
-	xorl	%edx, %edx
-	jmp	L(aligned_loop)
-
-	.p2align 4
-L(first_vec):
-	/* Check if there is a null byte.  */
-	testl	%ecx, %ecx
-	jnz	L(char_and_nul_in_first_vec)
-
-	/* Remember the match and keep searching.  */
-	movl	%eax, %edx
-	movq	%rdi, %rsi
-	andq	$-VEC_SIZE, %rdi
-	jmp	L(aligned_loop)
-
-	.p2align 4
-L(cros_page_boundary):
-	andl	$(VEC_SIZE - 1), %ecx
-	andq	$-VEC_SIZE, %rdi
-+	jz	L(aligned_more)
-+	/* fallthrough: zero CHAR in first VEC.  */
- 
-+	/* K1 has a 1 for each search CHAR match in YMM1.  */
-+	VPCMP	$0, %YMMMATCH, %YMM1, %k1
-+	kmovd	%k1, %eax
-+	/* Build mask up until first zero CHAR (used to mask of
-+	   potential search CHAR matches past the end of the string).
-+	 */
-+	blsmskl	%ecx, %ecx
-+	andl	%ecx, %eax
-+	jz	L(ret0)
-+	/* Get last match (the `andl` removed any out of bounds
-+	   matches).  */
-+	bsrl	%eax, %eax
- # ifdef USE_AS_WCSRCHR
-	/* NB: Divide shift count by 4 since each bit in K1 represent 4
-	   bytes.  */
-	movl	%ecx, %SHIFT_REG
-	sarl	$2, %SHIFT_REG
-+	leaq	(%rdi, %rax, CHAR_SIZE), %rax
-+# else
-+	addq	%rdi, %rax
- # endif
-+L(ret0):
-+	ret
- 
-	VMOVA	(%rdi), %YMM1
-
-	/* Each bit in K0 represents a null byte in YMM1.  */
-	VPCMP	$0, %YMMZERO, %YMM1, %k0
-	/* Each bit in K1 represents a CHAR in YMM1.  */
-+	/* Returns for first vec x1/x2/x3 have hard coded backward
-+	   search path for earlier matches.  */
-+	.p2align 4,, 6
-+L(first_vec_x1):
-+	VPCMP	$0, %YMMMATCH, %YMM2, %k1
-+	kmovd	%k1, %eax
-+	blsmskl	%ecx, %ecx
-+	/* eax non-zero if search CHAR in range.  */
-+	andl	%ecx, %eax
-+	jnz	L(first_vec_x1_return)
-+
-+	/* fallthrough: no match in YMM2 then need to check for earlier
-+	   matches (in YMM1).  */
-+	.p2align 4,, 4
-+L(first_vec_x0_test):
- 	VPCMP	$0, %YMMMATCH, %YMM1, %k1
-	kmovd	%k0, %edx
- 	kmovd	%k1, %eax
-
-	shrxl	%SHIFT_REG, %edx, %edx
-	shrxl	%SHIFT_REG, %eax, %eax
-	addq	$VEC_SIZE, %rdi
-
-	/* Check if there is a CHAR.  */
- 	testl	%eax, %eax
-	jnz	L(found_char)
-
-	testl	%edx, %edx
-	jnz	L(return_null)
-
-	jmp	L(aligned_loop)
-
-	.p2align 4
-L(found_char):
-	testl	%edx, %edx
-	jnz	L(char_and_nul)
-
-	/* Remember the match and keep searching.  */
-	movl	%eax, %edx
-	leaq	(%rdi, %rcx), %rsi
-+	jz	L(ret1)
-+	bsrl	%eax, %eax
-+# ifdef USE_AS_WCSRCHR
-+	leaq	(%rsi, %rax, CHAR_SIZE), %rax
-+# else
-+	addq	%rsi, %rax
-+# endif
-+L(ret1):
-+	ret
- 
-	.p2align 4
-L(aligned_loop):
-	VMOVA	(%rdi), %YMM1
-	addq	$VEC_SIZE, %rdi
-+	.p2align 4,, 10
-+L(first_vec_x1_or_x2):
-+	VPCMP	$0, %YMM3, %YMMMATCH, %k3
-+	VPCMP	$0, %YMM2, %YMMMATCH, %k2
-+	/* K2 and K3 have 1 for any search CHAR match. Test if any
-+	   matches between either of them. Otherwise check YMM1.  */
-+	kortestd %k2, %k3
-+	jz	L(first_vec_x0_test)
-+
-+	/* Guranteed that YMM2 and YMM3 are within range so merge the
-+	   two bitmasks then get last result.  */
-+	kunpck	%k2, %k3, %k3
-+	kmovq	%k3, %rax
-+	bsrq	%rax, %rax
-+	leaq	(VEC_SIZE)(%r8, %rax, CHAR_SIZE), %rax
-+	ret
- 
-	/* Each bit in K0 represents a null byte in YMM1.  */
-	VPCMP	$0, %YMMZERO, %YMM1, %k0
-	/* Each bit in K1 represents a CHAR in YMM1.  */
-	VPCMP	$0, %YMMMATCH, %YMM1, %k1
-	kmovd	%k0, %ecx
-+	.p2align 4,, 6
-+L(first_vec_x3):
-+	VPCMP	$0, %YMMMATCH, %YMM4, %k1
- 	kmovd	%k1, %eax
-	orl	%eax, %ecx
-	jnz	L(char_nor_null)
-+	blsmskl	%ecx, %ecx
-+	/* If no search CHAR match in range check YMM1/YMM2/YMM3.  */
-+	andl	%ecx, %eax
-+	jz	L(first_vec_x1_or_x2)
-+	bsrl	%eax, %eax
-+	leaq	(VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
-+	ret
- 
-	VMOVA	(%rdi), %YMM1
-	add	$VEC_SIZE, %rdi
-+	.p2align 4,, 6
-+L(first_vec_x0_x1_test):
-+	VPCMP	$0, %YMMMATCH, %YMM2, %k1
-+	kmovd	%k1, %eax
-+	/* Check YMM2 for last match first. If no match try YMM1.  */
-+	testl	%eax, %eax
-+	jz	L(first_vec_x0_test)
-+	.p2align 4,, 4
-+L(first_vec_x1_return):
-+	bsrl	%eax, %eax
-+	leaq	(VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax
-+	ret
- 
-	/* Each bit in K0 represents a null byte in YMM1.  */
-	VPCMP	$0, %YMMZERO, %YMM1, %k0
-	/* Each bit in K1 represents a CHAR in YMM1.  */
-	VPCMP	$0, %YMMMATCH, %YMM1, %k1
-	kmovd	%k0, %ecx
-+	.p2align 4,, 10
-+L(first_vec_x2):
-+	VPCMP	$0, %YMMMATCH, %YMM3, %k1
- 	kmovd	%k1, %eax
-	orl	%eax, %ecx
-	jnz	L(char_nor_null)
-+	blsmskl	%ecx, %ecx
-+	/* Check YMM3 for last match first. If no match try YMM2/YMM1.
-+	 */
-+	andl	%ecx, %eax
-+	jz	L(first_vec_x0_x1_test)
-+	bsrl	%eax, %eax
-+	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
-+	ret
- 
-	VMOVA	(%rdi), %YMM1
-	addq	$VEC_SIZE, %rdi
- 
-	/* Each bit in K0 represents a null byte in YMM1.  */
-	VPCMP	$0, %YMMZERO, %YMM1, %k0
-	/* Each bit in K1 represents a CHAR in YMM1.  */
-	VPCMP	$0, %YMMMATCH, %YMM1, %k1
-+	.p2align 4
-+L(aligned_more):
-+	/* Need to keep original pointer incase YMM1 has last match.  */
-+	movq	%rdi, %rsi
-+	andq	$-VEC_SIZE, %rdi
-+	VMOVU	VEC_SIZE(%rdi), %YMM2
-+	VPTESTN	%YMM2, %YMM2, %k0
- 	kmovd	%k0, %ecx
-	kmovd	%k1, %eax
-	orl	%eax, %ecx
-	jnz	L(char_nor_null)
-+	testl	%ecx, %ecx
-+	jnz	L(first_vec_x1)
- 
-	VMOVA	(%rdi), %YMM1
-	addq	$VEC_SIZE, %rdi
-+	VMOVU	(VEC_SIZE * 2)(%rdi), %YMM3
-+	VPTESTN	%YMM3, %YMM3, %k0
-+	kmovd	%k0, %ecx
-+	testl	%ecx, %ecx
-+	jnz	L(first_vec_x2)
- 
-	/* Each bit in K0 represents a null byte in YMM1.  */
-	VPCMP	$0, %YMMZERO, %YMM1, %k0
-	/* Each bit in K1 represents a CHAR in YMM1.  */
-	VPCMP	$0, %YMMMATCH, %YMM1, %k1
-+	VMOVU	(VEC_SIZE * 3)(%rdi), %YMM4
-+	VPTESTN	%YMM4, %YMM4, %k0
- 	kmovd	%k0, %ecx
-	kmovd	%k1, %eax
-	orl	%eax, %ecx
-	jz	L(aligned_loop)
-+	movq	%rdi, %r8
-+	testl	%ecx, %ecx
-+	jnz	L(first_vec_x3)
- 
-+	andq	$-(VEC_SIZE * 2), %rdi
- 	.p2align 4
-L(char_nor_null):
-	/* Find a CHAR or a null byte in a loop.  */
-+L(first_aligned_loop):
-+	/* Preserve YMM1, YMM2, YMM3, and YMM4 until we can gurantee
-+	   they don't store a match.  */
-+	VMOVA	(VEC_SIZE * 4)(%rdi), %YMM5
-+	VMOVA	(VEC_SIZE * 5)(%rdi), %YMM6
-+
-+	VPCMP	$0, %YMM5, %YMMMATCH, %k2
-+	vpxord	%YMM6, %YMMMATCH, %YMM7
-+
-+	VPMIN	%YMM5, %YMM6, %YMM8
-+	VPMIN	%YMM8, %YMM7, %YMM7
-+
-+	VPTESTN	%YMM7, %YMM7, %k1
-+	subq	$(VEC_SIZE * -2), %rdi
-+	kortestd %k1, %k2
-+	jz	L(first_aligned_loop)
-+
-+	VPCMP	$0, %YMM6, %YMMMATCH, %k3
-+	VPTESTN	%YMM8, %YMM8, %k1
-+	ktestd	%k1, %k1
-+	jz	L(second_aligned_loop_prep)
-+
-+	kortestd %k2, %k3
-+	jnz	L(return_first_aligned_loop)
-+
-+	.p2align 4,, 6
-+L(first_vec_x1_or_x2_or_x3):
-+	VPCMP	$0, %YMM4, %YMMMATCH, %k4
-+	kmovd	%k4, %eax
- 	testl	%eax, %eax
-	jnz	L(match)
-L(return_value):
-	testl	%edx, %edx
-	jz	L(return_null)
-	movl	%edx, %eax
-	movq	%rsi, %rdi
-+	jz	L(first_vec_x1_or_x2)
- 	bsrl	%eax, %eax
-# ifdef USE_AS_WCSRCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	leaq	-VEC_SIZE(%rdi, %rax, 4), %rax
-# else
-	leaq	-VEC_SIZE(%rdi, %rax), %rax
-# endif
-+	leaq	(VEC_SIZE * 3)(%r8, %rax, CHAR_SIZE), %rax
- 	ret
- 
-	.p2align 4
-L(match):
-	/* Find a CHAR.  Check if there is a null byte.  */
-	kmovd	%k0, %ecx
-	testl	%ecx, %ecx
-	jnz	L(find_nul)
-+	.p2align 4,, 8
-+L(return_first_aligned_loop):
-+	VPTESTN	%YMM5, %YMM5, %k0
-+	kunpck	%k0, %k1, %k0
-+	kmov_2x	%k0, %maskz_2x
-+
-+	blsmsk	%maskz_2x, %maskz_2x
-+	kunpck	%k2, %k3, %k3
-+	kmov_2x	%k3, %maskm_2x
-+	and	%maskz_2x, %maskm_2x
-+	jz	L(first_vec_x1_or_x2_or_x3)
- 
-	/* Remember the match and keep searching.  */
-	movl	%eax, %edx
-+	bsr	%maskm_2x, %maskm_2x
-+	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
-+	ret
-+
-+	.p2align 4
-+	/* We can throw away the work done for the first 4x checks here
-+	   as we have a later match. This is the 'fast' path persay.
-+	 */
-+L(second_aligned_loop_prep):
-+L(second_aligned_loop_set_furthest_match):
- 	movq	%rdi, %rsi
-	jmp	L(aligned_loop)
-+	kunpck	%k2, %k3, %k4
- 
- 	.p2align 4
-L(find_nul):
-	/* Mask out any matching bits after the null byte.  */
-	movl	%ecx, %r8d
-	subl	$1, %r8d
-	xorl	%ecx, %r8d
-	andl	%r8d, %eax
-	testl	%eax, %eax
-	/* If there is no CHAR here, return the remembered one.  */
-	jz	L(return_value)
-	bsrl	%eax, %eax
-+L(second_aligned_loop):
-+	VMOVU	(VEC_SIZE * 4)(%rdi), %YMM1
-+	VMOVU	(VEC_SIZE * 5)(%rdi), %YMM2
-+
-+	VPCMP	$0, %YMM1, %YMMMATCH, %k2
-+	vpxord	%YMM2, %YMMMATCH, %YMM3
-+
-+	VPMIN	%YMM1, %YMM2, %YMM4
-+	VPMIN	%YMM3, %YMM4, %YMM3
-+
-+	VPTESTN	%YMM3, %YMM3, %k1
-+	subq	$(VEC_SIZE * -2), %rdi
-+	kortestd %k1, %k2
-+	jz	L(second_aligned_loop)
-+
-+	VPCMP	$0, %YMM2, %YMMMATCH, %k3
-+	VPTESTN	%YMM4, %YMM4, %k1
-+	ktestd	%k1, %k1
-+	jz	L(second_aligned_loop_set_furthest_match)
-+
-+	kortestd %k2, %k3
-+	/* branch here because there is a significant advantage interms
-+	   of output dependency chance in using edx.  */
-+	jnz	L(return_new_match)
-+L(return_old_match):
-+	kmovq	%k4, %rax
-+	bsrq	%rax, %rax
-+	leaq	(VEC_SIZE * 2)(%rsi, %rax, CHAR_SIZE), %rax
-+	ret
-+
-+L(return_new_match):
-+	VPTESTN	%YMM1, %YMM1, %k0
-+	kunpck	%k0, %k1, %k0
-+	kmov_2x	%k0, %maskz_2x
-+
-+	blsmsk	%maskz_2x, %maskz_2x
-+	kunpck	%k2, %k3, %k3
-+	kmov_2x	%k3, %maskm_2x
-+	and	%maskz_2x, %maskm_2x
-+	jz	L(return_old_match)
-+
-+	bsr	%maskm_2x, %maskm_2x
-+	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
-+	ret
-+
-+L(cross_page_boundary):
-+	/* eax contains all the page offset bits of src (rdi). `xor rdi,
-+	   rax` sets pointer will all page offset bits cleared so
-+	   offset of (PAGE_SIZE - VEC_SIZE) will get last aligned VEC
-+	   before page cross (guranteed to be safe to read). Doing this
-+	   as opposed to `movq %rdi, %rax; andq $-VEC_SIZE, %rax` saves
-+	   a bit of code size.  */
-+	xorq	%rdi, %rax
-+	VMOVU	(PAGE_SIZE - VEC_SIZE)(%rax), %YMM1
-+	VPTESTN	%YMM1, %YMM1, %k0
-+	kmovd	%k0, %ecx
-+
-+	/* Shift out zero CHAR matches that are before the begining of
-+	   src (rdi).  */
- # ifdef USE_AS_WCSRCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	leaq	-VEC_SIZE(%rdi, %rax, 4), %rax
-# else
-	leaq	-VEC_SIZE(%rdi, %rax), %rax
-+	movl	%edi, %esi
-+	andl	$(VEC_SIZE - 1), %esi
-+	shrl	$2, %esi
- # endif
-	ret
-+	shrxl	%SHIFT_REG, %ecx, %ecx
- 
-	.p2align 4
-L(char_and_nul):
-	/* Find both a CHAR and a null byte.  */
-	addq	%rcx, %rdi
-	movl	%edx, %ecx
-L(char_and_nul_in_first_vec):
-	/* Mask out any matching bits after the null byte.  */
-	movl	%ecx, %r8d
-	subl	$1, %r8d
-	xorl	%ecx, %r8d
-	andl	%r8d, %eax
-	testl	%eax, %eax
-	/* Return null pointer if the null byte comes first.  */
-	jz	L(return_null)
-+	testl	%ecx, %ecx
-+	jz	L(page_cross_continue)
-+
-+	/* Found zero CHAR so need to test for search CHAR.  */
-+	VPCMP	$0, %YMMMATCH, %YMM1, %k1
-+	kmovd	%k1, %eax
-+	/* Shift out search CHAR matches that are before the begining of
-+	   src (rdi).  */
-+	shrxl	%SHIFT_REG, %eax, %eax
-+
-+	/* Check if any search CHAR match in range.  */
-+	blsmskl	%ecx, %ecx
-+	andl	%ecx, %eax
-+	jz	L(ret3)
- 	bsrl	%eax, %eax
- # ifdef USE_AS_WCSRCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	leaq	-VEC_SIZE(%rdi, %rax, 4), %rax
-+	leaq	(%rdi, %rax, CHAR_SIZE), %rax
- # else
-	leaq	-VEC_SIZE(%rdi, %rax), %rax
-+	addq	%rdi, %rax
- # endif
-+L(ret3):
- 	ret
- 
-	.p2align 4
-L(return_null):
-	xorl	%eax, %eax
-	ret
-
-END (STRRCHR)
-+END(STRRCHR)
- #endif
-- 
-GitLab
-
--- a/glibc-RHEL-15696-106.patch
+++ b/glibc-RHEL-15696-106.patch
@ -1,73 +0,0 @@
-From 911c63a51c690dd1a97dfc587097277029baf00f Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Wed, 27 Apr 2022 15:13:02 -0500
-Subject: [PATCH] sysdeps: Add 'get_fast_jitter' interace in fast-jitter.h
-Content-type: text/plain; charset=UTF-8
-
-'get_fast_jitter' is meant to be used purely for performance
-purposes. In all cases it's used it should be acceptable to get no
-randomness (see default case). An example use case is in setting
-jitter for retries between threads at a lock. There is a
-performance benefit to having jitter, but only if the jitter can
-be generated very quickly and ultimately there is no serious issue
-if no jitter is generated.
-
-The implementation generally uses 'HP_TIMING_NOW' iff it is
-inlined (avoid any potential syscall paths).
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/generic/fast-jitter.h | 42 +++++++++++++++++++++++++++++++++++
- 1 file changed, 42 insertions(+)
- create mode 100644 sysdeps/generic/fast-jitter.h
-
-diff --git a/sysdeps/generic/fast-jitter.h b/sysdeps/generic/fast-jitter.h
-new file mode 100644
-index 00000000..4dd53e34
--- /dev/null
-+++ b/sysdeps/generic/fast-jitter.h
-@@ -0,0 +1,42 @@
-+/* Fallback for fast jitter just return 0.
-+   Copyright (C) 2019-2022 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#ifndef _FAST_JITTER_H
-+# define _FAST_JITTER_H
-+
-+# include <stdint.h>
-+# include <hp-timing.h>
-+
-+/* Baseline just return 0.  We could create jitter using a clock or
-+   'random_bits' but that may imply a syscall and the goal of
-+   'get_fast_jitter' is minimal overhead "randomness" when such
-+   randomness helps performance.  Adding high overhead the function
-+   defeats the purpose.  */
-+static inline uint32_t
-+get_fast_jitter (void)
-+{
-+# if HP_TIMING_INLINE
-+  hp_timing_t jitter;
-+  HP_TIMING_NOW (jitter);
-+  return (uint32_t) jitter;
-+# else
-+  return 0;
-+# endif
-+}
-+
-+#endif
-- 
-GitLab
-
--- a/glibc-RHEL-15696-107.patch
+++ b/glibc-RHEL-15696-107.patch
@ -1,226 +0,0 @@
-From 8162147872491bb5b48e91543b19c49a29ae6b6d Mon Sep 17 00:00:00 2001
-From: Wangyang Guo <wangyang.guo@intel.com>
-Date: Fri, 6 May 2022 01:50:10 +0000
-Subject: [PATCH] nptl: Add backoff mechanism to spinlock loop
-Content-type: text/plain; charset=UTF-8
-
-When mutiple threads waiting for lock at the same time, once lock owner
-releases the lock, waiters will see lock available and all try to lock,
-which may cause an expensive CAS storm.
-
-Binary exponential backoff with random jitter is introduced. As try-lock
-attempt increases, there is more likely that a larger number threads
-compete for adaptive mutex lock, so increase wait time in exponential.
-A random jitter is also added to avoid synchronous try-lock from other
-threads.
-
-v2: Remove read-check before try-lock for performance.
-
-v3:
-1. Restore read-check since it works well in some platform.
-2. Make backoff arch dependent, and enable it for x86_64.
-3. Limit max backoff to reduce latency in large critical section.
-
-v4: Fix strict-prototypes error in sysdeps/nptl/pthread_mutex_backoff.h
-
-v5: Commit log updated for regression in large critical section.
-
-Result of pthread-mutex-locks bench
-
-Test Platform: Xeon 8280L (2 socket, 112 CPUs in total)
-First Row: thread number
-First Col: critical section length
-Values: backoff vs upstream, time based, low is better
-
-non-critical-length: 1
-	1	2	4	8	16	32	64	112	140
-0	0.99	0.58	0.52	0.49	0.43	0.44	0.46	0.52	0.54
-1	0.98	0.43	0.56	0.50	0.44	0.45	0.50	0.56	0.57
-2	0.99	0.41	0.57	0.51	0.45	0.47	0.48	0.60	0.61
-4	0.99	0.45	0.59	0.53	0.48	0.49	0.52	0.64	0.65
-8	1.00	0.66	0.71	0.63	0.56	0.59	0.66	0.72	0.71
-16	0.97	0.78	0.91	0.73	0.67	0.70	0.79	0.80	0.80
-32	0.95	1.17	0.98	0.87	0.82	0.86	0.89	0.90	0.90
-64	0.96	0.95	1.01	1.01	0.98	1.00	1.03	0.99	0.99
-128	0.99	1.01	1.01	1.17	1.08	1.12	1.02	0.97	1.02
-
-non-critical-length: 32
-	1	2	4	8	16	32	64	112	140
-0	1.03	0.97	0.75	0.65	0.58	0.58	0.56	0.70	0.70
-1	0.94	0.95	0.76	0.65	0.58	0.58	0.61	0.71	0.72
-2	0.97	0.96	0.77	0.66	0.58	0.59	0.62	0.74	0.74
-4	0.99	0.96	0.78	0.66	0.60	0.61	0.66	0.76	0.77
-8	0.99	0.99	0.84	0.70	0.64	0.66	0.71	0.80	0.80
-16	0.98	0.97	0.95	0.76	0.70	0.73	0.81	0.85	0.84
-32	1.04	1.12	1.04	0.89	0.82	0.86	0.93	0.91	0.91
-64	0.99	1.15	1.07	1.00	0.99	1.01	1.05	0.99	0.99
-128	1.00	1.21	1.20	1.22	1.25	1.31	1.12	1.10	0.99
-
-non-critical-length: 128
-	1	2	4	8	16	32	64	112	140
-0	1.02	1.00	0.99	0.67	0.61	0.61	0.61	0.74	0.73
-1	0.95	0.99	1.00	0.68	0.61	0.60	0.60	0.74	0.74
-2	1.00	1.04	1.00	0.68	0.59	0.61	0.65	0.76	0.76
-4	1.00	0.96	0.98	0.70	0.63	0.63	0.67	0.78	0.77
-8	1.01	1.02	0.89	0.73	0.65	0.67	0.71	0.81	0.80
-16	0.99	0.96	0.96	0.79	0.71	0.73	0.80	0.84	0.84
-32	0.99	0.95	1.05	0.89	0.84	0.85	0.94	0.92	0.91
-64	1.00	0.99	1.16	1.04	1.00	1.02	1.06	0.99	0.99
-128	1.00	1.06	0.98	1.14	1.39	1.26	1.08	1.02	0.98
-
-There is regression in large critical section. But adaptive mutex is
-aimed for "quick" locks. Small critical section is more common when
-users choose to use adaptive pthread_mutex.
-
-Signed-off-by: Wangyang Guo <wangyang.guo@intel.com>
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
-
-Conflicts:
-	pthreadP.h
-	(had been moved)
-	nptl/pthread_mutex_lock.c
-	(max_adaptive_count renamed)
-
---
- nptl/pthreadP.h                             |  1 +
- nptl/pthread_mutex_lock.c                   | 16 +++++++--
- sysdeps/nptl/pthread_mutex_backoff.h        | 35 ++++++++++++++++++
- sysdeps/x86_64/nptl/pthread_mutex_backoff.h | 39 +++++++++++++++++++++
- 4 files changed, 89 insertions(+), 2 deletions(-)
- create mode 100644 sysdeps/nptl/pthread_mutex_backoff.h
- create mode 100644 sysdeps/x86_64/nptl/pthread_mutex_backoff.h
-
-diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
-index 7ddc166c..1550e3b6 100644
--- a/nptl/pthreadP.h
-+++ b/nptl/pthreadP.h
-@@ -33,6 +33,7 @@
- #include <kernel-features.h>
- #include <errno.h>
- #include <internal-signals.h>
-+#include <pthread_mutex_backoff.h>
- 
- 
- /* Atomic operations on TLS memory.  */
-diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
-index d96a9933..c7770fc9 100644
--- a/nptl/pthread_mutex_lock.c
-+++ b/nptl/pthread_mutex_lock.c
-@@ -133,14 +133,26 @@ __pthread_mutex_lock (pthread_mutex_t *mutex)
- 	  int cnt = 0;
- 	  int max_cnt = MIN (MAX_ADAPTIVE_COUNT,
- 			     mutex->__data.__spins * 2 + 10);
-+	  int spin_count, exp_backoff = 1;
-+	  unsigned int jitter = get_jitter ();
- 	  do
- 	    {
-	      if (cnt++ >= max_cnt)
-+	      /* In each loop, spin count is exponential backoff plus
-+		 random jitter, random range is [0, exp_backoff-1].  */
-+	      spin_count = exp_backoff + (jitter & (exp_backoff - 1));
-+	      cnt += spin_count;
-+	      if (cnt >= max_cnt)
- 		{
-+		  /* If cnt exceeds max spin count, just go to wait
-+		     queue.  */
- 		  LLL_MUTEX_LOCK (mutex);
- 		  break;
- 		}
-	      atomic_spin_nop ();
-+	      do
-+		atomic_spin_nop ();
-+	      while (--spin_count > 0);
-+	      /* Prepare for next loop.  */
-+	      exp_backoff = get_next_backoff (exp_backoff);
- 	    }
- 	  while (LLL_MUTEX_READ_LOCK (mutex) != 0
- 		 || LLL_MUTEX_TRYLOCK (mutex) != 0);
-diff --git a/sysdeps/nptl/pthread_mutex_backoff.h b/sysdeps/nptl/pthread_mutex_backoff.h
-new file mode 100644
-index 00000000..5b26c22a
--- /dev/null
-+++ b/sysdeps/nptl/pthread_mutex_backoff.h
-@@ -0,0 +1,35 @@
-+/* Pthread mutex backoff configuration.
-+   Copyright (C) 2022 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+#ifndef _PTHREAD_MUTEX_BACKOFF_H
-+#define _PTHREAD_MUTEX_BACKOFF_H 1
-+
-+static inline unsigned int
-+get_jitter (void)
-+{
-+  /* Arch dependent random jitter, return 0 disables random.  */
-+  return 0;
-+}
-+
-+static inline int
-+get_next_backoff (int backoff)
-+{
-+  /* Next backoff, return 1 disables mutex backoff.  */
-+  return 1;
-+}
-+
-+#endif
-diff --git a/sysdeps/x86_64/nptl/pthread_mutex_backoff.h b/sysdeps/x86_64/nptl/pthread_mutex_backoff.h
-new file mode 100644
-index 00000000..ec74c3d9
--- /dev/null
-+++ b/sysdeps/x86_64/nptl/pthread_mutex_backoff.h
-@@ -0,0 +1,39 @@
-+/* Pthread mutex backoff configuration.
-+   Copyright (C) 2022 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+#ifndef _PTHREAD_MUTEX_BACKOFF_H
-+#define _PTHREAD_MUTEX_BACKOFF_H 1
-+
-+#include <fast-jitter.h>
-+
-+static inline unsigned int
-+get_jitter (void)
-+{
-+  return get_fast_jitter ();
-+}
-+
-+#define MAX_BACKOFF 16
-+
-+static inline int
-+get_next_backoff (int backoff)
-+{
-+  /* Binary expontial backoff. Limiting max backoff
-+     can reduce latency in large critical section.  */
-+  return (backoff < MAX_BACKOFF) ? backoff << 1 : backoff;
-+}
-+
-+#endif
-- 
-GitLab
-
--- a/glibc-RHEL-15696-108.patch
+++ b/glibc-RHEL-15696-108.patch
@ -1,55 +0,0 @@
-From c6272098323153db373f2986c67786ea8c85f1cf Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Tue, 15 Feb 2022 08:18:15 -0600
-Subject: [PATCH] x86: Fallback {str|wcs}cmp RTM in the ncmp overflow case [BZ
- #28896]
-Content-type: text/plain; charset=UTF-8
-
-In the overflow fallback strncmp-avx2-rtm and wcsncmp-avx2-rtm would
-call strcmp-avx2 and wcscmp-avx2 respectively. This would have
-not checks around vzeroupper and would trigger spurious
-aborts. This commit fixes that.
-
-test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass on
-AVX2 machines with and without RTM.
-
-Co-authored-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/multiarch/strcmp-avx2.S | 8 ++------
- 1 file changed, 2 insertions(+), 6 deletions(-)
-
-Conflicts:
-	sysdeps/x86_64/multiarch/strcmp-avx2.S
-	(split into two patches due to upstream bug differences)
-
-diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
-index 28cc98b6..e267c6cb 100644
--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
-+++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
-@@ -345,10 +345,10 @@ L(one_or_less):
- 	movq	%LOCALE_REG, %rdx
- #  endif
- 	jb	L(ret_zero)
-#  ifdef USE_AS_WCSCMP
- 	/* 'nbe' covers the case where length is negative (large
- 	   unsigned).  */
-	jnbe	__wcscmp_avx2
-+	jnbe	OVERFLOW_STRCMP
-+#  ifdef USE_AS_WCSCMP
- 	movl	(%rdi), %edx
- 	xorl	%eax, %eax
- 	cmpl	(%rsi), %edx
-@@ -357,10 +357,6 @@ L(one_or_less):
- 	negl	%eax
- 	orl	$1, %eax
- #  else
-	/* 'nbe' covers the case where length is negative (large
-	   unsigned).  */
-
-	jnbe	__strcmp_avx2
- 	movzbl	(%rdi), %eax
- 	movzbl	(%rsi), %ecx
- 	TOLOWER_gpr (%rax, %eax)
-- 
-GitLab
-
--- a/glibc-RHEL-15696-109.patch
+++ b/glibc-RHEL-15696-109.patch
@ -1,60 +0,0 @@
-From 259a17cc98058d2576511201f85d28cb5d9de2a2 Mon Sep 17 00:00:00 2001
-From: Stefan Liebler <stli@linux.ibm.com>
-Date: Mon, 28 Jun 2021 13:01:07 +0200
-Subject: s390x: Update math: redirect roundeven function
-
-After recent commit
-447954a206837b5f153869cfeeeab44631c3fac9
-"math: redirect roundeven function", building on
-s390x fails with:
-Error: symbol `__roundevenl' is already defined
-
-Similar to aarch64/riscv fix, this patch redirects target
-specific functions for s390x:
-commit 3213ed770cbc5821920d16caa93c85e92dd7b9f6
-"Update math: redirect roundeven function"
-
-diff --git a/sysdeps/s390/fpu/s_roundeven.c b/sysdeps/s390/fpu/s_roundeven.c
-index 40b07e054b..0773adfed0 100644
--- a/sysdeps/s390/fpu/s_roundeven.c
-+++ b/sysdeps/s390/fpu/s_roundeven.c
-@@ -18,6 +18,7 @@
-    <https://www.gnu.org/licenses/>.  */
- 
- #ifdef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
-+# define NO_MATH_REDIRECT
- # include <math.h>
- # include <libm-alias-double.h>
- 
-@@ -31,7 +32,6 @@ __roundeven (double x)
-   __asm__ ("fidbra %0,4,%1,4" : "=f" (y) : "f" (x));
-   return y;
- }
-hidden_def (__roundeven)
- libm_alias_double (__roundeven, roundeven)
- 
- #else
-diff --git a/sysdeps/s390/fpu/s_roundevenf.c b/sysdeps/s390/fpu/s_roundevenf.c
-index d2fbf3d2b6..289785bc4a 100644
--- a/sysdeps/s390/fpu/s_roundevenf.c
-+++ b/sysdeps/s390/fpu/s_roundevenf.c
-@@ -18,6 +18,7 @@
-    <https://www.gnu.org/licenses/>.  */
- 
- #ifdef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
-+# define NO_MATH_REDIRECT
- # include <math.h>
- # include <libm-alias-float.h>
- 
-diff --git a/sysdeps/s390/fpu/s_roundevenl.c b/sysdeps/s390/fpu/s_roundevenl.c
-index 29ab7a8616..94b6459ab4 100644
--- a/sysdeps/s390/fpu/s_roundevenl.c
-+++ b/sysdeps/s390/fpu/s_roundevenl.c
-@@ -18,6 +18,7 @@
-    <https://www.gnu.org/licenses/>.  */
- 
- #ifdef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
-+# define NO_MATH_REDIRECT
- # include <math.h>
- # include <math_private.h>
- # include <libm-alias-ldouble.h>
--- a/glibc-RHEL-15696-11.patch
+++ b/glibc-RHEL-15696-11.patch
@ -1,74 +0,0 @@
-From 1da50d4bda07f04135dca39f40e79fc9eabed1f8 Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Fri, 26 Feb 2021 05:36:59 -0800
-Subject: [PATCH] x86: Set Prefer_No_VZEROUPPER and add Prefer_AVX2_STRCMP
-Content-type: text/plain; charset=UTF-8
-
-1. Set Prefer_No_VZEROUPPER if RTM is usable to avoid RTM abort triggered
-by VZEROUPPER inside a transactionally executing RTM region.
-2. Since to compare 2 32-byte strings, 256-bit EVEX strcmp requires 2
-loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp requires 1 load, 2 VPCMPEQs,
-1 VPMINU and 1 VPMOVMSKB, AVX2 strcmp is faster than EVEX strcmp.  Add
-Prefer_AVX2_STRCMP to prefer AVX2 strcmp family functions.
---
- sysdeps/x86/cpu-features.c                    | 20 +++++++++++++++++--
- sysdeps/x86/cpu-tunables.c                    |  2 ++
- ...cpu-features-preferred_feature_index_1.def |  1 +
- 3 files changed, 21 insertions(+), 2 deletions(-)
-
-diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
-index 91042505..3610ee5c 100644
--- a/sysdeps/x86/cpu-features.c
-+++ b/sysdeps/x86/cpu-features.c
-@@ -524,8 +524,24 @@ init_cpu_features (struct cpu_features *cpu_features)
- 	cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
- 	  |= bit_arch_Prefer_No_VZEROUPPER;
-       else
-	cpu_features->preferred[index_arch_Prefer_No_AVX512]
-	  |= bit_arch_Prefer_No_AVX512;
-+	{
-+	  cpu_features->preferred[index_arch_Prefer_No_AVX512]
-+	    |= bit_arch_Prefer_No_AVX512;
-+
-+	  /* Avoid RTM abort triggered by VZEROUPPER inside a
-+	     transactionally executing RTM region.  */
-+	  if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
-+	    cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
-+	      |= bit_arch_Prefer_No_VZEROUPPER;
-+
-+	  /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp
-+	     requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp
-+	     requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB,
-+	     AVX2 strcmp is faster than EVEX strcmp.  */
-+	  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
-+	    cpu_features->preferred[index_arch_Prefer_AVX2_STRCMP]
-+	      |= bit_arch_Prefer_AVX2_STRCMP;
-+	}
-     }
-   /* This spells out "AuthenticAMD".  */
-   else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
-diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c
-index 3173b2b9..73adbaba 100644
--- a/sysdeps/x86/cpu-tunables.c
-+++ b/sysdeps/x86/cpu-tunables.c
-@@ -239,6 +239,8 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
- 	      CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features,
- 						Fast_Copy_Backward,
- 						disable, 18);
-+	      CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH
-+		(n, cpu_features, Prefer_AVX2_STRCMP, AVX2, disable, 18);
- 	    }
- 	  break;
- 	case 19:
-diff --git a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
-index 17a5cc42..4ca70b40 100644
--- a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
-+++ b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
-@@ -32,3 +32,4 @@ BIT (Prefer_ERMS)
- BIT (Prefer_FSRM)
- BIT (Prefer_No_AVX512)
- BIT (MathVec_Prefer_No_AVX512)
-+BIT (Prefer_AVX2_STRCMP)
-- 
-GitLab
-
--- a/glibc-RHEL-15696-110.patch
+++ b/glibc-RHEL-15696-110.patch
@ -1,26 +0,0 @@
-From 3213ed770cbc5821920d16caa93c85e92dd7b9f6 Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Wed, 23 Jun 2021 13:29:41 -0700
-Subject: Update math: redirect roundeven function
-
-Redirect target specific roundeven functions for aarch64, ldbl-128ibm
-and riscv.
-
-Conflicts:
-	sysdeps/aarch64/*
-	(not needed)
-	sysdeps/riscv/*
-	(not supported)
-
-diff --git a/sysdeps/ieee754/ldbl-128ibm/s_roundevenl.c b/sysdeps/ieee754/ldbl-128ibm/s_roundevenl.c
-index 6701970f4a..90eecf496b 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_roundevenl.c
-+++ b/sysdeps/ieee754/ldbl-128ibm/s_roundevenl.c
-@@ -17,6 +17,7 @@
-    License along with the GNU C Library; if not, see
-    <http://www.gnu.org/licenses/>.  */
- 
-+#define NO_MATH_REDIRECT
- #include <math.h>
- #include <math_private.h>
- 
--- a/glibc-RHEL-15696-12.patch
+++ b/glibc-RHEL-15696-12.patch
--- a/glibc-RHEL-15696-13.patch
+++ b/glibc-RHEL-15696-13.patch
--- a/glibc-RHEL-15696-14.patch
+++ b/glibc-RHEL-15696-14.patch
@ -1,242 +0,0 @@
-From 63ad43566f7a25d140dc723598aeb441ad657eed Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Fri, 5 Mar 2021 06:46:08 -0800
-Subject: [PATCH] x86-64: Add memmove family functions with 256-bit EVEX
-Content-type: text/plain; charset=UTF-8
-
-Update ifunc-memmove.h to select the function optimized with 256-bit EVEX
-instructions using YMM16-YMM31 registers to avoid RTM abort with usable
-AVX512VL since VZEROUPPER isn't needed at function exit.
---
- sysdeps/x86_64/multiarch/Makefile             |  1 +
- sysdeps/x86_64/multiarch/ifunc-impl-list.c    | 36 +++++++++++++++++++
- sysdeps/x86_64/multiarch/ifunc-memmove.h      | 21 +++++++++--
- .../multiarch/memmove-evex-unaligned-erms.S   | 33 +++++++++++++++++
- .../multiarch/memmove-vec-unaligned-erms.S    | 24 ++++++++-----
- 5 files changed, 104 insertions(+), 11 deletions(-)
- create mode 100644 sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S
-
-diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
-index 46783cd1..4563fc56 100644
--- a/sysdeps/x86_64/multiarch/Makefile
-+++ b/sysdeps/x86_64/multiarch/Makefile
-@@ -41,6 +41,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \
- 		   memset-avx2-unaligned-erms \
- 		   memset-avx512-unaligned-erms \
- 		   memchr-evex \
-+		   memmove-evex-unaligned-erms \
- 		   memrchr-evex \
- 		   rawmemchr-evex \
- 		   stpcpy-evex \
-diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-index 082e4da3..6bd3abfc 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-@@ -80,6 +80,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
- 			      CPU_FEATURE_USABLE (AVX),
- 			      __memmove_chk_avx_unaligned_erms)
-+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __memmove_chk_evex_unaligned)
-+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __memmove_chk_evex_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
- 			      CPU_FEATURE_USABLE (SSSE3),
- 			      __memmove_chk_ssse3_back)
-@@ -102,6 +108,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 	      IFUNC_IMPL_ADD (array, i, memmove,
- 			      CPU_FEATURE_USABLE (AVX),
- 			      __memmove_avx_unaligned_erms)
-+	      IFUNC_IMPL_ADD (array, i, memmove,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __memmove_evex_unaligned)
-+	      IFUNC_IMPL_ADD (array, i, memmove,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __memmove_evex_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, memmove,
- 			      CPU_FEATURE_USABLE (AVX512F),
- 			      __memmove_avx512_no_vzeroupper)
-@@ -565,6 +577,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- 			      CPU_FEATURE_USABLE (AVX),
- 			      __memcpy_chk_avx_unaligned_erms)
-+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __memcpy_chk_evex_unaligned)
-+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __memcpy_chk_evex_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- 			      CPU_FEATURE_USABLE (SSSE3),
- 			      __memcpy_chk_ssse3_back)
-@@ -587,6 +605,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 	      IFUNC_IMPL_ADD (array, i, memcpy,
- 			      CPU_FEATURE_USABLE (AVX),
- 			      __memcpy_avx_unaligned_erms)
-+	      IFUNC_IMPL_ADD (array, i, memcpy,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __memcpy_evex_unaligned)
-+	      IFUNC_IMPL_ADD (array, i, memcpy,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __memcpy_evex_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3),
- 			      __memcpy_ssse3_back)
- 	      IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3),
-@@ -623,6 +647,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- 			      CPU_FEATURE_USABLE (AVX),
- 			      __mempcpy_chk_avx_unaligned_erms)
-+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __mempcpy_chk_evex_unaligned)
-+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __mempcpy_chk_evex_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- 			      CPU_FEATURE_USABLE (SSSE3),
- 			      __mempcpy_chk_ssse3_back)
-@@ -654,6 +684,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 	      IFUNC_IMPL_ADD (array, i, mempcpy,
- 			      CPU_FEATURE_USABLE (AVX),
- 			      __mempcpy_avx_unaligned_erms)
-+	      IFUNC_IMPL_ADD (array, i, mempcpy,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __mempcpy_evex_unaligned)
-+	      IFUNC_IMPL_ADD (array, i, mempcpy,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __mempcpy_evex_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3),
- 			      __mempcpy_ssse3_back)
- 	      IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3),
-diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h
-index 5e5f0299..6f8bce5f 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memmove.h
-+++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h
-@@ -29,6 +29,10 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
-   attribute_hidden;
-+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
-+  attribute_hidden;
-+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
-+  attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
-   attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
-@@ -59,10 +63,21 @@ IFUNC_SELECTOR (void)
- 
-   if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
-     {
-      if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-	return OPTIMIZE (avx_unaligned_erms);
-+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
-+	{
-+	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-+	    return OPTIMIZE (evex_unaligned_erms);
-+
-+	  return OPTIMIZE (evex_unaligned);
-+	}
-+
-+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-+	{
-+	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-+	    return OPTIMIZE (avx_unaligned_erms);
- 
-      return OPTIMIZE (avx_unaligned);
-+	  return OPTIMIZE (avx_unaligned);
-+	}
-     }
- 
-   if (!CPU_FEATURE_USABLE_P (cpu_features, SSSE3)
-diff --git a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S
-new file mode 100644
-index 00000000..0cbce8f9
--- /dev/null
-+++ b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S
-@@ -0,0 +1,33 @@
-+#if IS_IN (libc)
-+# define VEC_SIZE	32
-+# define XMM0		xmm16
-+# define XMM1		xmm17
-+# define YMM0		ymm16
-+# define YMM1		ymm17
-+# define VEC0		ymm16
-+# define VEC1		ymm17
-+# define VEC2		ymm18
-+# define VEC3		ymm19
-+# define VEC4		ymm20
-+# define VEC5		ymm21
-+# define VEC6		ymm22
-+# define VEC7		ymm23
-+# define VEC8		ymm24
-+# define VEC9		ymm25
-+# define VEC10		ymm26
-+# define VEC11		ymm27
-+# define VEC12		ymm28
-+# define VEC13		ymm29
-+# define VEC14		ymm30
-+# define VEC15		ymm31
-+# define VEC(i)		VEC##i
-+# define VMOVNT		vmovntdq
-+# define VMOVU		vmovdqu64
-+# define VMOVA		vmovdqa64
-+# define VZEROUPPER
-+
-+# define SECTION(p)		p##.evex
-+# define MEMMOVE_SYMBOL(p,s)	p##_evex_##s
-+
-+# include "memmove-vec-unaligned-erms.S"
-+#endif
-diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
-index 274aa1c7..08e21692 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
-+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
-@@ -48,6 +48,14 @@
- # define MEMMOVE_CHK_SYMBOL(p,s)	MEMMOVE_SYMBOL(p, s)
- #endif
- 
-+#ifndef XMM0
-+# define XMM0				xmm0
-+#endif
-+
-+#ifndef YMM0
-+# define YMM0				ymm0
-+#endif
-+
- #ifndef VZEROUPPER
- # if VEC_SIZE > 16
- #  define VZEROUPPER vzeroupper
-@@ -277,20 +285,20 @@ L(less_vec):
- #if VEC_SIZE > 32
- L(between_32_63):
- 	/* From 32 to 63.  No branch when size == 32.  */
-	vmovdqu	(%rsi), %ymm0
-	vmovdqu	-32(%rsi,%rdx), %ymm1
-	vmovdqu	%ymm0, (%rdi)
-	vmovdqu	%ymm1, -32(%rdi,%rdx)
-+	VMOVU	(%rsi), %YMM0
-+	VMOVU	-32(%rsi,%rdx), %YMM1
-+	VMOVU	%YMM0, (%rdi)
-+	VMOVU	%YMM1, -32(%rdi,%rdx)
- 	VZEROUPPER
- 	ret
- #endif
- #if VEC_SIZE > 16
- 	/* From 16 to 31.  No branch when size == 16.  */
- L(between_16_31):
-	vmovdqu	(%rsi), %xmm0
-	vmovdqu	-16(%rsi,%rdx), %xmm1
-	vmovdqu	%xmm0, (%rdi)
-	vmovdqu	%xmm1, -16(%rdi,%rdx)
-+	VMOVU	(%rsi), %XMM0
-+	VMOVU	-16(%rsi,%rdx), %XMM1
-+	VMOVU	%XMM0, (%rdi)
-+	VMOVU	%XMM1, -16(%rdi,%rdx)
- 	ret
- #endif
- L(between_8_15):
-- 
-GitLab
-
--- a/glibc-RHEL-15696-15.patch
+++ b/glibc-RHEL-15696-15.patch
@ -1,254 +0,0 @@
-From 1b968b6b9b3aac702ac2f133e0dd16cfdbb415ee Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Fri, 5 Mar 2021 07:15:03 -0800
-Subject: [PATCH] x86-64: Add memset family functions with 256-bit EVEX
-Content-type: text/plain; charset=UTF-8
-
-Update ifunc-memset.h/ifunc-wmemset.h to select the function optimized
-with 256-bit EVEX instructions using YMM16-YMM31 registers to avoid RTM
-abort with usable AVX512VL and AVX512BW since VZEROUPPER isn't needed at
-function exit.
---
- sysdeps/x86_64/multiarch/Makefile             |  1 +
- sysdeps/x86_64/multiarch/ifunc-impl-list.c    | 22 +++++++++++++++++
- sysdeps/x86_64/multiarch/ifunc-memset.h       | 24 +++++++++++++++----
- sysdeps/x86_64/multiarch/ifunc-wmemset.h      | 13 ++++++----
- .../multiarch/memset-evex-unaligned-erms.S    | 24 +++++++++++++++++++
- .../multiarch/memset-vec-unaligned-erms.S     | 20 +++++++++++-----
- 6 files changed, 90 insertions(+), 14 deletions(-)
- create mode 100644 sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
-
-diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
-index 4563fc56..1cc0a10e 100644
--- a/sysdeps/x86_64/multiarch/Makefile
-+++ b/sysdeps/x86_64/multiarch/Makefile
-@@ -43,6 +43,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \
- 		   memchr-evex \
- 		   memmove-evex-unaligned-erms \
- 		   memrchr-evex \
-+		   memset-evex-unaligned-erms \
- 		   rawmemchr-evex \
- 		   stpcpy-evex \
- 		   stpncpy-evex \
-diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-index 6bd3abfc..7cf83485 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-@@ -160,6 +160,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
- 			      CPU_FEATURE_USABLE (AVX2),
- 			      __memset_chk_avx2_unaligned_erms)
-+	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-+			      (CPU_FEATURE_USABLE (AVX512VL)
-+			       && CPU_FEATURE_USABLE (AVX512BW)),
-+			      __memset_chk_evex_unaligned)
-+	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-+			      (CPU_FEATURE_USABLE (AVX512VL)
-+			       && CPU_FEATURE_USABLE (AVX512BW)),
-+			      __memset_chk_evex_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
- 			      CPU_FEATURE_USABLE (AVX512F),
- 			      __memset_chk_avx512_unaligned_erms)
-@@ -185,6 +193,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 	      IFUNC_IMPL_ADD (array, i, memset,
- 			      CPU_FEATURE_USABLE (AVX2),
- 			      __memset_avx2_unaligned_erms)
-+	      IFUNC_IMPL_ADD (array, i, memset,
-+			      (CPU_FEATURE_USABLE (AVX512VL)
-+			       && CPU_FEATURE_USABLE (AVX512BW)),
-+			      __memset_evex_unaligned)
-+	      IFUNC_IMPL_ADD (array, i, memset,
-+			      (CPU_FEATURE_USABLE (AVX512VL)
-+			       && CPU_FEATURE_USABLE (AVX512BW)),
-+			      __memset_evex_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, memset,
- 			      CPU_FEATURE_USABLE (AVX512F),
- 			      __memset_avx512_unaligned_erms)
-@@ -555,6 +571,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 	      IFUNC_IMPL_ADD (array, i, wmemset,
- 			      CPU_FEATURE_USABLE (AVX2),
- 			      __wmemset_avx2_unaligned)
-+	      IFUNC_IMPL_ADD (array, i, wmemset,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __wmemset_evex_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, wmemset,
- 			      CPU_FEATURE_USABLE (AVX512F),
- 			      __wmemset_avx512_unaligned))
-@@ -723,6 +742,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 	      IFUNC_IMPL_ADD (array, i, __wmemset_chk,
- 			      CPU_FEATURE_USABLE (AVX2),
- 			      __wmemset_chk_avx2_unaligned)
-+	      IFUNC_IMPL_ADD (array, i, __wmemset_chk,
-+			      CPU_FEATURE_USABLE (AVX512VL),
-+			      __wmemset_chk_evex_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, __wmemset_chk,
- 			      CPU_FEATURE_USABLE (AVX512F),
- 			      __wmemset_chk_avx512_unaligned))
-diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
-index 708bd72e..6f31f4dc 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memset.h
-+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
-@@ -27,6 +27,10 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
-   attribute_hidden;
-+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
-+  attribute_hidden;
-+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
-+  attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
-   attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
-@@ -56,10 +60,22 @@ IFUNC_SELECTOR (void)
- 
-   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
-     {
-      if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-	return OPTIMIZE (avx2_unaligned_erms);
-      else
-	return OPTIMIZE (avx2_unaligned);
-+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-+	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
-+	{
-+	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-+	    return OPTIMIZE (evex_unaligned_erms);
-+
-+	  return OPTIMIZE (evex_unaligned);
-+	}
-+
-+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-+	{
-+	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-+	    return OPTIMIZE (avx2_unaligned_erms);
-+
-+	  return OPTIMIZE (avx2_unaligned);
-+	}
-     }
- 
-   if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
-index eb242210..9290c4bf 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
-+++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
-@@ -20,6 +20,7 @@
- 
- extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
-+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
- 
- static inline void *
-@@ -27,14 +28,18 @@ IFUNC_SELECTOR (void)
- {
-   const struct cpu_features* cpu_features = __get_cpu_features ();
- 
-  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
-      && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
-+  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
-       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
-     {
-       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
-	  && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
-+	  && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
-+	  && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
- 	return OPTIMIZE (avx512_unaligned);
-      else
-+
-+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
-+	return OPTIMIZE (evex_unaligned);
-+
-+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
- 	return OPTIMIZE (avx2_unaligned);
-     }
- 
-diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
-new file mode 100644
-index 00000000..ae0a4d6e
--- /dev/null
-+++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
-@@ -0,0 +1,24 @@
-+#if IS_IN (libc)
-+# define VEC_SIZE	32
-+# define XMM0		xmm16
-+# define YMM0		ymm16
-+# define VEC0		ymm16
-+# define VEC(i)		VEC##i
-+# define VMOVU		vmovdqu64
-+# define VMOVA		vmovdqa64
-+# define VZEROUPPER
-+
-+# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
-+  movq r, %rax; \
-+  vpbroadcastb d, %VEC0
-+
-+# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
-+  movq r, %rax; \
-+  vpbroadcastd d, %VEC0
-+
-+# define SECTION(p)		p##.evex
-+# define MEMSET_SYMBOL(p,s)	p##_evex_##s
-+# define WMEMSET_SYMBOL(p,s)	p##_evex_##s
-+
-+# include "memset-vec-unaligned-erms.S"
-+#endif
-diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
-index 9a0fd818..71e91a8f 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
-+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
-@@ -34,6 +34,14 @@
- # define WMEMSET_CHK_SYMBOL(p,s)	WMEMSET_SYMBOL(p, s)
- #endif
- 
-+#ifndef XMM0
-+# define XMM0				xmm0
-+#endif
-+
-+#ifndef YMM0
-+# define YMM0				ymm0
-+#endif
-+
- #ifndef VZEROUPPER
- # if VEC_SIZE > 16
- #  define VZEROUPPER			vzeroupper
-@@ -67,7 +75,7 @@
- ENTRY (__bzero)
- 	mov	%RDI_LP, %RAX_LP /* Set return value.  */
- 	mov	%RSI_LP, %RDX_LP /* Set n.  */
-	pxor	%xmm0, %xmm0
-+	pxor	%XMM0, %XMM0
- 	jmp	L(entry_from_bzero)
- END (__bzero)
- weak_alias (__bzero, bzero)
-@@ -223,7 +231,7 @@ L(less_vec):
- 	cmpb	$16, %dl
- 	jae	L(between_16_31)
- # endif
-	MOVQ	%xmm0, %rcx
-+	MOVQ	%XMM0, %rcx
- 	cmpb	$8, %dl
- 	jae	L(between_8_15)
- 	cmpb	$4, %dl
-@@ -238,16 +246,16 @@ L(less_vec):
- # if VEC_SIZE > 32
- 	/* From 32 to 63.  No branch when size == 32.  */
- L(between_32_63):
-	vmovdqu	%ymm0, -32(%rdi,%rdx)
-	vmovdqu	%ymm0, (%rdi)
-+	VMOVU	%YMM0, -32(%rdi,%rdx)
-+	VMOVU	%YMM0, (%rdi)
- 	VZEROUPPER
- 	ret
- # endif
- # if VEC_SIZE > 16
- 	/* From 16 to 31.  No branch when size == 16.  */
- L(between_16_31):
-	vmovdqu	%xmm0, -16(%rdi,%rdx)
-	vmovdqu	%xmm0, (%rdi)
-+	VMOVU	%XMM0, -16(%rdi,%rdx)
-+	VMOVU	%XMM0, (%rdi)
- 	VZEROUPPER
- 	ret
- # endif
-- 
-GitLab
-
--- a/glibc-RHEL-15696-16.patch
+++ b/glibc-RHEL-15696-16.patch
@ -1,561 +0,0 @@
-From 91264fe3577fe887b4860923fa6142b5274c8965 Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Fri, 5 Mar 2021 07:20:28 -0800
-Subject: [PATCH] x86-64: Add memcmp family functions with 256-bit EVEX
-Content-type: text/plain; charset=UTF-8
-
-Update ifunc-memcmp.h to select the function optimized with 256-bit EVEX
-instructions using YMM16-YMM31 registers to avoid RTM abort with usable
-AVX512VL, AVX512BW and MOVBE since VZEROUPPER isn't needed at function
-exit.
---
- sysdeps/x86_64/multiarch/Makefile             |   4 +-
- sysdeps/x86_64/multiarch/ifunc-impl-list.c    |  10 +
- sysdeps/x86_64/multiarch/ifunc-memcmp.h       |  13 +-
- sysdeps/x86_64/multiarch/memcmp-evex-movbe.S  | 440 ++++++++++++++++++
- sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S |   4 +
- 5 files changed, 467 insertions(+), 4 deletions(-)
- create mode 100644 sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
- create mode 100644 sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S
-
-diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
-index 1cc0a10e..9d79b138 100644
--- a/sysdeps/x86_64/multiarch/Makefile
-+++ b/sysdeps/x86_64/multiarch/Makefile
-@@ -41,6 +41,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \
- 		   memset-avx2-unaligned-erms \
- 		   memset-avx512-unaligned-erms \
- 		   memchr-evex \
-+		   memcmp-evex-movbe \
- 		   memmove-evex-unaligned-erms \
- 		   memrchr-evex \
- 		   memset-evex-unaligned-erms \
-@@ -81,7 +82,8 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
- 		   wcsncmp-evex \
- 		   wcsnlen-evex \
- 		   wcsrchr-evex \
-		   wmemchr-evex
-+		   wmemchr-evex \
-+		   wmemcmp-evex-movbe
- endif
- 
- ifeq ($(subdir),debug)
-diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-index 7cf83485..c8da910e 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-@@ -56,6 +56,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			      (CPU_FEATURE_USABLE (AVX2)
- 			       && CPU_FEATURE_USABLE (MOVBE)),
- 			      __memcmp_avx2_movbe)
-+	      IFUNC_IMPL_ADD (array, i, memcmp,
-+			      (CPU_FEATURE_USABLE (AVX512VL)
-+			       && CPU_FEATURE_USABLE (AVX512BW)
-+			       && CPU_FEATURE_USABLE (MOVBE)),
-+			      __memcmp_evex_movbe)
- 	      IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSE4_1),
- 			      __memcmp_sse4_1)
- 	      IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSSE3),
-@@ -558,6 +563,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			      (CPU_FEATURE_USABLE (AVX2)
- 			       && CPU_FEATURE_USABLE (MOVBE)),
- 			      __wmemcmp_avx2_movbe)
-+	      IFUNC_IMPL_ADD (array, i, wmemcmp,
-+			      (CPU_FEATURE_USABLE (AVX512VL)
-+			       && CPU_FEATURE_USABLE (AVX512BW)
-+			       && CPU_FEATURE_USABLE (MOVBE)),
-+			      __wmemcmp_evex_movbe)
- 	      IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSE4_1),
- 			      __wmemcmp_sse4_1)
- 	      IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSSE3),
-diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
-index 6c1f3153..3ca1f0a6 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h
-+++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
-@@ -23,17 +23,24 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
-+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden;
- 
- static inline void *
- IFUNC_SELECTOR (void)
- {
-   const struct cpu_features* cpu_features = __get_cpu_features ();
- 
-  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
-      && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
-+  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
-       && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
-       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
-    return OPTIMIZE (avx2_movbe);
-+    {
-+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-+	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
-+	return OPTIMIZE (evex_movbe);
-+
-+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-+	return OPTIMIZE (avx2_movbe);
-+    }
- 
-   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
-     return OPTIMIZE (sse4_1);
-diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
-new file mode 100644
-index 00000000..9c093972
--- /dev/null
-+++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
-@@ -0,0 +1,440 @@
-+/* memcmp/wmemcmp optimized with 256-bit EVEX instructions.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#if IS_IN (libc)
-+
-+/* memcmp/wmemcmp is implemented as:
-+   1. For size from 2 to 7 bytes, load as big endian with movbe and bswap
-+      to avoid branches.
-+   2. Use overlapping compare to avoid branch.
-+   3. Use vector compare when size >= 4 bytes for memcmp or size >= 8
-+      bytes for wmemcmp.
-+   4. If size is 8 * VEC_SIZE or less, unroll the loop.
-+   5. Compare 4 * VEC_SIZE at a time with the aligned first memory
-+      area.
-+   6. Use 2 vector compares when size is 2 * VEC_SIZE or less.
-+   7. Use 4 vector compares when size is 4 * VEC_SIZE or less.
-+   8. Use 8 vector compares when size is 8 * VEC_SIZE or less.  */
-+
-+# include <sysdep.h>
-+
-+# ifndef MEMCMP
-+#  define MEMCMP	__memcmp_evex_movbe
-+# endif
-+
-+# define VMOVU		vmovdqu64
-+
-+# ifdef USE_AS_WMEMCMP
-+#  define VPCMPEQ	vpcmpeqd
-+# else
-+#  define VPCMPEQ	vpcmpeqb
-+# endif
-+
-+# define XMM1		xmm17
-+# define XMM2		xmm18
-+# define YMM1		ymm17
-+# define YMM2		ymm18
-+# define YMM3		ymm19
-+# define YMM4		ymm20
-+# define YMM5		ymm21
-+# define YMM6		ymm22
-+
-+# define VEC_SIZE 32
-+# ifdef USE_AS_WMEMCMP
-+#  define VEC_MASK 0xff
-+#  define XMM_MASK 0xf
-+# else
-+#  define VEC_MASK 0xffffffff
-+#  define XMM_MASK 0xffff
-+# endif
-+
-+/* Warning!
-+           wmemcmp has to use SIGNED comparison for elements.
-+           memcmp has to use UNSIGNED comparison for elemnts.
-+*/
-+
-+	.section .text.evex,"ax",@progbits
-+ENTRY (MEMCMP)
-+# ifdef USE_AS_WMEMCMP
-+	shl	$2, %RDX_LP
-+# elif defined __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	movl	%edx, %edx
-+# endif
-+	cmp	$VEC_SIZE, %RDX_LP
-+	jb	L(less_vec)
-+
-+	/* From VEC to 2 * VEC.  No branch when size == VEC_SIZE.  */
-+	VMOVU	(%rsi), %YMM2
-+	VPCMPEQ (%rdi), %YMM2, %k1
-+	kmovd	%k1, %eax
-+	subl    $VEC_MASK, %eax
-+	jnz	L(first_vec)
-+
-+	cmpq	$(VEC_SIZE * 2), %rdx
-+	jbe	L(last_vec)
-+
-+	/* More than 2 * VEC.  */
-+	cmpq	$(VEC_SIZE * 8), %rdx
-+	ja	L(more_8x_vec)
-+	cmpq	$(VEC_SIZE * 4), %rdx
-+	jb	L(last_4x_vec)
-+
-+	/* From 4 * VEC to 8 * VEC, inclusively. */
-+	VMOVU	(%rsi), %YMM1
-+	VPCMPEQ (%rdi), %YMM1, %k1
-+
-+	VMOVU	VEC_SIZE(%rsi), %YMM2
-+	VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
-+
-+	VMOVU	(VEC_SIZE * 2)(%rsi), %YMM3
-+	VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
-+
-+	VMOVU	(VEC_SIZE * 3)(%rsi), %YMM4
-+	VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
-+
-+	kandd	%k1, %k2, %k5
-+	kandd	%k3, %k4, %k6
-+	kandd	%k5, %k6, %k6
-+
-+	kmovd	%k6, %eax
-+	cmpl	$VEC_MASK, %eax
-+	jne	L(4x_vec_end)
-+
-+	leaq	-(4 * VEC_SIZE)(%rdi, %rdx), %rdi
-+	leaq	-(4 * VEC_SIZE)(%rsi, %rdx), %rsi
-+	VMOVU	(%rsi), %YMM1
-+	VPCMPEQ (%rdi), %YMM1, %k1
-+
-+	VMOVU	VEC_SIZE(%rsi), %YMM2
-+	VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
-+	kandd	%k1, %k2, %k5
-+
-+	VMOVU	(VEC_SIZE * 2)(%rsi), %YMM3
-+	VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
-+	kandd	%k3, %k5, %k5
-+
-+	VMOVU	(VEC_SIZE * 3)(%rsi), %YMM4
-+	VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
-+	kandd	%k4, %k5, %k5
-+
-+	kmovd	%k5, %eax
-+	cmpl	$VEC_MASK, %eax
-+	jne	L(4x_vec_end)
-+	xorl	%eax, %eax
-+	ret
-+
-+	.p2align 4
-+L(last_2x_vec):
-+	/* From VEC to 2 * VEC.  No branch when size == VEC_SIZE.  */
-+	VMOVU	(%rsi), %YMM2
-+	VPCMPEQ (%rdi), %YMM2, %k2
-+	kmovd	%k2, %eax
-+	subl    $VEC_MASK, %eax
-+	jnz	L(first_vec)
-+
-+L(last_vec):
-+	/* Use overlapping loads to avoid branches.  */
-+	leaq	-VEC_SIZE(%rdi, %rdx), %rdi
-+	leaq	-VEC_SIZE(%rsi, %rdx), %rsi
-+	VMOVU	(%rsi), %YMM2
-+	VPCMPEQ (%rdi), %YMM2, %k2
-+	kmovd	%k2, %eax
-+	subl    $VEC_MASK, %eax
-+	jnz	L(first_vec)
-+	ret
-+
-+	.p2align 4
-+L(first_vec):
-+	/* A byte or int32 is different within 16 or 32 bytes.  */
-+	tzcntl	%eax, %ecx
-+# ifdef USE_AS_WMEMCMP
-+	xorl	%eax, %eax
-+	movl	(%rdi, %rcx, 4), %edx
-+	cmpl	(%rsi, %rcx, 4), %edx
-+L(wmemcmp_return):
-+	setl	%al
-+	negl	%eax
-+	orl	$1, %eax
-+# else
-+	movzbl	(%rdi, %rcx), %eax
-+	movzbl	(%rsi, %rcx), %edx
-+	sub	%edx, %eax
-+# endif
-+	ret
-+
-+# ifdef USE_AS_WMEMCMP
-+	.p2align 4
-+L(4):
-+	xorl	%eax, %eax
-+	movl	(%rdi), %edx
-+	cmpl	(%rsi), %edx
-+	jne	L(wmemcmp_return)
-+	ret
-+# else
-+	.p2align 4
-+L(between_4_7):
-+	/* Load as big endian with overlapping movbe to avoid branches.  */
-+	movbe	(%rdi), %eax
-+	movbe	(%rsi), %ecx
-+	shlq	$32, %rax
-+	shlq	$32, %rcx
-+	movbe	-4(%rdi, %rdx), %edi
-+	movbe	-4(%rsi, %rdx), %esi
-+	orq	%rdi, %rax
-+	orq	%rsi, %rcx
-+	subq	%rcx, %rax
-+	je	L(exit)
-+	sbbl	%eax, %eax
-+	orl	$1, %eax
-+	ret
-+
-+	.p2align 4
-+L(exit):
-+	ret
-+
-+	.p2align 4
-+L(between_2_3):
-+	/* Load as big endian to avoid branches.  */
-+	movzwl	(%rdi), %eax
-+	movzwl	(%rsi), %ecx
-+	shll	$8, %eax
-+	shll	$8, %ecx
-+	bswap	%eax
-+	bswap	%ecx
-+	movb	-1(%rdi, %rdx), %al
-+	movb	-1(%rsi, %rdx), %cl
-+	/* Subtraction is okay because the upper 8 bits are zero.  */
-+	subl	%ecx, %eax
-+	ret
-+
-+	.p2align 4
-+L(1):
-+	movzbl	(%rdi), %eax
-+	movzbl	(%rsi), %ecx
-+	subl	%ecx, %eax
-+	ret
-+# endif
-+
-+	.p2align 4
-+L(zero):
-+	xorl	%eax, %eax
-+	ret
-+
-+	.p2align 4
-+L(less_vec):
-+# ifdef USE_AS_WMEMCMP
-+	/* It can only be 0, 4, 8, 12, 16, 20, 24, 28 bytes.  */
-+	cmpb	$4, %dl
-+	je	L(4)
-+	jb	L(zero)
-+# else
-+	cmpb	$1, %dl
-+	je	L(1)
-+	jb	L(zero)
-+	cmpb	$4, %dl
-+	jb	L(between_2_3)
-+	cmpb	$8, %dl
-+	jb	L(between_4_7)
-+# endif
-+	cmpb	$16, %dl
-+	jae	L(between_16_31)
-+	/* It is between 8 and 15 bytes.  */
-+	vmovq	(%rdi), %XMM1
-+	vmovq	(%rsi), %XMM2
-+	VPCMPEQ %XMM1, %XMM2, %k2
-+	kmovw	%k2, %eax
-+	subl    $XMM_MASK, %eax
-+	jnz	L(first_vec)
-+	/* Use overlapping loads to avoid branches.  */
-+	leaq	-8(%rdi, %rdx), %rdi
-+	leaq	-8(%rsi, %rdx), %rsi
-+	vmovq	(%rdi), %XMM1
-+	vmovq	(%rsi), %XMM2
-+	VPCMPEQ %XMM1, %XMM2, %k2
-+	kmovw	%k2, %eax
-+	subl    $XMM_MASK, %eax
-+	jnz	L(first_vec)
-+	ret
-+
-+	.p2align 4
-+L(between_16_31):
-+	/* From 16 to 31 bytes.  No branch when size == 16.  */
-+	VMOVU	(%rsi), %XMM2
-+	VPCMPEQ (%rdi), %XMM2, %k2
-+	kmovw	%k2, %eax
-+	subl    $XMM_MASK, %eax
-+	jnz	L(first_vec)
-+
-+	/* Use overlapping loads to avoid branches.  */
-+	leaq	-16(%rdi, %rdx), %rdi
-+	leaq	-16(%rsi, %rdx), %rsi
-+	VMOVU	(%rsi), %XMM2
-+	VPCMPEQ (%rdi), %XMM2, %k2
-+	kmovw	%k2, %eax
-+	subl    $XMM_MASK, %eax
-+	jnz	L(first_vec)
-+	ret
-+
-+	.p2align 4
-+L(more_8x_vec):
-+	/* More than 8 * VEC.  Check the first VEC.  */
-+	VMOVU	(%rsi), %YMM2
-+	VPCMPEQ (%rdi), %YMM2, %k2
-+	kmovd	%k2, %eax
-+	subl    $VEC_MASK, %eax
-+	jnz	L(first_vec)
-+
-+	/* Align the first memory area for aligned loads in the loop.
-+	   Compute how much the first memory area is misaligned.  */
-+	movq	%rdi, %rcx
-+	andl	$(VEC_SIZE - 1), %ecx
-+	/* Get the negative of offset for alignment.  */
-+	subq	$VEC_SIZE, %rcx
-+	/* Adjust the second memory area.  */
-+	subq	%rcx, %rsi
-+	/* Adjust the first memory area which should be aligned now.  */
-+	subq	%rcx, %rdi
-+	/* Adjust length.  */
-+	addq	%rcx, %rdx
-+
-+L(loop_4x_vec):
-+	/* Compare 4 * VEC at a time forward.  */
-+	VMOVU	(%rsi), %YMM1
-+	VPCMPEQ (%rdi), %YMM1, %k1
-+
-+	VMOVU	VEC_SIZE(%rsi), %YMM2
-+	VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
-+	kandd	%k2, %k1, %k5
-+
-+	VMOVU	(VEC_SIZE * 2)(%rsi), %YMM3
-+	VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
-+	kandd	%k3, %k5, %k5
-+
-+	VMOVU	(VEC_SIZE * 3)(%rsi), %YMM4
-+	VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
-+	kandd	%k4, %k5, %k5
-+
-+	kmovd	%k5, %eax
-+	cmpl	$VEC_MASK, %eax
-+	jne	L(4x_vec_end)
-+
-+	addq	$(VEC_SIZE * 4), %rdi
-+	addq	$(VEC_SIZE * 4), %rsi
-+
-+	subq	$(VEC_SIZE * 4), %rdx
-+	cmpq	$(VEC_SIZE * 4), %rdx
-+	jae	L(loop_4x_vec)
-+
-+	/* Less than 4 * VEC.  */
-+	cmpq	$VEC_SIZE, %rdx
-+	jbe	L(last_vec)
-+	cmpq	$(VEC_SIZE * 2), %rdx
-+	jbe	L(last_2x_vec)
-+
-+L(last_4x_vec):
-+	/* From 2 * VEC to 4 * VEC. */
-+	VMOVU	(%rsi), %YMM2
-+	VPCMPEQ (%rdi), %YMM2, %k2
-+	kmovd	%k2, %eax
-+	subl    $VEC_MASK, %eax
-+	jnz	L(first_vec)
-+
-+	addq	$VEC_SIZE, %rdi
-+	addq	$VEC_SIZE, %rsi
-+	VMOVU	(%rsi), %YMM2
-+	VPCMPEQ (%rdi), %YMM2, %k2
-+	kmovd	%k2, %eax
-+	subl    $VEC_MASK, %eax
-+	jnz	L(first_vec)
-+
-+	/* Use overlapping loads to avoid branches.  */
-+	leaq	-(3 * VEC_SIZE)(%rdi, %rdx), %rdi
-+	leaq	-(3 * VEC_SIZE)(%rsi, %rdx), %rsi
-+	VMOVU	(%rsi), %YMM2
-+	VPCMPEQ (%rdi), %YMM2, %k2
-+	kmovd	%k2, %eax
-+	subl    $VEC_MASK, %eax
-+	jnz	L(first_vec)
-+
-+	addq	$VEC_SIZE, %rdi
-+	addq	$VEC_SIZE, %rsi
-+	VMOVU	(%rsi), %YMM2
-+	VPCMPEQ (%rdi), %YMM2, %k2
-+	kmovd	%k2, %eax
-+	subl    $VEC_MASK, %eax
-+	jnz	L(first_vec)
-+	ret
-+
-+	.p2align 4
-+L(4x_vec_end):
-+	kmovd	%k1, %eax
-+	subl	$VEC_MASK, %eax
-+	jnz	L(first_vec)
-+	kmovd	%k2, %eax
-+	subl	$VEC_MASK, %eax
-+	jnz	L(first_vec_x1)
-+	kmovd	%k3, %eax
-+	subl	$VEC_MASK, %eax
-+	jnz	L(first_vec_x2)
-+	kmovd	%k4, %eax
-+	subl	$VEC_MASK, %eax
-+	tzcntl	%eax, %ecx
-+# ifdef USE_AS_WMEMCMP
-+	xorl	%eax, %eax
-+	movl	(VEC_SIZE * 3)(%rdi, %rcx, 4), %edx
-+	cmpl	(VEC_SIZE * 3)(%rsi, %rcx, 4), %edx
-+	jmp	L(wmemcmp_return)
-+# else
-+	movzbl	(VEC_SIZE * 3)(%rdi, %rcx), %eax
-+	movzbl	(VEC_SIZE * 3)(%rsi, %rcx), %edx
-+	sub	%edx, %eax
-+# endif
-+	ret
-+
-+	.p2align 4
-+L(first_vec_x1):
-+	tzcntl	%eax, %ecx
-+# ifdef USE_AS_WMEMCMP
-+	xorl	%eax, %eax
-+	movl	VEC_SIZE(%rdi, %rcx, 4), %edx
-+	cmpl	VEC_SIZE(%rsi, %rcx, 4), %edx
-+	jmp	L(wmemcmp_return)
-+# else
-+	movzbl	VEC_SIZE(%rdi, %rcx), %eax
-+	movzbl	VEC_SIZE(%rsi, %rcx), %edx
-+	sub	%edx, %eax
-+# endif
-+	ret
-+
-+	.p2align 4
-+L(first_vec_x2):
-+	tzcntl	%eax, %ecx
-+# ifdef USE_AS_WMEMCMP
-+	xorl	%eax, %eax
-+	movl	(VEC_SIZE * 2)(%rdi, %rcx, 4), %edx
-+	cmpl	(VEC_SIZE * 2)(%rsi, %rcx, 4), %edx
-+	jmp	L(wmemcmp_return)
-+# else
-+	movzbl	(VEC_SIZE * 2)(%rdi, %rcx), %eax
-+	movzbl	(VEC_SIZE * 2)(%rsi, %rcx), %edx
-+	sub	%edx, %eax
-+# endif
-+	ret
-+END (MEMCMP)
-+#endif
-diff --git a/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S
-new file mode 100644
-index 00000000..4726d74a
--- /dev/null
-+++ b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S
-@@ -0,0 +1,4 @@
-+#define MEMCMP __wmemcmp_evex_movbe
-+#define USE_AS_WMEMCMP 1
-+
-+#include "memcmp-evex-movbe.S"
-- 
-GitLab
-
--- a/glibc-RHEL-15696-17.patch
+++ b/glibc-RHEL-15696-17.patch
--- a/glibc-RHEL-15696-18.patch
+++ b/glibc-RHEL-15696-18.patch
@ -1,735 +0,0 @@
-From 4bd660be40967cd69072f69ebc2ad32bfcc1f206 Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Tue, 23 Feb 2021 06:33:10 -0800
-Subject: [PATCH] x86: Add string/memory function tests in RTM region
-Content-type: text/plain; charset=UTF-8
-
-At function exit, AVX optimized string/memory functions have VZEROUPPER
-which triggers RTM abort.   When such functions are called inside a
-transactionally executing RTM region, RTM abort causes severe performance
-degradation.  Add tests to verify that string/memory functions won't
-cause RTM abort in RTM region.
---
- sysdeps/x86/Makefile          | 23 +++++++++++
- sysdeps/x86/tst-memchr-rtm.c  | 54 ++++++++++++++++++++++++++
- sysdeps/x86/tst-memcmp-rtm.c  | 52 +++++++++++++++++++++++++
- sysdeps/x86/tst-memmove-rtm.c | 53 ++++++++++++++++++++++++++
- sysdeps/x86/tst-memrchr-rtm.c | 54 ++++++++++++++++++++++++++
- sysdeps/x86/tst-memset-rtm.c  | 45 ++++++++++++++++++++++
- sysdeps/x86/tst-strchr-rtm.c  | 54 ++++++++++++++++++++++++++
- sysdeps/x86/tst-strcpy-rtm.c  | 53 ++++++++++++++++++++++++++
- sysdeps/x86/tst-string-rtm.h  | 72 +++++++++++++++++++++++++++++++++++
- sysdeps/x86/tst-strlen-rtm.c  | 53 ++++++++++++++++++++++++++
- sysdeps/x86/tst-strncmp-rtm.c | 52 +++++++++++++++++++++++++
- sysdeps/x86/tst-strrchr-rtm.c | 53 ++++++++++++++++++++++++++
- 12 files changed, 618 insertions(+)
- create mode 100644 sysdeps/x86/tst-memchr-rtm.c
- create mode 100644 sysdeps/x86/tst-memcmp-rtm.c
- create mode 100644 sysdeps/x86/tst-memmove-rtm.c
- create mode 100644 sysdeps/x86/tst-memrchr-rtm.c
- create mode 100644 sysdeps/x86/tst-memset-rtm.c
- create mode 100644 sysdeps/x86/tst-strchr-rtm.c
- create mode 100644 sysdeps/x86/tst-strcpy-rtm.c
- create mode 100644 sysdeps/x86/tst-string-rtm.h
- create mode 100644 sysdeps/x86/tst-strlen-rtm.c
- create mode 100644 sysdeps/x86/tst-strncmp-rtm.c
- create mode 100644 sysdeps/x86/tst-strrchr-rtm.c
-
-diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
-index 59e928e9..5be71ada 100644
--- a/sysdeps/x86/Makefile
-+++ b/sysdeps/x86/Makefile
-@@ -17,6 +17,29 @@ endif
- 
- ifeq ($(subdir),string)
- sysdep_routines += cacheinfo
-+
-+tests += \
-+  tst-memchr-rtm \
-+  tst-memcmp-rtm \
-+  tst-memmove-rtm \
-+  tst-memrchr-rtm \
-+  tst-memset-rtm \
-+  tst-strchr-rtm \
-+  tst-strcpy-rtm \
-+  tst-strlen-rtm \
-+  tst-strncmp-rtm \
-+  tst-strrchr-rtm
-+
-+CFLAGS-tst-memchr-rtm.c += -mrtm
-+CFLAGS-tst-memcmp-rtm.c += -mrtm
-+CFLAGS-tst-memmove-rtm.c += -mrtm
-+CFLAGS-tst-memrchr-rtm.c += -mrtm
-+CFLAGS-tst-memset-rtm.c += -mrtm
-+CFLAGS-tst-strchr-rtm.c += -mrtm
-+CFLAGS-tst-strcpy-rtm.c += -mrtm
-+CFLAGS-tst-strlen-rtm.c += -mrtm
-+CFLAGS-tst-strncmp-rtm.c += -mrtm
-+CFLAGS-tst-strrchr-rtm.c += -mrtm
- endif
- 
- ifneq ($(enable-cet),no)
-diff --git a/sysdeps/x86/tst-memchr-rtm.c b/sysdeps/x86/tst-memchr-rtm.c
-new file mode 100644
-index 00000000..e4749401
--- /dev/null
-+++ b/sysdeps/x86/tst-memchr-rtm.c
-@@ -0,0 +1,54 @@
-+/* Test case for memchr inside a transactionally executing RTM region.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <tst-string-rtm.h>
-+
-+#define LOOP 3000
-+#define STRING_SIZE 1024
-+char string1[STRING_SIZE];
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+prepare (void)
-+{
-+  memset (string1, 'a', STRING_SIZE);
-+  string1[100] = 'c';
-+  string1[STRING_SIZE - 100] = 'c';
-+  char *p = memchr (string1, 'c', STRING_SIZE);
-+  if (p == &string1[100])
-+    return EXIT_SUCCESS;
-+  else
-+    return EXIT_FAILURE;
-+}
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+function (void)
-+{
-+  char *p = memchr (string1, 'c', STRING_SIZE);
-+  if (p == &string1[100])
-+    return 0;
-+  else
-+    return 1;
-+}
-+
-+static int
-+do_test (void)
-+{
-+  return do_test_1 ("memchr", LOOP, prepare, function);
-+}
-diff --git a/sysdeps/x86/tst-memcmp-rtm.c b/sysdeps/x86/tst-memcmp-rtm.c
-new file mode 100644
-index 00000000..e4c8a623
--- /dev/null
-+++ b/sysdeps/x86/tst-memcmp-rtm.c
-@@ -0,0 +1,52 @@
-+/* Test case for memcmp inside a transactionally executing RTM region.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <tst-string-rtm.h>
-+
-+#define LOOP 3000
-+#define STRING_SIZE 1024
-+char string1[STRING_SIZE];
-+char string2[STRING_SIZE];
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+prepare (void)
-+{
-+  memset (string1, 'a', STRING_SIZE);
-+  memset (string2, 'a', STRING_SIZE);
-+  if (memcmp (string1, string2, STRING_SIZE) == 0)
-+    return EXIT_SUCCESS;
-+  else
-+    return EXIT_FAILURE;
-+}
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+function (void)
-+{
-+  if (memcmp (string1, string2, STRING_SIZE) == 0)
-+    return 0;
-+  else
-+    return 1;
-+}
-+
-+static int
-+do_test (void)
-+{
-+  return do_test_1 ("memcmp", LOOP, prepare, function);
-+}
-diff --git a/sysdeps/x86/tst-memmove-rtm.c b/sysdeps/x86/tst-memmove-rtm.c
-new file mode 100644
-index 00000000..4bf97ef1
--- /dev/null
-+++ b/sysdeps/x86/tst-memmove-rtm.c
-@@ -0,0 +1,53 @@
-+/* Test case for memmove inside a transactionally executing RTM region.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <tst-string-rtm.h>
-+
-+#define LOOP 3000
-+#define STRING_SIZE 1024
-+char string1[STRING_SIZE];
-+char string2[STRING_SIZE];
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+prepare (void)
-+{
-+  memset (string1, 'a', STRING_SIZE);
-+  if (memmove (string2, string1, STRING_SIZE) == string2
-+      && memcmp (string2, string1, STRING_SIZE) == 0)
-+    return EXIT_SUCCESS;
-+  else
-+    return EXIT_FAILURE;
-+}
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+function (void)
-+{
-+  if (memmove (string2, string1, STRING_SIZE) == string2
-+      && memcmp (string2, string1, STRING_SIZE) == 0)
-+    return 0;
-+  else
-+    return 1;
-+}
-+
-+static int
-+do_test (void)
-+{
-+  return do_test_1 ("memmove", LOOP, prepare, function);
-+}
-diff --git a/sysdeps/x86/tst-memrchr-rtm.c b/sysdeps/x86/tst-memrchr-rtm.c
-new file mode 100644
-index 00000000..a57a5a8e
--- /dev/null
-+++ b/sysdeps/x86/tst-memrchr-rtm.c
-@@ -0,0 +1,54 @@
-+/* Test case for memrchr inside a transactionally executing RTM region.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <tst-string-rtm.h>
-+
-+#define LOOP 3000
-+#define STRING_SIZE 1024
-+char string1[STRING_SIZE];
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+prepare (void)
-+{
-+  memset (string1, 'a', STRING_SIZE);
-+  string1[100] = 'c';
-+  string1[STRING_SIZE - 100] = 'c';
-+  char *p = memrchr (string1, 'c', STRING_SIZE);
-+  if (p == &string1[STRING_SIZE - 100])
-+    return EXIT_SUCCESS;
-+  else
-+    return EXIT_FAILURE;
-+}
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+function (void)
-+{
-+  char *p = memrchr (string1, 'c', STRING_SIZE);
-+  if (p == &string1[STRING_SIZE - 100])
-+    return 0;
-+  else
-+    return 1;
-+}
-+
-+static int
-+do_test (void)
-+{
-+  return do_test_1 ("memrchr", LOOP, prepare, function);
-+}
-diff --git a/sysdeps/x86/tst-memset-rtm.c b/sysdeps/x86/tst-memset-rtm.c
-new file mode 100644
-index 00000000..bf343a4d
--- /dev/null
-+++ b/sysdeps/x86/tst-memset-rtm.c
-@@ -0,0 +1,45 @@
-+/* Test case for memset inside a transactionally executing RTM region.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <tst-string-rtm.h>
-+
-+#define LOOP 3000
-+#define STRING_SIZE 1024
-+char string1[STRING_SIZE];
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+prepare (void)
-+{
-+  memset (string1, 'a', STRING_SIZE);
-+  return EXIT_SUCCESS;
-+}
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+function (void)
-+{
-+  memset (string1, 'a', STRING_SIZE);
-+  return 0;
-+}
-+
-+static int
-+do_test (void)
-+{
-+  return do_test_1 ("memset", LOOP, prepare, function);
-+}
-diff --git a/sysdeps/x86/tst-strchr-rtm.c b/sysdeps/x86/tst-strchr-rtm.c
-new file mode 100644
-index 00000000..a82e29c0
--- /dev/null
-+++ b/sysdeps/x86/tst-strchr-rtm.c
-@@ -0,0 +1,54 @@
-+/* Test case for strchr inside a transactionally executing RTM region.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <tst-string-rtm.h>
-+
-+#define LOOP 3000
-+#define STRING_SIZE 1024
-+char string1[STRING_SIZE];
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+prepare (void)
-+{
-+  memset (string1, 'a', STRING_SIZE - 1);
-+  string1[100] = 'c';
-+  string1[STRING_SIZE - 100] = 'c';
-+  char *p = strchr (string1, 'c');
-+  if (p == &string1[100])
-+    return EXIT_SUCCESS;
-+  else
-+    return EXIT_FAILURE;
-+}
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+function (void)
-+{
-+  char *p = strchr (string1, 'c');
-+  if (p == &string1[100])
-+    return 0;
-+  else
-+    return 1;
-+}
-+
-+static int
-+do_test (void)
-+{
-+  return do_test_1 ("strchr", LOOP, prepare, function);
-+}
-diff --git a/sysdeps/x86/tst-strcpy-rtm.c b/sysdeps/x86/tst-strcpy-rtm.c
-new file mode 100644
-index 00000000..2b2a583f
--- /dev/null
-+++ b/sysdeps/x86/tst-strcpy-rtm.c
-@@ -0,0 +1,53 @@
-+/* Test case for strcpy inside a transactionally executing RTM region.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <tst-string-rtm.h>
-+
-+#define LOOP 3000
-+#define STRING_SIZE 1024
-+char string1[STRING_SIZE];
-+char string2[STRING_SIZE];
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+prepare (void)
-+{
-+  memset (string1, 'a', STRING_SIZE - 1);
-+  if (strcpy (string2, string1) == string2
-+      && strcmp (string2, string1) == 0)
-+    return EXIT_SUCCESS;
-+  else
-+    return EXIT_FAILURE;
-+}
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+function (void)
-+{
-+  if (strcpy (string2, string1) == string2
-+      && strcmp (string2, string1) == 0)
-+    return 0;
-+  else
-+    return 1;
-+}
-+
-+static int
-+do_test (void)
-+{
-+  return do_test_1 ("strcpy", LOOP, prepare, function);
-+}
-diff --git a/sysdeps/x86/tst-string-rtm.h b/sysdeps/x86/tst-string-rtm.h
-new file mode 100644
-index 00000000..d2470afa
--- /dev/null
-+++ b/sysdeps/x86/tst-string-rtm.h
-@@ -0,0 +1,72 @@
-+/* Test string function in a transactionally executing RTM region.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <string.h>
-+#include <x86intrin.h>
-+#include <sys/platform/x86.h>
-+#include <support/check.h>
-+#include <support/test-driver.h>
-+
-+static int
-+do_test_1 (const char *name, unsigned int loop, int (*prepare) (void),
-+	   int (*function) (void))
-+{
-+  if (!CPU_FEATURE_USABLE (RTM))
-+    return EXIT_UNSUPPORTED;
-+
-+  int status = prepare ();
-+  if (status != EXIT_SUCCESS)
-+    return status;
-+
-+  unsigned int i;
-+  unsigned int naborts = 0;
-+  unsigned int failed = 0;
-+  for (i = 0; i < loop; i++)
-+    {
-+      failed |= function ();
-+      if (_xbegin() == _XBEGIN_STARTED)
-+	{
-+	  failed |= function ();
-+	  _xend();
-+	}
-+      else
-+	{
-+	  failed |= function ();
-+	  ++naborts;
-+	}
-+    }
-+
-+  if (failed)
-+    FAIL_EXIT1 ("%s() failed", name);
-+
-+  if (naborts)
-+    {
-+      /* NB: Low single digit (<= 5%) noise-level aborts are normal for
-+	 TSX.  */
-+      double rate = 100 * ((double) naborts) / ((double) loop);
-+      if (rate > 5)
-+	FAIL_EXIT1 ("TSX abort rate: %.2f%% (%d out of %d)",
-+		    rate, naborts, loop);
-+    }
-+
-+  return EXIT_SUCCESS;
-+}
-+
-+static int do_test (void);
-+
-+#include <support/test-driver.c>
-diff --git a/sysdeps/x86/tst-strlen-rtm.c b/sysdeps/x86/tst-strlen-rtm.c
-new file mode 100644
-index 00000000..0dcf14db
--- /dev/null
-+++ b/sysdeps/x86/tst-strlen-rtm.c
-@@ -0,0 +1,53 @@
-+/* Test case for strlen inside a transactionally executing RTM region.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <tst-string-rtm.h>
-+
-+#define LOOP 3000
-+#define STRING_SIZE 1024
-+char string1[STRING_SIZE];
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+prepare (void)
-+{
-+  memset (string1, 'a', STRING_SIZE - 1);
-+  string1[STRING_SIZE - 100] = '\0';
-+  size_t len = strlen (string1);
-+  if (len == STRING_SIZE - 100)
-+    return EXIT_SUCCESS;
-+  else
-+    return EXIT_FAILURE;
-+}
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+function (void)
-+{
-+  size_t len = strlen (string1);
-+  if (len == STRING_SIZE - 100)
-+    return 0;
-+  else
-+    return 1;
-+}
-+
-+static int
-+do_test (void)
-+{
-+  return do_test_1 ("strlen", LOOP, prepare, function);
-+}
-diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c
-new file mode 100644
-index 00000000..236ad951
--- /dev/null
-+++ b/sysdeps/x86/tst-strncmp-rtm.c
-@@ -0,0 +1,52 @@
-+/* Test case for strncmp inside a transactionally executing RTM region.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <tst-string-rtm.h>
-+
-+#define LOOP 3000
-+#define STRING_SIZE 1024
-+char string1[STRING_SIZE];
-+char string2[STRING_SIZE];
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+prepare (void)
-+{
-+  memset (string1, 'a', STRING_SIZE - 1);
-+  memset (string2, 'a', STRING_SIZE - 1);
-+  if (strncmp (string1, string2, STRING_SIZE) == 0)
-+    return EXIT_SUCCESS;
-+  else
-+    return EXIT_FAILURE;
-+}
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+function (void)
-+{
-+  if (strncmp (string1, string2, STRING_SIZE) == 0)
-+    return 0;
-+  else
-+    return 1;
-+}
-+
-+static int
-+do_test (void)
-+{
-+  return do_test_1 ("strncmp", LOOP, prepare, function);
-+}
-diff --git a/sysdeps/x86/tst-strrchr-rtm.c b/sysdeps/x86/tst-strrchr-rtm.c
-new file mode 100644
-index 00000000..e32bfaf5
--- /dev/null
-+++ b/sysdeps/x86/tst-strrchr-rtm.c
-@@ -0,0 +1,53 @@
-+/* Test case for strrchr inside a transactionally executing RTM region.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <tst-string-rtm.h>
-+
-+#define LOOP 3000
-+#define STRING_SIZE 1024
-+char string1[STRING_SIZE];
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+prepare (void)
-+{
-+  memset (string1, 'a', STRING_SIZE - 1);
-+  string1[STRING_SIZE - 100] = 'c';
-+  char *p = strrchr (string1, 'c');
-+  if (p == &string1[STRING_SIZE - 100])
-+    return EXIT_SUCCESS;
-+  else
-+    return EXIT_FAILURE;
-+}
-+
-+__attribute__ ((noinline, noclone))
-+static int
-+function (void)
-+{
-+  char *p = strrchr (string1, 'c');
-+  if (p == &string1[STRING_SIZE - 100])
-+    return 0;
-+  else
-+    return 1;
-+}
-+
-+static int
-+do_test (void)
-+{
-+  return do_test_1 ("strrchr", LOOP, prepare, function);
-+}
-- 
-GitLab
-
--- a/glibc-RHEL-15696-19.patch
+++ b/glibc-RHEL-15696-19.patch
@ -1,148 +0,0 @@
-From 4e2d8f352774b56078c34648b14a2412c38384f4 Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Sun, 7 Mar 2021 09:44:18 -0800
-Subject: [PATCH] x86-64: Use ZMM16-ZMM31 in AVX512 memset family functions
-Content-type: text/plain; charset=UTF-8
-
-Update ifunc-memset.h/ifunc-wmemset.h to select the function optimized
-with AVX512 instructions using ZMM16-ZMM31 registers to avoid RTM abort
-with usable AVX512VL and AVX512BW since VZEROUPPER isn't needed at
-function exit.
---
- sysdeps/x86_64/multiarch/ifunc-impl-list.c       | 14 +++++++++-----
- sysdeps/x86_64/multiarch/ifunc-memset.h          | 13 ++++++++-----
- sysdeps/x86_64/multiarch/ifunc-wmemset.h         | 12 ++++++------
- .../multiarch/memset-avx512-unaligned-erms.S     | 16 ++++++++--------
- 4 files changed, 31 insertions(+), 24 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-index c1efeec0..d969a156 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-@@ -211,10 +211,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			       && CPU_FEATURE_USABLE (AVX512BW)),
- 			      __memset_chk_evex_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      (CPU_FEATURE_USABLE (AVX512VL)
-+			       && CPU_FEATURE_USABLE (AVX512BW)),
- 			      __memset_chk_avx512_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      (CPU_FEATURE_USABLE (AVX512VL)
-+			       && CPU_FEATURE_USABLE (AVX512BW)),
- 			      __memset_chk_avx512_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
- 			      CPU_FEATURE_USABLE (AVX512F),
-@@ -252,10 +254,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			       && CPU_FEATURE_USABLE (AVX512BW)),
- 			      __memset_evex_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, memset,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      (CPU_FEATURE_USABLE (AVX512VL)
-+			       && CPU_FEATURE_USABLE (AVX512BW)),
- 			      __memset_avx512_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, memset,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      (CPU_FEATURE_USABLE (AVX512VL)
-+			       && CPU_FEATURE_USABLE (AVX512BW)),
- 			      __memset_avx512_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, memset,
- 			      CPU_FEATURE_USABLE (AVX512F),
-@@ -719,7 +723,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __wmemset_evex_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, wmemset,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __wmemset_avx512_unaligned))
- 
- #ifdef SHARED
-diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
-index 6f3375cc..19795938 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memset.h
-+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
-@@ -53,13 +53,16 @@ IFUNC_SELECTOR (void)
-   if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
-       && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
-     {
-      if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-	return OPTIMIZE (avx512_no_vzeroupper);
-+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-+	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
-+	{
-+	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-+	    return OPTIMIZE (avx512_unaligned_erms);
- 
-      if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-	return OPTIMIZE (avx512_unaligned_erms);
-+	  return OPTIMIZE (avx512_unaligned);
-+	}
- 
-      return OPTIMIZE (avx512_unaligned);
-+      return OPTIMIZE (avx512_no_vzeroupper);
-     }
- 
-   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
-diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
-index bdc94c6c..98c5d406 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
-+++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
-@@ -33,13 +33,13 @@ IFUNC_SELECTOR (void)
-   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
-       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
-     {
-      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
-	  && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
-	  && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-	return OPTIMIZE (avx512_unaligned);
-
-       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
-	return OPTIMIZE (evex_unaligned);
-+	{
-+	  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
-+	    return OPTIMIZE (avx512_unaligned);
-+
-+	  return OPTIMIZE (evex_unaligned);
-+	}
- 
-       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
- 	return OPTIMIZE (avx2_unaligned_rtm);
-diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
-index 0783979c..22e7b187 100644
--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
-+++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
-@@ -1,22 +1,22 @@
- #if IS_IN (libc)
- # define VEC_SIZE	64
-# define VEC(i)		zmm##i
-+# define XMM0		xmm16
-+# define YMM0		ymm16
-+# define VEC0		zmm16
-+# define VEC(i)		VEC##i
- # define VMOVU		vmovdqu64
- # define VMOVA		vmovdqa64
-+# define VZEROUPPER
- 
- # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
-  vmovd d, %xmm0; \
-   movq r, %rax; \
-  vpbroadcastb %xmm0, %xmm0; \
-  vpbroadcastq %xmm0, %zmm0
-+  vpbroadcastb d, %VEC0
- 
- # define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
-  vmovd d, %xmm0; \
-   movq r, %rax; \
-  vpbroadcastd %xmm0, %xmm0; \
-  vpbroadcastq %xmm0, %zmm0
-+  vpbroadcastd d, %VEC0
- 
-# define SECTION(p)		p##.avx512
-+# define SECTION(p)		p##.evex512
- # define MEMSET_SYMBOL(p,s)	p##_avx512_##s
- # define WMEMSET_SYMBOL(p,s)	p##_avx512_##s
- 
-- 
-GitLab
-
--- a/glibc-RHEL-15696-2.patch
+++ b/glibc-RHEL-15696-2.patch
@ -1,230 +0,0 @@
-From b304fc201d2f6baf52ea790df8643e99772243cd Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Mon, 21 Jan 2019 11:25:56 -0800
-Subject: [PATCH] x86-64 memcmp/wmemcmp: Properly handle the length parameter
- [BZ# 24097]
-Content-type: text/plain; charset=UTF-8
-
-On x32, the size_t parameter may be passed in the lower 32 bits of a
-64-bit register with the non-zero upper 32 bits.  The string/memory
-functions written in assembly can only use the lower 32 bits of a
-64-bit register as length or must clear the upper 32 bits before using
-the full 64-bit register for length.
-
-This pach fixes memcmp/wmemcmp for x32.  Tested on x86-64 and x32.  On
-x86-64, libc.so is the same with and withou the fix.
-
-	[BZ# 24097]
-	CVE-2019-6488
-	* sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S: Use RDX_LP for
-	length.  Clear the upper 32 bits of RDX register.
-	* sysdeps/x86_64/multiarch/memcmp-sse4.S: Likewise.
-	* sysdeps/x86_64/multiarch/memcmp-ssse3.S: Likewise.
-	* sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-memcmp and
-	tst-size_t-wmemcmp.
-	* sysdeps/x86_64/x32/tst-size_t-memcmp.c: New file.
-	* sysdeps/x86_64/x32/tst-size_t-wmemcmp.c: Likewise.
---
- sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S |  7 +-
- sysdeps/x86_64/multiarch/memcmp-sse4.S       |  9 ++-
- sysdeps/x86_64/multiarch/memcmp-ssse3.S      |  7 +-
- sysdeps/x86_64/x32/Makefile                  |  4 +-
- sysdeps/x86_64/x32/tst-size_t-memcmp.c       | 76 ++++++++++++++++++++
- sysdeps/x86_64/x32/tst-size_t-wmemcmp.c      | 20 ++++++
- 6 files changed, 114 insertions(+), 9 deletions(-)
- create mode 100644 sysdeps/x86_64/x32/tst-size_t-memcmp.c
- create mode 100644 sysdeps/x86_64/x32/tst-size_t-wmemcmp.c
-
-Conflicts:
-	ChangeLog
-	(removed)
-
-diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
-index 30f764c3..e3a35b89 100644
--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
-+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
-@@ -58,9 +58,12 @@
- 	.section .text.avx,"ax",@progbits
- ENTRY (MEMCMP)
- # ifdef USE_AS_WMEMCMP
-	shl	$2, %rdx
-+	shl	$2, %RDX_LP
-+# elif defined __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	movl	%edx, %edx
- # endif
-	cmpq	$VEC_SIZE, %rdx
-+	cmp	$VEC_SIZE, %RDX_LP
- 	jb	L(less_vec)
- 
- 	/* From VEC to 2 * VEC.  No branch when size == VEC_SIZE.  */
-diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
-index 8e164f2c..302900f5 100644
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
-+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
-@@ -42,13 +42,16 @@
- 	.section .text.sse4.1,"ax",@progbits
- ENTRY (MEMCMP)
- # ifdef USE_AS_WMEMCMP
-	shl	$2, %rdx
-+	shl	$2, %RDX_LP
-+# elif defined __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	mov	%edx, %edx
- # endif
- 	pxor	%xmm0, %xmm0
-	cmp	$79, %rdx
-+	cmp	$79, %RDX_LP
- 	ja	L(79bytesormore)
- # ifndef USE_AS_WMEMCMP
-	cmp	$1, %rdx
-+	cmp	$1, %RDX_LP
- 	je	L(firstbyte)
- # endif
- 	add	%rdx, %rsi
-diff --git a/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/sysdeps/x86_64/multiarch/memcmp-ssse3.S
-index 6f76c641..69d030fc 100644
--- a/sysdeps/x86_64/multiarch/memcmp-ssse3.S
-+++ b/sysdeps/x86_64/multiarch/memcmp-ssse3.S
-@@ -33,9 +33,12 @@
- 	atom_text_section
- ENTRY (MEMCMP)
- # ifdef USE_AS_WMEMCMP
-	shl	$2, %rdx
-	test	%rdx, %rdx
-+	shl	$2, %RDX_LP
-+	test	%RDX_LP, %RDX_LP
- 	jz	L(equal)
-+# elif defined __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	mov	%edx, %edx
- # endif
- 	mov	%rdx, %rcx
- 	mov	%rdi, %rdx
-diff --git a/sysdeps/x86_64/x32/Makefile b/sysdeps/x86_64/x32/Makefile
-index 7d528889..ddec7f04 100644
--- a/sysdeps/x86_64/x32/Makefile
-+++ b/sysdeps/x86_64/x32/Makefile
-@@ -6,9 +6,9 @@ CFLAGS-s_llround.c += -fno-builtin-lround
- endif
- 
- ifeq ($(subdir),string)
-tests += tst-size_t-memchr
-+tests += tst-size_t-memchr tst-size_t-memcmp
- endif
- 
- ifeq ($(subdir),wcsmbs)
-tests += tst-size_t-wmemchr
-+tests += tst-size_t-wmemchr tst-size_t-wmemcmp
- endif
-diff --git a/sysdeps/x86_64/x32/tst-size_t-memcmp.c b/sysdeps/x86_64/x32/tst-size_t-memcmp.c
-new file mode 100644
-index 00000000..9bd6fdb4
--- /dev/null
-+++ b/sysdeps/x86_64/x32/tst-size_t-memcmp.c
-@@ -0,0 +1,76 @@
-+/* Test memcmp with size_t in the lower 32 bits of 64-bit register.
-+   Copyright (C) 2019 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <http://www.gnu.org/licenses/>.  */
-+
-+#define TEST_MAIN
-+#ifdef WIDE
-+# define TEST_NAME "wmemcmp"
-+#else
-+# define TEST_NAME "memcmp"
-+#endif
-+
-+#include "test-size_t.h"
-+
-+#ifdef WIDE
-+# include <inttypes.h>
-+# include <wchar.h>
-+
-+# define MEMCMP wmemcmp
-+# define CHAR wchar_t
-+#else
-+# define MEMCMP memcmp
-+# define CHAR char
-+#endif
-+
-+IMPL (MEMCMP, 1)
-+
-+typedef int (*proto_t) (const CHAR *, const CHAR *, size_t);
-+
-+static int
-+__attribute__ ((noinline, noclone))
-+do_memcmp (parameter_t a, parameter_t b)
-+{
-+  return CALL (&b, a.p, b.p, a.len);
-+}
-+
-+static int
-+test_main (void)
-+{
-+  test_init ();
-+
-+  parameter_t dest = { { page_size / sizeof (CHAR) }, buf1 };
-+  parameter_t src = { { 0 }, buf2 };
-+
-+  memcpy (buf1, buf2, page_size);
-+
-+  int ret = 0;
-+  FOR_EACH_IMPL (impl, 0)
-+    {
-+      src.fn = impl->fn;
-+      int res = do_memcmp (dest, src);
-+      if (res)
-+	{
-+	  error (0, 0, "Wrong result in function %s: %i != 0",
-+		 impl->name, res);
-+	  ret = 1;
-+	}
-+    }
-+
-+  return ret ? EXIT_FAILURE : EXIT_SUCCESS;
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/sysdeps/x86_64/x32/tst-size_t-wmemcmp.c b/sysdeps/x86_64/x32/tst-size_t-wmemcmp.c
-new file mode 100644
-index 00000000..e8b5ffd0
--- /dev/null
-+++ b/sysdeps/x86_64/x32/tst-size_t-wmemcmp.c
-@@ -0,0 +1,20 @@
-+/* Test wmemcmp with size_t in the lower 32 bits of 64-bit register.
-+   Copyright (C) 2019 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <http://www.gnu.org/licenses/>.  */
-+
-+#define WIDE 1
-+#include "tst-size_t-memcmp.c"
-- 
-GitLab
-
--- a/glibc-RHEL-15696-20.patch
+++ b/glibc-RHEL-15696-20.patch
@ -1,164 +0,0 @@
-From e4fda4631017e49d4ee5a2755db34289b6860fa4 Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Sun, 7 Mar 2021 09:45:23 -0800
-Subject: [PATCH] x86-64: Use ZMM16-ZMM31 in AVX512 memmove family functions
-Content-type: text/plain; charset=UTF-8
-
-Update ifunc-memmove.h to select the function optimized with AVX512
-instructions using ZMM16-ZMM31 registers to avoid RTM abort with usable
-AVX512VL since VZEROUPPER isn't needed at function exit.
---
- sysdeps/x86_64/multiarch/ifunc-impl-list.c    | 24 +++++++++---------
- sysdeps/x86_64/multiarch/ifunc-memmove.h      | 12 +++++----
- .../multiarch/memmove-avx512-unaligned-erms.S | 25 +++++++++++++++++--
- 3 files changed, 42 insertions(+), 19 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-index d969a156..fec384f6 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-@@ -83,10 +83,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			      CPU_FEATURE_USABLE (AVX512F),
- 			      __memmove_chk_avx512_no_vzeroupper)
- 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __memmove_chk_avx512_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __memmove_chk_avx512_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
- 			      CPU_FEATURE_USABLE (AVX),
-@@ -148,10 +148,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			      CPU_FEATURE_USABLE (AVX512F),
- 			      __memmove_avx512_no_vzeroupper)
- 	      IFUNC_IMPL_ADD (array, i, memmove,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __memmove_avx512_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, memmove,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __memmove_avx512_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, memmove, CPU_FEATURE_USABLE (SSSE3),
- 			      __memmove_ssse3_back)
-@@ -733,10 +733,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			      CPU_FEATURE_USABLE (AVX512F),
- 			      __memcpy_chk_avx512_no_vzeroupper)
- 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __memcpy_chk_avx512_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __memcpy_chk_avx512_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- 			      CPU_FEATURE_USABLE (AVX),
-@@ -802,10 +802,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			      CPU_FEATURE_USABLE (AVX512F),
- 			      __memcpy_avx512_no_vzeroupper)
- 	      IFUNC_IMPL_ADD (array, i, memcpy,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __memcpy_avx512_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, memcpy,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __memcpy_avx512_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, memcpy, 1,
-@@ -819,10 +819,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			      CPU_FEATURE_USABLE (AVX512F),
- 			      __mempcpy_chk_avx512_no_vzeroupper)
- 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __mempcpy_chk_avx512_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __mempcpy_chk_avx512_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- 			      CPU_FEATURE_USABLE (AVX),
-@@ -864,10 +864,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			      CPU_FEATURE_USABLE (AVX512F),
- 			      __mempcpy_avx512_no_vzeroupper)
- 	      IFUNC_IMPL_ADD (array, i, mempcpy,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __mempcpy_avx512_unaligned)
- 	      IFUNC_IMPL_ADD (array, i, mempcpy,
-			      CPU_FEATURE_USABLE (AVX512F),
-+			      CPU_FEATURE_USABLE (AVX512VL),
- 			      __mempcpy_avx512_unaligned_erms)
- 	      IFUNC_IMPL_ADD (array, i, mempcpy,
- 			      CPU_FEATURE_USABLE (AVX),
-diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h
-index fa09b9fb..014e95c7 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memmove.h
-+++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h
-@@ -56,13 +56,15 @@ IFUNC_SELECTOR (void)
-   if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
-       && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
-     {
-      if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-	return OPTIMIZE (avx512_no_vzeroupper);
-+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
-+	{
-+	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-+	    return OPTIMIZE (avx512_unaligned_erms);
- 
-      if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
-	return OPTIMIZE (avx512_unaligned_erms);
-+	  return OPTIMIZE (avx512_unaligned);
-+	}
- 
-      return OPTIMIZE (avx512_unaligned);
-+      return OPTIMIZE (avx512_no_vzeroupper);
-     }
- 
-   if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
-diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
-index aac1515c..848848ab 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
-+++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
-@@ -1,11 +1,32 @@
- #if IS_IN (libc)
- # define VEC_SIZE	64
-# define VEC(i)		zmm##i
-+# define XMM0		xmm16
-+# define XMM1		xmm17
-+# define YMM0		ymm16
-+# define YMM1		ymm17
-+# define VEC0		zmm16
-+# define VEC1		zmm17
-+# define VEC2		zmm18
-+# define VEC3		zmm19
-+# define VEC4		zmm20
-+# define VEC5		zmm21
-+# define VEC6		zmm22
-+# define VEC7		zmm23
-+# define VEC8		zmm24
-+# define VEC9		zmm25
-+# define VEC10		zmm26
-+# define VEC11		zmm27
-+# define VEC12		zmm28
-+# define VEC13		zmm29
-+# define VEC14		zmm30
-+# define VEC15		zmm31
-+# define VEC(i)		VEC##i
- # define VMOVNT		vmovntdq
- # define VMOVU		vmovdqu64
- # define VMOVA		vmovdqa64
-+# define VZEROUPPER
- 
-# define SECTION(p)		p##.avx512
-+# define SECTION(p)		p##.evex512
- # define MEMMOVE_SYMBOL(p,s)	p##_avx512_##s
- 
- # include "memmove-vec-unaligned-erms.S"
-- 
-GitLab
-
--- a/glibc-RHEL-15696-21.patch
+++ b/glibc-RHEL-15696-21.patch
@ -1,71 +0,0 @@
-From 595c22ecd8e87a27fd19270ed30fdbae9ad25426 Mon Sep 17 00:00:00 2001
-From: Sunil K Pandey <skpgkp2@gmail.com>
-Date: Thu, 1 Apr 2021 15:47:04 -0700
-Subject: [PATCH] x86-64: Fix ifdef indentation in strlen-evex.S
-Content-type: text/plain; charset=UTF-8
-
-Fix some indentations of ifdef in file strlen-evex.S which are off by 1
-and confusing to read.
---
- sysdeps/x86_64/multiarch/strlen-evex.S | 16 ++++++++--------
- 1 file changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S
-index cd022509..05838190 100644
--- a/sysdeps/x86_64/multiarch/strlen-evex.S
-+++ b/sysdeps/x86_64/multiarch/strlen-evex.S
-@@ -276,10 +276,10 @@ L(last_2x_vec):
- 	.p2align 4
- L(first_vec_x0_check):
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WCSLEN
-+#  ifdef USE_AS_WCSLEN
- 	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
- 	sall	$2, %eax
-# endif
-+#  endif
- 	/* Check the end of data.  */
- 	cmpq	%rax, %rsi
- 	jbe	L(max)
-@@ -293,10 +293,10 @@ L(first_vec_x0_check):
- 	.p2align 4
- L(first_vec_x1_check):
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WCSLEN
-+#  ifdef USE_AS_WCSLEN
- 	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
- 	sall	$2, %eax
-# endif
-+#  endif
- 	/* Check the end of data.  */
- 	cmpq	%rax, %rsi
- 	jbe	L(max)
-@@ -311,10 +311,10 @@ L(first_vec_x1_check):
- 	.p2align 4
- L(first_vec_x2_check):
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WCSLEN
-+#  ifdef USE_AS_WCSLEN
- 	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
- 	sall	$2, %eax
-# endif
-+#  endif
- 	/* Check the end of data.  */
- 	cmpq	%rax, %rsi
- 	jbe	L(max)
-@@ -329,10 +329,10 @@ L(first_vec_x2_check):
- 	.p2align 4
- L(first_vec_x3_check):
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WCSLEN
-+#  ifdef USE_AS_WCSLEN
- 	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
- 	sall	$2, %eax
-# endif
-+#  endif
- 	/* Check the end of data.  */
- 	cmpq	%rax, %rsi
- 	jbe	L(max)
-- 
-GitLab
-
--- a/glibc-RHEL-15696-22.patch
+++ b/glibc-RHEL-15696-22.patch
@ -1,51 +0,0 @@
-From 55bf411b451c13f0fb7ff3d3bf9a820020b45df1 Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Mon, 19 Apr 2021 07:07:21 -0700
-Subject: [PATCH] x86-64: Require BMI2 for __strlen_evex and __strnlen_evex
-Content-type: text/plain; charset=UTF-8
-
-Since __strlen_evex and __strnlen_evex added by
-
-commit 1fd8c163a83d96ace1ff78fa6bac7aee084f6f77
-Author: H.J. Lu <hjl.tools@gmail.com>
-Date:   Fri Mar 5 06:24:52 2021 -0800
-
-    x86-64: Add ifunc-avx2.h functions with 256-bit EVEX
-
-use sarx:
-
-c4 e2 6a f7 c0       	sarx   %edx,%eax,%eax
-
-require BMI2 for __strlen_evex and __strnlen_evex in ifunc-impl-list.c.
-ifunc-avx2.h already requires BMI2 for EVEX implementation.
---
- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-index fec384f6..cbfc1a5d 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-@@ -293,7 +293,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			      __strlen_avx2_rtm)
- 	      IFUNC_IMPL_ADD (array, i, strlen,
- 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
-+			       && CPU_FEATURE_USABLE (AVX512BW)
-+			       && CPU_FEATURE_USABLE (BMI2)),
- 			      __strlen_evex)
- 	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
- 
-@@ -308,7 +309,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			      __strnlen_avx2_rtm)
- 	      IFUNC_IMPL_ADD (array, i, strnlen,
- 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
-+			       && CPU_FEATURE_USABLE (AVX512BW)
-+			       && CPU_FEATURE_USABLE (BMI2)),
- 			      __strnlen_evex)
- 	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
- 
-- 
-GitLab
-
--- a/glibc-RHEL-15696-23.patch
+++ b/glibc-RHEL-15696-23.patch
@ -1,584 +0,0 @@
-From acfd088a1963ba51cd83c78f95c0ab25ead79e04 Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Mon, 3 May 2021 03:01:58 -0400
-Subject: [PATCH] x86: Optimize memchr-avx2.S
-Content-type: text/plain; charset=UTF-8
-
-No bug. This commit optimizes memchr-avx2.S. The optimizations include
-replacing some branches with cmovcc, avoiding some branches entirely
-in the less_4x_vec case, making the page cross logic less strict,
-asaving a few instructions the in loop return loop. test-memchr,
-test-rawmemchr, and test-wmemchr are all passing.
-
-Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/multiarch/memchr-avx2.S | 425 ++++++++++++++-----------
- 1 file changed, 247 insertions(+), 178 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
-index cf893e77..b377f22e 100644
--- a/sysdeps/x86_64/multiarch/memchr-avx2.S
-+++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
-@@ -26,8 +26,22 @@
- 
- # ifdef USE_AS_WMEMCHR
- #  define VPCMPEQ	vpcmpeqd
-+#  define VPBROADCAST	vpbroadcastd
-+#  define CHAR_SIZE	4
- # else
- #  define VPCMPEQ	vpcmpeqb
-+#  define VPBROADCAST	vpbroadcastb
-+#  define CHAR_SIZE	1
-+# endif
-+
-+# ifdef USE_AS_RAWMEMCHR
-+#  define ERAW_PTR_REG	ecx
-+#  define RRAW_PTR_REG	rcx
-+#  define ALGN_PTR_REG	rdi
-+# else
-+#  define ERAW_PTR_REG	edi
-+#  define RRAW_PTR_REG	rdi
-+#  define ALGN_PTR_REG	rcx
- # endif
- 
- # ifndef VZEROUPPER
-@@ -39,6 +53,7 @@
- # endif
- 
- # define VEC_SIZE 32
-+# define PAGE_SIZE 4096
- 
- 	.section SECTION(.text),"ax",@progbits
- ENTRY (MEMCHR)
-@@ -47,295 +62,349 @@ ENTRY (MEMCHR)
- 	test	%RDX_LP, %RDX_LP
- 	jz	L(null)
- # endif
-	movl	%edi, %ecx
-	/* Broadcast CHAR to YMM0.  */
-	vmovd	%esi, %xmm0
- # ifdef USE_AS_WMEMCHR
- 	shl	$2, %RDX_LP
-	vpbroadcastd %xmm0, %ymm0
- # else
- #  ifdef __ILP32__
- 	/* Clear the upper 32 bits.  */
- 	movl	%edx, %edx
- #  endif
-	vpbroadcastb %xmm0, %ymm0
- # endif
-+	/* Broadcast CHAR to YMMMATCH.  */
-+	vmovd	%esi, %xmm0
-+	VPBROADCAST %xmm0, %ymm0
- 	/* Check if we may cross page boundary with one vector load.  */
-	andl	$(2 * VEC_SIZE - 1), %ecx
-	cmpl	$VEC_SIZE, %ecx
-	ja	L(cros_page_boundary)
-+	movl	%edi, %eax
-+	andl	$(PAGE_SIZE - 1), %eax
-+	cmpl	$(PAGE_SIZE - VEC_SIZE), %eax
-+	ja	L(cross_page_boundary)
- 
- 	/* Check the first VEC_SIZE bytes.  */
-	VPCMPEQ (%rdi), %ymm0, %ymm1
-+	VPCMPEQ	(%rdi), %ymm0, %ymm1
- 	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-
- # ifndef USE_AS_RAWMEMCHR
-	jnz	L(first_vec_x0_check)
-	/* Adjust length and check the end of data.  */
-	subq	$VEC_SIZE, %rdx
-	jbe	L(zero)
-# else
-	jnz	L(first_vec_x0)
-+	/* If length < CHAR_PER_VEC handle special.  */
-+	cmpq	$VEC_SIZE, %rdx
-+	jbe	L(first_vec_x0)
- # endif
-
-	/* Align data for aligned loads in the loop.  */
-	addq	$VEC_SIZE, %rdi
-	andl	$(VEC_SIZE - 1), %ecx
-	andq	$-VEC_SIZE, %rdi
-+	testl	%eax, %eax
-+	jz	L(aligned_more)
-+	tzcntl	%eax, %eax
-+	addq	%rdi, %rax
-+	VZEROUPPER_RETURN
- 
- # ifndef USE_AS_RAWMEMCHR
-	/* Adjust length.  */
-	addq	%rcx, %rdx
-+	.p2align 5
-+L(first_vec_x0):
-+	/* Check if first match was before length.  */
-+	tzcntl	%eax, %eax
-+	xorl	%ecx, %ecx
-+	cmpl	%eax, %edx
-+	leaq	(%rdi, %rax), %rax
-+	cmovle	%rcx, %rax
-+	VZEROUPPER_RETURN
- 
-	subq	$(VEC_SIZE * 4), %rdx
-	jbe	L(last_4x_vec_or_less)
-+L(null):
-+	xorl	%eax, %eax
-+	ret
- # endif
-	jmp	L(more_4x_vec)
-
- 	.p2align 4
-L(cros_page_boundary):
-	andl	$(VEC_SIZE - 1), %ecx
-	andq	$-VEC_SIZE, %rdi
-	VPCMPEQ (%rdi), %ymm0, %ymm1
-+L(cross_page_boundary):
-+	/* Save pointer before aligning as its original value is necessary
-+	   for computer return address if byte is found or adjusting length
-+	   if it is not and this is memchr.  */
-+	movq	%rdi, %rcx
-+	/* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr and
-+	   rdi for rawmemchr.  */
-+	orq	$(VEC_SIZE - 1), %ALGN_PTR_REG
-+	VPCMPEQ	-(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1
- 	vpmovmskb %ymm1, %eax
-+# ifndef USE_AS_RAWMEMCHR
-+	/* Calculate length until end of page (length checked for a
-+	   match).  */
-+	leaq	1(%ALGN_PTR_REG), %rsi
-+	subq	%RRAW_PTR_REG, %rsi
-+# endif
- 	/* Remove the leading bytes.  */
-	sarl	%cl, %eax
-	testl	%eax, %eax
-	jz	L(aligned_more)
-	tzcntl	%eax, %eax
-+	sarxl	%ERAW_PTR_REG, %eax, %eax
- # ifndef USE_AS_RAWMEMCHR
- 	/* Check the end of data.  */
-	cmpq	%rax, %rdx
-	jbe	L(zero)
-+	cmpq	%rsi, %rdx
-+	jbe	L(first_vec_x0)
- # endif
-	addq	%rdi, %rax
-	addq	%rcx, %rax
-+	testl	%eax, %eax
-+	jz	L(cross_page_continue)
-+	tzcntl	%eax, %eax
-+	addq	%RRAW_PTR_REG, %rax
- L(return_vzeroupper):
- 	ZERO_UPPER_VEC_REGISTERS_RETURN
- 
- 	.p2align 4
-L(aligned_more):
-# ifndef USE_AS_RAWMEMCHR
-        /* Calculate "rdx + rcx - VEC_SIZE" with "rdx - (VEC_SIZE - rcx)"
-	   instead of "(rdx + rcx) - VEC_SIZE" to void possible addition
-	   overflow.  */
-	negq	%rcx
-	addq	$VEC_SIZE, %rcx
-+L(first_vec_x1):
-+	tzcntl	%eax, %eax
-+	incq	%rdi
-+	addq	%rdi, %rax
-+	VZEROUPPER_RETURN
- 
-	/* Check the end of data.  */
-	subq	%rcx, %rdx
-	jbe	L(zero)
-# endif
-+	.p2align 4
-+L(first_vec_x2):
-+	tzcntl	%eax, %eax
-+	addq	$(VEC_SIZE + 1), %rdi
-+	addq	%rdi, %rax
-+	VZEROUPPER_RETURN
-+
-+	.p2align 4
-+L(first_vec_x3):
-+	tzcntl	%eax, %eax
-+	addq	$(VEC_SIZE * 2 + 1), %rdi
-+	addq	%rdi, %rax
-+	VZEROUPPER_RETURN
- 
-	addq	$VEC_SIZE, %rdi
- 
-# ifndef USE_AS_RAWMEMCHR
-	subq	$(VEC_SIZE * 4), %rdx
-	jbe	L(last_4x_vec_or_less)
-# endif
-+	.p2align 4
-+L(first_vec_x4):
-+	tzcntl	%eax, %eax
-+	addq	$(VEC_SIZE * 3 + 1), %rdi
-+	addq	%rdi, %rax
-+	VZEROUPPER_RETURN
- 
-L(more_4x_vec):
-+	.p2align 4
-+L(aligned_more):
- 	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
- 	   since data is only aligned to VEC_SIZE.  */
-	VPCMPEQ (%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x0)
- 
-	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
-+# ifndef USE_AS_RAWMEMCHR
-+L(cross_page_continue):
-+	/* Align data to VEC_SIZE - 1.  */
-+	xorl	%ecx, %ecx
-+	subl	%edi, %ecx
-+	orq	$(VEC_SIZE - 1), %rdi
-+	/* esi is for adjusting length to see if near the end.  */
-+	leal	(VEC_SIZE * 4 + 1)(%rdi, %rcx), %esi
-+# else
-+	orq	$(VEC_SIZE - 1), %rdi
-+L(cross_page_continue):
-+# endif
-+	/* Load first VEC regardless.  */
-+	VPCMPEQ	1(%rdi), %ymm0, %ymm1
- 	vpmovmskb %ymm1, %eax
-+# ifndef USE_AS_RAWMEMCHR
-+	/* Adjust length. If near end handle specially.  */
-+	subq	%rsi, %rdx
-+	jbe	L(last_4x_vec_or_less)
-+# endif
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x1)
- 
-	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
-+	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
- 	vpmovmskb %ymm1, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x2)
- 
-	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
-+	VPCMPEQ	(VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
- 	vpmovmskb %ymm1, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x3)
- 
-	addq	$(VEC_SIZE * 4), %rdi
-+	VPCMPEQ	(VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb %ymm1, %eax
-+	testl	%eax, %eax
-+	jnz	L(first_vec_x4)
- 
- # ifndef USE_AS_RAWMEMCHR
-+	/* Check if at last VEC_SIZE * 4 length.  */
- 	subq	$(VEC_SIZE * 4), %rdx
-	jbe	L(last_4x_vec_or_less)
-# endif
-
-	/* Align data to 4 * VEC_SIZE.  */
-	movq	%rdi, %rcx
-	andl	$(4 * VEC_SIZE - 1), %ecx
-	andq	$-(4 * VEC_SIZE), %rdi
-
-# ifndef USE_AS_RAWMEMCHR
-	/* Adjust length.  */
-+	jbe	L(last_4x_vec_or_less_cmpeq)
-+	/* Align data to VEC_SIZE * 4 - 1 for the loop and readjust
-+	   length.  */
-+	incq	%rdi
-+	movl	%edi, %ecx
-+	orq	$(VEC_SIZE * 4 - 1), %rdi
-+	andl	$(VEC_SIZE * 4 - 1), %ecx
- 	addq	%rcx, %rdx
-+# else
-+	/* Align data to VEC_SIZE * 4 - 1 for loop.  */
-+	incq	%rdi
-+	orq	$(VEC_SIZE * 4 - 1), %rdi
- # endif
- 
-+	/* Compare 4 * VEC at a time forward.  */
- 	.p2align 4
- L(loop_4x_vec):
-	/* Compare 4 * VEC at a time forward.  */
-	VPCMPEQ (%rdi), %ymm0, %ymm1
-	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm2
-	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm3
-	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm4
-
-+	VPCMPEQ	1(%rdi), %ymm0, %ymm1
-+	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm2
-+	VPCMPEQ	(VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm3
-+	VPCMPEQ	(VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm4
- 	vpor	%ymm1, %ymm2, %ymm5
- 	vpor	%ymm3, %ymm4, %ymm6
- 	vpor	%ymm5, %ymm6, %ymm5
- 
-	vpmovmskb %ymm5, %eax
-	testl	%eax, %eax
-	jnz	L(4x_vec_end)
-
-	addq	$(VEC_SIZE * 4), %rdi
-
-+	vpmovmskb %ymm5, %ecx
- # ifdef USE_AS_RAWMEMCHR
-	jmp	L(loop_4x_vec)
-+	subq	$-(VEC_SIZE * 4), %rdi
-+	testl	%ecx, %ecx
-+	jz	L(loop_4x_vec)
- # else
-	subq	$(VEC_SIZE * 4), %rdx
-	ja	L(loop_4x_vec)
-+	testl	%ecx, %ecx
-+	jnz	L(loop_4x_vec_end)
- 
-L(last_4x_vec_or_less):
-	/* Less than 4 * VEC and aligned to VEC_SIZE.  */
-	addl	$(VEC_SIZE * 2), %edx
-	jle	L(last_2x_vec)
-+	subq	$-(VEC_SIZE * 4), %rdi
- 
-	VPCMPEQ (%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x0)
-+	subq	$(VEC_SIZE * 4), %rdx
-+	ja	L(loop_4x_vec)
- 
-	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
-+	/* Fall through into less than 4 remaining vectors of length case.
-+	 */
-+	VPCMPEQ	(VEC_SIZE * 0 + 1)(%rdi), %ymm0, %ymm1
- 	vpmovmskb %ymm1, %eax
-+	.p2align 4
-+L(last_4x_vec_or_less):
-+	/* Check if first VEC contained match.  */
- 	testl	%eax, %eax
-	jnz	L(first_vec_x1)
-+	jnz	L(first_vec_x1_check)
- 
-	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-+	/* If remaining length > VEC_SIZE * 2.  */
-+	addl	$(VEC_SIZE * 2), %edx
-+	jg	L(last_4x_vec)
- 
-	jnz	L(first_vec_x2_check)
-	subl	$VEC_SIZE, %edx
-	jle	L(zero)
-+L(last_2x_vec):
-+	/* If remaining length < VEC_SIZE.  */
-+	addl	$VEC_SIZE, %edx
-+	jle	L(zero_end)
- 
-	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
-+	/* Check VEC2 and compare any match with remaining length.  */
-+	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
- 	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-
-	jnz	L(first_vec_x3_check)
-	xorl	%eax, %eax
-+	tzcntl	%eax, %eax
-+	cmpl	%eax, %edx
-+	jbe	L(set_zero_end)
-+	addq	$(VEC_SIZE + 1), %rdi
-+	addq	%rdi, %rax
-+L(zero_end):
- 	VZEROUPPER_RETURN
- 
- 	.p2align 4
-L(last_2x_vec):
-	addl	$(VEC_SIZE * 2), %edx
-	VPCMPEQ (%rdi), %ymm0, %ymm1
-+L(loop_4x_vec_end):
-+# endif
-+	/* rawmemchr will fall through into this if match was found in
-+	   loop.  */
-+
- 	vpmovmskb %ymm1, %eax
- 	testl	%eax, %eax
-+	jnz	L(last_vec_x1_return)
- 
-	jnz	L(first_vec_x0_check)
-	subl	$VEC_SIZE, %edx
-	jle	L(zero)
-
-	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-+	vpmovmskb %ymm2, %eax
- 	testl	%eax, %eax
-	jnz	L(first_vec_x1_check)
-	xorl	%eax, %eax
-	VZEROUPPER_RETURN
-+	jnz	L(last_vec_x2_return)
- 
-	.p2align 4
-L(first_vec_x0_check):
-	tzcntl	%eax, %eax
-	/* Check the end of data.  */
-	cmpq	%rax, %rdx
-	jbe	L(zero)
-+	vpmovmskb %ymm3, %eax
-+	/* Combine VEC3 matches (eax) with VEC4 matches (ecx).  */
-+	salq	$32, %rcx
-+	orq	%rcx, %rax
-+	tzcntq	%rax, %rax
-+# ifdef USE_AS_RAWMEMCHR
-+	subq	$(VEC_SIZE * 2 - 1), %rdi
-+# else
-+	subq	$-(VEC_SIZE * 2 + 1), %rdi
-+# endif
- 	addq	%rdi, %rax
- 	VZEROUPPER_RETURN
-+# ifndef USE_AS_RAWMEMCHR
- 
- 	.p2align 4
- L(first_vec_x1_check):
- 	tzcntl	%eax, %eax
-	/* Check the end of data.  */
-	cmpq	%rax, %rdx
-	jbe	L(zero)
-	addq	$VEC_SIZE, %rax
-+	/* Adjust length.  */
-+	subl	$-(VEC_SIZE * 4), %edx
-+	/* Check if match within remaining length.  */
-+	cmpl	%eax, %edx
-+	jbe	L(set_zero_end)
-+	incq	%rdi
- 	addq	%rdi, %rax
- 	VZEROUPPER_RETURN
-+	.p2align 4
-+L(set_zero_end):
-+	xorl	%eax, %eax
-+	VZEROUPPER_RETURN
-+# endif
- 
- 	.p2align 4
-L(first_vec_x2_check):
-+L(last_vec_x1_return):
- 	tzcntl	%eax, %eax
-	/* Check the end of data.  */
-	cmpq	%rax, %rdx
-	jbe	L(zero)
-	addq	$(VEC_SIZE * 2), %rax
-+# ifdef USE_AS_RAWMEMCHR
-+	subq	$(VEC_SIZE * 4 - 1), %rdi
-+# else
-+	incq	%rdi
-+# endif
- 	addq	%rdi, %rax
- 	VZEROUPPER_RETURN
- 
- 	.p2align 4
-L(first_vec_x3_check):
-+L(last_vec_x2_return):
- 	tzcntl	%eax, %eax
-	/* Check the end of data.  */
-	cmpq	%rax, %rdx
-	jbe	L(zero)
-	addq	$(VEC_SIZE * 3), %rax
-+# ifdef USE_AS_RAWMEMCHR
-+	subq	$(VEC_SIZE * 3 - 1), %rdi
-+# else
-+	subq	$-(VEC_SIZE + 1), %rdi
-+# endif
- 	addq	%rdi, %rax
- 	VZEROUPPER_RETURN
- 
-+# ifndef USE_AS_RAWMEMCHR
- 	.p2align 4
-L(zero):
-	xorl	%eax, %eax
-	jmp     L(return_vzeroupper)
-+L(last_4x_vec_or_less_cmpeq):
-+	VPCMPEQ	(VEC_SIZE * 4 + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb %ymm1, %eax
-+	subq	$-(VEC_SIZE * 4), %rdi
-+	/* Check first VEC regardless.  */
-+	testl	%eax, %eax
-+	jnz	L(first_vec_x1_check)
- 
-+	/* If remaining length <= CHAR_PER_VEC * 2.  */
-+	addl	$(VEC_SIZE * 2), %edx
-+	jle	L(last_2x_vec)
- 	.p2align 4
-L(null):
-	xorl	%eax, %eax
-	ret
-# endif
-+L(last_4x_vec):
-+	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb %ymm1, %eax
-+	testl	%eax, %eax
-+	jnz	L(last_vec_x2_return)
- 
-	.p2align 4
-L(first_vec_x0):
-	tzcntl	%eax, %eax
-	addq	%rdi, %rax
-	VZEROUPPER_RETURN
-+	VPCMPEQ	(VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb %ymm1, %eax
- 
-	.p2align 4
-L(first_vec_x1):
-	tzcntl	%eax, %eax
-	addq	$VEC_SIZE, %rax
-	addq	%rdi, %rax
-	VZEROUPPER_RETURN
-+	/* Create mask for possible matches within remaining length.  */
-+	movq	$-1, %rcx
-+	bzhiq	%rdx, %rcx, %rcx
- 
-	.p2align 4
-L(first_vec_x2):
-+	/* Test matches in data against length match.  */
-+	andl	%ecx, %eax
-+	jnz	L(last_vec_x3)
-+
-+	/* if remaining length <= VEC_SIZE * 3 (Note this is after
-+	   remaining length was found to be > VEC_SIZE * 2.  */
-+	subl	$VEC_SIZE, %edx
-+	jbe	L(zero_end2)
-+
-+	VPCMPEQ	(VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb %ymm1, %eax
-+	/* Shift remaining length mask for last VEC.  */
-+	shrq	$32, %rcx
-+	andl	%ecx, %eax
-+	jz	L(zero_end2)
- 	tzcntl	%eax, %eax
-	addq	$(VEC_SIZE * 2), %rax
-+	addq	$(VEC_SIZE * 3 + 1), %rdi
- 	addq	%rdi, %rax
-+L(zero_end2):
- 	VZEROUPPER_RETURN
- 
- 	.p2align 4
-L(4x_vec_end):
-	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x0)
-	vpmovmskb %ymm2, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x1)
-	vpmovmskb %ymm3, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x2)
-	vpmovmskb %ymm4, %eax
-	testl	%eax, %eax
-L(first_vec_x3):
-+L(last_vec_x3):
- 	tzcntl	%eax, %eax
-	addq	$(VEC_SIZE * 3), %rax
-+	subq	$-(VEC_SIZE * 2 + 1), %rdi
- 	addq	%rdi, %rax
- 	VZEROUPPER_RETURN
-+# endif
- 
- END (MEMCHR)
- #endif
-- 
-GitLab
-
--- a/glibc-RHEL-15696-24.patch
+++ b/glibc-RHEL-15696-24.patch
@ -1,388 +0,0 @@
-From 645a158978f9520e74074e8c14047503be4db0f0 Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Wed, 9 Jun 2021 16:25:32 -0400
-Subject: [PATCH] x86: Fix overflow bug with wmemchr-sse2 and wmemchr-avx2 [BZ
- #27974]
-Content-type: text/plain; charset=UTF-8
-
-This commit fixes the bug mentioned in the previous commit.
-
-The previous implementations of wmemchr in these files relied
-on n * sizeof(wchar_t) which was not guranteed by the standard.
-
-The new overflow tests added in the previous commit now
-pass (As well as all the other tests).
-
-Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/memchr.S                | 77 +++++++++++++++++++-------
- sysdeps/x86_64/multiarch/memchr-avx2.S | 58 +++++++++++++------
- 2 files changed, 98 insertions(+), 37 deletions(-)
-
-diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
-index cb320257..24f9a0c5 100644
--- a/sysdeps/x86_64/memchr.S
-+++ b/sysdeps/x86_64/memchr.S
-@@ -21,9 +21,11 @@
- #ifdef USE_AS_WMEMCHR
- # define MEMCHR		wmemchr
- # define PCMPEQ		pcmpeqd
-+# define CHAR_PER_VEC	4
- #else
- # define MEMCHR		memchr
- # define PCMPEQ		pcmpeqb
-+# define CHAR_PER_VEC	16
- #endif
- 
- /* fast SSE2 version with using pmaxub and 64 byte loop */
-@@ -33,15 +35,14 @@ ENTRY(MEMCHR)
- 	movd	%esi, %xmm1
- 	mov	%edi, %ecx
- 
-+#ifdef __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	movl	%edx, %edx
-+#endif
- #ifdef USE_AS_WMEMCHR
- 	test	%RDX_LP, %RDX_LP
- 	jz	L(return_null)
-	shl	$2, %RDX_LP
- #else
-# ifdef __ILP32__
-	/* Clear the upper 32 bits.  */
-	movl	%edx, %edx
-# endif
- 	punpcklbw %xmm1, %xmm1
- 	test	%RDX_LP, %RDX_LP
- 	jz	L(return_null)
-@@ -60,13 +61,16 @@ ENTRY(MEMCHR)
- 	test	%eax, %eax
- 
- 	jnz	L(matches_1)
-	sub	$16, %rdx
-+	sub	$CHAR_PER_VEC, %rdx
- 	jbe	L(return_null)
- 	add	$16, %rdi
- 	and	$15, %ecx
- 	and	$-16, %rdi
-+#ifdef USE_AS_WMEMCHR
-+	shr	$2, %ecx
-+#endif
- 	add	%rcx, %rdx
-	sub	$64, %rdx
-+	sub	$(CHAR_PER_VEC * 4), %rdx
- 	jbe	L(exit_loop)
- 	jmp	L(loop_prolog)
- 
-@@ -77,16 +81,21 @@ L(crosscache):
- 	movdqa	(%rdi), %xmm0
- 
- 	PCMPEQ	%xmm1, %xmm0
-/* Check if there is a match.  */
-+	/* Check if there is a match.  */
- 	pmovmskb %xmm0, %eax
-/* Remove the leading bytes.  */
-+	/* Remove the leading bytes.  */
- 	sar	%cl, %eax
- 	test	%eax, %eax
- 	je	L(unaligned_no_match)
-/* Check which byte is a match.  */
-+	/* Check which byte is a match.  */
- 	bsf	%eax, %eax
-
-+#ifdef USE_AS_WMEMCHR
-+	mov	%eax, %esi
-+	shr	$2, %esi
-+	sub	%rsi, %rdx
-+#else
- 	sub	%rax, %rdx
-+#endif
- 	jbe	L(return_null)
- 	add	%rdi, %rax
- 	add	%rcx, %rax
-@@ -94,15 +103,18 @@ L(crosscache):
- 
- 	.p2align 4
- L(unaligned_no_match):
-        /* "rcx" is less than 16.  Calculate "rdx + rcx - 16" by using
-+	/* "rcx" is less than 16.  Calculate "rdx + rcx - 16" by using
- 	   "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
- 	   possible addition overflow.  */
- 	neg	%rcx
- 	add	$16, %rcx
-+#ifdef USE_AS_WMEMCHR
-+	shr	$2, %ecx
-+#endif
- 	sub	%rcx, %rdx
- 	jbe	L(return_null)
- 	add	$16, %rdi
-	sub	$64, %rdx
-+	sub	$(CHAR_PER_VEC * 4), %rdx
- 	jbe	L(exit_loop)
- 
- 	.p2align 4
-@@ -135,7 +147,7 @@ L(loop_prolog):
- 	test	$0x3f, %rdi
- 	jz	L(align64_loop)
- 
-	sub	$64, %rdx
-+	sub	$(CHAR_PER_VEC * 4), %rdx
- 	jbe	L(exit_loop)
- 
- 	movdqa	(%rdi), %xmm0
-@@ -167,11 +179,14 @@ L(loop_prolog):
- 	mov	%rdi, %rcx
- 	and	$-64, %rdi
- 	and	$63, %ecx
-+#ifdef USE_AS_WMEMCHR
-+	shr	$2, %ecx
-+#endif
- 	add	%rcx, %rdx
- 
- 	.p2align 4
- L(align64_loop):
-	sub	$64, %rdx
-+	sub	$(CHAR_PER_VEC * 4), %rdx
- 	jbe	L(exit_loop)
- 	movdqa	(%rdi), %xmm0
- 	movdqa	16(%rdi), %xmm2
-@@ -218,7 +233,7 @@ L(align64_loop):
- 
- 	.p2align 4
- L(exit_loop):
-	add	$32, %edx
-+	add	$(CHAR_PER_VEC * 2), %edx
- 	jle	L(exit_loop_32)
- 
- 	movdqa	(%rdi), %xmm0
-@@ -238,7 +253,7 @@ L(exit_loop):
- 	pmovmskb %xmm3, %eax
- 	test	%eax, %eax
- 	jnz	L(matches32_1)
-	sub	$16, %edx
-+	sub	$CHAR_PER_VEC, %edx
- 	jle	L(return_null)
- 
- 	PCMPEQ	48(%rdi), %xmm1
-@@ -250,13 +265,13 @@ L(exit_loop):
- 
- 	.p2align 4
- L(exit_loop_32):
-	add	$32, %edx
-+	add	$(CHAR_PER_VEC * 2), %edx
- 	movdqa	(%rdi), %xmm0
- 	PCMPEQ	%xmm1, %xmm0
- 	pmovmskb %xmm0, %eax
- 	test	%eax, %eax
- 	jnz	L(matches_1)
-	sub	$16, %edx
-+	sub	$CHAR_PER_VEC, %edx
- 	jbe	L(return_null)
- 
- 	PCMPEQ	16(%rdi), %xmm1
-@@ -293,7 +308,13 @@ L(matches32):
- 	.p2align 4
- L(matches_1):
- 	bsf	%eax, %eax
-+#ifdef USE_AS_WMEMCHR
-+	mov	%eax, %esi
-+	shr	$2, %esi
-+	sub	%rsi, %rdx
-+#else
- 	sub	%rax, %rdx
-+#endif
- 	jbe	L(return_null)
- 	add	%rdi, %rax
- 	ret
-@@ -301,7 +322,13 @@ L(matches_1):
- 	.p2align 4
- L(matches16_1):
- 	bsf	%eax, %eax
-+#ifdef USE_AS_WMEMCHR
-+	mov	%eax, %esi
-+	shr	$2, %esi
-+	sub	%rsi, %rdx
-+#else
- 	sub	%rax, %rdx
-+#endif
- 	jbe	L(return_null)
- 	lea	16(%rdi, %rax), %rax
- 	ret
-@@ -309,7 +336,13 @@ L(matches16_1):
- 	.p2align 4
- L(matches32_1):
- 	bsf	%eax, %eax
-+#ifdef USE_AS_WMEMCHR
-+	mov	%eax, %esi
-+	shr	$2, %esi
-+	sub	%rsi, %rdx
-+#else
- 	sub	%rax, %rdx
-+#endif
- 	jbe	L(return_null)
- 	lea	32(%rdi, %rax), %rax
- 	ret
-@@ -317,7 +350,13 @@ L(matches32_1):
- 	.p2align 4
- L(matches48_1):
- 	bsf	%eax, %eax
-+#ifdef USE_AS_WMEMCHR
-+	mov	%eax, %esi
-+	shr	$2, %esi
-+	sub	%rsi, %rdx
-+#else
- 	sub	%rax, %rdx
-+#endif
- 	jbe	L(return_null)
- 	lea	48(%rdi, %rax), %rax
- 	ret
-diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
-index b377f22e..16027abb 100644
--- a/sysdeps/x86_64/multiarch/memchr-avx2.S
-+++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
-@@ -54,21 +54,19 @@
- 
- # define VEC_SIZE 32
- # define PAGE_SIZE 4096
-+# define CHAR_PER_VEC	(VEC_SIZE / CHAR_SIZE)
- 
- 	.section SECTION(.text),"ax",@progbits
- ENTRY (MEMCHR)
- # ifndef USE_AS_RAWMEMCHR
- 	/* Check for zero length.  */
-	test	%RDX_LP, %RDX_LP
-	jz	L(null)
-# endif
-# ifdef USE_AS_WMEMCHR
-	shl	$2, %RDX_LP
-# else
- #  ifdef __ILP32__
-	/* Clear the upper 32 bits.  */
-	movl	%edx, %edx
-+	/* Clear upper bits.  */
-+	and	%RDX_LP, %RDX_LP
-+#  else
-+	test	%RDX_LP, %RDX_LP
- #  endif
-+	jz	L(null)
- # endif
- 	/* Broadcast CHAR to YMMMATCH.  */
- 	vmovd	%esi, %xmm0
-@@ -84,7 +82,7 @@ ENTRY (MEMCHR)
- 	vpmovmskb %ymm1, %eax
- # ifndef USE_AS_RAWMEMCHR
- 	/* If length < CHAR_PER_VEC handle special.  */
-	cmpq	$VEC_SIZE, %rdx
-+	cmpq	$CHAR_PER_VEC, %rdx
- 	jbe	L(first_vec_x0)
- # endif
- 	testl	%eax, %eax
-@@ -98,6 +96,10 @@ ENTRY (MEMCHR)
- L(first_vec_x0):
- 	/* Check if first match was before length.  */
- 	tzcntl	%eax, %eax
-+#  ifdef USE_AS_WMEMCHR
-+	/* NB: Multiply length by 4 to get byte count.  */
-+	sall	$2, %edx
-+#  endif
- 	xorl	%ecx, %ecx
- 	cmpl	%eax, %edx
- 	leaq	(%rdi, %rax), %rax
-@@ -110,12 +112,12 @@ L(null):
- # endif
- 	.p2align 4
- L(cross_page_boundary):
-	/* Save pointer before aligning as its original value is necessary
-	   for computer return address if byte is found or adjusting length
-	   if it is not and this is memchr.  */
-+	/* Save pointer before aligning as its original value is
-+	   necessary for computer return address if byte is found or
-+	   adjusting length if it is not and this is memchr.  */
- 	movq	%rdi, %rcx
-	/* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr and
-	   rdi for rawmemchr.  */
-+	/* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr
-+	   and rdi for rawmemchr.  */
- 	orq	$(VEC_SIZE - 1), %ALGN_PTR_REG
- 	VPCMPEQ	-(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1
- 	vpmovmskb %ymm1, %eax
-@@ -124,6 +126,10 @@ L(cross_page_boundary):
- 	   match).  */
- 	leaq	1(%ALGN_PTR_REG), %rsi
- 	subq	%RRAW_PTR_REG, %rsi
-+#  ifdef USE_AS_WMEMCHR
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
-+	shrl	$2, %esi
-+#  endif
- # endif
- 	/* Remove the leading bytes.  */
- 	sarxl	%ERAW_PTR_REG, %eax, %eax
-@@ -181,6 +187,10 @@ L(cross_page_continue):
- 	orq	$(VEC_SIZE - 1), %rdi
- 	/* esi is for adjusting length to see if near the end.  */
- 	leal	(VEC_SIZE * 4 + 1)(%rdi, %rcx), %esi
-+#  ifdef USE_AS_WMEMCHR
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %esi
-+#  endif
- # else
- 	orq	$(VEC_SIZE - 1), %rdi
- L(cross_page_continue):
-@@ -213,7 +223,7 @@ L(cross_page_continue):
- 
- # ifndef USE_AS_RAWMEMCHR
- 	/* Check if at last VEC_SIZE * 4 length.  */
-	subq	$(VEC_SIZE * 4), %rdx
-+	subq	$(CHAR_PER_VEC * 4), %rdx
- 	jbe	L(last_4x_vec_or_less_cmpeq)
- 	/* Align data to VEC_SIZE * 4 - 1 for the loop and readjust
- 	   length.  */
-@@ -221,6 +231,10 @@ L(cross_page_continue):
- 	movl	%edi, %ecx
- 	orq	$(VEC_SIZE * 4 - 1), %rdi
- 	andl	$(VEC_SIZE * 4 - 1), %ecx
-+#  ifdef USE_AS_WMEMCHR
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %ecx
-+#  endif
- 	addq	%rcx, %rdx
- # else
- 	/* Align data to VEC_SIZE * 4 - 1 for loop.  */
-@@ -250,15 +264,19 @@ L(loop_4x_vec):
- 
- 	subq	$-(VEC_SIZE * 4), %rdi
- 
-	subq	$(VEC_SIZE * 4), %rdx
-+	subq	$(CHAR_PER_VEC * 4), %rdx
- 	ja	L(loop_4x_vec)
- 
-	/* Fall through into less than 4 remaining vectors of length case.
-	 */
-+	/* Fall through into less than 4 remaining vectors of length
-+	   case.  */
- 	VPCMPEQ	(VEC_SIZE * 0 + 1)(%rdi), %ymm0, %ymm1
- 	vpmovmskb %ymm1, %eax
- 	.p2align 4
- L(last_4x_vec_or_less):
-+#  ifdef USE_AS_WMEMCHR
-+	/* NB: Multiply length by 4 to get byte count.  */
-+	sall	$2, %edx
-+#  endif
- 	/* Check if first VEC contained match.  */
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x1_check)
-@@ -355,6 +373,10 @@ L(last_vec_x2_return):
- L(last_4x_vec_or_less_cmpeq):
- 	VPCMPEQ	(VEC_SIZE * 4 + 1)(%rdi), %ymm0, %ymm1
- 	vpmovmskb %ymm1, %eax
-+#  ifdef USE_AS_WMEMCHR
-+	/* NB: Multiply length by 4 to get byte count.  */
-+	sall	$2, %edx
-+#  endif
- 	subq	$-(VEC_SIZE * 4), %rdi
- 	/* Check first VEC regardless.  */
- 	testl	%eax, %eax
-- 
-GitLab
-
--- a/glibc-RHEL-15696-25.patch
+++ b/glibc-RHEL-15696-25.patch
@ -1,767 +0,0 @@
-From aaa23c35071537e2dcf5807e956802ed215210aa Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Mon, 19 Apr 2021 19:36:07 -0400
-Subject: [PATCH] x86: Optimize strlen-avx2.S
-Content-type: text/plain; charset=UTF-8
-
-No bug. This commit optimizes strlen-avx2.S. The optimizations are
-mostly small things but they add up to roughly 10-30% performance
-improvement for strlen. The results for strnlen are bit more
-ambiguous. test-strlen, test-strnlen, test-wcslen, and test-wcsnlen
-are all passing.
-
-Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
---
- sysdeps/x86_64/multiarch/ifunc-impl-list.c |  16 +-
- sysdeps/x86_64/multiarch/strlen-avx2.S     | 532 +++++++++++++--------
- 2 files changed, 334 insertions(+), 214 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-index cbfc1a5d..f1a6460a 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-@@ -285,10 +285,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
-   /* Support sysdeps/x86_64/multiarch/strlen.c.  */
-   IFUNC_IMPL (i, name, strlen,
- 	      IFUNC_IMPL_ADD (array, i, strlen,
-			      CPU_FEATURE_USABLE (AVX2),
-+			      (CPU_FEATURE_USABLE (AVX2)
-+			       && CPU_FEATURE_USABLE (BMI2)),
- 			      __strlen_avx2)
- 	      IFUNC_IMPL_ADD (array, i, strlen,
- 			      (CPU_FEATURE_USABLE (AVX2)
-+			       && CPU_FEATURE_USABLE (BMI2)
- 			       && CPU_FEATURE_USABLE (RTM)),
- 			      __strlen_avx2_rtm)
- 	      IFUNC_IMPL_ADD (array, i, strlen,
-@@ -301,10 +303,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
-   /* Support sysdeps/x86_64/multiarch/strnlen.c.  */
-   IFUNC_IMPL (i, name, strnlen,
- 	      IFUNC_IMPL_ADD (array, i, strnlen,
-			      CPU_FEATURE_USABLE (AVX2),
-+			      (CPU_FEATURE_USABLE (AVX2)
-+			       && CPU_FEATURE_USABLE (BMI2)),
- 			      __strnlen_avx2)
- 	      IFUNC_IMPL_ADD (array, i, strnlen,
- 			      (CPU_FEATURE_USABLE (AVX2)
-+			       && CPU_FEATURE_USABLE (BMI2)
- 			       && CPU_FEATURE_USABLE (RTM)),
- 			      __strnlen_avx2_rtm)
- 	      IFUNC_IMPL_ADD (array, i, strnlen,
-@@ -640,10 +644,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
-   /* Support sysdeps/x86_64/multiarch/wcslen.c.  */
-   IFUNC_IMPL (i, name, wcslen,
- 	      IFUNC_IMPL_ADD (array, i, wcslen,
-			      CPU_FEATURE_USABLE (AVX2),
-+			      (CPU_FEATURE_USABLE (AVX2)
-+			       && CPU_FEATURE_USABLE (BMI2)),
- 			      __wcslen_avx2)
- 	      IFUNC_IMPL_ADD (array, i, wcslen,
- 			      (CPU_FEATURE_USABLE (AVX2)
-+			       && CPU_FEATURE_USABLE (BMI2)
- 			       && CPU_FEATURE_USABLE (RTM)),
- 			      __wcslen_avx2_rtm)
- 	      IFUNC_IMPL_ADD (array, i, wcslen,
-@@ -656,10 +662,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
-   /* Support sysdeps/x86_64/multiarch/wcsnlen.c.  */
-   IFUNC_IMPL (i, name, wcsnlen,
- 	      IFUNC_IMPL_ADD (array, i, wcsnlen,
-			      CPU_FEATURE_USABLE (AVX2),
-+			      (CPU_FEATURE_USABLE (AVX2)
-+			       && CPU_FEATURE_USABLE (BMI2)),
- 			      __wcsnlen_avx2)
- 	      IFUNC_IMPL_ADD (array, i, wcsnlen,
- 			      (CPU_FEATURE_USABLE (AVX2)
-+			       && CPU_FEATURE_USABLE (BMI2)
- 			       && CPU_FEATURE_USABLE (RTM)),
- 			      __wcsnlen_avx2_rtm)
- 	      IFUNC_IMPL_ADD (array, i, wcsnlen,
-diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S
-index 82826e10..be8a5db5 100644
--- a/sysdeps/x86_64/multiarch/strlen-avx2.S
-+++ b/sysdeps/x86_64/multiarch/strlen-avx2.S
-@@ -27,9 +27,11 @@
- # ifdef USE_AS_WCSLEN
- #  define VPCMPEQ	vpcmpeqd
- #  define VPMINU	vpminud
-+#  define CHAR_SIZE	4
- # else
- #  define VPCMPEQ	vpcmpeqb
- #  define VPMINU	vpminub
-+#  define CHAR_SIZE	1
- # endif
- 
- # ifndef VZEROUPPER
-@@ -41,349 +43,459 @@
- # endif
- 
- # define VEC_SIZE 32
-+# define PAGE_SIZE 4096
- 
- 	.section SECTION(.text),"ax",@progbits
- ENTRY (STRLEN)
- # ifdef USE_AS_STRNLEN
-	/* Check for zero length.  */
-+	/* Check zero length.  */
- 	test	%RSI_LP, %RSI_LP
- 	jz	L(zero)
-+	/* Store max len in R8_LP before adjusting if using WCSLEN.  */
-+	mov	%RSI_LP, %R8_LP
- #  ifdef USE_AS_WCSLEN
- 	shl	$2, %RSI_LP
- #  elif defined __ILP32__
- 	/* Clear the upper 32 bits.  */
- 	movl	%esi, %esi
- #  endif
-	mov	%RSI_LP, %R8_LP
- # endif
-	movl	%edi, %ecx
-+	movl	%edi, %eax
- 	movq	%rdi, %rdx
- 	vpxor	%xmm0, %xmm0, %xmm0
-
-+	/* Clear high bits from edi. Only keeping bits relevant to page
-+	   cross check.  */
-+	andl	$(PAGE_SIZE - 1), %eax
- 	/* Check if we may cross page boundary with one vector load.  */
-	andl	$(2 * VEC_SIZE - 1), %ecx
-	cmpl	$VEC_SIZE, %ecx
-	ja	L(cros_page_boundary)
-+	cmpl	$(PAGE_SIZE - VEC_SIZE), %eax
-+	ja	L(cross_page_boundary)
- 
- 	/* Check the first VEC_SIZE bytes.  */
-	VPCMPEQ (%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-
-+	VPCMPEQ	(%rdi), %ymm0, %ymm1
-+	vpmovmskb	%ymm1, %eax
- # ifdef USE_AS_STRNLEN
-	jnz	L(first_vec_x0_check)
-	/* Adjust length and check the end of data.  */
-	subq	$VEC_SIZE, %rsi
-	jbe	L(max)
-# else
-	jnz	L(first_vec_x0)
-+	/* If length < VEC_SIZE handle special.  */
-+	cmpq	$VEC_SIZE, %rsi
-+	jbe	L(first_vec_x0)
- # endif
-
-	/* Align data for aligned loads in the loop.  */
-	addq	$VEC_SIZE, %rdi
-	andl	$(VEC_SIZE - 1), %ecx
-	andq	$-VEC_SIZE, %rdi
-+	/* If empty continue to aligned_more. Otherwise return bit
-+	   position of first match.  */
-+	testl	%eax, %eax
-+	jz	L(aligned_more)
-+	tzcntl	%eax, %eax
-+# ifdef USE_AS_WCSLEN
-+	shrl	$2, %eax
-+# endif
-+	VZEROUPPER_RETURN
- 
- # ifdef USE_AS_STRNLEN
-	/* Adjust length.  */
-	addq	%rcx, %rsi
-+L(zero):
-+	xorl	%eax, %eax
-+	ret
- 
-	subq	$(VEC_SIZE * 4), %rsi
-	jbe	L(last_4x_vec_or_less)
-+	.p2align 4
-+L(first_vec_x0):
-+	/* Set bit for max len so that tzcnt will return min of max len
-+	   and position of first match.  */
-+	btsq	%rsi, %rax
-+	tzcntl	%eax, %eax
-+#  ifdef USE_AS_WCSLEN
-+	shrl	$2, %eax
-+#  endif
-+	VZEROUPPER_RETURN
- # endif
-	jmp	L(more_4x_vec)
- 
- 	.p2align 4
-L(cros_page_boundary):
-	andl	$(VEC_SIZE - 1), %ecx
-	andq	$-VEC_SIZE, %rdi
-	VPCMPEQ (%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-	/* Remove the leading bytes.  */
-	sarl	%cl, %eax
-	testl	%eax, %eax
-	jz	L(aligned_more)
-+L(first_vec_x1):
- 	tzcntl	%eax, %eax
-+	/* Safe to use 32 bit instructions as these are only called for
-+	   size = [1, 159].  */
- # ifdef USE_AS_STRNLEN
-	/* Check the end of data.  */
-	cmpq	%rax, %rsi
-	jbe	L(max)
-+	/* Use ecx which was computed earlier to compute correct value.
-+	 */
-+	subl	$(VEC_SIZE * 4 + 1), %ecx
-+	addl	%ecx, %eax
-+# else
-+	subl	%edx, %edi
-+	incl	%edi
-+	addl	%edi, %eax
- # endif
-	addq	%rdi, %rax
-	addq	%rcx, %rax
-	subq	%rdx, %rax
- # ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-+	shrl	$2, %eax
- # endif
-L(return_vzeroupper):
-	ZERO_UPPER_VEC_REGISTERS_RETURN
-+	VZEROUPPER_RETURN
- 
- 	.p2align 4
-L(aligned_more):
-+L(first_vec_x2):
-+	tzcntl	%eax, %eax
-+	/* Safe to use 32 bit instructions as these are only called for
-+	   size = [1, 159].  */
- # ifdef USE_AS_STRNLEN
-        /* "rcx" is less than VEC_SIZE.  Calculate "rdx + rcx - VEC_SIZE"
-	    with "rdx - (VEC_SIZE - rcx)" instead of "(rdx + rcx) - VEC_SIZE"
-	    to void possible addition overflow.  */
-	negq	%rcx
-	addq	$VEC_SIZE, %rcx
-
-	/* Check the end of data.  */
-	subq	%rcx, %rsi
-	jbe	L(max)
-+	/* Use ecx which was computed earlier to compute correct value.
-+	 */
-+	subl	$(VEC_SIZE * 3 + 1), %ecx
-+	addl	%ecx, %eax
-+# else
-+	subl	%edx, %edi
-+	addl	$(VEC_SIZE + 1), %edi
-+	addl	%edi, %eax
- # endif
-+# ifdef USE_AS_WCSLEN
-+	shrl	$2, %eax
-+# endif
-+	VZEROUPPER_RETURN
- 
-	addq	$VEC_SIZE, %rdi
-+	.p2align 4
-+L(first_vec_x3):
-+	tzcntl	%eax, %eax
-+	/* Safe to use 32 bit instructions as these are only called for
-+	   size = [1, 159].  */
-+# ifdef USE_AS_STRNLEN
-+	/* Use ecx which was computed earlier to compute correct value.
-+	 */
-+	subl	$(VEC_SIZE * 2 + 1), %ecx
-+	addl	%ecx, %eax
-+# else
-+	subl	%edx, %edi
-+	addl	$(VEC_SIZE * 2 + 1), %edi
-+	addl	%edi, %eax
-+# endif
-+# ifdef USE_AS_WCSLEN
-+	shrl	$2, %eax
-+# endif
-+	VZEROUPPER_RETURN
- 
-+	.p2align 4
-+L(first_vec_x4):
-+	tzcntl	%eax, %eax
-+	/* Safe to use 32 bit instructions as these are only called for
-+	   size = [1, 159].  */
- # ifdef USE_AS_STRNLEN
-	subq	$(VEC_SIZE * 4), %rsi
-	jbe	L(last_4x_vec_or_less)
-+	/* Use ecx which was computed earlier to compute correct value.
-+	 */
-+	subl	$(VEC_SIZE + 1), %ecx
-+	addl	%ecx, %eax
-+# else
-+	subl	%edx, %edi
-+	addl	$(VEC_SIZE * 3 + 1), %edi
-+	addl	%edi, %eax
- # endif
-+# ifdef USE_AS_WCSLEN
-+	shrl	$2, %eax
-+# endif
-+	VZEROUPPER_RETURN
- 
-L(more_4x_vec):
-+	.p2align 5
-+L(aligned_more):
-+	/* Align data to VEC_SIZE - 1. This is the same number of
-+	   instructions as using andq with -VEC_SIZE but saves 4 bytes of
-+	   code on the x4 check.  */
-+	orq	$(VEC_SIZE - 1), %rdi
-+L(cross_page_continue):
- 	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
- 	   since data is only aligned to VEC_SIZE.  */
-	VPCMPEQ (%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x0)
-
-	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-+# ifdef USE_AS_STRNLEN
-+	/* + 1 because rdi is aligned to VEC_SIZE - 1. + CHAR_SIZE because
-+	   it simplies the logic in last_4x_vec_or_less.  */
-+	leaq	(VEC_SIZE * 4 + CHAR_SIZE + 1)(%rdi), %rcx
-+	subq	%rdx, %rcx
-+# endif
-+	/* Load first VEC regardless.  */
-+	VPCMPEQ	1(%rdi), %ymm0, %ymm1
-+# ifdef USE_AS_STRNLEN
-+	/* Adjust length. If near end handle specially.  */
-+	subq	%rcx, %rsi
-+	jb	L(last_4x_vec_or_less)
-+# endif
-+	vpmovmskb	%ymm1, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x1)
- 
-	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-+	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb	%ymm1, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x2)
- 
-	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-+	VPCMPEQ	(VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb	%ymm1, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x3)
- 
-	addq	$(VEC_SIZE * 4), %rdi
-
-# ifdef USE_AS_STRNLEN
-	subq	$(VEC_SIZE * 4), %rsi
-	jbe	L(last_4x_vec_or_less)
-# endif
-
-	/* Align data to 4 * VEC_SIZE.  */
-	movq	%rdi, %rcx
-	andl	$(4 * VEC_SIZE - 1), %ecx
-	andq	$-(4 * VEC_SIZE), %rdi
-+	VPCMPEQ	(VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb	%ymm1, %eax
-+	testl	%eax, %eax
-+	jnz	L(first_vec_x4)
- 
-+	/* Align data to VEC_SIZE * 4 - 1.  */
- # ifdef USE_AS_STRNLEN
-	/* Adjust length.  */
-+	/* Before adjusting length check if at last VEC_SIZE * 4.  */
-+	cmpq	$(VEC_SIZE * 4 - 1), %rsi
-+	jbe	L(last_4x_vec_or_less_load)
-+	incq	%rdi
-+	movl	%edi, %ecx
-+	orq	$(VEC_SIZE * 4 - 1), %rdi
-+	andl	$(VEC_SIZE * 4 - 1), %ecx
-+	/* Readjust length.  */
- 	addq	%rcx, %rsi
-+# else
-+	incq	%rdi
-+	orq	$(VEC_SIZE * 4 - 1), %rdi
- # endif
-
-+	/* Compare 4 * VEC at a time forward.  */
- 	.p2align 4
- L(loop_4x_vec):
-	/* Compare 4 * VEC at a time forward.  */
-	vmovdqa (%rdi), %ymm1
-	vmovdqa	VEC_SIZE(%rdi), %ymm2
-	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm3
-	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm4
-	VPMINU	%ymm1, %ymm2, %ymm5
-	VPMINU	%ymm3, %ymm4, %ymm6
-	VPMINU	%ymm5, %ymm6, %ymm5
-
-	VPCMPEQ	%ymm5, %ymm0, %ymm5
-	vpmovmskb %ymm5, %eax
-	testl	%eax, %eax
-	jnz	L(4x_vec_end)
-
-	addq	$(VEC_SIZE * 4), %rdi
-
-# ifndef USE_AS_STRNLEN
-	jmp	L(loop_4x_vec)
-# else
-+# ifdef USE_AS_STRNLEN
-+	/* Break if at end of length.  */
- 	subq	$(VEC_SIZE * 4), %rsi
-	ja	L(loop_4x_vec)
-
-L(last_4x_vec_or_less):
-	/* Less than 4 * VEC and aligned to VEC_SIZE.  */
-	addl	$(VEC_SIZE * 2), %esi
-	jle	L(last_2x_vec)
-+	jb	L(last_4x_vec_or_less_cmpeq)
-+# endif
-+	/* Save some code size by microfusing VPMINU with the load. Since
-+	   the matches in ymm2/ymm4 can only be returned if there where no
-+	   matches in ymm1/ymm3 respectively there is no issue with overlap.
-+	 */
-+	vmovdqa	1(%rdi), %ymm1
-+	VPMINU	(VEC_SIZE + 1)(%rdi), %ymm1, %ymm2
-+	vmovdqa	(VEC_SIZE * 2 + 1)(%rdi), %ymm3
-+	VPMINU	(VEC_SIZE * 3 + 1)(%rdi), %ymm3, %ymm4
-+
-+	VPMINU	%ymm2, %ymm4, %ymm5
-+	VPCMPEQ	%ymm5, %ymm0, %ymm5
-+	vpmovmskb	%ymm5, %ecx
- 
-	VPCMPEQ (%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x0)
-+	subq	$-(VEC_SIZE * 4), %rdi
-+	testl	%ecx, %ecx
-+	jz	L(loop_4x_vec)
- 
-	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x1)
- 
-	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-+	VPCMPEQ	%ymm1, %ymm0, %ymm1
-+	vpmovmskb	%ymm1, %eax
-+	subq	%rdx, %rdi
- 	testl	%eax, %eax
-+	jnz	L(last_vec_return_x0)
- 
-	jnz	L(first_vec_x2_check)
-	subl	$VEC_SIZE, %esi
-	jle	L(max)
-
-	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-+	VPCMPEQ	%ymm2, %ymm0, %ymm2
-+	vpmovmskb	%ymm2, %eax
- 	testl	%eax, %eax
-
-	jnz	L(first_vec_x3_check)
-	movq	%r8, %rax
-#  ifdef USE_AS_WCSLEN
-+	jnz	L(last_vec_return_x1)
-+
-+	/* Combine last 2 VEC.  */
-+	VPCMPEQ	%ymm3, %ymm0, %ymm3
-+	vpmovmskb	%ymm3, %eax
-+	/* rcx has combined result from all 4 VEC. It will only be used if
-+	   the first 3 other VEC all did not contain a match.  */
-+	salq	$32, %rcx
-+	orq	%rcx, %rax
-+	tzcntq	%rax, %rax
-+	subq	$(VEC_SIZE * 2 - 1), %rdi
-+	addq	%rdi, %rax
-+# ifdef USE_AS_WCSLEN
- 	shrq	$2, %rax
-#  endif
-+# endif
- 	VZEROUPPER_RETURN
- 
-+
-+# ifdef USE_AS_STRNLEN
- 	.p2align 4
-L(last_2x_vec):
-	addl	$(VEC_SIZE * 2), %esi
-	VPCMPEQ (%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-+L(last_4x_vec_or_less_load):
-+	/* Depending on entry adjust rdi / prepare first VEC in ymm1.  */
-+	subq	$-(VEC_SIZE * 4), %rdi
-+L(last_4x_vec_or_less_cmpeq):
-+	VPCMPEQ	1(%rdi), %ymm0, %ymm1
-+L(last_4x_vec_or_less):
- 
-	jnz	L(first_vec_x0_check)
-	subl	$VEC_SIZE, %esi
-	jle	L(max)
-+	vpmovmskb	%ymm1, %eax
-+	/* If remaining length > VEC_SIZE * 2. This works if esi is off by
-+	   VEC_SIZE * 4.  */
-+	testl	$(VEC_SIZE * 2), %esi
-+	jnz	L(last_4x_vec)
- 
-	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-+	/* length may have been negative or positive by an offset of
-+	   VEC_SIZE * 4 depending on where this was called from. This fixes
-+	   that.  */
-+	andl	$(VEC_SIZE * 4 - 1), %esi
- 	testl	%eax, %eax
-	jnz	L(first_vec_x1_check)
-	movq	%r8, %rax
-#  ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-#  endif
-	VZEROUPPER_RETURN
-+	jnz	L(last_vec_x1_check)
- 
-	.p2align 4
-L(first_vec_x0_check):
-+	subl	$VEC_SIZE, %esi
-+	jb	L(max)
-+
-+	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb	%ymm1, %eax
- 	tzcntl	%eax, %eax
- 	/* Check the end of data.  */
-	cmpq	%rax, %rsi
-	jbe	L(max)
-+	cmpl	%eax, %esi
-+	jb	L(max)
-+	subq	%rdx, %rdi
-+	addl	$(VEC_SIZE + 1), %eax
- 	addq	%rdi, %rax
-	subq	%rdx, %rax
- #  ifdef USE_AS_WCSLEN
- 	shrq	$2, %rax
- #  endif
- 	VZEROUPPER_RETURN
-+# endif
- 
- 	.p2align 4
-L(first_vec_x1_check):
-+L(last_vec_return_x0):
- 	tzcntl	%eax, %eax
-	/* Check the end of data.  */
-	cmpq	%rax, %rsi
-	jbe	L(max)
-	addq	$VEC_SIZE, %rax
-+	subq	$(VEC_SIZE * 4 - 1), %rdi
- 	addq	%rdi, %rax
-	subq	%rdx, %rax
-#  ifdef USE_AS_WCSLEN
-+# ifdef USE_AS_WCSLEN
- 	shrq	$2, %rax
-#  endif
-+# endif
- 	VZEROUPPER_RETURN
- 
- 	.p2align 4
-L(first_vec_x2_check):
-+L(last_vec_return_x1):
- 	tzcntl	%eax, %eax
-	/* Check the end of data.  */
-	cmpq	%rax, %rsi
-	jbe	L(max)
-	addq	$(VEC_SIZE * 2), %rax
-+	subq	$(VEC_SIZE * 3 - 1), %rdi
- 	addq	%rdi, %rax
-	subq	%rdx, %rax
-#  ifdef USE_AS_WCSLEN
-+# ifdef USE_AS_WCSLEN
- 	shrq	$2, %rax
-#  endif
-+# endif
- 	VZEROUPPER_RETURN
- 
-+# ifdef USE_AS_STRNLEN
- 	.p2align 4
-L(first_vec_x3_check):
-+L(last_vec_x1_check):
-+
- 	tzcntl	%eax, %eax
- 	/* Check the end of data.  */
-	cmpq	%rax, %rsi
-	jbe	L(max)
-	addq	$(VEC_SIZE * 3), %rax
-+	cmpl	%eax, %esi
-+	jb	L(max)
-+	subq	%rdx, %rdi
-+	incl	%eax
- 	addq	%rdi, %rax
-	subq	%rdx, %rax
- #  ifdef USE_AS_WCSLEN
- 	shrq	$2, %rax
- #  endif
- 	VZEROUPPER_RETURN
- 
-	.p2align 4
- L(max):
- 	movq	%r8, %rax
-+	VZEROUPPER_RETURN
-+
-+	.p2align 4
-+L(last_4x_vec):
-+	/* Test first 2x VEC normally.  */
-+	testl	%eax, %eax
-+	jnz	L(last_vec_x1)
-+
-+	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb	%ymm1, %eax
-+	testl	%eax, %eax
-+	jnz	L(last_vec_x2)
-+
-+	/* Normalize length.  */
-+	andl	$(VEC_SIZE * 4 - 1), %esi
-+	VPCMPEQ	(VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb	%ymm1, %eax
-+	testl	%eax, %eax
-+	jnz	L(last_vec_x3)
-+
-+	subl	$(VEC_SIZE * 3), %esi
-+	jb	L(max)
-+
-+	VPCMPEQ	(VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb	%ymm1, %eax
-+	tzcntl	%eax, %eax
-+	/* Check the end of data.  */
-+	cmpl	%eax, %esi
-+	jb	L(max)
-+	subq	%rdx, %rdi
-+	addl	$(VEC_SIZE * 3 + 1), %eax
-+	addq	%rdi, %rax
- #  ifdef USE_AS_WCSLEN
- 	shrq	$2, %rax
- #  endif
- 	VZEROUPPER_RETURN
- 
-	.p2align 4
-L(zero):
-	xorl	%eax, %eax
-	ret
-# endif
- 
- 	.p2align 4
-L(first_vec_x0):
-+L(last_vec_x1):
-+	/* essentially duplicates of first_vec_x1 but use 64 bit
-+	   instructions.  */
- 	tzcntl	%eax, %eax
-+	subq	%rdx, %rdi
-+	incl	%eax
- 	addq	%rdi, %rax
-	subq	%rdx, %rax
-# ifdef USE_AS_WCSLEN
-+#  ifdef USE_AS_WCSLEN
- 	shrq	$2, %rax
-# endif
-+#  endif
- 	VZEROUPPER_RETURN
- 
- 	.p2align 4
-L(first_vec_x1):
-+L(last_vec_x2):
-+	/* essentially duplicates of first_vec_x1 but use 64 bit
-+	   instructions.  */
- 	tzcntl	%eax, %eax
-	addq	$VEC_SIZE, %rax
-+	subq	%rdx, %rdi
-+	addl	$(VEC_SIZE + 1), %eax
- 	addq	%rdi, %rax
-	subq	%rdx, %rax
-# ifdef USE_AS_WCSLEN
-+#  ifdef USE_AS_WCSLEN
- 	shrq	$2, %rax
-# endif
-+#  endif
- 	VZEROUPPER_RETURN
- 
- 	.p2align 4
-L(first_vec_x2):
-+L(last_vec_x3):
- 	tzcntl	%eax, %eax
-	addq	$(VEC_SIZE * 2), %rax
-+	subl	$(VEC_SIZE * 2), %esi
-+	/* Check the end of data.  */
-+	cmpl	%eax, %esi
-+	jb	L(max_end)
-+	subq	%rdx, %rdi
-+	addl	$(VEC_SIZE * 2 + 1), %eax
- 	addq	%rdi, %rax
-	subq	%rdx, %rax
-# ifdef USE_AS_WCSLEN
-+#  ifdef USE_AS_WCSLEN
- 	shrq	$2, %rax
-# endif
-+#  endif
-+	VZEROUPPER_RETURN
-+L(max_end):
-+	movq	%r8, %rax
- 	VZEROUPPER_RETURN
-+# endif
- 
-+	/* Cold case for crossing page with first load.	 */
- 	.p2align 4
-L(4x_vec_end):
-	VPCMPEQ	%ymm1, %ymm0, %ymm1
-	vpmovmskb %ymm1, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x0)
-	VPCMPEQ %ymm2, %ymm0, %ymm2
-	vpmovmskb %ymm2, %eax
-+L(cross_page_boundary):
-+	/* Align data to VEC_SIZE - 1.  */
-+	orq	$(VEC_SIZE - 1), %rdi
-+	VPCMPEQ	-(VEC_SIZE - 1)(%rdi), %ymm0, %ymm1
-+	vpmovmskb	%ymm1, %eax
-+	/* Remove the leading bytes. sarxl only uses bits [5:0] of COUNT
-+	   so no need to manually mod rdx.  */
-+	sarxl	%edx, %eax, %eax
-+# ifdef USE_AS_STRNLEN
- 	testl	%eax, %eax
-	jnz	L(first_vec_x1)
-	VPCMPEQ %ymm3, %ymm0, %ymm3
-	vpmovmskb %ymm3, %eax
-+	jnz	L(cross_page_less_vec)
-+	leaq	1(%rdi), %rcx
-+	subq	%rdx, %rcx
-+	/* Check length.  */
-+	cmpq	%rsi, %rcx
-+	jb	L(cross_page_continue)
-+	movq	%r8, %rax
-+# else
- 	testl	%eax, %eax
-	jnz	L(first_vec_x2)
-	VPCMPEQ %ymm4, %ymm0, %ymm4
-	vpmovmskb %ymm4, %eax
-L(first_vec_x3):
-+	jz	L(cross_page_continue)
- 	tzcntl	%eax, %eax
-	addq	$(VEC_SIZE * 3), %rax
-	addq	%rdi, %rax
-	subq	%rdx, %rax
-# ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-+#  ifdef USE_AS_WCSLEN
-+	shrl	$2, %eax
-+#  endif
- # endif
-+L(return_vzeroupper):
-+	ZERO_UPPER_VEC_REGISTERS_RETURN
-+
-+# ifdef USE_AS_STRNLEN
-+	.p2align 4
-+L(cross_page_less_vec):
-+	tzcntl	%eax, %eax
-+	cmpq	%rax, %rsi
-+	cmovb	%esi, %eax
-+#  ifdef USE_AS_WCSLEN
-+	shrl	$2, %eax
-+#  endif
- 	VZEROUPPER_RETURN
-+# endif
- 
- END (STRLEN)
- #endif
-- 
-GitLab
-
--- a/glibc-RHEL-15696-26.patch
+++ b/glibc-RHEL-15696-26.patch
@ -1,701 +0,0 @@
-From 2a76821c3081d2c0231ecd2618f52662cb48fccd Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Mon, 3 May 2021 03:03:19 -0400
-Subject: [PATCH] x86: Optimize memchr-evex.S
-Content-type: text/plain; charset=UTF-8
-
-No bug. This commit optimizes memchr-evex.S. The optimizations include
-replacing some branches with cmovcc, avoiding some branches entirely
-in the less_4x_vec case, making the page cross logic less strict,
-saving some ALU in the alignment process, and most importantly
-increasing ILP in the 4x loop. test-memchr, test-rawmemchr, and
-test-wmemchr are all passing.
-
-Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/multiarch/memchr-evex.S | 547 +++++++++++++++----------
- 1 file changed, 322 insertions(+), 225 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S
-index 6dd5d67b..81d5cd64 100644
--- a/sysdeps/x86_64/multiarch/memchr-evex.S
-+++ b/sysdeps/x86_64/multiarch/memchr-evex.S
-@@ -26,14 +26,28 @@
- 
- # ifdef USE_AS_WMEMCHR
- #  define VPBROADCAST	vpbroadcastd
-#  define VPCMP		vpcmpd
-#  define SHIFT_REG	r8d
-+#  define VPMINU	vpminud
-+#  define VPCMP	vpcmpd
-+#  define VPCMPEQ	vpcmpeqd
-+#  define CHAR_SIZE	4
- # else
- #  define VPBROADCAST	vpbroadcastb
-#  define VPCMP		vpcmpb
-#  define SHIFT_REG	ecx
-+#  define VPMINU	vpminub
-+#  define VPCMP	vpcmpb
-+#  define VPCMPEQ	vpcmpeqb
-+#  define CHAR_SIZE	1
- # endif
- 
-+# ifdef USE_AS_RAWMEMCHR
-+#  define RAW_PTR_REG	rcx
-+#  define ALGN_PTR_REG	rdi
-+# else
-+#  define RAW_PTR_REG	rdi
-+#  define ALGN_PTR_REG	rcx
-+# endif
-+
-+# define XMMZERO	xmm23
-+# define YMMZERO	ymm23
- # define XMMMATCH	xmm16
- # define YMMMATCH	ymm16
- # define YMM1		ymm17
-@@ -44,6 +58,8 @@
- # define YMM6		ymm22
- 
- # define VEC_SIZE 32
-+# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
-+# define PAGE_SIZE 4096
- 
- 	.section .text.evex,"ax",@progbits
- ENTRY (MEMCHR)
-@@ -51,11 +67,7 @@ ENTRY (MEMCHR)
- 	/* Check for zero length.  */
- 	test	%RDX_LP, %RDX_LP
- 	jz	L(zero)
-# endif
-	movl	%edi, %ecx
-# ifdef USE_AS_WMEMCHR
-	shl	$2, %RDX_LP
-# else
-+
- #  ifdef __ILP32__
- 	/* Clear the upper 32 bits.  */
- 	movl	%edx, %edx
-@@ -64,318 +76,403 @@ ENTRY (MEMCHR)
- 	/* Broadcast CHAR to YMMMATCH.  */
- 	VPBROADCAST %esi, %YMMMATCH
- 	/* Check if we may cross page boundary with one vector load.  */
-	andl	$(2 * VEC_SIZE - 1), %ecx
-	cmpl	$VEC_SIZE, %ecx
-	ja	L(cros_page_boundary)
-+	movl	%edi, %eax
-+	andl	$(PAGE_SIZE - 1), %eax
-+	cmpl	$(PAGE_SIZE - VEC_SIZE), %eax
-+	ja	L(cross_page_boundary)
- 
- 	/* Check the first VEC_SIZE bytes.  */
-	VPCMP	$0, (%rdi), %YMMMATCH, %k1
-	kmovd	%k1, %eax
-	testl	%eax, %eax
-
-+	VPCMP	$0, (%rdi), %YMMMATCH, %k0
-+	kmovd	%k0, %eax
- # ifndef USE_AS_RAWMEMCHR
-	jnz	L(first_vec_x0_check)
-	/* Adjust length and check the end of data.  */
-	subq	$VEC_SIZE, %rdx
-	jbe	L(zero)
-+	/* If length < CHAR_PER_VEC handle special.  */
-+	cmpq	$CHAR_PER_VEC, %rdx
-+	jbe	L(first_vec_x0)
-+# endif
-+	testl	%eax, %eax
-+	jz	L(aligned_more)
-+	tzcntl	%eax, %eax
-+# ifdef USE_AS_WMEMCHR
-+	/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count.  */
-+	leaq	(%rdi, %rax, CHAR_SIZE), %rax
- # else
-	jnz	L(first_vec_x0)
-+	addq	%rdi, %rax
- # endif
-
-	/* Align data for aligned loads in the loop.  */
-	addq	$VEC_SIZE, %rdi
-	andl	$(VEC_SIZE - 1), %ecx
-	andq	$-VEC_SIZE, %rdi
-+	ret
- 
- # ifndef USE_AS_RAWMEMCHR
-	/* Adjust length.  */
-	addq	%rcx, %rdx
-
-	subq	$(VEC_SIZE * 4), %rdx
-	jbe	L(last_4x_vec_or_less)
-# endif
-	jmp	L(more_4x_vec)
-+L(zero):
-+	xorl	%eax, %eax
-+	ret
- 
-+	.p2align 5
-+L(first_vec_x0):
-+	/* Check if first match was before length.  */
-+	tzcntl	%eax, %eax
-+	xorl	%ecx, %ecx
-+	cmpl	%eax, %edx
-+	leaq	(%rdi, %rax, CHAR_SIZE), %rax
-+	cmovle	%rcx, %rax
-+	ret
-+# else
-+	/* NB: first_vec_x0 is 17 bytes which will leave
-+	   cross_page_boundary (which is relatively cold) close enough
-+	   to ideal alignment. So only realign L(cross_page_boundary) if
-+	   rawmemchr.  */
- 	.p2align 4
-L(cros_page_boundary):
-	andl	$(VEC_SIZE - 1), %ecx
-+# endif
-+L(cross_page_boundary):
-+	/* Save pointer before aligning as its original value is
-+	   necessary for computer return address if byte is found or
-+	   adjusting length if it is not and this is memchr.  */
-+	movq	%rdi, %rcx
-+	/* Align data to VEC_SIZE. ALGN_PTR_REG is rcx for memchr and rdi
-+	   for rawmemchr.  */
-+	andq	$-VEC_SIZE, %ALGN_PTR_REG
-+	VPCMP	$0, (%ALGN_PTR_REG), %YMMMATCH, %k0
-+	kmovd	%k0, %r8d
- # ifdef USE_AS_WMEMCHR
-	/* NB: Divide shift count by 4 since each bit in K1 represent 4
-+	/* NB: Divide shift count by 4 since each bit in K0 represent 4
- 	   bytes.  */
-	movl	%ecx, %SHIFT_REG
-	sarl	$2, %SHIFT_REG
-+	sarl	$2, %eax
-+# endif
-+# ifndef USE_AS_RAWMEMCHR
-+	movl	$(PAGE_SIZE / CHAR_SIZE), %esi
-+	subl	%eax, %esi
- # endif
-	andq	$-VEC_SIZE, %rdi
-	VPCMP	$0, (%rdi), %YMMMATCH, %k1
-	kmovd	%k1, %eax
-	/* Remove the leading bytes.  */
-	sarxl	%SHIFT_REG, %eax, %eax
-	testl	%eax, %eax
-	jz	L(aligned_more)
-	tzcntl	%eax, %eax
- # ifdef USE_AS_WMEMCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-+	andl	$(CHAR_PER_VEC - 1), %eax
- # endif
-+	/* Remove the leading bytes.  */
-+	sarxl	%eax, %r8d, %eax
- # ifndef USE_AS_RAWMEMCHR
- 	/* Check the end of data.  */
-	cmpq	%rax, %rdx
-	jbe	L(zero)
-+	cmpq	%rsi, %rdx
-+	jbe	L(first_vec_x0)
-+# endif
-+	testl	%eax, %eax
-+	jz	L(cross_page_continue)
-+	tzcntl	%eax, %eax
-+# ifdef USE_AS_WMEMCHR
-+	/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count.  */
-+	leaq	(%RAW_PTR_REG, %rax, CHAR_SIZE), %rax
-+# else
-+	addq	%RAW_PTR_REG, %rax
- # endif
-	addq	%rdi, %rax
-	addq	%rcx, %rax
- 	ret
- 
- 	.p2align 4
-L(aligned_more):
-# ifndef USE_AS_RAWMEMCHR
-        /* Calculate "rdx + rcx - VEC_SIZE" with "rdx - (VEC_SIZE - rcx)"
-	   instead of "(rdx + rcx) - VEC_SIZE" to void possible addition
-	   overflow.  */
-	negq	%rcx
-	addq	$VEC_SIZE, %rcx
-+L(first_vec_x1):
-+	tzcntl	%eax, %eax
-+	leaq	VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
-+	ret
- 
-	/* Check the end of data.  */
-	subq	%rcx, %rdx
-	jbe	L(zero)
-# endif
-+	.p2align 4
-+L(first_vec_x2):
-+	tzcntl	%eax, %eax
-+	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
-+	ret
- 
-	addq	$VEC_SIZE, %rdi
-+	.p2align 4
-+L(first_vec_x3):
-+	tzcntl	%eax, %eax
-+	leaq	(VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
-+	ret
- 
-# ifndef USE_AS_RAWMEMCHR
-	subq	$(VEC_SIZE * 4), %rdx
-	jbe	L(last_4x_vec_or_less)
-# endif
-+	.p2align 4
-+L(first_vec_x4):
-+	tzcntl	%eax, %eax
-+	leaq	(VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
-+	ret
- 
-L(more_4x_vec):
-+	.p2align 5
-+L(aligned_more):
- 	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
- 	   since data is only aligned to VEC_SIZE.  */
-	VPCMP	$0, (%rdi), %YMMMATCH, %k1
-	kmovd	%k1, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x0)
- 
-	VPCMP	$0, VEC_SIZE(%rdi), %YMMMATCH, %k1
-	kmovd	%k1, %eax
-+# ifndef USE_AS_RAWMEMCHR
-+	/* Align data to VEC_SIZE.  */
-+L(cross_page_continue):
-+	xorl	%ecx, %ecx
-+	subl	%edi, %ecx
-+	andq	$-VEC_SIZE, %rdi
-+	/* esi is for adjusting length to see if near the end.  */
-+	leal	(VEC_SIZE * 5)(%rdi, %rcx), %esi
-+#  ifdef USE_AS_WMEMCHR
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %esi
-+#  endif
-+# else
-+	andq	$-VEC_SIZE, %rdi
-+L(cross_page_continue):
-+# endif
-+	/* Load first VEC regardless.  */
-+	VPCMP	$0, (VEC_SIZE)(%rdi), %YMMMATCH, %k0
-+	kmovd	%k0, %eax
-+# ifndef USE_AS_RAWMEMCHR
-+	/* Adjust length. If near end handle specially.  */
-+	subq	%rsi, %rdx
-+	jbe	L(last_4x_vec_or_less)
-+# endif
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x1)
- 
-	VPCMP	$0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k1
-	kmovd	%k1, %eax
-+	VPCMP	$0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
-+	kmovd	%k0, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x2)
- 
-	VPCMP	$0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
-	kmovd	%k1, %eax
-+	VPCMP	$0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0
-+	kmovd	%k0, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x3)
- 
-	addq	$(VEC_SIZE * 4), %rdi
-+	VPCMP	$0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
-+	kmovd	%k0, %eax
-+	testl	%eax, %eax
-+	jnz	L(first_vec_x4)
-+
- 
- # ifndef USE_AS_RAWMEMCHR
-	subq	$(VEC_SIZE * 4), %rdx
-	jbe	L(last_4x_vec_or_less)
-# endif
-+	/* Check if at last CHAR_PER_VEC * 4 length.  */
-+	subq	$(CHAR_PER_VEC * 4), %rdx
-+	jbe	L(last_4x_vec_or_less_cmpeq)
-+	addq	$VEC_SIZE, %rdi
- 
-	/* Align data to 4 * VEC_SIZE.  */
-	movq	%rdi, %rcx
-	andl	$(4 * VEC_SIZE - 1), %ecx
-+	/* Align data to VEC_SIZE * 4 for the loop and readjust length.
-+	 */
-+#  ifdef USE_AS_WMEMCHR
-+	movl	%edi, %ecx
- 	andq	$-(4 * VEC_SIZE), %rdi
-
-# ifndef USE_AS_RAWMEMCHR
-	/* Adjust length.  */
-+	andl	$(VEC_SIZE * 4 - 1), %ecx
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %ecx
- 	addq	%rcx, %rdx
-+#  else
-+	addq	%rdi, %rdx
-+	andq	$-(4 * VEC_SIZE), %rdi
-+	subq	%rdi, %rdx
-+#  endif
-+# else
-+	addq	$VEC_SIZE, %rdi
-+	andq	$-(4 * VEC_SIZE), %rdi
- # endif
- 
-+	vpxorq	%XMMZERO, %XMMZERO, %XMMZERO
-+
-+	/* Compare 4 * VEC at a time forward.  */
- 	.p2align 4
- L(loop_4x_vec):
-	/* Compare 4 * VEC at a time forward.  */
-	VPCMP	$0, (%rdi), %YMMMATCH, %k1
-	VPCMP	$0, VEC_SIZE(%rdi), %YMMMATCH, %k2
-	kord	%k1, %k2, %k5
-	VPCMP	$0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k3
-	VPCMP	$0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k4
-
-	kord	%k3, %k4, %k6
-	kortestd %k5, %k6
-	jnz	L(4x_vec_end)
-
-	addq	$(VEC_SIZE * 4), %rdi
-
-+	/* It would be possible to save some instructions using 4x VPCMP
-+	   but bottleneck on port 5 makes it not woth it.  */
-+	VPCMP	$4, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k1
-+	/* xor will set bytes match esi to zero.  */
-+	vpxorq	(VEC_SIZE * 5)(%rdi), %YMMMATCH, %YMM2
-+	vpxorq	(VEC_SIZE * 6)(%rdi), %YMMMATCH, %YMM3
-+	VPCMP	$0, (VEC_SIZE * 7)(%rdi), %YMMMATCH, %k3
-+	/* Reduce VEC2 / VEC3 with min and VEC1 with zero mask.  */
-+	VPMINU	%YMM2, %YMM3, %YMM3 {%k1} {z}
-+	VPCMP	$0, %YMM3, %YMMZERO, %k2
- # ifdef USE_AS_RAWMEMCHR
-	jmp	L(loop_4x_vec)
-+	subq	$-(VEC_SIZE * 4), %rdi
-+	kortestd %k2, %k3
-+	jz	L(loop_4x_vec)
- # else
-	subq	$(VEC_SIZE * 4), %rdx
-+	kortestd %k2, %k3
-+	jnz	L(loop_4x_vec_end)
-+
-+	subq	$-(VEC_SIZE * 4), %rdi
-+
-+	subq	$(CHAR_PER_VEC * 4), %rdx
- 	ja	L(loop_4x_vec)
- 
-+	/* Fall through into less than 4 remaining vectors of length case.
-+	 */
-+	VPCMP	$0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
-+	kmovd	%k0, %eax
-+	addq	$(VEC_SIZE * 3), %rdi
-+	.p2align 4
- L(last_4x_vec_or_less):
-	/* Less than 4 * VEC and aligned to VEC_SIZE.  */
-	addl	$(VEC_SIZE * 2), %edx
-	jle	L(last_2x_vec)
-
-	VPCMP	$0, (%rdi), %YMMMATCH, %k1
-	kmovd	%k1, %eax
-+	/* Check if first VEC contained match.  */
- 	testl	%eax, %eax
-	jnz	L(first_vec_x0)
-+	jnz	L(first_vec_x1_check)
- 
-	VPCMP	$0, VEC_SIZE(%rdi), %YMMMATCH, %k1
-	kmovd	%k1, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x1)
-+	/* If remaining length > CHAR_PER_VEC * 2.  */
-+	addl	$(CHAR_PER_VEC * 2), %edx
-+	jg	L(last_4x_vec)
- 
-	VPCMP	$0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k1
-	kmovd	%k1, %eax
-	testl	%eax, %eax
-+L(last_2x_vec):
-+	/* If remaining length < CHAR_PER_VEC.  */
-+	addl	$CHAR_PER_VEC, %edx
-+	jle	L(zero_end)
- 
-	jnz	L(first_vec_x2_check)
-	subl	$VEC_SIZE, %edx
-	jle	L(zero)
-+	/* Check VEC2 and compare any match with remaining length.  */
-+	VPCMP	$0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
-+	kmovd	%k0, %eax
-+	tzcntl	%eax, %eax
-+	cmpl	%eax, %edx
-+	jbe	L(set_zero_end)
-+	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
-+L(zero_end):
-+	ret
- 
-	VPCMP	$0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
-	kmovd	%k1, %eax
-	testl	%eax, %eax
- 
-	jnz	L(first_vec_x3_check)
-+	.p2align 4
-+L(first_vec_x1_check):
-+	tzcntl	%eax, %eax
-+	/* Adjust length.  */
-+	subl	$-(CHAR_PER_VEC * 4), %edx
-+	/* Check if match within remaining length.  */
-+	cmpl	%eax, %edx
-+	jbe	L(set_zero_end)
-+	/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count.  */
-+	leaq	VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
-+	ret
-+L(set_zero_end):
- 	xorl	%eax, %eax
- 	ret
- 
- 	.p2align 4
-L(last_2x_vec):
-	addl	$(VEC_SIZE * 2), %edx
-	VPCMP	$0, (%rdi), %YMMMATCH, %k1
-+L(loop_4x_vec_end):
-+# endif
-+	/* rawmemchr will fall through into this if match was found in
-+	   loop.  */
-+
-+	/* k1 has not of matches with VEC1.  */
- 	kmovd	%k1, %eax
-	testl	%eax, %eax
-+# ifdef USE_AS_WMEMCHR
-+	subl	$((1 << CHAR_PER_VEC) - 1), %eax
-+# else
-+	incl	%eax
-+# endif
-+	jnz	L(last_vec_x1_return)
- 
-	jnz	L(first_vec_x0_check)
-	subl	$VEC_SIZE, %edx
-	jle	L(zero)
-+	VPCMP	$0, %YMM2, %YMMZERO, %k0
-+	kmovd	%k0, %eax
-+	testl	%eax, %eax
-+	jnz	L(last_vec_x2_return)
- 
-	VPCMP	$0, VEC_SIZE(%rdi), %YMMMATCH, %k1
-	kmovd	%k1, %eax
-+	kmovd	%k2, %eax
- 	testl	%eax, %eax
-	jnz	L(first_vec_x1_check)
-	xorl	%eax, %eax
-	ret
-+	jnz	L(last_vec_x3_return)
- 
-	.p2align 4
-L(first_vec_x0_check):
-+	kmovd	%k3, %eax
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WMEMCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-+# ifdef USE_AS_RAWMEMCHR
-+	leaq	(VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
-+# else
-+	leaq	(VEC_SIZE * 7)(%rdi, %rax, CHAR_SIZE), %rax
- # endif
-	/* Check the end of data.  */
-	cmpq	%rax, %rdx
-	jbe	L(zero)
-	addq	%rdi, %rax
- 	ret
- 
- 	.p2align 4
-L(first_vec_x1_check):
-+L(last_vec_x1_return):
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WMEMCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-# endif
-	/* Check the end of data.  */
-	cmpq	%rax, %rdx
-	jbe	L(zero)
-	addq	$VEC_SIZE, %rax
-+# ifdef USE_AS_RAWMEMCHR
-+#  ifdef USE_AS_WMEMCHR
-+	/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count.  */
-+	leaq	(%rdi, %rax, CHAR_SIZE), %rax
-+#  else
- 	addq	%rdi, %rax
-	ret
-
-	.p2align 4
-L(first_vec_x2_check):
-	tzcntl	%eax, %eax
-# ifdef USE_AS_WMEMCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-+#  endif
-+# else
-+	/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count.  */
-+	leaq	(VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
- # endif
-	/* Check the end of data.  */
-	cmpq	%rax, %rdx
-	jbe	L(zero)
-	addq	$(VEC_SIZE * 2), %rax
-	addq	%rdi, %rax
- 	ret
- 
- 	.p2align 4
-L(first_vec_x3_check):
-+L(last_vec_x2_return):
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WMEMCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-+# ifdef USE_AS_RAWMEMCHR
-+	/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count.  */
-+	leaq	VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
-+# else
-+	/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count.  */
-+	leaq	(VEC_SIZE * 5)(%rdi, %rax, CHAR_SIZE), %rax
- # endif
-	/* Check the end of data.  */
-	cmpq	%rax, %rdx
-	jbe	L(zero)
-	addq	$(VEC_SIZE * 3), %rax
-	addq	%rdi, %rax
- 	ret
- 
- 	.p2align 4
-L(zero):
-	xorl	%eax, %eax
-	ret
-# endif
-
-	.p2align 4
-L(first_vec_x0):
-+L(last_vec_x3_return):
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WMEMCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	leaq	(%rdi, %rax, 4), %rax
-+# ifdef USE_AS_RAWMEMCHR
-+	/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count.  */
-+	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
- # else
-	addq	%rdi, %rax
-+	/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count.  */
-+	leaq	(VEC_SIZE * 6)(%rdi, %rax, CHAR_SIZE), %rax
- # endif
- 	ret
- 
-+
-+# ifndef USE_AS_RAWMEMCHR
-+L(last_4x_vec_or_less_cmpeq):
-+	VPCMP	$0, (VEC_SIZE * 5)(%rdi), %YMMMATCH, %k0
-+	kmovd	%k0, %eax
-+	subq	$-(VEC_SIZE * 4), %rdi
-+	/* Check first VEC regardless.  */
-+	testl	%eax, %eax
-+	jnz	L(first_vec_x1_check)
-+
-+	/* If remaining length <= CHAR_PER_VEC * 2.  */
-+	addl	$(CHAR_PER_VEC * 2), %edx
-+	jle	L(last_2x_vec)
-+
- 	.p2align 4
-L(first_vec_x1):
-+L(last_4x_vec):
-+	VPCMP	$0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
-+	kmovd	%k0, %eax
-+	testl	%eax, %eax
-+	jnz	L(last_vec_x2)
-+
-+
-+	VPCMP	$0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0
-+	kmovd	%k0, %eax
-+	/* Create mask for possible matches within remaining length.  */
-+#  ifdef USE_AS_WMEMCHR
-+	movl	$((1 << (CHAR_PER_VEC * 2)) - 1), %ecx
-+	bzhil	%edx, %ecx, %ecx
-+#  else
-+	movq	$-1, %rcx
-+	bzhiq	%rdx, %rcx, %rcx
-+#  endif
-+	/* Test matches in data against length match.  */
-+	andl	%ecx, %eax
-+	jnz	L(last_vec_x3)
-+
-+	/* if remaining length <= CHAR_PER_VEC * 3 (Note this is after
-+	   remaining length was found to be > CHAR_PER_VEC * 2.  */
-+	subl	$CHAR_PER_VEC, %edx
-+	jbe	L(zero_end2)
-+
-+
-+	VPCMP	$0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
-+	kmovd	%k0, %eax
-+	/* Shift remaining length mask for last VEC.  */
-+#  ifdef USE_AS_WMEMCHR
-+	shrl	$CHAR_PER_VEC, %ecx
-+#  else
-+	shrq	$CHAR_PER_VEC, %rcx
-+#  endif
-+	andl	%ecx, %eax
-+	jz	L(zero_end2)
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WMEMCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	leaq	VEC_SIZE(%rdi, %rax, 4), %rax
-# else
-	addq	$VEC_SIZE, %rax
-	addq	%rdi, %rax
-# endif
-+	leaq	(VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
-+L(zero_end2):
- 	ret
- 
-	.p2align 4
-L(first_vec_x2):
-+L(last_vec_x2):
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WMEMCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	leaq	(VEC_SIZE * 2)(%rdi, %rax, 4), %rax
-# else
-	addq	$(VEC_SIZE * 2), %rax
-	addq	%rdi, %rax
-# endif
-+	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
- 	ret
- 
- 	.p2align 4
-L(4x_vec_end):
-	kmovd	%k1, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x0)
-	kmovd	%k2, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x1)
-	kmovd	%k3, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x2)
-	kmovd	%k4, %eax
-	testl	%eax, %eax
-L(first_vec_x3):
-+L(last_vec_x3):
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WMEMCHR
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	leaq	(VEC_SIZE * 3)(%rdi, %rax, 4), %rax
-# else
-	addq	$(VEC_SIZE * 3), %rax
-	addq	%rdi, %rax
-# endif
-+	leaq	(VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
- 	ret
-+# endif
- 
- END (MEMCHR)
- #endif
-- 
-GitLab
-
--- a/glibc-RHEL-15696-27.patch
+++ b/glibc-RHEL-15696-27.patch
@ -1,30 +0,0 @@
-From 6ea916adfa0ab9af6e7dc6adcf6f977dfe017835 Mon Sep 17 00:00:00 2001
-From: Alice Xu <alice.d.xu@gmail.com>
-Date: Fri, 7 May 2021 19:03:21 -0700
-Subject: [PATCH] x86-64: Fix an unknown vector operation in memchr-evex.S
-Content-type: text/plain; charset=UTF-8
-
-An unknown vector operation occurred in commit 2a76821c308. Fixed it
-by using "ymm{k1}{z}" but not "ymm {k1} {z}".
-
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/multiarch/memchr-evex.S | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S
-index 81d5cd64..f3fdad4f 100644
--- a/sysdeps/x86_64/multiarch/memchr-evex.S
-+++ b/sysdeps/x86_64/multiarch/memchr-evex.S
-@@ -271,7 +271,7 @@ L(loop_4x_vec):
- 	vpxorq	(VEC_SIZE * 6)(%rdi), %YMMMATCH, %YMM3
- 	VPCMP	$0, (VEC_SIZE * 7)(%rdi), %YMMMATCH, %k3
- 	/* Reduce VEC2 / VEC3 with min and VEC1 with zero mask.  */
-	VPMINU	%YMM2, %YMM3, %YMM3 {%k1} {z}
-+	VPMINU	%YMM2, %YMM3, %YMM3{%k1}{z}
- 	VPCMP	$0, %YMM3, %YMMZERO, %k2
- # ifdef USE_AS_RAWMEMCHR
- 	subq	$-(VEC_SIZE * 4), %rdi
-- 
-GitLab
-
--- a/glibc-RHEL-15696-28.patch
+++ b/glibc-RHEL-15696-28.patch
@ -1,566 +0,0 @@
-From a0db678071c60b6c47c468d231dd0b3694ba7a98 Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Tue, 22 Jun 2021 20:42:10 -0700
-Subject: [PATCH] x86-64: Move strlen.S to multiarch/strlen-vec.S
-Content-type: text/plain; charset=UTF-8
-
-Since strlen.S contains SSE2 version of strlen/strnlen and SSE4.1
-version of wcslen/wcsnlen, move strlen.S to multiarch/strlen-vec.S
-and include multiarch/strlen-vec.S from SSE2 and SSE4.1 variants.
-This also removes the unused symbols, __GI___strlen_sse2 and
-__GI___wcsnlen_sse4_1.
---
- sysdeps/x86_64/multiarch/strlen-sse2.S    |   2 +-
- sysdeps/x86_64/multiarch/strlen-vec.S     | 257 ++++++++++++++++++++++
- sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S |   2 +-
- sysdeps/x86_64/strlen.S                   | 243 +-------------------
- 4 files changed, 262 insertions(+), 242 deletions(-)
- create mode 100644 sysdeps/x86_64/multiarch/strlen-vec.S
-
-Conflicts:
-	sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
-	(Copyright dates, URL)
-
-diff --git a/sysdeps/x86_64/multiarch/strlen-sse2.S b/sysdeps/x86_64/multiarch/strlen-sse2.S
-index 7bc57b8d..449c8a7f 100644
--- a/sysdeps/x86_64/multiarch/strlen-sse2.S
-+++ b/sysdeps/x86_64/multiarch/strlen-sse2.S
-@@ -20,4 +20,4 @@
- # define strlen __strlen_sse2
- #endif
- 
-#include "../strlen.S"
-+#include "strlen-vec.S"
-diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S
-new file mode 100644
-index 00000000..8f660bb9
--- /dev/null
-+++ b/sysdeps/x86_64/multiarch/strlen-vec.S
-@@ -0,0 +1,257 @@
-+/* SSE2 version of strlen and SSE4.1 version of wcslen.
-+   Copyright (C) 2012-2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <sysdep.h>
-+
-+#ifdef AS_WCSLEN
-+# define PMINU		pminud
-+# define PCMPEQ		pcmpeqd
-+# define SHIFT_RETURN	shrq $2, %rax
-+#else
-+# define PMINU		pminub
-+# define PCMPEQ		pcmpeqb
-+# define SHIFT_RETURN
-+#endif
-+
-+/* Long lived register in strlen(s), strnlen(s, n) are:
-+
-+	%xmm3 - zero
-+	%rdi   - s
-+	%r10  (s+n) & (~(64-1))
-+	%r11   s+n
-+*/
-+
-+
-+.text
-+ENTRY(strlen)
-+
-+/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx.  */
-+#define FIND_ZERO	\
-+	PCMPEQ	(%rax), %xmm0;	\
-+	PCMPEQ	16(%rax), %xmm1;	\
-+	PCMPEQ	32(%rax), %xmm2;	\
-+	PCMPEQ	48(%rax), %xmm3;	\
-+	pmovmskb	%xmm0, %esi;	\
-+	pmovmskb	%xmm1, %edx;	\
-+	pmovmskb	%xmm2, %r8d;	\
-+	pmovmskb	%xmm3, %ecx;	\
-+	salq	$16, %rdx;	\
-+	salq	$16, %rcx;	\
-+	orq	%rsi, %rdx;	\
-+	orq	%r8, %rcx;	\
-+	salq	$32, %rcx;	\
-+	orq	%rcx, %rdx;
-+
-+#ifdef AS_STRNLEN
-+/* Do not read anything when n==0.  */
-+	test	%RSI_LP, %RSI_LP
-+	jne	L(n_nonzero)
-+	xor	%rax, %rax
-+	ret
-+L(n_nonzero):
-+# ifdef AS_WCSLEN
-+	shl	$2, %RSI_LP
-+# endif
-+
-+/* Initialize long lived registers.  */
-+
-+	add	%RDI_LP, %RSI_LP
-+	mov	%RSI_LP, %R10_LP
-+	and	$-64, %R10_LP
-+	mov	%RSI_LP, %R11_LP
-+#endif
-+
-+	pxor	%xmm0, %xmm0
-+	pxor	%xmm1, %xmm1
-+	pxor	%xmm2, %xmm2
-+	pxor	%xmm3, %xmm3
-+	movq	%rdi, %rax
-+	movq	%rdi, %rcx
-+	andq	$4095, %rcx
-+/* Offsets 4032-4047 will be aligned into 4032 thus fit into page.  */
-+	cmpq	$4047, %rcx
-+/* We cannot unify this branching as it would be ~6 cycles slower.  */
-+	ja	L(cross_page)
-+
-+#ifdef AS_STRNLEN
-+/* Test if end is among first 64 bytes.  */
-+# define STRNLEN_PROLOG	\
-+	mov	%r11, %rsi;	\
-+	subq	%rax, %rsi;	\
-+	andq	$-64, %rax;	\
-+	testq	$-64, %rsi;	\
-+	je	L(strnlen_ret)
-+#else
-+# define STRNLEN_PROLOG  andq $-64, %rax;
-+#endif
-+
-+/* Ignore bits in mask that come before start of string.  */
-+#define PROLOG(lab)	\
-+	movq	%rdi, %rcx;	\
-+	xorq	%rax, %rcx;	\
-+	STRNLEN_PROLOG;	\
-+	sarq	%cl, %rdx;	\
-+	test	%rdx, %rdx;	\
-+	je	L(lab);	\
-+	bsfq	%rdx, %rax;	\
-+	SHIFT_RETURN;		\
-+	ret
-+
-+#ifdef AS_STRNLEN
-+	andq	$-16, %rax
-+	FIND_ZERO
-+#else
-+	/* Test first 16 bytes unaligned.  */
-+	movdqu	(%rax), %xmm4
-+	PCMPEQ	%xmm0, %xmm4
-+	pmovmskb	%xmm4, %edx
-+	test	%edx, %edx
-+	je 	L(next48_bytes)
-+	bsf	%edx, %eax /* If eax is zeroed 16bit bsf can be used.  */
-+	SHIFT_RETURN
-+	ret
-+
-+L(next48_bytes):
-+/* Same as FIND_ZERO except we do not check first 16 bytes.  */
-+	andq	$-16, %rax
-+	PCMPEQ 16(%rax), %xmm1
-+	PCMPEQ 32(%rax), %xmm2
-+	PCMPEQ 48(%rax), %xmm3
-+	pmovmskb	%xmm1, %edx
-+	pmovmskb	%xmm2, %r8d
-+	pmovmskb	%xmm3, %ecx
-+	salq	$16, %rdx
-+	salq	$16, %rcx
-+	orq	%r8, %rcx
-+	salq	$32, %rcx
-+	orq	%rcx, %rdx
-+#endif
-+
-+	/* When no zero byte is found xmm1-3 are zero so we do not have to
-+	   zero them.  */
-+	PROLOG(loop)
-+
-+	.p2align 4
-+L(cross_page):
-+	andq	$-64, %rax
-+	FIND_ZERO
-+	PROLOG(loop_init)
-+
-+#ifdef AS_STRNLEN
-+/* We must do this check to correctly handle strnlen (s, -1).  */
-+L(strnlen_ret):
-+	bts	%rsi, %rdx
-+	sarq	%cl, %rdx
-+	test	%rdx, %rdx
-+	je	L(loop_init)
-+	bsfq	%rdx, %rax
-+	SHIFT_RETURN
-+	ret
-+#endif
-+	.p2align 4
-+L(loop_init):
-+	pxor	%xmm1, %xmm1
-+	pxor	%xmm2, %xmm2
-+	pxor	%xmm3, %xmm3
-+#ifdef AS_STRNLEN
-+	.p2align 4
-+L(loop):
-+
-+	addq	$64, %rax
-+	cmpq	%rax, %r10
-+	je	L(exit_end)
-+
-+	movdqa	(%rax), %xmm0
-+	PMINU	16(%rax), %xmm0
-+	PMINU	32(%rax), %xmm0
-+	PMINU	48(%rax), %xmm0
-+	PCMPEQ	%xmm3, %xmm0
-+	pmovmskb	%xmm0, %edx
-+	testl	%edx, %edx
-+	jne	L(exit)
-+	jmp	L(loop)
-+
-+	.p2align 4
-+L(exit_end):
-+	cmp	%rax, %r11
-+	je	L(first) /* Do not read when end is at page boundary.  */
-+	pxor	%xmm0, %xmm0
-+	FIND_ZERO
-+
-+L(first):
-+	bts	%r11, %rdx
-+	bsfq	%rdx, %rdx
-+	addq	%rdx, %rax
-+	subq	%rdi, %rax
-+	SHIFT_RETURN
-+	ret
-+
-+	.p2align 4
-+L(exit):
-+	pxor	%xmm0, %xmm0
-+	FIND_ZERO
-+
-+	bsfq	%rdx, %rdx
-+	addq	%rdx, %rax
-+	subq	%rdi, %rax
-+	SHIFT_RETURN
-+	ret
-+
-+#else
-+
-+	/* Main loop.  Unrolled twice to improve L2 cache performance on core2.  */
-+	.p2align 4
-+L(loop):
-+
-+	movdqa	64(%rax), %xmm0
-+	PMINU	80(%rax), %xmm0
-+	PMINU	96(%rax), %xmm0
-+	PMINU	112(%rax), %xmm0
-+	PCMPEQ	%xmm3, %xmm0
-+	pmovmskb	%xmm0, %edx
-+	testl	%edx, %edx
-+	jne	L(exit64)
-+
-+	subq	$-128, %rax
-+
-+	movdqa	(%rax), %xmm0
-+	PMINU	16(%rax), %xmm0
-+	PMINU	32(%rax), %xmm0
-+	PMINU	48(%rax), %xmm0
-+	PCMPEQ	%xmm3, %xmm0
-+	pmovmskb	%xmm0, %edx
-+	testl	%edx, %edx
-+	jne	L(exit0)
-+	jmp	L(loop)
-+
-+	.p2align 4
-+L(exit64):
-+	addq	$64, %rax
-+L(exit0):
-+	pxor	%xmm0, %xmm0
-+	FIND_ZERO
-+
-+	bsfq	%rdx, %rdx
-+	addq	%rdx, %rax
-+	subq	%rdi, %rax
-+	SHIFT_RETURN
-+	ret
-+
-+#endif
-+
-+END(strlen)
-diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
-index a8cab0cb..5fa51fe0 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
-+++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
-@@ -2,4 +2,4 @@
- #define AS_STRNLEN
- #define strlen	__wcsnlen_sse4_1
- 
-#include "../strlen.S"
-+#include "strlen-vec.S"
-diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
-index f845f3d4..ad047d84 100644
--- a/sysdeps/x86_64/strlen.S
-+++ b/sysdeps/x86_64/strlen.S
-@@ -1,5 +1,5 @@
-/* SSE2 version of strlen/wcslen.
-   Copyright (C) 2012-2018 Free Software Foundation, Inc.
-+/* SSE2 version of strlen.
-+   Copyright (C) 2021 Free Software Foundation, Inc.
-    This file is part of the GNU C Library.
- 
-    The GNU C Library is free software; you can redistribute it and/or
-@@ -16,243 +16,6 @@
-    License along with the GNU C Library; if not, see
-    <http://www.gnu.org/licenses/>.  */
- 
-#include <sysdep.h>
-+#include "multiarch/strlen-vec.S"
- 
-#ifdef AS_WCSLEN
-# define PMINU		pminud
-# define PCMPEQ		pcmpeqd
-# define SHIFT_RETURN	shrq $2, %rax
-#else
-# define PMINU		pminub
-# define PCMPEQ		pcmpeqb
-# define SHIFT_RETURN
-#endif
-
-/* Long lived register in strlen(s), strnlen(s, n) are:
-
-	%xmm3 - zero
-	%rdi   - s
-	%r10  (s+n) & (~(64-1))
-	%r11   s+n
-*/
-
-
-.text
-ENTRY(strlen)
-
-/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx.  */
-#define FIND_ZERO	\
-	PCMPEQ	(%rax), %xmm0;	\
-	PCMPEQ	16(%rax), %xmm1;	\
-	PCMPEQ	32(%rax), %xmm2;	\
-	PCMPEQ	48(%rax), %xmm3;	\
-	pmovmskb	%xmm0, %esi;	\
-	pmovmskb	%xmm1, %edx;	\
-	pmovmskb	%xmm2, %r8d;	\
-	pmovmskb	%xmm3, %ecx;	\
-	salq	$16, %rdx;	\
-	salq	$16, %rcx;	\
-	orq	%rsi, %rdx;	\
-	orq	%r8, %rcx;	\
-	salq	$32, %rcx;	\
-	orq	%rcx, %rdx;
-
-#ifdef AS_STRNLEN
-/* Do not read anything when n==0.  */
-	test	%RSI_LP, %RSI_LP
-	jne	L(n_nonzero)
-	xor	%rax, %rax
-	ret
-L(n_nonzero):
-# ifdef AS_WCSLEN
-	shl	$2, %RSI_LP
-# endif
-
-/* Initialize long lived registers.  */
-
-	add	%RDI_LP, %RSI_LP
-	mov	%RSI_LP, %R10_LP
-	and	$-64, %R10_LP
-	mov	%RSI_LP, %R11_LP
-#endif
-
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-	movq	%rdi, %rax
-	movq	%rdi, %rcx
-	andq	$4095, %rcx
-/* Offsets 4032-4047 will be aligned into 4032 thus fit into page.  */
-	cmpq	$4047, %rcx
-/* We cannot unify this branching as it would be ~6 cycles slower.  */
-	ja	L(cross_page)
-
-#ifdef AS_STRNLEN
-/* Test if end is among first 64 bytes.  */
-# define STRNLEN_PROLOG	\
-	mov	%r11, %rsi;	\
-	subq	%rax, %rsi;	\
-	andq	$-64, %rax;	\
-	testq	$-64, %rsi;	\
-	je	L(strnlen_ret)
-#else
-# define STRNLEN_PROLOG  andq $-64, %rax;
-#endif
-
-/* Ignore bits in mask that come before start of string.  */
-#define PROLOG(lab)	\
-	movq	%rdi, %rcx;	\
-	xorq	%rax, %rcx;	\
-	STRNLEN_PROLOG;	\
-	sarq	%cl, %rdx;	\
-	test	%rdx, %rdx;	\
-	je	L(lab);	\
-	bsfq	%rdx, %rax;	\
-	SHIFT_RETURN;		\
-	ret
-
-#ifdef AS_STRNLEN
-	andq	$-16, %rax
-	FIND_ZERO
-#else
-	/* Test first 16 bytes unaligned.  */
-	movdqu	(%rax), %xmm4
-	PCMPEQ	%xmm0, %xmm4
-	pmovmskb	%xmm4, %edx
-	test	%edx, %edx
-	je 	L(next48_bytes)
-	bsf	%edx, %eax /* If eax is zeroed 16bit bsf can be used.  */
-	SHIFT_RETURN
-	ret
-
-L(next48_bytes):
-/* Same as FIND_ZERO except we do not check first 16 bytes.  */
-	andq	$-16, %rax
-	PCMPEQ 16(%rax), %xmm1
-	PCMPEQ 32(%rax), %xmm2
-	PCMPEQ 48(%rax), %xmm3
-	pmovmskb	%xmm1, %edx
-	pmovmskb	%xmm2, %r8d
-	pmovmskb	%xmm3, %ecx
-	salq	$16, %rdx
-	salq	$16, %rcx
-	orq	%r8, %rcx
-	salq	$32, %rcx
-	orq	%rcx, %rdx
-#endif
-
-	/* When no zero byte is found xmm1-3 are zero so we do not have to
-	   zero them.  */
-	PROLOG(loop)
-
-	.p2align 4
-L(cross_page):
-	andq	$-64, %rax
-	FIND_ZERO
-	PROLOG(loop_init)
-
-#ifdef AS_STRNLEN
-/* We must do this check to correctly handle strnlen (s, -1).  */
-L(strnlen_ret):
-	bts	%rsi, %rdx
-	sarq	%cl, %rdx
-	test	%rdx, %rdx
-	je	L(loop_init)
-	bsfq	%rdx, %rax
-	SHIFT_RETURN
-	ret
-#endif
-	.p2align 4
-L(loop_init):
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-#ifdef AS_STRNLEN
-	.p2align 4
-L(loop):
-
-	addq	$64, %rax
-	cmpq	%rax, %r10
-	je	L(exit_end)
-
-	movdqa	(%rax), %xmm0
-	PMINU	16(%rax), %xmm0
-	PMINU	32(%rax), %xmm0
-	PMINU	48(%rax), %xmm0
-	PCMPEQ	%xmm3, %xmm0
-	pmovmskb	%xmm0, %edx
-	testl	%edx, %edx
-	jne	L(exit)
-	jmp	L(loop)
-
-	.p2align 4
-L(exit_end):
-	cmp	%rax, %r11
-	je	L(first) /* Do not read when end is at page boundary.  */
-	pxor	%xmm0, %xmm0
-	FIND_ZERO
-
-L(first):
-	bts	%r11, %rdx
-	bsfq	%rdx, %rdx
-	addq	%rdx, %rax
-	subq	%rdi, %rax
-	SHIFT_RETURN
-	ret
-
-	.p2align 4
-L(exit):
-	pxor	%xmm0, %xmm0
-	FIND_ZERO
-
-	bsfq	%rdx, %rdx
-	addq	%rdx, %rax
-	subq	%rdi, %rax
-	SHIFT_RETURN
-	ret
-
-#else
-
-	/* Main loop.  Unrolled twice to improve L2 cache performance on core2.  */
-	.p2align 4
-L(loop):
-
-	movdqa	64(%rax), %xmm0
-	PMINU	80(%rax), %xmm0
-	PMINU	96(%rax), %xmm0
-	PMINU	112(%rax), %xmm0
-	PCMPEQ	%xmm3, %xmm0
-	pmovmskb	%xmm0, %edx
-	testl	%edx, %edx
-	jne	L(exit64)
-
-	subq	$-128, %rax
-
-	movdqa	(%rax), %xmm0
-	PMINU	16(%rax), %xmm0
-	PMINU	32(%rax), %xmm0
-	PMINU	48(%rax), %xmm0
-	PCMPEQ	%xmm3, %xmm0
-	pmovmskb	%xmm0, %edx
-	testl	%edx, %edx
-	jne	L(exit0)
-	jmp	L(loop)
-
-	.p2align 4
-L(exit64):
-	addq	$64, %rax
-L(exit0):
-	pxor	%xmm0, %xmm0
-	FIND_ZERO
-
-	bsfq	%rdx, %rdx
-	addq	%rdx, %rax
-	subq	%rdi, %rax
-	SHIFT_RETURN
-	ret
-
-#endif
-
-END(strlen)
- libc_hidden_builtin_def (strlen)
-- 
-GitLab
-
--- a/glibc-RHEL-15696-29.patch
+++ b/glibc-RHEL-15696-29.patch
@ -1,181 +0,0 @@
-From 6f573a27b6c8b4236445810a44660612323f5a73 Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Wed, 23 Jun 2021 01:19:34 -0400
-Subject: [PATCH] x86-64: Add wcslen optimize for sse4.1
-Content-type: text/plain; charset=UTF-8
-
-No bug. This comment adds the ifunc / build infrastructure
-necessary for wcslen to prefer the sse4.1 implementation
-in strlen-vec.S. test-wcslen.c is passing.
-
-Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/multiarch/Makefile          |  4 +-
- sysdeps/x86_64/multiarch/ifunc-impl-list.c |  3 ++
- sysdeps/x86_64/multiarch/ifunc-wcslen.h    | 52 ++++++++++++++++++++++
- sysdeps/x86_64/multiarch/wcslen-sse4_1.S   |  4 ++
- sysdeps/x86_64/multiarch/wcslen.c          |  2 +-
- sysdeps/x86_64/multiarch/wcsnlen.c         | 34 +-------------
- 6 files changed, 63 insertions(+), 36 deletions(-)
- create mode 100644 sysdeps/x86_64/multiarch/ifunc-wcslen.h
- create mode 100644 sysdeps/x86_64/multiarch/wcslen-sse4_1.S
-
-diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
-index 491c7698..65fde4eb 100644
--- a/sysdeps/x86_64/multiarch/Makefile
-+++ b/sysdeps/x86_64/multiarch/Makefile
-@@ -93,8 +93,8 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
- 		   wcscpy-ssse3 wcscpy-c \
- 		   wcschr-sse2 wcschr-avx2 \
- 		   wcsrchr-sse2 wcsrchr-avx2 \
-		   wcsnlen-sse4_1 wcsnlen-c \
-		   wcslen-sse2 wcslen-avx2 wcsnlen-avx2 \
-+		   wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \
-+		   wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \
- 		   wcschr-avx2-rtm \
- 		   wcscmp-avx2-rtm \
- 		   wcslen-avx2-rtm \
-diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-index f1a6460a..580913ca 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-@@ -657,6 +657,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- 			       && CPU_FEATURE_USABLE (AVX512BW)
- 			       && CPU_FEATURE_USABLE (BMI2)),
- 			      __wcslen_evex)
-+	      IFUNC_IMPL_ADD (array, i, wcsnlen,
-+			      CPU_FEATURE_USABLE (SSE4_1),
-+			      __wcsnlen_sse4_1)
- 	      IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2))
- 
-   /* Support sysdeps/x86_64/multiarch/wcsnlen.c.  */
-diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
-new file mode 100644
-index 00000000..39e33473
--- /dev/null
-+++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
-@@ -0,0 +1,52 @@
-+/* Common definition for ifunc selections for wcslen and wcsnlen
-+   All versions must be listed in ifunc-impl-list.c.
-+   Copyright (C) 2017-2021 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <init-arch.h>
-+
-+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
-+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
-+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
-+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
-+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
-+
-+static inline void *
-+IFUNC_SELECTOR (void)
-+{
-+  const struct cpu_features* cpu_features = __get_cpu_features ();
-+
-+  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
-+      && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
-+      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
-+    {
-+      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-+	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
-+	return OPTIMIZE (evex);
-+
-+      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
-+	return OPTIMIZE (avx2_rtm);
-+
-+      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-+	return OPTIMIZE (avx2);
-+    }
-+
-+  if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
-+    return OPTIMIZE (sse4_1);
-+
-+  return OPTIMIZE (sse2);
-+}
-diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
-new file mode 100644
-index 00000000..7e62621a
--- /dev/null
-+++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
-@@ -0,0 +1,4 @@
-+#define AS_WCSLEN
-+#define strlen	__wcslen_sse4_1
-+
-+#include "strlen-vec.S"
-diff --git a/sysdeps/x86_64/multiarch/wcslen.c b/sysdeps/x86_64/multiarch/wcslen.c
-index 6d06e47c..3b04b75b 100644
--- a/sysdeps/x86_64/multiarch/wcslen.c
-+++ b/sysdeps/x86_64/multiarch/wcslen.c
-@@ -24,7 +24,7 @@
- # undef __wcslen
- 
- # define SYMBOL_NAME wcslen
-# include "ifunc-avx2.h"
-+# include "ifunc-wcslen.h"
- 
- libc_ifunc_redirected (__redirect_wcslen, __wcslen, IFUNC_SELECTOR ());
- weak_alias (__wcslen, wcslen);
-diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
-index 20b731ae..06736410 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen.c
-+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
-@@ -24,39 +24,7 @@
- # undef __wcsnlen
- 
- # define SYMBOL_NAME wcsnlen
-# include <init-arch.h>
-
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
-
-static inline void *
-IFUNC_SELECTOR (void)
-{
-  const struct cpu_features* cpu_features = __get_cpu_features ();
-
-  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
-      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
-    {
-      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
-	return OPTIMIZE (evex);
-
-      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
-	return OPTIMIZE (avx2_rtm);
-
-      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-	return OPTIMIZE (avx2);
-    }
-
-  if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
-    return OPTIMIZE (sse4_1);
-
-  return OPTIMIZE (sse2);
-}
-+# include "ifunc-wcslen.h"
- 
- libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
- weak_alias (__wcsnlen, wcsnlen);
-- 
-GitLab
-
--- a/glibc-RHEL-15696-3.patch
+++ b/glibc-RHEL-15696-3.patch
@ -1,396 +0,0 @@
-From 231c56760c1e2ded21ad96bbb860b1f08c556c7a Mon Sep 17 00:00:00 2001
-From: "H.J. Lu" <hjl.tools@gmail.com>
-Date: Mon, 21 Jan 2019 11:27:25 -0800
-Subject: [PATCH] x86-64 memcpy: Properly handle the length parameter [BZ#
- 24097]
-Content-type: text/plain; charset=UTF-8
-
-On x32, the size_t parameter may be passed in the lower 32 bits of a
-64-bit register with the non-zero upper 32 bits.  The string/memory
-functions written in assembly can only use the lower 32 bits of a
-64-bit register as length or must clear the upper 32 bits before using
-the full 64-bit register for length.
-
-This pach fixes memcpy for x32.  Tested on x86-64 and x32.  On x86-64,
-libc.so is the same with and withou the fix.
-
-	[BZ# 24097]
-	CVE-2019-6488
-	* sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Use RDX_LP for
-	length.  Clear the upper 32 bits of RDX register.
-	* sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise.
-	* sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S:
-	Likewise.
-	* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:
-	Likewise.
-	* sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-memcpy.
-	tst-size_t-wmemchr.
-	* sysdeps/x86_64/x32/tst-size_t-memcpy.c: New file.
---
- sysdeps/x86_64/multiarch/memcpy-ssse3-back.S  | 17 ++++--
- sysdeps/x86_64/multiarch/memcpy-ssse3.S       | 17 ++++--
- .../multiarch/memmove-avx512-no-vzeroupper.S  | 16 +++--
- .../multiarch/memmove-vec-unaligned-erms.S    | 54 +++++++++--------
- sysdeps/x86_64/x32/Makefile                   |  2 +-
- sysdeps/x86_64/x32/tst-size_t-memcpy.c        | 58 +++++++++++++++++++
- 6 files changed, 122 insertions(+), 42 deletions(-)
- create mode 100644 sysdeps/x86_64/x32/tst-size_t-memcpy.c
-
-Conflicts:
-	ChangeLog
-	(removed)
-
-diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
-index 3cd11233..568eebd3 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
-+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
-@@ -45,28 +45,33 @@
- 	.section .text.ssse3,"ax",@progbits
- #if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
- ENTRY (MEMPCPY_CHK)
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (MEMPCPY_CHK)
- 
- ENTRY (MEMPCPY)
-	movq	%rdi, %rax
-	addq	%rdx, %rax
-+	mov	%RDI_LP, %RAX_LP
-+	add	%RDX_LP, %RAX_LP
- 	jmp	L(start)
- END (MEMPCPY)
- #endif
- 
- #if !defined USE_AS_BCOPY
- ENTRY (MEMCPY_CHK)
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (MEMCPY_CHK)
- #endif
- 
- ENTRY (MEMCPY)
-	mov	%rdi, %rax
-+	mov	%RDI_LP, %RAX_LP
- #ifdef USE_AS_MEMPCPY
-	add	%rdx, %rax
-+	add	%RDX_LP, %RAX_LP
-+#endif
-+
-+#ifdef __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	mov	%edx, %edx
- #endif
- 
- #ifdef USE_AS_MEMMOVE
-diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
-index 0240bfa3..0bd5ee99 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
-+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
-@@ -45,28 +45,33 @@
- 	.section .text.ssse3,"ax",@progbits
- #if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
- ENTRY (MEMPCPY_CHK)
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (MEMPCPY_CHK)
- 
- ENTRY (MEMPCPY)
-	movq	%rdi, %rax
-	addq	%rdx, %rax
-+	mov	%RDI_LP, %RAX_LP
-+	add	%RDX_LP, %RAX_LP
- 	jmp	L(start)
- END (MEMPCPY)
- #endif
- 
- #if !defined USE_AS_BCOPY
- ENTRY (MEMCPY_CHK)
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (MEMCPY_CHK)
- #endif
- 
- ENTRY (MEMCPY)
-	mov	%rdi, %rax
-+	mov	%RDI_LP, %RAX_LP
- #ifdef USE_AS_MEMPCPY
-	add	%rdx, %rax
-+	add	%RDX_LP, %RAX_LP
-+#endif
-+
-+#ifdef __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	mov	%edx, %edx
- #endif
- 
- #ifdef USE_AS_MEMMOVE
-diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
-index effc3ac2..6ca2bbc9 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
-+++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
-@@ -24,27 +24,31 @@
- 
- 	.section .text.avx512,"ax",@progbits
- ENTRY (__mempcpy_chk_avx512_no_vzeroupper)
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (__mempcpy_chk_avx512_no_vzeroupper)
- 
- ENTRY (__mempcpy_avx512_no_vzeroupper)
-	movq	%rdi, %rax
-	addq	%rdx, %rax
-+	mov	%RDI_LP, %RAX_LP
-+	add	%RDX_LP, %RAX_LP
- 	jmp	L(start)
- END (__mempcpy_avx512_no_vzeroupper)
- 
- ENTRY (__memmove_chk_avx512_no_vzeroupper)
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (__memmove_chk_avx512_no_vzeroupper)
- 
- ENTRY (__memmove_avx512_no_vzeroupper)
-	mov	%rdi, %rax
-+	mov	%RDI_LP, %RAX_LP
- # ifdef USE_AS_MEMPCPY
-	add	%rdx, %rax
-+	add	%RDX_LP, %RAX_LP
- # endif
- L(start):
-+# ifdef __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	mov	%edx, %edx
-+# endif
- 	lea	(%rsi, %rdx), %rcx
- 	lea	(%rdi, %rdx), %r9
- 	cmp	$512, %rdx
-diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
-index c952576c..274aa1c7 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
-+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
-@@ -95,20 +95,20 @@
- 	.section SECTION(.text),"ax",@progbits
- #if defined SHARED && IS_IN (libc)
- ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
- #endif
- 
- ENTRY (MEMPCPY_SYMBOL (__mempcpy, unaligned))
-	movq	%rdi, %rax
-	addq	%rdx, %rax
-+	mov	%RDI_LP, %RAX_LP
-+	add	%RDX_LP, %RAX_LP
- 	jmp	L(start)
- END (MEMPCPY_SYMBOL (__mempcpy, unaligned))
- 
- #if defined SHARED && IS_IN (libc)
- ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned))
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned))
- #endif
-@@ -116,9 +116,13 @@ END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned))
- ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned))
- 	movq	%rdi, %rax
- L(start):
-	cmpq	$VEC_SIZE, %rdx
-+# ifdef __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	movl	%edx, %edx
-+# endif
-+	cmp	$VEC_SIZE, %RDX_LP
- 	jb	L(less_vec)
-	cmpq	$(VEC_SIZE * 2), %rdx
-+	cmp	$(VEC_SIZE * 2), %RDX_LP
- 	ja	L(more_2x_vec)
- #if !defined USE_MULTIARCH || !IS_IN (libc)
- L(last_2x_vec):
-@@ -138,38 +142,38 @@ END (MEMMOVE_SYMBOL (__memmove, unaligned))
- 
- # if VEC_SIZE == 16
- ENTRY (__mempcpy_chk_erms)
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (__mempcpy_chk_erms)
- 
- /* Only used to measure performance of REP MOVSB.  */
- ENTRY (__mempcpy_erms)
-	movq	%rdi, %rax
-+	mov	%RDI_LP, %RAX_LP
- 	/* Skip zero length.  */
-	testq	%rdx, %rdx
-+	test	%RDX_LP, %RDX_LP
- 	jz	2f
-	addq	%rdx, %rax
-+	add	%RDX_LP, %RAX_LP
- 	jmp	L(start_movsb)
- END (__mempcpy_erms)
- 
- ENTRY (__memmove_chk_erms)
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (__memmove_chk_erms)
- 
- ENTRY (__memmove_erms)
- 	movq	%rdi, %rax
- 	/* Skip zero length.  */
-	testq	%rdx, %rdx
-+	test	%RDX_LP, %RDX_LP
- 	jz	2f
- L(start_movsb):
-	movq	%rdx, %rcx
-	cmpq	%rsi, %rdi
-+	mov	%RDX_LP, %RCX_LP
-+	cmp	%RSI_LP, %RDI_LP
- 	jb	1f
- 	/* Source == destination is less common.  */
- 	je	2f
-	leaq	(%rsi,%rcx), %rdx
-	cmpq	%rdx, %rdi
-+	lea	(%rsi,%rcx), %RDX_LP
-+	cmp	%RDX_LP, %RDI_LP
- 	jb	L(movsb_backward)
- 1:
- 	rep movsb
-@@ -189,20 +193,20 @@ strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
- 
- # ifdef SHARED
- ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
- # endif
- 
- ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
-	movq	%rdi, %rax
-	addq	%rdx, %rax
-+	mov	%RDI_LP, %RAX_LP
-+	add	%RDX_LP, %RAX_LP
- 	jmp	L(start_erms)
- END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
- 
- # ifdef SHARED
- ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
-	cmpq	%rdx, %rcx
-+	cmp	%RDX_LP, %RCX_LP
- 	jb	HIDDEN_JUMPTARGET (__chk_fail)
- END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
- # endif
-@@ -210,9 +214,13 @@ END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
- ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
- 	movq	%rdi, %rax
- L(start_erms):
-	cmpq	$VEC_SIZE, %rdx
-+# ifdef __ILP32__
-+	/* Clear the upper 32 bits.  */
-+	movl	%edx, %edx
-+# endif
-+	cmp	$VEC_SIZE, %RDX_LP
- 	jb	L(less_vec)
-	cmpq	$(VEC_SIZE * 2), %rdx
-+	cmp	$(VEC_SIZE * 2), %RDX_LP
- 	ja	L(movsb_more_2x_vec)
- L(last_2x_vec):
- 	/* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE. */
-@@ -236,7 +244,7 @@ L(movsb):
- 	/* Avoid slow backward REP MOVSB.  */
- 	jb	L(more_8x_vec_backward)
- 1:
-	movq	%rdx, %rcx
-+	mov	%RDX_LP, %RCX_LP
- 	rep movsb
- L(nop):
- 	ret
-diff --git a/sysdeps/x86_64/x32/Makefile b/sysdeps/x86_64/x32/Makefile
-index ddec7f04..2fe1e5ac 100644
--- a/sysdeps/x86_64/x32/Makefile
-+++ b/sysdeps/x86_64/x32/Makefile
-@@ -6,7 +6,7 @@ CFLAGS-s_llround.c += -fno-builtin-lround
- endif
- 
- ifeq ($(subdir),string)
-tests += tst-size_t-memchr tst-size_t-memcmp
-+tests += tst-size_t-memchr tst-size_t-memcmp tst-size_t-memcpy
- endif
- 
- ifeq ($(subdir),wcsmbs)
-diff --git a/sysdeps/x86_64/x32/tst-size_t-memcpy.c b/sysdeps/x86_64/x32/tst-size_t-memcpy.c
-new file mode 100644
-index 00000000..66b71e17
--- /dev/null
-+++ b/sysdeps/x86_64/x32/tst-size_t-memcpy.c
-@@ -0,0 +1,58 @@
-+/* Test memcpy with size_t in the lower 32 bits of 64-bit register.
-+   Copyright (C) 2019 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library; if not, see
-+   <http://www.gnu.org/licenses/>.  */
-+
-+#define TEST_NAME "memcpy"
-+#include "test-size_t.h"
-+
-+IMPL (memcpy, 1)
-+
-+typedef void *(*proto_t) (void *, const void *, size_t);
-+
-+static void *
-+__attribute__ ((noinline, noclone))
-+do_memcpy (parameter_t a, parameter_t b)
-+{
-+  return CALL (&b, a.p, b.p, a.len);
-+}
-+
-+static int
-+test_main (void)
-+{
-+  test_init ();
-+
-+  parameter_t dest = { { page_size }, buf1 };
-+  parameter_t src = { { 0 }, buf2 };
-+
-+  int ret = 0;
-+  FOR_EACH_IMPL (impl, 0)
-+    {
-+      src.fn = impl->fn;
-+      do_memcpy (dest, src);
-+      int res = memcmp (dest.p, src.p, dest.len);
-+      if (res)
-+	{
-+	  error (0, 0, "Wrong result in function %s: %i != 0",
-+		 impl->name, res);
-+	  ret = 1;
-+	}
-+    }
-+
-+  return ret ? EXIT_FAILURE : EXIT_SUCCESS;
-+}
-+
-+#include <support/test-driver.c>
-- 
-GitLab
-
--- a/glibc-RHEL-15696-30.patch
+++ b/glibc-RHEL-15696-30.patch
@ -1,497 +0,0 @@
-From a775a7a3eb1e85b54af0b4ee5ff4dcf66772a1fb Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Wed, 23 Jun 2021 01:56:29 -0400
-Subject: [PATCH] x86: Fix overflow bug in wcsnlen-sse4_1 and wcsnlen-avx2 [BZ
- #27974]
-Content-type: text/plain; charset=UTF-8
-
-This commit fixes the bug mentioned in the previous commit.
-
-The previous implementations of wmemchr in these files relied
-on maxlen * sizeof(wchar_t) which was not guranteed by the standard.
-
-The new overflow tests added in the previous commit now
-pass (As well as all the other tests).
-
-Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
-Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
- sysdeps/x86_64/multiarch/strlen-avx2.S | 130 ++++++++++++++++++-------
- sysdeps/x86_64/multiarch/strlen-vec.S  |  15 ++-
- 2 files changed, 107 insertions(+), 38 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S
-index be8a5db5..37688966 100644
--- a/sysdeps/x86_64/multiarch/strlen-avx2.S
-+++ b/sysdeps/x86_64/multiarch/strlen-avx2.S
-@@ -44,21 +44,21 @@
- 
- # define VEC_SIZE 32
- # define PAGE_SIZE 4096
-+# define CHAR_PER_VEC	(VEC_SIZE / CHAR_SIZE)
- 
- 	.section SECTION(.text),"ax",@progbits
- ENTRY (STRLEN)
- # ifdef USE_AS_STRNLEN
- 	/* Check zero length.  */
-+#  ifdef __ILP32__
-+	/* Clear upper bits.  */
-+	and	%RSI_LP, %RSI_LP
-+#  else
- 	test	%RSI_LP, %RSI_LP
-+#  endif
- 	jz	L(zero)
- 	/* Store max len in R8_LP before adjusting if using WCSLEN.  */
- 	mov	%RSI_LP, %R8_LP
-#  ifdef USE_AS_WCSLEN
-	shl	$2, %RSI_LP
-#  elif defined __ILP32__
-	/* Clear the upper 32 bits.  */
-	movl	%esi, %esi
-#  endif
- # endif
- 	movl	%edi, %eax
- 	movq	%rdi, %rdx
-@@ -72,10 +72,10 @@ ENTRY (STRLEN)
- 
- 	/* Check the first VEC_SIZE bytes.  */
- 	VPCMPEQ	(%rdi), %ymm0, %ymm1
-	vpmovmskb	%ymm1, %eax
-+	vpmovmskb %ymm1, %eax
- # ifdef USE_AS_STRNLEN
- 	/* If length < VEC_SIZE handle special.  */
-	cmpq	$VEC_SIZE, %rsi
-+	cmpq	$CHAR_PER_VEC, %rsi
- 	jbe	L(first_vec_x0)
- # endif
- 	/* If empty continue to aligned_more. Otherwise return bit
-@@ -84,6 +84,7 @@ ENTRY (STRLEN)
- 	jz	L(aligned_more)
- 	tzcntl	%eax, %eax
- # ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrl	$2, %eax
- # endif
- 	VZEROUPPER_RETURN
-@@ -97,9 +98,14 @@ L(zero):
- L(first_vec_x0):
- 	/* Set bit for max len so that tzcnt will return min of max len
- 	   and position of first match.  */
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Multiply length by 4 to get byte count.  */
-+	sall	$2, %esi
-+#  endif
- 	btsq	%rsi, %rax
- 	tzcntl	%eax, %eax
- #  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrl	$2, %eax
- #  endif
- 	VZEROUPPER_RETURN
-@@ -113,14 +119,19 @@ L(first_vec_x1):
- # ifdef USE_AS_STRNLEN
- 	/* Use ecx which was computed earlier to compute correct value.
- 	 */
-+#  ifdef USE_AS_WCSLEN
-+	leal	-(VEC_SIZE * 4 + 1)(%rax, %rcx, 4), %eax
-+#  else
- 	subl	$(VEC_SIZE * 4 + 1), %ecx
- 	addl	%ecx, %eax
-+#  endif
- # else
- 	subl	%edx, %edi
- 	incl	%edi
- 	addl	%edi, %eax
- # endif
- # ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrl	$2, %eax
- # endif
- 	VZEROUPPER_RETURN
-@@ -133,14 +144,19 @@ L(first_vec_x2):
- # ifdef USE_AS_STRNLEN
- 	/* Use ecx which was computed earlier to compute correct value.
- 	 */
-+#  ifdef USE_AS_WCSLEN
-+	leal	-(VEC_SIZE * 3 + 1)(%rax, %rcx, 4), %eax
-+#  else
- 	subl	$(VEC_SIZE * 3 + 1), %ecx
- 	addl	%ecx, %eax
-+#  endif
- # else
- 	subl	%edx, %edi
- 	addl	$(VEC_SIZE + 1), %edi
- 	addl	%edi, %eax
- # endif
- # ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrl	$2, %eax
- # endif
- 	VZEROUPPER_RETURN
-@@ -153,14 +169,19 @@ L(first_vec_x3):
- # ifdef USE_AS_STRNLEN
- 	/* Use ecx which was computed earlier to compute correct value.
- 	 */
-+#  ifdef USE_AS_WCSLEN
-+	leal	-(VEC_SIZE * 2 + 1)(%rax, %rcx, 4), %eax
-+#  else
- 	subl	$(VEC_SIZE * 2 + 1), %ecx
- 	addl	%ecx, %eax
-+#  endif
- # else
- 	subl	%edx, %edi
- 	addl	$(VEC_SIZE * 2 + 1), %edi
- 	addl	%edi, %eax
- # endif
- # ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrl	$2, %eax
- # endif
- 	VZEROUPPER_RETURN
-@@ -173,14 +194,19 @@ L(first_vec_x4):
- # ifdef USE_AS_STRNLEN
- 	/* Use ecx which was computed earlier to compute correct value.
- 	 */
-+#  ifdef USE_AS_WCSLEN
-+	leal	-(VEC_SIZE * 1 + 1)(%rax, %rcx, 4), %eax
-+#  else
- 	subl	$(VEC_SIZE + 1), %ecx
- 	addl	%ecx, %eax
-+#  endif
- # else
- 	subl	%edx, %edi
- 	addl	$(VEC_SIZE * 3 + 1), %edi
- 	addl	%edi, %eax
- # endif
- # ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrl	$2, %eax
- # endif
- 	VZEROUPPER_RETURN
-@@ -195,10 +221,14 @@ L(cross_page_continue):
- 	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
- 	   since data is only aligned to VEC_SIZE.  */
- # ifdef USE_AS_STRNLEN
-	/* + 1 because rdi is aligned to VEC_SIZE - 1. + CHAR_SIZE because
-	   it simplies the logic in last_4x_vec_or_less.  */
-+	/* + 1 because rdi is aligned to VEC_SIZE - 1. + CHAR_SIZE
-+	   because it simplies the logic in last_4x_vec_or_less.  */
- 	leaq	(VEC_SIZE * 4 + CHAR_SIZE + 1)(%rdi), %rcx
- 	subq	%rdx, %rcx
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %ecx
-+#  endif
- # endif
- 	/* Load first VEC regardless.  */
- 	VPCMPEQ	1(%rdi), %ymm0, %ymm1
-@@ -207,34 +237,38 @@ L(cross_page_continue):
- 	subq	%rcx, %rsi
- 	jb	L(last_4x_vec_or_less)
- # endif
-	vpmovmskb	%ymm1, %eax
-+	vpmovmskb %ymm1, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x1)
- 
- 	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
-	vpmovmskb	%ymm1, %eax
-+	vpmovmskb %ymm1, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x2)
- 
- 	VPCMPEQ	(VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
-	vpmovmskb	%ymm1, %eax
-+	vpmovmskb %ymm1, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x3)
- 
- 	VPCMPEQ	(VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
-	vpmovmskb	%ymm1, %eax
-+	vpmovmskb %ymm1, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x4)
- 
- 	/* Align data to VEC_SIZE * 4 - 1.  */
- # ifdef USE_AS_STRNLEN
- 	/* Before adjusting length check if at last VEC_SIZE * 4.  */
-	cmpq	$(VEC_SIZE * 4 - 1), %rsi
-+	cmpq	$(CHAR_PER_VEC * 4 - 1), %rsi
- 	jbe	L(last_4x_vec_or_less_load)
- 	incq	%rdi
- 	movl	%edi, %ecx
- 	orq	$(VEC_SIZE * 4 - 1), %rdi
- 	andl	$(VEC_SIZE * 4 - 1), %ecx
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %ecx
-+#  endif
- 	/* Readjust length.  */
- 	addq	%rcx, %rsi
- # else
-@@ -246,13 +280,13 @@ L(cross_page_continue):
- L(loop_4x_vec):
- # ifdef USE_AS_STRNLEN
- 	/* Break if at end of length.  */
-	subq	$(VEC_SIZE * 4), %rsi
-+	subq	$(CHAR_PER_VEC * 4), %rsi
- 	jb	L(last_4x_vec_or_less_cmpeq)
- # endif
-	/* Save some code size by microfusing VPMINU with the load. Since
-	   the matches in ymm2/ymm4 can only be returned if there where no
-	   matches in ymm1/ymm3 respectively there is no issue with overlap.
-	 */
-+	/* Save some code size by microfusing VPMINU with the load.
-+	   Since the matches in ymm2/ymm4 can only be returned if there
-+	   where no matches in ymm1/ymm3 respectively there is no issue
-+	   with overlap.  */
- 	vmovdqa	1(%rdi), %ymm1
- 	VPMINU	(VEC_SIZE + 1)(%rdi), %ymm1, %ymm2
- 	vmovdqa	(VEC_SIZE * 2 + 1)(%rdi), %ymm3
-@@ -260,7 +294,7 @@ L(loop_4x_vec):
- 
- 	VPMINU	%ymm2, %ymm4, %ymm5
- 	VPCMPEQ	%ymm5, %ymm0, %ymm5
-	vpmovmskb	%ymm5, %ecx
-+	vpmovmskb %ymm5, %ecx
- 
- 	subq	$-(VEC_SIZE * 4), %rdi
- 	testl	%ecx, %ecx
-@@ -268,27 +302,28 @@ L(loop_4x_vec):
- 
- 
- 	VPCMPEQ	%ymm1, %ymm0, %ymm1
-	vpmovmskb	%ymm1, %eax
-+	vpmovmskb %ymm1, %eax
- 	subq	%rdx, %rdi
- 	testl	%eax, %eax
- 	jnz	L(last_vec_return_x0)
- 
- 	VPCMPEQ	%ymm2, %ymm0, %ymm2
-	vpmovmskb	%ymm2, %eax
-+	vpmovmskb %ymm2, %eax
- 	testl	%eax, %eax
- 	jnz	L(last_vec_return_x1)
- 
- 	/* Combine last 2 VEC.  */
- 	VPCMPEQ	%ymm3, %ymm0, %ymm3
-	vpmovmskb	%ymm3, %eax
-	/* rcx has combined result from all 4 VEC. It will only be used if
-	   the first 3 other VEC all did not contain a match.  */
-+	vpmovmskb %ymm3, %eax
-+	/* rcx has combined result from all 4 VEC. It will only be used
-+	   if the first 3 other VEC all did not contain a match.  */
- 	salq	$32, %rcx
- 	orq	%rcx, %rax
- 	tzcntq	%rax, %rax
- 	subq	$(VEC_SIZE * 2 - 1), %rdi
- 	addq	%rdi, %rax
- # ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrq	$2, %rax
- # endif
- 	VZEROUPPER_RETURN
-@@ -297,15 +332,19 @@ L(loop_4x_vec):
- # ifdef USE_AS_STRNLEN
- 	.p2align 4
- L(last_4x_vec_or_less_load):
-	/* Depending on entry adjust rdi / prepare first VEC in ymm1.  */
-+	/* Depending on entry adjust rdi / prepare first VEC in ymm1.
-+	 */
- 	subq	$-(VEC_SIZE * 4), %rdi
- L(last_4x_vec_or_less_cmpeq):
- 	VPCMPEQ	1(%rdi), %ymm0, %ymm1
- L(last_4x_vec_or_less):
-
-	vpmovmskb	%ymm1, %eax
-	/* If remaining length > VEC_SIZE * 2. This works if esi is off by
-	   VEC_SIZE * 4.  */
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Multiply length by 4 to get byte count.  */
-+	sall	$2, %esi
-+#  endif
-+	vpmovmskb %ymm1, %eax
-+	/* If remaining length > VEC_SIZE * 2. This works if esi is off
-+	   by VEC_SIZE * 4.  */
- 	testl	$(VEC_SIZE * 2), %esi
- 	jnz	L(last_4x_vec)
- 
-@@ -320,7 +359,7 @@ L(last_4x_vec_or_less):
- 	jb	L(max)
- 
- 	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
-	vpmovmskb	%ymm1, %eax
-+	vpmovmskb %ymm1, %eax
- 	tzcntl	%eax, %eax
- 	/* Check the end of data.  */
- 	cmpl	%eax, %esi
-@@ -329,6 +368,7 @@ L(last_4x_vec_or_less):
- 	addl	$(VEC_SIZE + 1), %eax
- 	addq	%rdi, %rax
- #  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrq	$2, %rax
- #  endif
- 	VZEROUPPER_RETURN
-@@ -340,6 +380,7 @@ L(last_vec_return_x0):
- 	subq	$(VEC_SIZE * 4 - 1), %rdi
- 	addq	%rdi, %rax
- # ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrq	$2, %rax
- # endif
- 	VZEROUPPER_RETURN
-@@ -350,6 +391,7 @@ L(last_vec_return_x1):
- 	subq	$(VEC_SIZE * 3 - 1), %rdi
- 	addq	%rdi, %rax
- # ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrq	$2, %rax
- # endif
- 	VZEROUPPER_RETURN
-@@ -366,6 +408,7 @@ L(last_vec_x1_check):
- 	incl	%eax
- 	addq	%rdi, %rax
- #  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrq	$2, %rax
- #  endif
- 	VZEROUPPER_RETURN
-@@ -381,14 +424,14 @@ L(last_4x_vec):
- 	jnz	L(last_vec_x1)
- 
- 	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
-	vpmovmskb	%ymm1, %eax
-+	vpmovmskb %ymm1, %eax
- 	testl	%eax, %eax
- 	jnz	L(last_vec_x2)
- 
- 	/* Normalize length.  */
- 	andl	$(VEC_SIZE * 4 - 1), %esi
- 	VPCMPEQ	(VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
-	vpmovmskb	%ymm1, %eax
-+	vpmovmskb %ymm1, %eax
- 	testl	%eax, %eax
- 	jnz	L(last_vec_x3)
- 
-@@ -396,7 +439,7 @@ L(last_4x_vec):
- 	jb	L(max)
- 
- 	VPCMPEQ	(VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
-	vpmovmskb	%ymm1, %eax
-+	vpmovmskb %ymm1, %eax
- 	tzcntl	%eax, %eax
- 	/* Check the end of data.  */
- 	cmpl	%eax, %esi
-@@ -405,6 +448,7 @@ L(last_4x_vec):
- 	addl	$(VEC_SIZE * 3 + 1), %eax
- 	addq	%rdi, %rax
- #  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrq	$2, %rax
- #  endif
- 	VZEROUPPER_RETURN
-@@ -419,6 +463,7 @@ L(last_vec_x1):
- 	incl	%eax
- 	addq	%rdi, %rax
- #  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrq	$2, %rax
- #  endif
- 	VZEROUPPER_RETURN
-@@ -432,6 +477,7 @@ L(last_vec_x2):
- 	addl	$(VEC_SIZE + 1), %eax
- 	addq	%rdi, %rax
- #  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrq	$2, %rax
- #  endif
- 	VZEROUPPER_RETURN
-@@ -447,6 +493,7 @@ L(last_vec_x3):
- 	addl	$(VEC_SIZE * 2 + 1), %eax
- 	addq	%rdi, %rax
- #  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
- 	shrq	$2, %rax
- #  endif
- 	VZEROUPPER_RETURN
-@@ -455,13 +502,13 @@ L(max_end):
- 	VZEROUPPER_RETURN
- # endif
- 
-	/* Cold case for crossing page with first load.	 */
-+	/* Cold case for crossing page with first load.  */
- 	.p2align 4
- L(cross_page_boundary):
- 	/* Align data to VEC_SIZE - 1.  */
- 	orq	$(VEC_SIZE - 1), %rdi
- 	VPCMPEQ	-(VEC_SIZE - 1)(%rdi), %ymm0, %ymm1
-	vpmovmskb	%ymm1, %eax
-+	vpmovmskb %ymm1, %eax
- 	/* Remove the leading bytes. sarxl only uses bits [5:0] of COUNT
- 	   so no need to manually mod rdx.  */
- 	sarxl	%edx, %eax, %eax
-@@ -470,6 +517,10 @@ L(cross_page_boundary):
- 	jnz	L(cross_page_less_vec)
- 	leaq	1(%rdi), %rcx
- 	subq	%rdx, %rcx
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get wchar_t count.  */
-+	shrl	$2, %ecx
-+#  endif
- 	/* Check length.  */
- 	cmpq	%rsi, %rcx
- 	jb	L(cross_page_continue)
-@@ -479,6 +530,7 @@ L(cross_page_boundary):
- 	jz	L(cross_page_continue)
- 	tzcntl	%eax, %eax
- #  ifdef USE_AS_WCSLEN
-+	/* NB: Divide length by 4 to get wchar_t count.  */
- 	shrl	$2, %eax
- #  endif
- # endif
-@@ -489,6 +541,10 @@ L(return_vzeroupper):
- 	.p2align 4
- L(cross_page_less_vec):
- 	tzcntl	%eax, %eax
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Multiply length by 4 to get byte count.  */
-+	sall	$2, %esi
-+#  endif
- 	cmpq	%rax, %rsi
- 	cmovb	%esi, %eax
- #  ifdef USE_AS_WCSLEN
-diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S
-index 8f660bb9..439e486a 100644
--- a/sysdeps/x86_64/multiarch/strlen-vec.S
-+++ b/sysdeps/x86_64/multiarch/strlen-vec.S
-@@ -65,12 +65,25 @@ ENTRY(strlen)
- 	ret
- L(n_nonzero):
- # ifdef AS_WCSLEN
-	shl	$2, %RSI_LP
-+/* Check for overflow from maxlen * sizeof(wchar_t). If it would
-+   overflow the only way this program doesn't have undefined behavior 
-+   is if there is a null terminator in valid memory so wcslen will 
-+   suffice.  */
-+	mov	%RSI_LP, %R10_LP
-+	sar	$62, %R10_LP
-+	test	%R10_LP, %R10_LP
-+	jnz	__wcslen_sse4_1
-+	sal	$2, %RSI_LP
- # endif
- 
-+
- /* Initialize long lived registers.  */
- 
- 	add	%RDI_LP, %RSI_LP
-+# ifdef AS_WCSLEN
-+/* Check for overflow again from s + maxlen * sizeof(wchar_t).  */
-+	jbe	__wcslen_sse4_1
-+# endif
- 	mov	%RSI_LP, %R10_LP
- 	and	$-64, %R10_LP
- 	mov	%RSI_LP, %R11_LP
-- 
-GitLab
-
--- a/glibc-RHEL-15696-31.patch
+++ b/glibc-RHEL-15696-31.patch
@ -1,745 +0,0 @@
-From 4ba65586847751372520a36757c17f114588794e Mon Sep 17 00:00:00 2001
-From: Noah Goldstein <goldstein.w.n@gmail.com>
-Date: Mon, 19 Apr 2021 19:36:06 -0400
-Subject: [PATCH] x86: Optimize strlen-evex.S
-Content-type: text/plain; charset=UTF-8
-
-No bug. This commit optimizes strlen-evex.S. The
-optimizations are mostly small things but they add up to roughly
-10-30% performance improvement for strlen. The results for strnlen are
-bit more ambiguous. test-strlen, test-strnlen, test-wcslen, and
-test-wcsnlen are all passing.
-
-Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
---
- sysdeps/x86_64/multiarch/strlen-evex.S | 581 ++++++++++++++-----------
- 1 file changed, 317 insertions(+), 264 deletions(-)
-
-diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S
-index 05838190..4bf6874b 100644
--- a/sysdeps/x86_64/multiarch/strlen-evex.S
-+++ b/sysdeps/x86_64/multiarch/strlen-evex.S
-@@ -29,11 +29,13 @@
- # ifdef USE_AS_WCSLEN
- #  define VPCMP		vpcmpd
- #  define VPMINU	vpminud
-#  define SHIFT_REG	r9d
-+#  define SHIFT_REG ecx
-+#  define CHAR_SIZE	4
- # else
- #  define VPCMP		vpcmpb
- #  define VPMINU	vpminub
-#  define SHIFT_REG	ecx
-+#  define SHIFT_REG edx
-+#  define CHAR_SIZE	1
- # endif
- 
- # define XMMZERO	xmm16
-@@ -46,132 +48,165 @@
- # define YMM6		ymm22
- 
- # define VEC_SIZE 32
-+# define PAGE_SIZE 4096
-+# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
- 
- 	.section .text.evex,"ax",@progbits
- ENTRY (STRLEN)
- # ifdef USE_AS_STRNLEN
-	/* Check for zero length.  */
-+	/* Check zero length.  */
- 	test	%RSI_LP, %RSI_LP
- 	jz	L(zero)
-#  ifdef USE_AS_WCSLEN
-	shl	$2, %RSI_LP
-#  elif defined __ILP32__
-+#  ifdef __ILP32__
- 	/* Clear the upper 32 bits.  */
- 	movl	%esi, %esi
- #  endif
- 	mov	%RSI_LP, %R8_LP
- # endif
-	movl	%edi, %ecx
-	movq	%rdi, %rdx
-+	movl	%edi, %eax
- 	vpxorq	%XMMZERO, %XMMZERO, %XMMZERO
-
-+	/* Clear high bits from edi. Only keeping bits relevant to page
-+	   cross check.  */
-+	andl	$(PAGE_SIZE - 1), %eax
- 	/* Check if we may cross page boundary with one vector load.  */
-	andl	$(2 * VEC_SIZE - 1), %ecx
-	cmpl	$VEC_SIZE, %ecx
-	ja	L(cros_page_boundary)
-+	cmpl	$(PAGE_SIZE - VEC_SIZE), %eax
-+	ja	L(cross_page_boundary)
- 
- 	/* Check the first VEC_SIZE bytes.  Each bit in K0 represents a
- 	   null byte.  */
- 	VPCMP	$0, (%rdi), %YMMZERO, %k0
- 	kmovd	%k0, %eax
-	testl	%eax, %eax
-
- # ifdef USE_AS_STRNLEN
-	jnz	L(first_vec_x0_check)
-	/* Adjust length and check the end of data.  */
-	subq	$VEC_SIZE, %rsi
-	jbe	L(max)
-# else
-	jnz	L(first_vec_x0)
-+	/* If length < CHAR_PER_VEC handle special.  */
-+	cmpq	$CHAR_PER_VEC, %rsi
-+	jbe	L(first_vec_x0)
- # endif
-
-	/* Align data for aligned loads in the loop.  */
-	addq	$VEC_SIZE, %rdi
-	andl	$(VEC_SIZE - 1), %ecx
-	andq	$-VEC_SIZE, %rdi
-
-+	testl	%eax, %eax
-+	jz	L(aligned_more)
-+	tzcntl	%eax, %eax
-+	ret
- # ifdef USE_AS_STRNLEN
-	/* Adjust length.  */
-	addq	%rcx, %rsi
-+L(zero):
-+	xorl	%eax, %eax
-+	ret
- 
-	subq	$(VEC_SIZE * 4), %rsi
-	jbe	L(last_4x_vec_or_less)
-+	.p2align 4
-+L(first_vec_x0):
-+	/* Set bit for max len so that tzcnt will return min of max len
-+	   and position of first match.  */
-+	btsq	%rsi, %rax
-+	tzcntl	%eax, %eax
-+	ret
- # endif
-	jmp	L(more_4x_vec)
- 
- 	.p2align 4
-L(cros_page_boundary):
-	andl	$(VEC_SIZE - 1), %ecx
-	andq	$-VEC_SIZE, %rdi
-
-# ifdef USE_AS_WCSLEN
-	/* NB: Divide shift count by 4 since each bit in K0 represent 4
-	   bytes.  */
-	movl	%ecx, %SHIFT_REG
-	sarl	$2, %SHIFT_REG
-+L(first_vec_x1):
-+	tzcntl	%eax, %eax
-+	/* Safe to use 32 bit instructions as these are only called for
-+	   size = [1, 159].  */
-+# ifdef USE_AS_STRNLEN
-+	/* Use ecx which was computed earlier to compute correct value.
-+	 */
-+	leal	-(CHAR_PER_VEC * 4 + 1)(%rcx, %rax), %eax
-+# else
-+	subl	%edx, %edi
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %edi
-+#  endif
-+	leal	CHAR_PER_VEC(%rdi, %rax), %eax
- # endif
-	VPCMP	$0, (%rdi), %YMMZERO, %k0
-	kmovd	%k0, %eax
-+	ret
- 
-	/* Remove the leading bytes.  */
-	sarxl	%SHIFT_REG, %eax, %eax
-	testl	%eax, %eax
-	jz	L(aligned_more)
-+	.p2align 4
-+L(first_vec_x2):
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WCSLEN
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-# endif
-+	/* Safe to use 32 bit instructions as these are only called for
-+	   size = [1, 159].  */
- # ifdef USE_AS_STRNLEN
-	/* Check the end of data.  */
-	cmpq	%rax, %rsi
-	jbe	L(max)
-# endif
-	addq	%rdi, %rax
-	addq	%rcx, %rax
-	subq	%rdx, %rax
-# ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-+	/* Use ecx which was computed earlier to compute correct value.
-+	 */
-+	leal	-(CHAR_PER_VEC * 3 + 1)(%rcx, %rax), %eax
-+# else
-+	subl	%edx, %edi
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %edi
-+#  endif
-+	leal	(CHAR_PER_VEC * 2)(%rdi, %rax), %eax
- # endif
- 	ret
- 
- 	.p2align 4
-L(aligned_more):
-+L(first_vec_x3):
-+	tzcntl	%eax, %eax
-+	/* Safe to use 32 bit instructions as these are only called for
-+	   size = [1, 159].  */
- # ifdef USE_AS_STRNLEN
-        /* "rcx" is less than VEC_SIZE.  Calculate "rdx + rcx - VEC_SIZE"
-	    with "rdx - (VEC_SIZE - rcx)" instead of "(rdx + rcx) - VEC_SIZE"
-	    to void possible addition overflow.  */
-	negq	%rcx
-	addq	$VEC_SIZE, %rcx
-
-	/* Check the end of data.  */
-	subq	%rcx, %rsi
-	jbe	L(max)
-+	/* Use ecx which was computed earlier to compute correct value.
-+	 */
-+	leal	-(CHAR_PER_VEC * 2 + 1)(%rcx, %rax), %eax
-+# else
-+	subl	%edx, %edi
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %edi
-+#  endif
-+	leal	(CHAR_PER_VEC * 3)(%rdi, %rax), %eax
- # endif
-+	ret
- 
-	addq	$VEC_SIZE, %rdi
-
-+	.p2align 4
-+L(first_vec_x4):
-+	tzcntl	%eax, %eax
-+	/* Safe to use 32 bit instructions as these are only called for
-+	   size = [1, 159].  */
- # ifdef USE_AS_STRNLEN
-	subq	$(VEC_SIZE * 4), %rsi
-	jbe	L(last_4x_vec_or_less)
-+	/* Use ecx which was computed earlier to compute correct value.
-+	 */
-+	leal	-(CHAR_PER_VEC + 1)(%rcx, %rax), %eax
-+# else
-+	subl	%edx, %edi
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %edi
-+#  endif
-+	leal	(CHAR_PER_VEC * 4)(%rdi, %rax), %eax
- # endif
-+	ret
- 
-L(more_4x_vec):
-+	.p2align 5
-+L(aligned_more):
-+	movq	%rdi, %rdx
-+	/* Align data to VEC_SIZE.  */
-+	andq	$-(VEC_SIZE), %rdi
-+L(cross_page_continue):
- 	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
- 	   since data is only aligned to VEC_SIZE.  */
-	VPCMP	$0, (%rdi), %YMMZERO, %k0
-	kmovd	%k0, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x0)
-
-+# ifdef USE_AS_STRNLEN
-+	/* + CHAR_SIZE because it simplies the logic in
-+	   last_4x_vec_or_less.  */
-+	leaq	(VEC_SIZE * 5 + CHAR_SIZE)(%rdi), %rcx
-+	subq	%rdx, %rcx
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %ecx
-+#  endif
-+# endif
-+	/* Load first VEC regardless.  */
- 	VPCMP	$0, VEC_SIZE(%rdi), %YMMZERO, %k0
-+# ifdef USE_AS_STRNLEN
-+	/* Adjust length. If near end handle specially.  */
-+	subq	%rcx, %rsi
-+	jb	L(last_4x_vec_or_less)
-+# endif
- 	kmovd	%k0, %eax
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x1)
- 
- 	VPCMP	$0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0
- 	kmovd	%k0, %eax
-	testl	%eax, %eax
-+	test	%eax, %eax
- 	jnz	L(first_vec_x2)
- 
- 	VPCMP	$0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0
-@@ -179,258 +214,276 @@ L(more_4x_vec):
- 	testl	%eax, %eax
- 	jnz	L(first_vec_x3)
- 
-	addq	$(VEC_SIZE * 4), %rdi
-
-# ifdef USE_AS_STRNLEN
-	subq	$(VEC_SIZE * 4), %rsi
-	jbe	L(last_4x_vec_or_less)
-# endif
-
-	/* Align data to 4 * VEC_SIZE.  */
-	movq	%rdi, %rcx
-	andl	$(4 * VEC_SIZE - 1), %ecx
-	andq	$-(4 * VEC_SIZE), %rdi
-+	VPCMP	$0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0
-+	kmovd	%k0, %eax
-+	testl	%eax, %eax
-+	jnz	L(first_vec_x4)
- 
-+	addq	$VEC_SIZE, %rdi
- # ifdef USE_AS_STRNLEN
-	/* Adjust length.  */
-+	/* Check if at last VEC_SIZE * 4 length.  */
-+	cmpq	$(CHAR_PER_VEC * 4 - 1), %rsi
-+	jbe	L(last_4x_vec_or_less_load)
-+	movl	%edi, %ecx
-+	andl	$(VEC_SIZE * 4 - 1), %ecx
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarl	$2, %ecx
-+#  endif
-+	/* Readjust length.  */
- 	addq	%rcx, %rsi
- # endif
-+	/* Align data to VEC_SIZE * 4.  */
-+	andq	$-(VEC_SIZE * 4), %rdi
- 
-+	/* Compare 4 * VEC at a time forward.  */
- 	.p2align 4
- L(loop_4x_vec):
-	/* Compare 4 * VEC at a time forward.  */
-	VMOVA	(%rdi), %YMM1
-	VMOVA	VEC_SIZE(%rdi), %YMM2
-	VMOVA	(VEC_SIZE * 2)(%rdi), %YMM3
-	VMOVA	(VEC_SIZE * 3)(%rdi), %YMM4
-
-	VPMINU	%YMM1, %YMM2, %YMM5
-	VPMINU	%YMM3, %YMM4, %YMM6
-+	/* Load first VEC regardless.  */
-+	VMOVA	(VEC_SIZE * 4)(%rdi), %YMM1
-+# ifdef USE_AS_STRNLEN
-+	/* Break if at end of length.  */
-+	subq	$(CHAR_PER_VEC * 4), %rsi
-+	jb	L(last_4x_vec_or_less_cmpeq)
-+# endif
-+	/* Save some code size by microfusing VPMINU with the load. Since
-+	   the matches in ymm2/ymm4 can only be returned if there where no
-+	   matches in ymm1/ymm3 respectively there is no issue with overlap.
-+	 */
-+	VPMINU	(VEC_SIZE * 5)(%rdi), %YMM1, %YMM2
-+	VMOVA	(VEC_SIZE * 6)(%rdi), %YMM3
-+	VPMINU	(VEC_SIZE * 7)(%rdi), %YMM3, %YMM4
-+
-+	VPCMP	$0, %YMM2, %YMMZERO, %k0
-+	VPCMP	$0, %YMM4, %YMMZERO, %k1
-+	subq	$-(VEC_SIZE * 4), %rdi
-+	kortestd	%k0, %k1
-+	jz	L(loop_4x_vec)
-+
-+	/* Check if end was in first half.  */
-+	kmovd	%k0, %eax
-+	subq	%rdx, %rdi
-+# ifdef USE_AS_WCSLEN
-+	shrq	$2, %rdi
-+# endif
-+	testl	%eax, %eax
-+	jz	L(second_vec_return)
- 
-	VPMINU	%YMM5, %YMM6, %YMM5
-	VPCMP	$0, %YMM5, %YMMZERO, %k0
-	ktestd	%k0, %k0
-	jnz	L(4x_vec_end)
-+	VPCMP	$0, %YMM1, %YMMZERO, %k2
-+	kmovd	%k2, %edx
-+	/* Combine VEC1 matches (edx) with VEC2 matches (eax).  */
-+# ifdef USE_AS_WCSLEN
-+	sall	$CHAR_PER_VEC, %eax
-+	orl	%edx, %eax
-+	tzcntl	%eax, %eax
-+# else
-+	salq	$CHAR_PER_VEC, %rax
-+	orq	%rdx, %rax
-+	tzcntq	%rax, %rax
-+# endif
-+	addq	%rdi, %rax
-+	ret
- 
-	addq	$(VEC_SIZE * 4), %rdi
- 
-# ifndef USE_AS_STRNLEN
-	jmp	L(loop_4x_vec)
-# else
-	subq	$(VEC_SIZE * 4), %rsi
-	ja	L(loop_4x_vec)
-+# ifdef USE_AS_STRNLEN
- 
-+L(last_4x_vec_or_less_load):
-+	/* Depending on entry adjust rdi / prepare first VEC in YMM1.  */
-+	VMOVA	(VEC_SIZE * 4)(%rdi), %YMM1
-+L(last_4x_vec_or_less_cmpeq):
-+	VPCMP	$0, %YMM1, %YMMZERO, %k0
-+	addq	$(VEC_SIZE * 3), %rdi
- L(last_4x_vec_or_less):
-	/* Less than 4 * VEC and aligned to VEC_SIZE.  */
-	addl	$(VEC_SIZE * 2), %esi
-	jle	L(last_2x_vec)
-
-	VPCMP	$0, (%rdi), %YMMZERO, %k0
- 	kmovd	%k0, %eax
-+	/* If remaining length > VEC_SIZE * 2. This works if esi is off by
-+	   VEC_SIZE * 4.  */
-+	testl	$(CHAR_PER_VEC * 2), %esi
-+	jnz	L(last_4x_vec)
-+
-+	/* length may have been negative or positive by an offset of
-+	   CHAR_PER_VEC * 4 depending on where this was called from. This
-+	   fixes that.  */
-+	andl	$(CHAR_PER_VEC * 4 - 1), %esi
- 	testl	%eax, %eax
-	jnz	L(first_vec_x0)
-+	jnz	L(last_vec_x1_check)
- 
-	VPCMP	$0, VEC_SIZE(%rdi), %YMMZERO, %k0
-	kmovd	%k0, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x1)
-+	/* Check the end of data.  */
-+	subl	$CHAR_PER_VEC, %esi
-+	jb	L(max)
- 
- 	VPCMP	$0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0
- 	kmovd	%k0, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x2_check)
-	subl	$VEC_SIZE, %esi
-	jle	L(max)
-+	tzcntl	%eax, %eax
-+	/* Check the end of data.  */
-+	cmpl	%eax, %esi
-+	jb	L(max)
- 
-	VPCMP	$0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0
-	kmovd	%k0, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x3_check)
-+	subq	%rdx, %rdi
-+#  ifdef USE_AS_WCSLEN
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarq	$2, %rdi
-+#  endif
-+	leaq	(CHAR_PER_VEC * 2)(%rdi, %rax), %rax
-+	ret
-+L(max):
- 	movq	%r8, %rax
-+	ret
-+# endif
-+
-+	/* Placed here in strnlen so that the jcc L(last_4x_vec_or_less)
-+	   in the 4x VEC loop can use 2 byte encoding.  */
-+	.p2align 4
-+L(second_vec_return):
-+	VPCMP	$0, %YMM3, %YMMZERO, %k0
-+	/* Combine YMM3 matches (k0) with YMM4 matches (k1).  */
-+# ifdef USE_AS_WCSLEN
-+	kunpckbw	%k0, %k1, %k0
-+	kmovd	%k0, %eax
-+	tzcntl	%eax, %eax
-+# else
-+	kunpckdq	%k0, %k1, %k0
-+	kmovq	%k0, %rax
-+	tzcntq	%rax, %rax
-+# endif
-+	leaq	(CHAR_PER_VEC * 2)(%rdi, %rax), %rax
-+	ret
-+
-+
-+# ifdef USE_AS_STRNLEN
-+L(last_vec_x1_check):
-+	tzcntl	%eax, %eax
-+	/* Check the end of data.  */
-+	cmpl	%eax, %esi
-+	jb	L(max)
-+	subq	%rdx, %rdi
- #  ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarq	$2, %rdi
- #  endif
-+	leaq	(CHAR_PER_VEC)(%rdi, %rax), %rax
- 	ret
- 
- 	.p2align 4
-L(last_2x_vec):
-	addl	$(VEC_SIZE * 2), %esi
-+L(last_4x_vec):
-+	/* Test first 2x VEC normally.  */
-+	testl	%eax, %eax
-+	jnz	L(last_vec_x1)
- 
-	VPCMP	$0, (%rdi), %YMMZERO, %k0
-+	VPCMP	$0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0
- 	kmovd	%k0, %eax
- 	testl	%eax, %eax
-	jnz	L(first_vec_x0_check)
-	subl	$VEC_SIZE, %esi
-	jle	L(max)
-+	jnz	L(last_vec_x2)
- 
-	VPCMP	$0, VEC_SIZE(%rdi), %YMMZERO, %k0
-+	/* Normalize length.  */
-+	andl	$(CHAR_PER_VEC * 4 - 1), %esi
-+	VPCMP	$0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0
- 	kmovd	%k0, %eax
- 	testl	%eax, %eax
-	jnz	L(first_vec_x1_check)
-	movq	%r8, %rax
-#  ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-#  endif
-	ret
-+	jnz	L(last_vec_x3)
- 
-	.p2align 4
-L(first_vec_x0_check):
-+	/* Check the end of data.  */
-+	subl	$(CHAR_PER_VEC * 3), %esi
-+	jb	L(max)
-+
-+	VPCMP	$0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0
-+	kmovd	%k0, %eax
- 	tzcntl	%eax, %eax
-#  ifdef USE_AS_WCSLEN
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-#  endif
- 	/* Check the end of data.  */
-	cmpq	%rax, %rsi
-	jbe	L(max)
-	addq	%rdi, %rax
-	subq	%rdx, %rax
-+	cmpl	%eax, %esi
-+	jb	L(max_end)
-+
-+	subq	%rdx, %rdi
- #  ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarq	$2, %rdi
- #  endif
-+	leaq	(CHAR_PER_VEC * 4)(%rdi, %rax), %rax
- 	ret
- 
- 	.p2align 4
-L(first_vec_x1_check):
-+L(last_vec_x1):
- 	tzcntl	%eax, %eax
-+	subq	%rdx, %rdi
- #  ifdef USE_AS_WCSLEN
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-#  endif
-	/* Check the end of data.  */
-	cmpq	%rax, %rsi
-	jbe	L(max)
-	addq	$VEC_SIZE, %rax
-	addq	%rdi, %rax
-	subq	%rdx, %rax
-#  ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarq	$2, %rdi
- #  endif
-+	leaq	(CHAR_PER_VEC)(%rdi, %rax), %rax
- 	ret
- 
- 	.p2align 4
-L(first_vec_x2_check):
-+L(last_vec_x2):
- 	tzcntl	%eax, %eax
-+	subq	%rdx, %rdi
- #  ifdef USE_AS_WCSLEN
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-#  endif
-	/* Check the end of data.  */
-	cmpq	%rax, %rsi
-	jbe	L(max)
-	addq	$(VEC_SIZE * 2), %rax
-	addq	%rdi, %rax
-	subq	%rdx, %rax
-#  ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarq	$2, %rdi
- #  endif
-+	leaq	(CHAR_PER_VEC * 2)(%rdi, %rax), %rax
- 	ret
- 
- 	.p2align 4
-L(first_vec_x3_check):
-+L(last_vec_x3):
- 	tzcntl	%eax, %eax
-#  ifdef USE_AS_WCSLEN
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-#  endif
-+	subl	$(CHAR_PER_VEC * 2), %esi
- 	/* Check the end of data.  */
-	cmpq	%rax, %rsi
-	jbe	L(max)
-	addq	$(VEC_SIZE * 3), %rax
-	addq	%rdi, %rax
-	subq	%rdx, %rax
-+	cmpl	%eax, %esi
-+	jb	L(max_end)
-+	subq	%rdx, %rdi
- #  ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
-+	sarq	$2, %rdi
- #  endif
-+	leaq	(CHAR_PER_VEC * 3)(%rdi, %rax), %rax
- 	ret
-
-	.p2align 4
-L(max):
-+L(max_end):
- 	movq	%r8, %rax
-#  ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-#  endif
-	ret
-
-	.p2align 4
-L(zero):
-	xorl	%eax, %eax
- 	ret
- # endif
- 
-+	/* Cold case for crossing page with first load.	 */
- 	.p2align 4
-L(first_vec_x0):
-	tzcntl	%eax, %eax
-# ifdef USE_AS_WCSLEN
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-# endif
-	addq	%rdi, %rax
-	subq	%rdx, %rax
-+L(cross_page_boundary):
-+	movq	%rdi, %rdx
-+	/* Align data to VEC_SIZE.  */
-+	andq	$-VEC_SIZE, %rdi
-+	VPCMP	$0, (%rdi), %YMMZERO, %k0
-+	kmovd	%k0, %eax
-+	/* Remove the leading bytes.  */
- # ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-+	/* NB: Divide shift count by 4 since each bit in K0 represent 4
-+	   bytes.  */
-+	movl	%edx, %ecx
-+	shrl	$2, %ecx
-+	andl	$(CHAR_PER_VEC - 1), %ecx
- # endif
-	ret
-
-	.p2align 4
-L(first_vec_x1):
-+	/* SHIFT_REG is ecx for USE_AS_WCSLEN and edx otherwise.  */
-+	sarxl	%SHIFT_REG, %eax, %eax
-+	testl	%eax, %eax
-+# ifndef USE_AS_STRNLEN
-+	jz	L(cross_page_continue)
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WCSLEN
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-# endif
-	addq	$VEC_SIZE, %rax
-	addq	%rdi, %rax
-	subq	%rdx, %rax
-# ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-# endif
- 	ret
-
-	.p2align 4
-L(first_vec_x2):
-	tzcntl	%eax, %eax
-# ifdef USE_AS_WCSLEN
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-# endif
-	addq	$(VEC_SIZE * 2), %rax
-	addq	%rdi, %rax
-	subq	%rdx, %rax
-# ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-# endif
-+# else
-+	jnz	L(cross_page_less_vec)
-+#  ifndef USE_AS_WCSLEN
-+	movl	%edx, %ecx
-+	andl	$(CHAR_PER_VEC - 1), %ecx
-+#  endif
-+	movl	$CHAR_PER_VEC, %eax
-+	subl	%ecx, %eax
-+	/* Check the end of data.  */
-+	cmpq	%rax, %rsi
-+	ja	L(cross_page_continue)
-+	movl	%esi, %eax
- 	ret
-
-	.p2align 4
-L(4x_vec_end):
-	VPCMP	$0, %YMM1, %YMMZERO, %k0
-	kmovd	%k0, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x0)
-	VPCMP	$0, %YMM2, %YMMZERO, %k1
-	kmovd	%k1, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x1)
-	VPCMP	$0, %YMM3, %YMMZERO, %k2
-	kmovd	%k2, %eax
-	testl	%eax, %eax
-	jnz	L(first_vec_x2)
-	VPCMP	$0, %YMM4, %YMMZERO, %k3
-	kmovd	%k3, %eax
-L(first_vec_x3):
-+L(cross_page_less_vec):
- 	tzcntl	%eax, %eax
-# ifdef USE_AS_WCSLEN
-	/* NB: Multiply wchar_t count by 4 to get the number of bytes.  */
-	sall	$2, %eax
-# endif
-	addq	$(VEC_SIZE * 3), %rax
-	addq	%rdi, %rax
-	subq	%rdx, %rax
-# ifdef USE_AS_WCSLEN
-	shrq	$2, %rax
-# endif
-+	/* Select min of length and position of first null.  */
-+	cmpq	%rax, %rsi
-+	cmovb	%esi, %eax
- 	ret
-+# endif
- 
- END (STRLEN)
- #endif
-- 
-GitLab
-
--- a/Show more
+++ b/Show more
				`@ -1 +0,0 @@`
				`c81d2388896379997bc359d4f2084239 glibc-2.28.tar.xz`