update to glibc-2.28-251.2.src.rpm
Signed-off-by: Zhao Hang <wb-zh951434@alibaba-inc.com>
This commit is contained in:
parent
e44d0a27bf
commit
bd4ad22e2b
160 changed files with 43782 additions and 39677 deletions
143
glibc-RHEL-15696-96.patch
Normal file
143
glibc-RHEL-15696-96.patch
Normal file
|
@ -0,0 +1,143 @@
|
|||
From d154758e618ec9324f5d339c46db0aa27e8b1226 Mon Sep 17 00:00:00 2001
|
||||
From: Noah Goldstein <goldstein.w.n@gmail.com>
|
||||
Date: Wed, 23 Mar 2022 16:57:38 -0500
|
||||
Subject: [PATCH] x86: Optimize str{n}casecmp TOLOWER logic in strcmp-sse42.S
|
||||
Content-type: text/plain; charset=UTF-8
|
||||
|
||||
Slightly faster method of doing TOLOWER that saves an
|
||||
instruction.
|
||||
|
||||
Also replace the hard coded 5-byte no with .p2align 4. On builds with
|
||||
CET enabled this misaligned entry to strcasecmp.
|
||||
|
||||
geometric_mean(N=40) of all benchmarks New / Original: .920
|
||||
|
||||
All string/memory tests pass.
|
||||
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
||||
---
|
||||
sysdeps/x86_64/multiarch/strcmp-sse42.S | 83 +++++++++++--------------
|
||||
1 file changed, 35 insertions(+), 48 deletions(-)
|
||||
|
||||
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
|
||||
index d8fdeb3a..59e8ddfc 100644
|
||||
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
|
||||
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
|
||||
@@ -89,9 +89,8 @@ ENTRY (GLABEL(__strcasecmp))
|
||||
movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
|
||||
mov %fs:(%rax),%RDX_LP
|
||||
|
||||
- // XXX 5 byte should be before the function
|
||||
- /* 5-byte NOP. */
|
||||
- .byte 0x0f,0x1f,0x44,0x00,0x00
|
||||
+ /* Either 1 or 5 bytes (dependeing if CET is enabled). */
|
||||
+ .p2align 4
|
||||
END (GLABEL(__strcasecmp))
|
||||
/* FALLTHROUGH to strcasecmp_l. */
|
||||
#endif
|
||||
@@ -100,9 +99,8 @@ ENTRY (GLABEL(__strncasecmp))
|
||||
movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
|
||||
mov %fs:(%rax),%RCX_LP
|
||||
|
||||
- // XXX 5 byte should be before the function
|
||||
- /* 5-byte NOP. */
|
||||
- .byte 0x0f,0x1f,0x44,0x00,0x00
|
||||
+ /* Either 1 or 5 bytes (dependeing if CET is enabled). */
|
||||
+ .p2align 4
|
||||
END (GLABEL(__strncasecmp))
|
||||
/* FALLTHROUGH to strncasecmp_l. */
|
||||
#endif
|
||||
@@ -170,27 +168,22 @@ STRCMP_SSE42:
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
.section .rodata.cst16,"aM",@progbits,16
|
||||
.align 16
|
||||
-LABEL(belowupper):
|
||||
- .quad 0x4040404040404040
|
||||
- .quad 0x4040404040404040
|
||||
-LABEL(topupper):
|
||||
-# ifdef USE_AVX
|
||||
- .quad 0x5a5a5a5a5a5a5a5a
|
||||
- .quad 0x5a5a5a5a5a5a5a5a
|
||||
-# else
|
||||
- .quad 0x5b5b5b5b5b5b5b5b
|
||||
- .quad 0x5b5b5b5b5b5b5b5b
|
||||
-# endif
|
||||
-LABEL(touppermask):
|
||||
+LABEL(lcase_min):
|
||||
+ .quad 0x3f3f3f3f3f3f3f3f
|
||||
+ .quad 0x3f3f3f3f3f3f3f3f
|
||||
+LABEL(lcase_max):
|
||||
+ .quad 0x9999999999999999
|
||||
+ .quad 0x9999999999999999
|
||||
+LABEL(case_add):
|
||||
.quad 0x2020202020202020
|
||||
.quad 0x2020202020202020
|
||||
.previous
|
||||
- movdqa LABEL(belowupper)(%rip), %xmm4
|
||||
-# define UCLOW_reg %xmm4
|
||||
- movdqa LABEL(topupper)(%rip), %xmm5
|
||||
-# define UCHIGH_reg %xmm5
|
||||
- movdqa LABEL(touppermask)(%rip), %xmm6
|
||||
-# define LCQWORD_reg %xmm6
|
||||
+ movdqa LABEL(lcase_min)(%rip), %xmm4
|
||||
+# define LCASE_MIN_reg %xmm4
|
||||
+ movdqa LABEL(lcase_max)(%rip), %xmm5
|
||||
+# define LCASE_MAX_reg %xmm5
|
||||
+ movdqa LABEL(case_add)(%rip), %xmm6
|
||||
+# define CASE_ADD_reg %xmm6
|
||||
#endif
|
||||
cmp $0x30, %ecx
|
||||
ja LABEL(crosscache)/* rsi: 16-byte load will cross cache line */
|
||||
@@ -201,32 +194,26 @@ LABEL(touppermask):
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
# ifdef USE_AVX
|
||||
# define TOLOWER(reg1, reg2) \
|
||||
- vpcmpgtb UCLOW_reg, reg1, %xmm7; \
|
||||
- vpcmpgtb UCHIGH_reg, reg1, %xmm8; \
|
||||
- vpcmpgtb UCLOW_reg, reg2, %xmm9; \
|
||||
- vpcmpgtb UCHIGH_reg, reg2, %xmm10; \
|
||||
- vpandn %xmm7, %xmm8, %xmm8; \
|
||||
- vpandn %xmm9, %xmm10, %xmm10; \
|
||||
- vpand LCQWORD_reg, %xmm8, %xmm8; \
|
||||
- vpand LCQWORD_reg, %xmm10, %xmm10; \
|
||||
- vpor reg1, %xmm8, reg1; \
|
||||
- vpor reg2, %xmm10, reg2
|
||||
+ vpaddb LCASE_MIN_reg, reg1, %xmm7; \
|
||||
+ vpaddb LCASE_MIN_reg, reg2, %xmm8; \
|
||||
+ vpcmpgtb LCASE_MAX_reg, %xmm7, %xmm7; \
|
||||
+ vpcmpgtb LCASE_MAX_reg, %xmm8, %xmm8; \
|
||||
+ vpandn CASE_ADD_reg, %xmm7, %xmm7; \
|
||||
+ vpandn CASE_ADD_reg, %xmm8, %xmm8; \
|
||||
+ vpaddb %xmm7, reg1, reg1; \
|
||||
+ vpaddb %xmm8, reg2, reg2
|
||||
# else
|
||||
# define TOLOWER(reg1, reg2) \
|
||||
- movdqa reg1, %xmm7; \
|
||||
- movdqa UCHIGH_reg, %xmm8; \
|
||||
- movdqa reg2, %xmm9; \
|
||||
- movdqa UCHIGH_reg, %xmm10; \
|
||||
- pcmpgtb UCLOW_reg, %xmm7; \
|
||||
- pcmpgtb reg1, %xmm8; \
|
||||
- pcmpgtb UCLOW_reg, %xmm9; \
|
||||
- pcmpgtb reg2, %xmm10; \
|
||||
- pand %xmm8, %xmm7; \
|
||||
- pand %xmm10, %xmm9; \
|
||||
- pand LCQWORD_reg, %xmm7; \
|
||||
- pand LCQWORD_reg, %xmm9; \
|
||||
- por %xmm7, reg1; \
|
||||
- por %xmm9, reg2
|
||||
+ movdqa LCASE_MIN_reg, %xmm7; \
|
||||
+ movdqa LCASE_MIN_reg, %xmm8; \
|
||||
+ paddb reg1, %xmm7; \
|
||||
+ paddb reg2, %xmm8; \
|
||||
+ pcmpgtb LCASE_MAX_reg, %xmm7; \
|
||||
+ pcmpgtb LCASE_MAX_reg, %xmm8; \
|
||||
+ pandn CASE_ADD_reg, %xmm7; \
|
||||
+ pandn CASE_ADD_reg, %xmm8; \
|
||||
+ paddb %xmm7, reg1; \
|
||||
+ paddb %xmm8, reg2
|
||||
# endif
|
||||
TOLOWER (%xmm1, %xmm2)
|
||||
#else
|
||||
--
|
||||
GitLab
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue