303 lines
13 KiB
Diff
303 lines
13 KiB
Diff
--- ./embed.fnc.orig 2017-09-07 19:19:52.000000000 +0000
|
|
+++ ./embed.fnc 2020-06-09 16:21:37.259095322 +0000
|
|
@@ -2397,7 +2397,8 @@ Es |SSize_t|study_chunk |NN RExC_state_t
|
|
|NULLOK struct scan_data_t *data \
|
|
|I32 stopparen|U32 recursed_depth \
|
|
|NULLOK regnode_ssc *and_withp \
|
|
- |U32 flags|U32 depth
|
|
+ |U32 flags|U32 depth|bool was_mutate_ok
|
|
+Es |void |rck_elide_nothing|NN regnode *node
|
|
EsRn |U32 |add_data |NN RExC_state_t* const pRExC_state \
|
|
|NN const char* const s|const U32 n
|
|
rs |void |re_croak2 |bool utf8|NN const char* pat1|NN const char* pat2|...
|
|
--- ./embed.h.orig 2017-07-18 22:59:59.000000000 +0000
|
|
+++ ./embed.h 2020-06-09 16:21:37.263095314 +0000
|
|
@@ -1046,6 +1046,7 @@
|
|
#define output_or_return_posix_warnings(a,b,c) S_output_or_return_posix_warnings(aTHX_ a,b,c)
|
|
#define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a)
|
|
#define populate_ANYOF_from_invlist(a,b) S_populate_ANYOF_from_invlist(aTHX_ a,b)
|
|
+#define rck_elide_nothing(a) S_rck_elide_nothing(aTHX_ a)
|
|
#define reg(a,b,c,d) S_reg(aTHX_ a,b,c,d)
|
|
#define reg2Lanode(a,b,c,d) S_reg2Lanode(aTHX_ a,b,c,d)
|
|
#define reg_node(a,b) S_reg_node(aTHX_ a,b)
|
|
@@ -1075,7 +1076,7 @@
|
|
#define ssc_is_cp_posixl_init S_ssc_is_cp_posixl_init
|
|
#define ssc_or(a,b,c) S_ssc_or(aTHX_ a,b,c)
|
|
#define ssc_union(a,b,c) S_ssc_union(aTHX_ a,b,c)
|
|
-#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
|
|
+#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l)
|
|
# endif
|
|
# if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C)
|
|
#define _invlist_dump(a,b,c,d) Perl__invlist_dump(aTHX_ a,b,c,d)
|
|
--- ./proto.h.orig 2017-09-07 19:19:52.000000000 +0000
|
|
+++ ./proto.h 2020-06-09 16:21:52.091061856 +0000
|
|
@@ -5150,6 +5150,9 @@ STATIC void S_parse_lparen_question_flag
|
|
STATIC void S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr);
|
|
#define PERL_ARGS_ASSERT_POPULATE_ANYOF_FROM_INVLIST \
|
|
assert(node); assert(invlist_ptr)
|
|
+STATIC void S_rck_elide_nothing(pTHX_ regnode *node);
|
|
+#define PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING \
|
|
+ assert(node)
|
|
PERL_STATIC_NO_RET void S_re_croak2(pTHX_ bool utf8, const char* pat1, const char* pat2, ...)
|
|
__attribute__noreturn__;
|
|
#define PERL_ARGS_ASSERT_RE_CROAK2 \
|
|
@@ -5258,7 +5261,7 @@ PERL_STATIC_INLINE void S_ssc_union(pTHX
|
|
#define PERL_ARGS_ASSERT_SSC_UNION \
|
|
assert(ssc); assert(invlist)
|
|
#endif
|
|
-STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth);
|
|
+STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok);
|
|
#define PERL_ARGS_ASSERT_STUDY_CHUNK \
|
|
assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last)
|
|
#endif
|
|
--- ./regcomp.c.orig 2020-06-09 16:21:32.515106026 +0000
|
|
+++ ./regcomp.c 2020-06-09 16:21:37.263095314 +0000
|
|
@@ -110,6 +110,7 @@ typedef struct scan_frame {
|
|
regnode *next_regnode; /* next node to process when last is reached */
|
|
U32 prev_recursed_depth;
|
|
I32 stopparen; /* what stopparen do we use */
|
|
+ bool in_gosub; /* this or an outer frame is for GOSUB */
|
|
U32 is_top_frame; /* what flags do we use? */
|
|
|
|
struct scan_frame *this_prev_frame; /* this previous frame */
|
|
@@ -4093,6 +4094,44 @@ S_unwind_scan_frames(pTHX_ const void *p
|
|
} while (f);
|
|
}
|
|
|
|
+/* Follow the next-chain of the current node and optimize away
|
|
+ all the NOTHINGs from it.
|
|
+ */
|
|
+STATIC void
|
|
+S_rck_elide_nothing(pTHX_ regnode *node)
|
|
+{
|
|
+ dVAR;
|
|
+
|
|
+ PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
|
|
+
|
|
+ if (OP(node) != CURLYX) {
|
|
+ const int max = (reg_off_by_arg[OP(node)]
|
|
+ ? I32_MAX
|
|
+ /* I32 may be smaller than U16 on CRAYs! */
|
|
+ : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
|
|
+ int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node));
|
|
+ int noff;
|
|
+ regnode *n = node;
|
|
+
|
|
+ /* Skip NOTHING and LONGJMP. */
|
|
+ while (
|
|
+ (n = regnext(n))
|
|
+ && (
|
|
+ (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
|
|
+ || ((OP(n) == LONGJMP) && (noff = ARG(n)))
|
|
+ )
|
|
+ && off + noff < max
|
|
+ ) {
|
|
+ off += noff;
|
|
+ }
|
|
+ if (reg_off_by_arg[OP(node)])
|
|
+ ARG(node) = off;
|
|
+ else
|
|
+ NEXT_OFF(node) = off;
|
|
+ }
|
|
+ return;
|
|
+}
|
|
+
|
|
|
|
STATIC SSize_t
|
|
S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
|
|
@@ -4102,7 +4141,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
I32 stopparen,
|
|
U32 recursed_depth,
|
|
regnode_ssc *and_withp,
|
|
- U32 flags, U32 depth)
|
|
+ U32 flags, U32 depth, bool was_mutate_ok)
|
|
/* scanp: Start here (read-write). */
|
|
/* deltap: Write maxlen-minlen here. */
|
|
/* last: Stop before this one. */
|
|
@@ -4179,6 +4218,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
node length to get a real minimum (because
|
|
the folded version may be shorter) */
|
|
bool unfolded_multi_char = FALSE;
|
|
+ /* avoid mutating ops if we are anywhere within the recursed or
|
|
+ * enframed handling for a GOSUB: the outermost level will handle it.
|
|
+ */
|
|
+ bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
|
|
/* Peephole optimizer: */
|
|
DEBUG_STUDYDATA("Peep:", data, depth);
|
|
DEBUG_PEEP("Peep", scan, depth);
|
|
@@ -4189,30 +4232,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
* parsing code, as each (?:..) is handled by a different invocation of
|
|
* reg() -- Yves
|
|
*/
|
|
- JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
|
|
+ if (mutate_ok)
|
|
+ JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
|
|
|
|
/* Follow the next-chain of the current node and optimize
|
|
away all the NOTHINGs from it. */
|
|
- if (OP(scan) != CURLYX) {
|
|
- const int max = (reg_off_by_arg[OP(scan)]
|
|
- ? I32_MAX
|
|
- /* I32 may be smaller than U16 on CRAYs! */
|
|
- : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
|
|
- int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
|
|
- int noff;
|
|
- regnode *n = scan;
|
|
-
|
|
- /* Skip NOTHING and LONGJMP. */
|
|
- while ((n = regnext(n))
|
|
- && ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
|
|
- || ((OP(n) == LONGJMP) && (noff = ARG(n))))
|
|
- && off + noff < max)
|
|
- off += noff;
|
|
- if (reg_off_by_arg[OP(scan)])
|
|
- ARG(scan) = off;
|
|
- else
|
|
- NEXT_OFF(scan) = off;
|
|
- }
|
|
+ rck_elide_nothing(scan);
|
|
|
|
/* The principal pseudo-switch. Cannot be a switch, since we
|
|
look into several different things. */
|
|
@@ -4238,7 +4263,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
* NOTE we dont use the return here! */
|
|
(void)study_chunk(pRExC_state, &scan, &minlen,
|
|
&deltanext, next, &data_fake, stopparen,
|
|
- recursed_depth, NULL, f, depth+1);
|
|
+ recursed_depth, NULL, f, depth+1, mutate_ok);
|
|
|
|
scan = next;
|
|
} else
|
|
@@ -4305,7 +4330,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
/* we suppose the run is continuous, last=next...*/
|
|
minnext = study_chunk(pRExC_state, &scan, minlenp,
|
|
&deltanext, next, &data_fake, stopparen,
|
|
- recursed_depth, NULL, f,depth+1);
|
|
+ recursed_depth, NULL, f,depth+1, mutate_ok);
|
|
|
|
if (min1 > minnext)
|
|
min1 = minnext;
|
|
@@ -4373,7 +4398,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
}
|
|
|
|
if (PERL_ENABLE_TRIE_OPTIMISATION &&
|
|
- OP( startbranch ) == BRANCH )
|
|
+ OP( startbranch ) == BRANCH && mutate_ok)
|
|
{
|
|
/* demq.
|
|
|
|
@@ -4825,6 +4850,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
newframe->stopparen = stopparen;
|
|
newframe->prev_recursed_depth = recursed_depth;
|
|
newframe->this_prev_frame= frame;
|
|
+ newframe->in_gosub = ((frame && frame->in_gosub) || OP(scan) == GOSUB);
|
|
|
|
DEBUG_STUDYDATA("frame-new:",data,depth);
|
|
DEBUG_PEEP("fnew", scan, depth);
|
|
@@ -5043,7 +5069,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
(mincount == 0
|
|
? (f & ~SCF_DO_SUBSTR)
|
|
: f)
|
|
- ,depth+1);
|
|
+ ,depth+1, mutate_ok);
|
|
|
|
if (flags & SCF_DO_STCLASS)
|
|
data->start_class = oclass;
|
|
@@ -5091,6 +5117,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
(void)ReREFCNT_inc(RExC_rx_sv);
|
|
}
|
|
|
|
+ if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext )
|
|
+ || min >= SSize_t_MAX - minnext * mincount )
|
|
+ {
|
|
+ FAIL("Regexp out of space");
|
|
+ }
|
|
+
|
|
min += minnext * mincount;
|
|
is_inf_internal |= deltanext == SSize_t_MAX
|
|
|| (maxcount == REG_INFTY && minnext + deltanext > 0);
|
|
@@ -5105,7 +5137,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
if ( OP(oscan) == CURLYX && data
|
|
&& data->flags & SF_IN_PAR
|
|
&& !(data->flags & SF_HAS_EVAL)
|
|
- && !deltanext && minnext == 1 ) {
|
|
+ && !deltanext && minnext == 1
|
|
+ && mutate_ok ) {
|
|
/* Try to optimize to CURLYN. */
|
|
regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
|
|
regnode * const nxt1 = nxt;
|
|
@@ -5155,6 +5188,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
/* Nor characters whose fold at run-time may be
|
|
* multi-character */
|
|
&& ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
|
|
+ && mutate_ok
|
|
) {
|
|
/* XXXX How to optimize if data == 0? */
|
|
/* Optimize to a simpler form. */
|
|
@@ -5201,7 +5235,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
|
|
#endif
|
|
/* Optimize again: */
|
|
study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
|
|
- NULL, stopparen, recursed_depth, NULL, 0,depth+1);
|
|
+ NULL, stopparen, recursed_depth, NULL, 0,depth+1, mutate_ok);
|
|
}
|
|
else
|
|
oscan->flags = 0;
|
|
@@ -5328,11 +5362,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RH
|
|
if (data && (fl & SF_HAS_EVAL))
|
|
data->flags |= SF_HAS_EVAL;
|
|
optimize_curly_tail:
|
|
- if (OP(oscan) != CURLYX) {
|
|
- while (PL_regkind[OP(next = regnext(oscan))] == NOTHING
|
|
- && NEXT_OFF(next))
|
|
- NEXT_OFF(oscan) += NEXT_OFF(next);
|
|
- }
|
|
+ rck_elide_nothing(oscan);
|
|
continue;
|
|
|
|
default:
|
|
@@ -5592,7 +5622,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RH
|
|
nscan = NEXTOPER(NEXTOPER(scan));
|
|
minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
|
|
last, &data_fake, stopparen,
|
|
- recursed_depth, NULL, f, depth+1);
|
|
+ recursed_depth, NULL, f, depth+1, mutate_ok);
|
|
if (scan->flags) {
|
|
if (deltanext) {
|
|
FAIL("Variable length lookbehind not implemented");
|
|
@@ -5681,7 +5711,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RH
|
|
*minnextp = study_chunk(pRExC_state, &nscan, minnextp,
|
|
&deltanext, last, &data_fake,
|
|
stopparen, recursed_depth, NULL,
|
|
- f,depth+1);
|
|
+ f,depth+1, mutate_ok);
|
|
if (scan->flags) {
|
|
if (deltanext) {
|
|
FAIL("Variable length lookbehind not implemented");
|
|
@@ -5841,7 +5871,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RH
|
|
branches even though they arent otherwise used. */
|
|
minnext = study_chunk(pRExC_state, &scan, minlenp,
|
|
&deltanext, (regnode *)nextbranch, &data_fake,
|
|
- stopparen, recursed_depth, NULL, f,depth+1);
|
|
+ stopparen, recursed_depth, NULL, f,depth+1, mutate_ok);
|
|
}
|
|
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
|
|
nextbranch= regnext((regnode*)nextbranch);
|
|
@@ -7520,7 +7550,7 @@ Perl_re_op_compile(pTHX_ SV ** const pat
|
|
&data, -1, 0, NULL,
|
|
SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
|
|
| (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
|
|
- 0);
|
|
+ 0, TRUE);
|
|
|
|
|
|
CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
|
|
@@ -7666,7 +7696,7 @@ Perl_re_op_compile(pTHX_ SV ** const pat
|
|
SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
|
|
? SCF_TRIE_DOING_RESTUDY
|
|
: 0),
|
|
- 0);
|
|
+ 0, TRUE);
|
|
|
|
CHECK_RESTUDY_GOTO_butfirst(NOOP);
|
|
|