#ifdef PERL_EXT_RE_BUILD
# ifndef PERL_IN_XSUB_RE
# define PERL_IN_XSUB_RE
# endif
# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
# define DEBUGGING
# endif
#endif
#ifdef PERL_IN_XSUB_RE
# define Perl_regexec_flags my_regexec
# define Perl_regdump my_regdump
# define Perl_regprop my_regprop
# define Perl_re_intuit_start my_re_intuit_start
# define Perl_pregexec my_pregexec
# define Perl_reginitcolors my_reginitcolors
# define Perl_regclass_swash my_regclass_swash
# define PERL_NO_GET_CONTEXT
#endif
#include "EXTERN.h"
#define PERL_IN_REGEXEC_C
#include "perl.h"
#include "regcomp.h"
#define RF_tainted 1
#define RF_warned 2
#define RF_evaled 4
#define RF_utf8 8
#define UTF ((PL_reg_flags & RF_utf8) != 0)
#define RS_init 1
#define RS_set 2
#ifndef STATIC
#define STATIC static
#endif
#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
#define HOPc(pos,off) ((char*)HOP(pos,off))
#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
#define HOPBACK(pos, off) ( \
(PL_reg_match_utf8) \
? reghopmaybe((U8*)pos, -off) \
: (pos - off >= PL_bostr) \
? (U8*)(pos - off) \
: (U8*)NULL \
)
#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((U8*)b); LEAVE; } } STMT_END
#define JUMPABLE(rn) ( \
OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
OP(rn) == PLUS || OP(rn) == MINMOD || \
(PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
)
#define HAS_TEXT(rn) ( \
PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
)
#define FIND_NEXT_IMPT(rn) STMT_START { \
while (JUMPABLE(rn)) \
if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
rn = NEXTOPER(NEXTOPER(rn)); \
else if (OP(rn) == PLUS) \
rn = NEXTOPER(rn); \
else if (OP(rn) == IFMATCH) \
rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
else rn += NEXT_OFF(rn); \
} STMT_END
static void restore_pos(pTHX_ void *arg);
STATIC CHECKPOINT
S_regcppush(pTHX_ I32 parenfloor)
{
int retval = PL_savestack_ix;
#define REGCP_PAREN_ELEMS 4
int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
int p;
if (paren_elems_to_push < 0)
Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
#define REGCP_OTHER_ELEMS 6
SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
for (p = PL_regsize; p > parenfloor; p--) {
SSPUSHINT(PL_regendp[p]);
SSPUSHINT(PL_regstartp[p]);
SSPUSHPTR(PL_reg_start_tmp[p]);
SSPUSHINT(p);
}
SSPUSHINT(PL_regsize);
SSPUSHINT(*PL_reglastparen);
SSPUSHINT(*PL_reglastcloseparen);
SSPUSHPTR(PL_reginput);
#define REGCP_FRAME_ELEMS 2
SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
SSPUSHINT(SAVEt_REGCONTEXT);
return retval;
}
# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
" Setting an EVAL scope, savestack=%"IVdf"\n", \
(IV)PL_savestack_ix)); cp = PL_savestack_ix
# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
PerlIO_printf(Perl_debug_log, \
" Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
(IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
STATIC char *
S_regcppop(pTHX)
{
I32 i;
U32 paren = 0;
char *input;
I32 tmps;
i = SSPOPINT;
assert(i == SAVEt_REGCONTEXT);
i = SSPOPINT;
input = (char *) SSPOPPTR;
*PL_reglastcloseparen = SSPOPINT;
*PL_reglastparen = SSPOPINT;
PL_regsize = SSPOPINT;
for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
i > 0; i -= REGCP_PAREN_ELEMS) {
paren = (U32)SSPOPINT;
PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
PL_regstartp[paren] = SSPOPINT;
tmps = SSPOPINT;
if (paren <= *PL_reglastparen)
PL_regendp[paren] = tmps;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
" restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
(UV)paren, (IV)PL_regstartp[paren],
(IV)(PL_reg_start_tmp[paren] - PL_bostr),
(IV)PL_regendp[paren],
(paren > *PL_reglastparen ? "(no)" : ""));
);
}
DEBUG_r(
if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
PerlIO_printf(Perl_debug_log,
" restoring \\%"IVdf"..\\%"IVdf" to undef\n",
(IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
}
);
#if 1
for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
if ((I32)paren > PL_regsize)
PL_regstartp[paren] = -1;
PL_regendp[paren] = -1;
}
#endif
return input;
}
STATIC char *
S_regcp_set_to(pTHX_ I32 ss)
{
I32 tmp = PL_savestack_ix;
PL_savestack_ix = ss;
regcppop();
PL_savestack_ix = tmp;
return Nullch;
}
typedef struct re_cc_state
{
I32 ss;
regnode *node;
struct re_cc_state *prev;
CURCUR *cc;
regexp *re;
} re_cc_state;
#define regcpblow(cp) LEAVE_SCOPE(cp)
#define TRYPAREN(paren, n, input) { \
if (paren) { \
if (n) { \
PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
PL_regendp[paren] = input - PL_bostr; \
} \
else \
PL_regendp[paren] = -1; \
} \
if (regmatch(next)) \
sayYES; \
if (paren && n) \
PL_regendp[paren] = -1; \
}
I32
Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
char *strbeg, I32 minend, SV *screamer, U32 nosave)
{
return
regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
nosave ? 0 : REXEC_COPY_STR);
}
STATIC void
S_cache_re(pTHX_ regexp *prog)
{
PL_regprecomp = prog->precomp;
#ifdef DEBUGGING
PL_regprogram = prog->program;
#endif
PL_regnpar = prog->nparens;
PL_regdata = prog->data;
PL_reg_re = prog;
}
char *
Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
char *strend, U32 flags, re_scream_pos_data *data)
{
register I32 start_shift = 0;
register I32 end_shift = 0;
register char *s;
register SV *check;
char *strbeg;
char *t;
int do_utf8 = sv ? SvUTF8(sv) : 0;
I32 ml_anch;
register char *other_last = Nullch;
char *check_at = Nullch;
#ifdef DEBUGGING
char *i_strpos = strpos;
SV *dsv = PERL_DEBUG_PAD_ZERO(0);
#endif
RX_MATCH_UTF8_set(prog,do_utf8);
if (prog->reganch & ROPT_UTF8) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
"UTF-8 regex...\n"));
PL_reg_flags |= RF_utf8;
}
DEBUG_r({
char *s = PL_reg_match_utf8 ?
sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
strpos;
int len = PL_reg_match_utf8 ?
strlen(s) : strend - strpos;
if (!PL_colorset)
reginitcolors();
if (PL_reg_match_utf8)
DEBUG_r(PerlIO_printf(Perl_debug_log,
"UTF-8 target...\n"));
PerlIO_printf(Perl_debug_log,
"%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
PL_colors[4],PL_colors[5],PL_colors[0],
prog->precomp,
PL_colors[1],
(strlen(prog->precomp) > 60 ? "..." : ""),
PL_colors[0],
(int)(len > 60 ? 60 : len),
s, PL_colors[1],
(len > 60 ? "..." : "")
);
});
if (prog->minlen > strend - strpos) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
"String too short... [re_intuit_start]\n"));
goto fail;
}
strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
PL_regeol = strend;
if (do_utf8) {
if (!prog->check_utf8 && prog->check_substr)
to_utf8_substr(prog);
check = prog->check_utf8;
} else {
if (!prog->check_substr && prog->check_utf8)
to_byte_substr(prog);
check = prog->check_substr;
}
if (check == &PL_sv_undef) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
"Non-utf string cannot match utf check string\n"));
goto fail;
}
if (prog->reganch & ROPT_ANCH) {
ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
|| ( (prog->reganch & ROPT_ANCH_BOL)
&& !PL_multiline ) );
if (!ml_anch) {
if ( !(prog->reganch & (ROPT_ANCH_GPOS
| ROPT_IMPLICIT))
&& sv && !SvROK(sv)
&& (strpos != strbeg)) {
DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
goto fail;
}
if (prog->check_offset_min == prog->check_offset_max &&
!(prog->reganch & ROPT_CANY_SEEN)) {
I32 slen;
s = HOP3c(strpos, prog->check_offset_min, strend);
if (SvTAIL(check)) {
slen = SvCUR(check);
if ( strend - s > slen || strend - s < slen - 1
|| (strend - s == slen && strend[-1] != '\n')) {
DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
goto fail_finish;
}
slen--;
if (slen && (*SvPVX(check) != *s
|| (slen > 1
&& memNE(SvPVX(check), s, slen)))) {
report_neq:
DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
goto fail_finish;
}
}
else if (*SvPVX(check) != *s
|| ((slen = SvCUR(check)) > 1
&& memNE(SvPVX(check), s, slen)))
goto report_neq;
goto success_at_start;
}
}
s = strpos;
start_shift = prog->check_offset_min;
end_shift = prog->minlen - start_shift -
CHR_SVLEN(check) + (SvTAIL(check) != 0);
if (!ml_anch) {
I32 end = prog->check_offset_max + CHR_SVLEN(check)
- (SvTAIL(check) != 0);
I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
if (end_shift < eshift)
end_shift = eshift;
}
}
else {
ml_anch = 0;
s = strpos;
start_shift = prog->check_offset_min;
end_shift = prog->minlen - start_shift -
CHR_SVLEN(check) + (SvTAIL(check) != 0);
}
#ifdef DEBUGGING
if (end_shift < 0)
Perl_croak(aTHX_ "panic: end_shift");
#endif
restart:
if (flags & REXEC_SCREAM) {
I32 p = -1;
I32 *pp = data ? data->scream_pos : &p;
if (PL_screamfirst[BmRARE(check)] >= 0
|| ( BmRARE(check) == '\n'
&& (BmPREVIOUS(check) == SvCUR(check) - 1)
&& SvTAIL(check) ))
s = screaminstr(sv, check,
start_shift + (s - strbeg), end_shift, pp, 0);
else
goto fail_finish;
if (s && RX_MATCH_COPIED(prog))
s = strbeg + (s - SvPVX(sv));
if (data)
*data->scream_olds = s;
}
else if (prog->reganch & ROPT_CANY_SEEN)
s = fbm_instr((U8*)(s + start_shift),
(U8*)(strend - end_shift),
check, PL_multiline ? FBMrf_MULTILINE : 0);
else
s = fbm_instr(HOP3(s, start_shift, strend),
HOP3(strend, -end_shift, strbeg),
check, PL_multiline ? FBMrf_MULTILINE : 0);
DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
(s ? "Found" : "Did not find"),
(check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
PL_colors[0],
(int)(SvCUR(check) - (SvTAIL(check)!=0)),
SvPVX(check),
PL_colors[1], (SvTAIL(check) ? "$" : ""),
(s ? " at offset " : "...\n") ) );
if (!s)
goto fail_finish;
check_at = s;
DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
if (!other_last)
other_last = strpos;
if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
do_other_anchored:
{
char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
char *s1 = s;
SV* must;
t = s - prog->check_offset_max;
if (s - strpos > prog->check_offset_max
&& (!do_utf8
|| ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
&& t > strpos)))
;
else
t = strpos;
t = HOP3c(t, prog->anchored_offset, strend);
if (t < other_last)
t = other_last;
last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
if (last < last1)
last1 = last;
must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
if (must == &PL_sv_undef) {
s = (char*)NULL;
DEBUG_r(must = prog->anchored_utf8);
}
else
s = fbm_instr(
(unsigned char*)t,
HOP3(HOP3(last1, prog->anchored_offset, strend)
+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
must,
PL_multiline ? FBMrf_MULTILINE : 0
);
DEBUG_r(PerlIO_printf(Perl_debug_log,
"%s anchored substr `%s%.*s%s'%s",
(s ? "Found" : "Contradicts"),
PL_colors[0],
(int)(SvCUR(must)
- (SvTAIL(must)!=0)),
SvPVX(must),
PL_colors[1], (SvTAIL(must) ? "$" : "")));
if (!s) {
if (last1 >= last2) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
", giving up...\n"));
goto fail_finish;
}
DEBUG_r(PerlIO_printf(Perl_debug_log,
", trying floating at offset %ld...\n",
(long)(HOP3c(s1, 1, strend) - i_strpos)));
other_last = HOP3c(last1, prog->anchored_offset+1, strend);
s = HOP3c(last, 1, strend);
goto restart;
}
else {
DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
(long)(s - i_strpos)));
t = HOP3c(s, -prog->anchored_offset, strbeg);
other_last = HOP3c(s, 1, strend);
s = s1;
if (t == strpos)
goto try_at_start;
goto try_at_offset;
}
}
}
else {
char *last, *last1;
char *s1 = s;
SV* must;
t = HOP3c(s, -start_shift, strbeg);
last1 = last =
HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
last = HOP3c(t, prog->float_max_offset, strend);
s = HOP3c(t, prog->float_min_offset, strend);
if (s < other_last)
s = other_last;
must = do_utf8 ? prog->float_utf8 : prog->float_substr;
if (must == &PL_sv_undef) {
s = (char*)NULL;
DEBUG_r(must = prog->float_utf8);
}
else
s = fbm_instr((unsigned char*)s,
(unsigned char*)last + SvCUR(must)
- (SvTAIL(must)!=0),
must, PL_multiline ? FBMrf_MULTILINE : 0);
DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
(s ? "Found" : "Contradicts"),
PL_colors[0],
(int)(SvCUR(must) - (SvTAIL(must)!=0)),
SvPVX(must),
PL_colors[1], (SvTAIL(must) ? "$" : "")));
if (!s) {
if (last1 == last) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
", giving up...\n"));
goto fail_finish;
}
DEBUG_r(PerlIO_printf(Perl_debug_log,
", trying anchored starting at offset %ld...\n",
(long)(s1 + 1 - i_strpos)));
other_last = last;
s = HOP3c(t, 1, strend);
goto restart;
}
else {
DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
(long)(s - i_strpos)));
other_last = s;
s = s1;
if (t == strpos)
goto try_at_start;
goto try_at_offset;
}
}
}
t = s - prog->check_offset_max;
if (s - strpos > prog->check_offset_max
&& (!do_utf8
|| ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
&& t > strpos))) {
try_at_offset:
if (ml_anch && t[-1] != '\n') {
find_anchor:
while (t < strend - prog->minlen) {
if (*t == '\n') {
if (t < check_at - prog->check_offset_min) {
if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
strpos = t + 1;
DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
goto do_other_anchored;
}
s = t + 1;
DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
goto set_useful;
}
DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
other_last = strpos = s = t + 1;
goto restart;
}
t++;
}
DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
PL_colors[0],PL_colors[1]));
goto fail_finish;
}
else {
DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
PL_colors[0],PL_colors[1]));
}
s = t;
set_useful:
++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);
}
else {
try_at_start:
if (ml_anch && sv && !SvROK(sv)
&& (strpos != strbeg) && strpos[-1] != '\n'
&& !(prog->reganch & ROPT_IMPLICIT))
{
t = strpos;
goto find_anchor;
}
DEBUG_r( if (ml_anch)
PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
(long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
);
success_at_start:
if (!(prog->reganch & ROPT_NAUGHTY)
&& (do_utf8 ? (
prog->check_utf8
&& --BmUSEFUL(prog->check_utf8) < 0
&& (prog->check_utf8 == prog->float_utf8)
) : (
prog->check_substr
&& --BmUSEFUL(prog->check_substr) < 0
&& (prog->check_substr == prog->float_substr)
)))
{
DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
if (do_utf8 ? prog->check_substr : prog->check_utf8)
SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
prog->check_substr = prog->check_utf8 = Nullsv;
prog->float_substr = prog->float_utf8 = Nullsv;
check = Nullsv;
s = strpos;
prog->reganch &= ~RE_USE_INTUIT;
}
else
s = strpos;
}
if (prog->regstclass) {
U8* str = (U8*)STRING(prog->regstclass);
int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
? CHR_DIST(str+STR_LEN(prog->regstclass), str)
: 1);
char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
: (prog->float_substr || prog->float_utf8
? HOP3c(HOP3c(check_at, -start_shift, strbeg),
cl_l, strend)
: strend);
char *startpos = strbeg;
t = s;
cache_re(prog);
s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
if (!s) {
#ifdef DEBUGGING
char *what = 0;
#endif
if (endpos == strend) {
DEBUG_r( PerlIO_printf(Perl_debug_log,
"Could not match STCLASS...\n") );
goto fail;
}
DEBUG_r( PerlIO_printf(Perl_debug_log,
"This position contradicts STCLASS...\n") );
if ((prog->reganch & ROPT_ANCH) && !ml_anch)
goto fail;
if (prog->anchored_substr || prog->anchored_utf8) {
if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
DEBUG_r( what = "anchored" );
hop_and_restart:
s = HOP3c(t, 1, strend);
if (s + start_shift + end_shift > strend) {
DEBUG_r( PerlIO_printf(Perl_debug_log,
"Could not match STCLASS...\n") );
goto fail;
}
if (!check)
goto giveup;
DEBUG_r( PerlIO_printf(Perl_debug_log,
"Looking for %s substr starting at offset %ld...\n",
what, (long)(s + start_shift - i_strpos)) );
goto restart;
}
if (t + start_shift >= check_at)
goto retry_floating_check;
s = check_at;
if (!check)
goto giveup;
DEBUG_r( PerlIO_printf(Perl_debug_log,
"Looking for anchored substr starting at offset %ld...\n",
(long)(other_last - i_strpos)) );
goto do_other_anchored;
}
if (ml_anch) {
s = t = t + 1;
if (!check)
goto giveup;
DEBUG_r( PerlIO_printf(Perl_debug_log,
"Looking for /%s^%s/m starting at offset %ld...\n",
PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
goto try_at_offset;
}
if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
goto fail;
retry_floating_check:
t = check_at - start_shift;
DEBUG_r( what = "floating" );
goto hop_and_restart;
}
if (t != s) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
"By STCLASS: moving %ld --> %ld\n",
(long)(t - i_strpos), (long)(s - i_strpos))
);
}
else {
DEBUG_r(PerlIO_printf(Perl_debug_log,
"Does not contradict STCLASS...\n");
);
}
}
giveup:
DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
PL_colors[4], (check ? "Guessed" : "Giving up"),
PL_colors[5], (long)(s - i_strpos)) );
return s;
fail_finish:
if (prog->check_substr || prog->check_utf8)
BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5;
fail:
DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
PL_colors[4],PL_colors[5]));
return Nullch;
}
STATIC char *
S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
{
I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
char *m;
STRLEN ln;
unsigned int c1;
unsigned int c2;
char *e;
register I32 tmp = 1;
register bool do_utf8 = PL_reg_match_utf8;
switch (OP(c)) {
case ANYOF:
if (do_utf8) {
while (s < strend) {
if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
!UTF8_IS_INVARIANT((U8)s[0]) ?
reginclass(c, (U8*)s, 0, do_utf8) :
REGINCLASS(c, (U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
STRLEN skip = 1;
if (REGINCLASS(c, (U8*)s) ||
(ANYOF_FOLD_SHARP_S(c, s, strend) &&
(skip = SHARP_S_SKIP))) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += skip;
}
}
break;
case CANY:
while (s < strend) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
s++;
}
break;
case EXACTF:
m = STRING(c);
ln = STR_LEN(c);
if (UTF) {
STRLEN ulen1, ulen2;
U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC,
0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
}
else {
c1 = *(U8*)m;
c2 = PL_fold[c1];
}
goto do_exactf;
case EXACTFL:
m = STRING(c);
ln = STR_LEN(c);
c1 = *(U8*)m;
c2 = PL_fold_locale[c1];
do_exactf:
e = HOP3c(strend, -(I32)ln, s);
if (norun && e < s)
e = s;
if (do_utf8) {
UV c, f;
U8 tmpbuf [UTF8_MAXLEN+1];
U8 foldbuf[UTF8_MAXLEN_FOLD+1];
STRLEN len, foldlen;
if (c1 == c2) {
while (s <= e) {
c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY);
if ( c == c1
&& (ln == len ||
ibcmp_utf8(s, (char **)0, 0, do_utf8,
m, (char **)0, ln, (bool)UTF))
&& (norun || regtry(prog, s)) )
goto got_it;
else {
uvchr_to_utf8(tmpbuf, c);
f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
if ( f != c
&& (f == c1 || f == c2)
&& (ln == foldlen ||
!ibcmp_utf8((char *) foldbuf,
(char **)0, foldlen, do_utf8,
m,
(char **)0, ln, (bool)UTF))
&& (norun || regtry(prog, s)) )
goto got_it;
}
s += len;
}
}
else {
while (s <= e) {
c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY);
if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
if ( (c == c1 || c == c2)
&& (ln == len ||
ibcmp_utf8(s, (char **)0, 0, do_utf8,
m, (char **)0, ln, (bool)UTF))
&& (norun || regtry(prog, s)) )
goto got_it;
else {
uvchr_to_utf8(tmpbuf, c);
f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
if ( f != c
&& (f == c1 || f == c2)
&& (ln == foldlen ||
!ibcmp_utf8((char *) foldbuf,
(char **)0, foldlen, do_utf8,
m,
(char **)0, ln, (bool)UTF))
&& (norun || regtry(prog, s)) )
goto got_it;
}
s += len;
}
}
}
else {
if (c1 == c2)
while (s <= e) {
if ( *(U8*)s == c1
&& (ln == 1 || !(OP(c) == EXACTF
? ibcmp(s, m, ln)
: ibcmp_locale(s, m, ln)))
&& (norun || regtry(prog, s)) )
goto got_it;
s++;
}
else
while (s <= e) {
if ( (*(U8*)s == c1 || *(U8*)s == c2)
&& (ln == 1 || !(OP(c) == EXACTF
? ibcmp(s, m, ln)
: ibcmp_locale(s, m, ln)))
&& (norun || regtry(prog, s)) )
goto got_it;
s++;
}
}
break;
case BOUNDL:
PL_reg_flags |= RF_tainted;
case BOUND:
if (do_utf8) {
if (s == PL_bostr)
tmp = '\n';
else {
U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
}
tmp = ((OP(c) == BOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
if (tmp == !(OP(c) == BOUND ?
swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
{
tmp = !tmp;
if ((norun || regtry(prog, s)))
goto got_it;
}
s += UTF8SKIP(s);
}
}
else {
tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
while (s < strend) {
if (tmp ==
!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
tmp = !tmp;
if ((norun || regtry(prog, s)))
goto got_it;
}
s++;
}
}
if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
goto got_it;
break;
case NBOUNDL:
PL_reg_flags |= RF_tainted;
case NBOUND:
if (do_utf8) {
if (s == PL_bostr)
tmp = '\n';
else {
U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
}
tmp = ((OP(c) == NBOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
if (tmp == !(OP(c) == NBOUND ?
swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
else if ((norun || regtry(prog, s)))
goto got_it;
s += UTF8SKIP(s);
}
}
else {
tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
tmp = ((OP(c) == NBOUND ?
isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
while (s < strend) {
if (tmp ==
!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
tmp = !tmp;
else if ((norun || regtry(prog, s)))
goto got_it;
s++;
}
}
if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
goto got_it;
break;
case ALNUM:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (isALNUM(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
case ALNUML:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
while (s < strend) {
if (isALNUM_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (isALNUM_LC(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
case NALNUM:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (!isALNUM(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
case NALNUML:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
while (s < strend) {
if (!isALNUM_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (!isALNUM_LC(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
case SPACE:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
while (s < strend) {
if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (isSPACE(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
case SPACEL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
while (s < strend) {
if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (isSPACE_LC(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
case NSPACE:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
while (s < strend) {
if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (!isSPACE(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
case NSPACEL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
while (s < strend) {
if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (!isSPACE_LC(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
case DIGIT:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
while (s < strend) {
if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (isDIGIT(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
case DIGITL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
while (s < strend) {
if (isDIGIT_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (isDIGIT_LC(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
case NDIGIT:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
while (s < strend) {
if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (!isDIGIT(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
case NDIGITL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
while (s < strend) {
if (!isDIGIT_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (!isDIGIT_LC(*s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
}
else
tmp = 1;
s++;
}
}
break;
default:
Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
break;
}
return 0;
got_it:
return s;
}
I32
Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
{
register char *s;
register regnode *c;
register char *startpos = stringarg;
I32 minlen;
I32 dontbother = 0;
I32 end_shift = 0;
I32 scream_pos = -1;
char *scream_olds;
SV* oreplsv = GvSV(PL_replgv);
bool do_utf8 = DO_UTF8(sv);
#ifdef DEBUGGING
SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
#endif
RX_MATCH_UTF8_set(prog,do_utf8);
PL_regcc = 0;
cache_re(prog);
#ifdef DEBUGGING
PL_regnarrate = DEBUG_r_TEST;
#endif
if (prog == NULL || startpos == NULL) {
Perl_croak(aTHX_ "NULL regexp parameter");
return 0;
}
minlen = prog->minlen;
if (strend - startpos < minlen) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
"String too short [regexec_flags]...\n"));
goto phooey;
}
if (UCHARAT(prog->program) != REG_MAGIC) {
Perl_croak(aTHX_ "corrupted regexp program");
}
PL_reg_flags = 0;
PL_reg_eval_set = 0;
PL_reg_maxiter = 0;
if (prog->reganch & ROPT_UTF8)
PL_reg_flags |= RF_utf8;
PL_regbol = startpos;
PL_bostr = strbeg;
PL_reg_sv = sv;
PL_regeol = strend;
PL_regtill = startpos+minend;
PL_reg_call_cc = 0;
s = startpos;
if (prog->reganch & ROPT_GPOS_SEEN) {
MAGIC *mg;
if (flags & REXEC_IGNOREPOS)
PL_reg_ganch = startpos;
else if (sv && SvTYPE(sv) >= SVt_PVMG
&& SvMAGIC(sv)
&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
&& mg->mg_len >= 0) {
PL_reg_ganch = strbeg + mg->mg_len;
if (prog->reganch & ROPT_ANCH_GPOS) {
if (s > PL_reg_ganch)
goto phooey;
s = PL_reg_ganch;
}
}
else
PL_reg_ganch = strbeg;
}
if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
re_scream_pos_data d;
d.scream_olds = &scream_olds;
d.scream_pos = &scream_pos;
s = re_intuit_start(prog, sv, s, strend, flags, &d);
if (!s) {
DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
goto phooey;
}
}
DEBUG_r({
char *s0 = UTF ?
pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
UNI_DISPLAY_REGEX) :
prog->precomp;
int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
UNI_DISPLAY_REGEX) : startpos;
int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
if (!PL_colorset)
reginitcolors();
PerlIO_printf(Perl_debug_log,
"%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
PL_colors[4],PL_colors[5],PL_colors[0],
len0, len0, s0,
PL_colors[1],
len0 > 60 ? "..." : "",
PL_colors[0],
(int)(len1 > 60 ? 60 : len1),
s1, PL_colors[1],
(len1 > 60 ? "..." : "")
);
});
if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
if (s == startpos && regtry(prog, startpos))
goto got_it;
else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
|| (prog->reganch & ROPT_ANCH_MBOL))
{
char *end;
if (minlen)
dontbother = minlen - 1;
end = HOP3c(strend, -dontbother, strbeg) - 1;
if (prog->check_substr || prog->check_utf8) {
if (s == startpos)
goto after_try;
while (1) {
if (regtry(prog, s))
goto got_it;
after_try:
if (s >= end)
goto phooey;
if (prog->reganch & RE_USE_INTUIT) {
s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
if (!s)
goto phooey;
}
else
s++;
}
} else {
if (s > startpos)
s--;
while (s < end) {
if (*s++ == '\n') {
if (regtry(prog, s))
goto got_it;
}
}
}
}
goto phooey;
} else if (prog->reganch & ROPT_ANCH_GPOS) {
if (regtry(prog, PL_reg_ganch))
goto got_it;
goto phooey;
}
if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
char ch;
#ifdef DEBUGGING
int did_match = 0;
#endif
if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
if (do_utf8) {
while (s < strend) {
if (*s == ch) {
DEBUG_r( did_match = 1 );
if (regtry(prog, s)) goto got_it;
s += UTF8SKIP(s);
while (s < strend && *s == ch)
s += UTF8SKIP(s);
}
s += UTF8SKIP(s);
}
}
else {
while (s < strend) {
if (*s == ch) {
DEBUG_r( did_match = 1 );
if (regtry(prog, s)) goto got_it;
s++;
while (s < strend && *s == ch)
s++;
}
s++;
}
}
DEBUG_r(if (!did_match)
PerlIO_printf(Perl_debug_log,
"Did not find anchored character...\n")
);
}
else if (prog->anchored_substr != Nullsv
|| prog->anchored_utf8 != Nullsv
|| ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
&& prog->float_max_offset < strend - s)) {
SV *must;
I32 back_max;
I32 back_min;
char *last;
char *last1;
#ifdef DEBUGGING
int did_match = 0;
#endif
if (prog->anchored_substr || prog->anchored_utf8) {
if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
back_max = back_min = prog->anchored_offset;
} else {
if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
must = do_utf8 ? prog->float_utf8 : prog->float_substr;
back_max = prog->float_max_offset;
back_min = prog->float_min_offset;
}
if (must == &PL_sv_undef)
goto phooey;
last = HOP3c(strend,
-(I32)(CHR_SVLEN(must)
- (SvTAIL(must) != 0) + back_min), strbeg);
if (s > PL_bostr)
last1 = HOPc(s, -1);
else
last1 = s - 1;
scream_pos = -1;
dontbother = end_shift;
strend = HOPc(strend, -dontbother);
while ( (s <= last) &&
((flags & REXEC_SCREAM)
? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
end_shift, &scream_pos, 0))
: (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
(unsigned char*)strend, must,
PL_multiline ? FBMrf_MULTILINE : 0))) ) {
if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
s = strbeg + (s - SvPVX(sv));
DEBUG_r( did_match = 1 );
if (HOPc(s, -back_max) > last1) {
last1 = HOPc(s, -back_min);
s = HOPc(s, -back_max);
}
else {
char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
last1 = HOPc(s, -back_min);
s = t;
}
if (do_utf8) {
while (s <= last1) {
if (regtry(prog, s))
goto got_it;
s += UTF8SKIP(s);
}
}
else {
while (s <= last1) {
if (regtry(prog, s))
goto got_it;
s++;
}
}
}
DEBUG_r(if (!did_match)
PerlIO_printf(Perl_debug_log,
"Did not find %s substr `%s%.*s%s'%s...\n",
((must == prog->anchored_substr || must == prog->anchored_utf8)
? "anchored" : "floating"),
PL_colors[0],
(int)(SvCUR(must) - (SvTAIL(must)!=0)),
SvPVX(must),
PL_colors[1], (SvTAIL(must) ? "$" : ""))
);
goto phooey;
}
else if ((c = prog->regstclass)) {
if (minlen) {
I32 op = (U8)OP(prog->regstclass);
if (PL_regkind[op] != EXACT && op != CANY)
strend = HOPc(strend, -(minlen - 1));
}
DEBUG_r({
SV *prop = sv_newmortal();
char *s0;
char *s1;
int len0;
int len1;
regprop(prop, c);
s0 = UTF ?
pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
UNI_DISPLAY_REGEX) :
SvPVX(prop);
len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
s1 = UTF ?
sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
len1 = UTF ? SvCUR(dsv1) : strend - s;
PerlIO_printf(Perl_debug_log,
"Matching stclass `%*.*s' against `%*.*s'\n",
len0, len0, s0,
len1, len1, s1);
});
if (find_byclass(prog, c, s, strend, startpos, 0))
goto got_it;
DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
}
else {
dontbother = 0;
if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
char *last;
SV* float_real;
if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
if (flags & REXEC_SCREAM) {
last = screaminstr(sv, float_real, s - strbeg,
end_shift, &scream_pos, 1);
if (!last)
last = scream_olds;
else if (RX_MATCH_COPIED(prog))
s = strbeg + (s - SvPVX(sv));
}
else {
STRLEN len;
char *little = SvPV(float_real, len);
if (SvTAIL(float_real)) {
if (memEQ(strend - len + 1, little, len - 1))
last = strend - len + 1;
else if (!PL_multiline)
last = memEQ(strend - len, little, len)
? strend - len : Nullch;
else
goto find_last;
} else {
find_last:
if (len)
last = rninstr(s, strend, little, little + len);
else
last = strend;
}
}
if (last == NULL) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
"%sCan't trim the tail, match fails (should not happen)%s\n",
PL_colors[4],PL_colors[5]));
goto phooey;
}
dontbother = strend - last + prog->float_min_offset;
}
if (minlen && (dontbother < minlen))
dontbother = minlen - 1;
strend -= dontbother;
if (do_utf8) {
for (;;) {
if (regtry(prog, s))
goto got_it;
if (s >= strend)
break;
s += UTF8SKIP(s);
};
}
else {
do {
if (regtry(prog, s))
goto got_it;
} while (s++ < strend);
}
}
goto phooey;
got_it:
RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
if (PL_reg_eval_set) {
if (oreplsv != GvSV(PL_replgv))
sv_setsv(oreplsv, GvSV(PL_replgv));
restore_pos(aTHX_ 0);
}
if ( !(flags & REXEC_NOT_FIRST) ) {
if (RX_MATCH_COPIED(prog)) {
Safefree(prog->subbeg);
RX_MATCH_COPIED_off(prog);
}
if (flags & REXEC_COPY_STR) {
I32 i = PL_regeol - startpos + (stringarg - strbeg);
s = savepvn(strbeg, i);
prog->subbeg = s;
prog->sublen = i;
RX_MATCH_COPIED_on(prog);
}
else {
prog->subbeg = strbeg;
prog->sublen = PL_regeol - strbeg;
}
}
return 1;
phooey:
DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
PL_colors[4],PL_colors[5]));
if (PL_reg_eval_set)
restore_pos(aTHX_ 0);
return 0;
}
STATIC I32
S_regtry(pTHX_ regexp *prog, char *startpos)
{
register I32 i;
register I32 *sp;
register I32 *ep;
CHECKPOINT lastcp;
#ifdef DEBUGGING
PL_regindent = 0;
#endif
if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
MAGIC *mg;
PL_reg_eval_set = RS_init;
DEBUG_r(DEBUG_s(
PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
(IV)(PL_stack_sp - PL_stack_base));
));
SAVEI32(cxstack[cxstack_ix].blk_oldsp);
cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
SAVETMPS;
if (PL_reg_sv) {
if (PL_reg_sv != DEFSV) {
SAVESPTR(DEFSV);
DEFSV = PL_reg_sv;
}
if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
&& (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
sv_magic(PL_reg_sv, (SV*)0,
PERL_MAGIC_regex_global, Nullch, 0);
mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
mg->mg_len = -1;
}
PL_reg_magic = mg;
PL_reg_oldpos = mg->mg_len;
SAVEDESTRUCTOR_X(restore_pos, 0);
}
if (!PL_reg_curpm) {
Newz(22,PL_reg_curpm, 1, PMOP);
#ifdef USE_ITHREADS
{
SV* repointer = newSViv(0);
SvFLAGS(repointer) |= SVf_BREAK;
av_push(PL_regex_padav,repointer);
PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
PL_regex_pad = AvARRAY(PL_regex_padav);
}
#endif
}
PM_SETRE(PL_reg_curpm, prog);
PL_reg_oldcurpm = PL_curpm;
PL_curpm = PL_reg_curpm;
if (RX_MATCH_COPIED(prog)) {
PL_reg_oldsaved = prog->subbeg;
PL_reg_oldsavedlen = prog->sublen;
RX_MATCH_COPIED_off(prog);
}
else
PL_reg_oldsaved = Nullch;
prog->subbeg = PL_bostr;
prog->sublen = PL_regeol - PL_bostr;
}
prog->startp[0] = startpos - PL_bostr;
PL_reginput = startpos;
PL_regstartp = prog->startp;
PL_regendp = prog->endp;
PL_reglastparen = &prog->lastparen;
PL_reglastcloseparen = &prog->lastcloseparen;
prog->lastparen = 0;
prog->lastcloseparen = 0;
PL_regsize = 0;
DEBUG_r(PL_reg_starttry = startpos);
if (PL_reg_start_tmpl <= prog->nparens) {
PL_reg_start_tmpl = prog->nparens*3/2 + 3;
if(PL_reg_start_tmp)
Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
else
New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
}
#if 1
sp = prog->startp;
ep = prog->endp;
if (prog->nparens) {
for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
*++sp = -1;
*++ep = -1;
}
}
#endif
REGCP_SET(lastcp);
if (regmatch(prog->program + 1)) {
prog->endp[0] = PL_reginput - PL_bostr;
return 1;
}
REGCP_UNWIND(lastcp);
return 0;
}
#define RE_UNWIND_BRANCH 1
#define RE_UNWIND_BRANCHJ 2
union re_unwind_t;
typedef struct {
I32 type;
I32 prev;
CHECKPOINT lastcp;
} re_unwind_generic_t;
typedef struct {
I32 type;
I32 prev;
CHECKPOINT lastcp;
I32 lastparen;
regnode *next;
char *locinput;
I32 nextchr;
#ifdef DEBUGGING
int regindent;
#endif
} re_unwind_branch_t;
typedef union re_unwind_t {
I32 type;
re_unwind_generic_t generic;
re_unwind_branch_t branch;
} re_unwind_t;
#define sayYES goto yes
#define sayNO goto no
#define sayNO_ANYOF goto no_anyof
#define sayYES_FINAL goto yes_final
#define sayYES_LOUD goto yes_loud
#define sayNO_FINAL goto no_final
#define sayNO_SILENT goto do_no
#define saySAME(x) if (x) goto yes; else goto no
#define REPORT_CODE_OFF 24
STATIC I32
S_regmatch(pTHX_ regnode *prog)
{
register regnode *scan;
regnode *next;
regnode *inner;
register I32 nextchr;
register I32 n;
register I32 ln = 0;
register char *s = Nullch;
register char *locinput = PL_reginput;
register I32 c1 = 0, c2 = 0, paren;
int minmod = 0, sw = 0, logical = 0;
I32 unwind = 0;
#if 0
I32 firstcp = PL_savestack_ix;
#endif
register bool do_utf8 = PL_reg_match_utf8;
#ifdef DEBUGGING
SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
#endif
#ifdef DEBUGGING
PL_regindent++;
#endif
nextchr = UCHARAT(locinput);
scan = prog;
while (scan != NULL) {
DEBUG_r( {
SV *prop = sv_newmortal();
int docolor = *PL_colors[0];
int taill = (docolor ? 10 : 7);
int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
int pref_len = (locinput - PL_bostr) > (5 + taill) - l
? (5 + taill) - l : locinput - PL_bostr;
int pref0_len;
while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
pref_len++;
pref0_len = pref_len - (locinput - PL_reg_starttry);
if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
l = ( PL_regeol - locinput > (5 + taill) - pref_len
? (5 + taill) - pref_len : PL_regeol - locinput);
while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
l--;
if (pref0_len < 0)
pref0_len = 0;
if (pref0_len > pref_len)
pref0_len = pref_len;
regprop(prop, scan);
{
char *s0 =
do_utf8 && OP(scan) != CANY ?
pv_uni_display(dsv0, (U8*)(locinput - pref_len),
pref0_len, 60, UNI_DISPLAY_REGEX) :
locinput - pref_len;
int len0 = do_utf8 ? strlen(s0) : pref0_len;
char *s1 = do_utf8 && OP(scan) != CANY ?
pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
locinput - pref_len + pref0_len;
int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
char *s2 = do_utf8 && OP(scan) != CANY ?
pv_uni_display(dsv2, (U8*)locinput,
PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
locinput;
int len2 = do_utf8 ? strlen(s2) : l;
PerlIO_printf(Perl_debug_log,
"%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
(IV)(locinput - PL_bostr),
PL_colors[4],
len0, s0,
PL_colors[5],
PL_colors[2],
len1, s1,
PL_colors[3],
(docolor ? "" : "> <"),
PL_colors[0],
len2, s2,
PL_colors[1],
15 - l - pref_len + 1,
"",
(IV)(scan - PL_regprogram), PL_regindent*2, "",
SvPVX(prop));
}
});
next = scan + NEXT_OFF(scan);
if (next == scan)
next = NULL;
switch (OP(scan)) {
case BOL:
if (locinput == PL_bostr || (PL_multiline &&
(nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
{
break;
}
sayNO;
case MBOL:
if (locinput == PL_bostr ||
((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
{
break;
}
sayNO;
case SBOL:
if (locinput == PL_bostr)
break;
sayNO;
case GPOS:
if (locinput == PL_reg_ganch)
break;
sayNO;
case EOL:
if (PL_multiline)
goto meol;
else
goto seol;
case MEOL:
meol:
if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
sayNO;
break;
case SEOL:
seol:
if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
sayNO;
if (PL_regeol - locinput > 1)
sayNO;
break;
case EOS:
if (PL_regeol != locinput)
sayNO;
break;
case SANY:
if (!nextchr && locinput >= PL_regeol)
sayNO;
if (do_utf8) {
locinput += PL_utf8skip[nextchr];
if (locinput > PL_regeol)
sayNO;
nextchr = UCHARAT(locinput);
}
else
nextchr = UCHARAT(++locinput);
break;
case CANY:
if (!nextchr && locinput >= PL_regeol)
sayNO;
nextchr = UCHARAT(++locinput);
break;
case REG_ANY:
if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
sayNO;
if (do_utf8) {
locinput += PL_utf8skip[nextchr];
if (locinput > PL_regeol)
sayNO;
nextchr = UCHARAT(locinput);
}
else
nextchr = UCHARAT(++locinput);
break;
case EXACT:
s = STRING(scan);
ln = STR_LEN(scan);
if (do_utf8 != UTF) {
char *l = locinput;
char *e = s + ln;
STRLEN ulen;
if (do_utf8) {
while (s < e) {
if (l >= PL_regeol)
sayNO;
if (NATIVE_TO_UNI(*(U8*)s) !=
utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY))
sayNO;
l += ulen;
s ++;
}
}
else {
while (s < e) {
if (l >= PL_regeol)
sayNO;
if (NATIVE_TO_UNI(*((U8*)l)) !=
utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY))
sayNO;
s += ulen;
l ++;
}
}
locinput = l;
nextchr = UCHARAT(locinput);
break;
}
if (UCHARAT(s) != nextchr)
sayNO;
if (PL_regeol - locinput < ln)
sayNO;
if (ln > 1 && memNE(s, locinput, ln))
sayNO;
locinput += ln;
nextchr = UCHARAT(locinput);
break;
case EXACTFL:
PL_reg_flags |= RF_tainted;
case EXACTF:
s = STRING(scan);
ln = STR_LEN(scan);
if (do_utf8 || UTF) {
char *l = locinput;
char *e = PL_regeol;
if (ibcmp_utf8(s, 0, ln, (bool)UTF,
l, &e, 0, do_utf8)) {
if (!(do_utf8 &&
toLOWER(s[0]) == 's' &&
ln >= 2 &&
toLOWER(s[1]) == 's' &&
(U8)l[0] == 0xC3 &&
e - l >= 2 &&
(U8)l[1] == 0x9F))
sayNO;
}
locinput = e;
nextchr = UCHARAT(locinput);
break;
}
if (UCHARAT(s) != nextchr &&
UCHARAT(s) != ((OP(scan) == EXACTF)
? PL_fold : PL_fold_locale)[nextchr])
sayNO;
if (PL_regeol - locinput < ln)
sayNO;
if (ln > 1 && (OP(scan) == EXACTF
? ibcmp(s, locinput, ln)
: ibcmp_locale(s, locinput, ln)))
sayNO;
locinput += ln;
nextchr = UCHARAT(locinput);
break;
case ANYOF:
if (do_utf8) {
STRLEN inclasslen = PL_regeol - locinput;
if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
sayNO_ANYOF;
if (locinput >= PL_regeol)
sayNO;
locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
nextchr = UCHARAT(locinput);
break;
}
else {
if (nextchr < 0)
nextchr = UCHARAT(locinput);
if (!REGINCLASS(scan, (U8*)locinput))
sayNO_ANYOF;
if (!nextchr && locinput >= PL_regeol)
sayNO;
nextchr = UCHARAT(++locinput);
break;
}
no_anyof:
if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
locinput += SHARP_S_SKIP;
nextchr = UCHARAT(locinput);
}
else
sayNO;
break;
case ALNUML:
PL_reg_flags |= RF_tainted;
case ALNUM:
if (!nextchr)
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
if (!(OP(scan) == ALNUM
? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
: isALNUM_LC_utf8((U8*)locinput)))
{
sayNO;
}
locinput += PL_utf8skip[nextchr];
nextchr = UCHARAT(locinput);
break;
}
if (!(OP(scan) == ALNUM
? isALNUM(nextchr) : isALNUM_LC(nextchr)))
sayNO;
nextchr = UCHARAT(++locinput);
break;
case NALNUML:
PL_reg_flags |= RF_tainted;
case NALNUM:
if (!nextchr && locinput >= PL_regeol)
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
if (OP(scan) == NALNUM
? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
: isALNUM_LC_utf8((U8*)locinput))
{
sayNO;
}
locinput += PL_utf8skip[nextchr];
nextchr = UCHARAT(locinput);
break;
}
if (OP(scan) == NALNUM
? isALNUM(nextchr) : isALNUM_LC(nextchr))
sayNO;
nextchr = UCHARAT(++locinput);
break;
case BOUNDL:
case NBOUNDL:
PL_reg_flags |= RF_tainted;
case BOUND:
case NBOUND:
if (do_utf8) {
if (locinput == PL_bostr)
ln = '\n';
else {
U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
}
if (OP(scan) == BOUND || OP(scan) == NBOUND) {
ln = isALNUM_uni(ln);
LOAD_UTF8_CHARCLASS(alnum,"a");
n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
}
else {
ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
n = isALNUM_LC_utf8((U8*)locinput);
}
}
else {
ln = (locinput != PL_bostr) ?
UCHARAT(locinput - 1) : '\n';
if (OP(scan) == BOUND || OP(scan) == NBOUND) {
ln = isALNUM(ln);
n = isALNUM(nextchr);
}
else {
ln = isALNUM_LC(ln);
n = isALNUM_LC(nextchr);
}
}
if (((!ln) == (!n)) == (OP(scan) == BOUND ||
OP(scan) == BOUNDL))
sayNO;
break;
case SPACEL:
PL_reg_flags |= RF_tainted;
case SPACE:
if (!nextchr)
sayNO;
if (do_utf8) {
if (UTF8_IS_CONTINUED(nextchr)) {
LOAD_UTF8_CHARCLASS(space," ");
if (!(OP(scan) == SPACE
? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
: isSPACE_LC_utf8((U8*)locinput)))
{
sayNO;
}
locinput += PL_utf8skip[nextchr];
nextchr = UCHARAT(locinput);
break;
}
if (!(OP(scan) == SPACE
? isSPACE(nextchr) : isSPACE_LC(nextchr)))
sayNO;
nextchr = UCHARAT(++locinput);
}
else {
if (!(OP(scan) == SPACE
? isSPACE(nextchr) : isSPACE_LC(nextchr)))
sayNO;
nextchr = UCHARAT(++locinput);
}
break;
case NSPACEL:
PL_reg_flags |= RF_tainted;
case NSPACE:
if (!nextchr && locinput >= PL_regeol)
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
if (OP(scan) == NSPACE
? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
: isSPACE_LC_utf8((U8*)locinput))
{
sayNO;
}
locinput += PL_utf8skip[nextchr];
nextchr = UCHARAT(locinput);
break;
}
if (OP(scan) == NSPACE
? isSPACE(nextchr) : isSPACE_LC(nextchr))
sayNO;
nextchr = UCHARAT(++locinput);
break;
case DIGITL:
PL_reg_flags |= RF_tainted;
case DIGIT:
if (!nextchr)
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
if (!(OP(scan) == DIGIT
? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
: isDIGIT_LC_utf8((U8*)locinput)))
{
sayNO;
}
locinput += PL_utf8skip[nextchr];
nextchr = UCHARAT(locinput);
break;
}
if (!(OP(scan) == DIGIT
? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
sayNO;
nextchr = UCHARAT(++locinput);
break;
case NDIGITL:
PL_reg_flags |= RF_tainted;
case NDIGIT:
if (!nextchr && locinput >= PL_regeol)
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
if (OP(scan) == NDIGIT
? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
: isDIGIT_LC_utf8((U8*)locinput))
{
sayNO;
}
locinput += PL_utf8skip[nextchr];
nextchr = UCHARAT(locinput);
break;
}
if (OP(scan) == NDIGIT
? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
sayNO;
nextchr = UCHARAT(++locinput);
break;
case CLUMP:
if (locinput >= PL_regeol)
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS(mark,"~");
if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
sayNO;
locinput += PL_utf8skip[nextchr];
while (locinput < PL_regeol &&
swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
locinput += UTF8SKIP(locinput);
if (locinput > PL_regeol)
sayNO;
}
else
locinput++;
nextchr = UCHARAT(locinput);
break;
case REFFL:
PL_reg_flags |= RF_tainted;
case REF:
case REFF:
n = ARG(scan);
ln = PL_regstartp[n];
PL_reg_leftiter = PL_reg_maxiter;
if ((I32)*PL_reglastparen < n || ln == -1)
sayNO;
if (ln == PL_regendp[n])
break;
s = PL_bostr + ln;
if (do_utf8 && OP(scan) != REF) {
char *l = locinput;
char *e = PL_bostr + PL_regendp[n];
if (OP(scan) == REFF) {
STRLEN ulen1, ulen2;
U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
while (s < e) {
if (l >= PL_regeol)
sayNO;
toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
sayNO;
s += ulen1;
l += ulen2;
}
}
locinput = l;
nextchr = UCHARAT(locinput);
break;
}
if (UCHARAT(s) != nextchr &&
(OP(scan) == REF ||
(UCHARAT(s) != ((OP(scan) == REFF
? PL_fold : PL_fold_locale)[nextchr]))))
sayNO;
ln = PL_regendp[n] - ln;
if (locinput + ln > PL_regeol)
sayNO;
if (ln > 1 && (OP(scan) == REF
? memNE(s, locinput, ln)
: (OP(scan) == REFF
? ibcmp(s, locinput, ln)
: ibcmp_locale(s, locinput, ln))))
sayNO;
locinput += ln;
nextchr = UCHARAT(locinput);
break;
case NOTHING:
case TAIL:
break;
case BACK:
break;
case EVAL:
{
dSP;
OP_4tree *oop = PL_op;
COP *ocurcop = PL_curcop;
PAD *old_comppad;
SV *ret;
n = ARG(scan);
PL_op = (OP_4tree*)PL_regdata->data[n];
DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
{
SV **before = SP;
CALLRUNOPS(aTHX);
SPAGAIN;
if (SP == before)
ret = &PL_sv_undef;
else {
ret = POPs;
PUTBACK;
}
}
PL_op = oop;
PAD_RESTORE_LOCAL(old_comppad);
PL_curcop = ocurcop;
if (logical) {
if (logical == 2) {
regexp *re;
MAGIC *mg = Null(MAGIC*);
re_cc_state state;
CHECKPOINT cp, lastcp;
int toggleutf;
register SV *sv;
if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
mg = mg_find(sv, PERL_MAGIC_qr);
else if (SvSMAGICAL(ret)) {
if (SvGMAGICAL(ret))
sv_unmagic(ret, PERL_MAGIC_qr);
else
mg = mg_find(ret, PERL_MAGIC_qr);
}
if (mg) {
re = (regexp *)mg->mg_obj;
(void)ReREFCNT_inc(re);
}
else {
STRLEN len;
char *t = SvPV(ret, len);
PMOP pm;
char *oprecomp = PL_regprecomp;
I32 osize = PL_regsize;
I32 onpar = PL_regnpar;
Zero(&pm, 1, PMOP);
if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
re = CALLREGCOMP(aTHX_ t, t + len, &pm);
if (!(SvFLAGS(ret)
& (SVs_TEMP | SVs_PADTMP | SVf_READONLY
| SVs_GMG)))
sv_magic(ret,(SV*)ReREFCNT_inc(re),
PERL_MAGIC_qr,0,0);
PL_regprecomp = oprecomp;
PL_regsize = osize;
PL_regnpar = onpar;
}
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"Entering embedded `%s%.60s%s%s'\n",
PL_colors[0],
re->precomp,
PL_colors[1],
(strlen(re->precomp) > 60 ? "..." : ""))
);
state.node = next;
state.prev = PL_reg_call_cc;
state.cc = PL_regcc;
state.re = PL_reg_re;
PL_regcc = 0;
cp = regcppush(0);
REGCP_SET(lastcp);
cache_re(re);
state.ss = PL_savestack_ix;
*PL_reglastparen = 0;
*PL_reglastcloseparen = 0;
PL_reg_call_cc = &state;
PL_reginput = locinput;
toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
((re->reganch & ROPT_UTF8) != 0);
if (toggleutf) PL_reg_flags ^= RF_utf8;
PL_reg_maxiter = 0;
if (regmatch(re->program + 1)) {
PL_reg_call_cc = state.prev;
PL_regcc = state.cc;
PL_reg_re = state.re;
cache_re(PL_reg_re);
if (toggleutf) PL_reg_flags ^= RF_utf8;
PL_reg_maxiter = 0;
ReREFCNT_dec(re);
regcpblow(cp);
sayYES;
}
ReREFCNT_dec(re);
REGCP_UNWIND(lastcp);
regcppop();
PL_reg_call_cc = state.prev;
PL_regcc = state.cc;
PL_reg_re = state.re;
cache_re(PL_reg_re);
if (toggleutf) PL_reg_flags ^= RF_utf8;
PL_reg_maxiter = 0;
logical = 0;
sayNO;
}
sw = SvTRUE(ret);
logical = 0;
}
else
sv_setsv(save_scalar(PL_replgv), ret);
break;
}
case OPEN:
n = ARG(scan);
PL_reg_start_tmp[n] = locinput;
if (n > PL_regsize)
PL_regsize = n;
break;
case CLOSE:
n = ARG(scan);
PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
PL_regendp[n] = locinput - PL_bostr;
if (n > (I32)*PL_reglastparen)
*PL_reglastparen = n;
*PL_reglastcloseparen = n;
break;
case GROUPP:
n = ARG(scan);
sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
break;
case IFTHEN:
PL_reg_leftiter = PL_reg_maxiter;
if (sw)
next = NEXTOPER(NEXTOPER(scan));
else {
next = scan + ARG(scan);
if (OP(next) == IFTHEN)
next = NEXTOPER(NEXTOPER(next));
}
break;
case LOGICAL:
logical = scan->flags;
break;
case CURLYX: {
CURCUR cc;
CHECKPOINT cp = PL_savestack_ix;
I32 parenfloor = scan->flags;
if (OP(PREVOPER(next)) == NOTHING)
next += ARG(next);
cc.oldcc = PL_regcc;
PL_regcc = &cc;
if (parenfloor > (I32)*PL_reglastparen)
parenfloor = *PL_reglastparen;
cc.parenfloor = parenfloor;
cc.cur = -1;
cc.min = ARG1(scan);
cc.max = ARG2(scan);
cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
cc.next = next;
cc.minmod = minmod;
cc.lastloc = 0;
PL_reginput = locinput;
n = regmatch(PREVOPER(next));
regcpblow(cp);
PL_regcc = cc.oldcc;
saySAME(n);
}
case WHILEM: {
CHECKPOINT cp, lastcp;
CURCUR* cc = PL_regcc;
char *lastloc = cc->lastloc;
n = cc->cur + 1;
PL_reginput = locinput;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s %ld out of %ld..%ld cc=%"UVxf"\n",
REPORT_CODE_OFF+PL_regindent*2, "",
(long)n, (long)cc->min,
(long)cc->max, PTR2UV(cc))
);
if (locinput == cc->lastloc && n >= cc->min) {
PL_regcc = cc->oldcc;
if (PL_regcc)
ln = PL_regcc->cur;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s empty match detected, try continuation...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
if (regmatch(cc->next))
sayYES;
if (PL_regcc)
PL_regcc->cur = ln;
PL_regcc = cc;
sayNO;
}
if (n < cc->min) {
cc->cur = n;
cc->lastloc = locinput;
if (regmatch(cc->scan))
sayYES;
cc->cur = n - 1;
cc->lastloc = lastloc;
sayNO;
}
if (scan->flags) {
if (!PL_reg_maxiter) {
PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
PL_reg_leftiter = PL_reg_maxiter;
}
if (PL_reg_leftiter-- == 0) {
I32 size = (PL_reg_maxiter + 7)/8;
if (PL_reg_poscache) {
if ((I32)PL_reg_poscache_size < size) {
Renew(PL_reg_poscache, size, char);
PL_reg_poscache_size = size;
}
Zero(PL_reg_poscache, size, char);
}
else {
PL_reg_poscache_size = size;
Newz(29, PL_reg_poscache, size, char);
}
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%sDetected a super-linear match, switching on caching%s...\n",
PL_colors[4], PL_colors[5])
);
}
if (PL_reg_leftiter < 0) {
I32 o = locinput - PL_bostr, b;
o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
b = o % 8;
o /= 8;
if (PL_reg_poscache[o] & (1<<b)) {
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s already tried at this position...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
sayNO_SILENT;
}
PL_reg_poscache[o] |= (1<<b);
}
}
if (cc->minmod) {
PL_regcc = cc->oldcc;
if (PL_regcc)
ln = PL_regcc->cur;
cp = regcppush(cc->parenfloor);
REGCP_SET(lastcp);
if (regmatch(cc->next)) {
regcpblow(cp);
sayYES;
}
REGCP_UNWIND(lastcp);
regcppop();
if (PL_regcc)
PL_regcc->cur = ln;
PL_regcc = cc;
if (n >= cc->max) {
if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
&& !(PL_reg_flags & RF_warned)) {
PL_reg_flags |= RF_warned;
Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
"Complex regular subexpression recursion",
REG_INFTY - 1);
}
sayNO;
}
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s trying longer...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
PL_reginput = locinput;
cc->cur = n;
cc->lastloc = locinput;
cp = regcppush(cc->parenfloor);
REGCP_SET(lastcp);
if (regmatch(cc->scan)) {
regcpblow(cp);
sayYES;
}
REGCP_UNWIND(lastcp);
regcppop();
cc->cur = n - 1;
cc->lastloc = lastloc;
sayNO;
}
if (n < cc->max) {
cp = regcppush(cc->parenfloor);
cc->cur = n;
cc->lastloc = locinput;
REGCP_SET(lastcp);
if (regmatch(cc->scan)) {
regcpblow(cp);
sayYES;
}
REGCP_UNWIND(lastcp);
regcppop();
PL_reginput = locinput;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s failed, try continuation...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
}
if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
&& !(PL_reg_flags & RF_warned)) {
PL_reg_flags |= RF_warned;
Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
"Complex regular subexpression recursion",
REG_INFTY - 1);
}
PL_regcc = cc->oldcc;
if (PL_regcc)
ln = PL_regcc->cur;
if (regmatch(cc->next))
sayYES;
if (PL_regcc)
PL_regcc->cur = ln;
PL_regcc = cc;
cc->cur = n - 1;
cc->lastloc = lastloc;
sayNO;
}
case BRANCHJ:
next = scan + ARG(scan);
if (next == scan)
next = NULL;
inner = NEXTOPER(NEXTOPER(scan));
goto do_branch;
case BRANCH:
inner = NEXTOPER(scan);
do_branch:
{
c1 = OP(scan);
if (OP(next) != c1)
next = inner;
else {
I32 lastparen = *PL_reglastparen;
I32 unwind1;
re_unwind_branch_t *uw;
unwind1 = SSNEWt(1,re_unwind_branch_t);
uw = SSPTRt(unwind1,re_unwind_branch_t);
uw->prev = unwind;
unwind = unwind1;
uw->type = ((c1 == BRANCH)
? RE_UNWIND_BRANCH
: RE_UNWIND_BRANCHJ);
uw->lastparen = lastparen;
uw->next = next;
uw->locinput = locinput;
uw->nextchr = nextchr;
#ifdef DEBUGGING
uw->regindent = ++PL_regindent;
#endif
REGCP_SET(uw->lastcp);
next = inner;
}
}
break;
case MINMOD:
minmod = 1;
break;
case CURLYM:
{
I32 l = 0;
CHECKPOINT lastcp;
ln = ARG1(scan);
n = ARG2(scan);
paren = scan->flags;
if (paren) {
if (paren > PL_regsize)
PL_regsize = paren;
if (paren > (I32)*PL_reglastparen)
*PL_reglastparen = paren;
}
scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
if (paren)
scan += NEXT_OFF(scan);
PL_reginput = locinput;
if (minmod) {
minmod = 0;
if (ln && regrepeat_hard(scan, ln, &l) < ln)
sayNO;
if (ln && l == 0)
n = ln;
locinput = PL_reginput;
if (HAS_TEXT(next) || JUMPABLE(next)) {
regnode *text_node = next;
if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
else {
if (PL_regkind[(U8)OP(text_node)] == REF) {
I32 n, ln;
n = ARG(text_node);
ln = PL_regstartp[n];
if (
(I32)*PL_reglastparen < n ||
ln == -1 ||
ln == PL_regendp[n]
) {
c1 = c2 = -1000;
goto assume_ok_MM;
}
c1 = *(PL_bostr + ln);
}
else { c1 = (U8)*STRING(text_node); }
if (OP(text_node) == EXACTF || OP(text_node) == REFF)
c2 = PL_fold[c1];
else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
c2 = PL_fold_locale[c1];
else
c2 = c1;
}
}
else
c1 = c2 = -1000;
assume_ok_MM:
REGCP_SET(lastcp);
while (n >= ln || (n == REG_INFTY && ln > 0 && l)) {
if (c1 == -1000 ||
UCHARAT(PL_reginput) == c1 ||
UCHARAT(PL_reginput) == c2)
{
if (paren) {
if (ln) {
PL_regstartp[paren] =
HOPc(PL_reginput, -l) - PL_bostr;
PL_regendp[paren] = PL_reginput - PL_bostr;
}
else
PL_regendp[paren] = -1;
}
if (regmatch(next))
sayYES;
REGCP_UNWIND(lastcp);
}
PL_reginput = locinput;
if (regrepeat_hard(scan, 1, &l)) {
ln++;
locinput = PL_reginput;
}
else
sayNO;
}
}
else {
n = regrepeat_hard(scan, n, &l);
if (n != 0 && l == 0 && !(paren && ln == 0))
ln = n;
locinput = PL_reginput;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s matched %"IVdf" times, len=%"IVdf"...\n",
(int)(REPORT_CODE_OFF+PL_regindent*2), "",
(IV) n, (IV)l)
);
if (n >= ln) {
if (HAS_TEXT(next) || JUMPABLE(next)) {
regnode *text_node = next;
if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
else {
if (PL_regkind[(U8)OP(text_node)] == REF) {
I32 n, ln;
n = ARG(text_node);
ln = PL_regstartp[n];
if (
(I32)*PL_reglastparen < n ||
ln == -1 ||
ln == PL_regendp[n]
) {
c1 = c2 = -1000;
goto assume_ok_REG;
}
c1 = *(PL_bostr + ln);
}
else { c1 = (U8)*STRING(text_node); }
if (OP(text_node) == EXACTF || OP(text_node) == REFF)
c2 = PL_fold[c1];
else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
c2 = PL_fold_locale[c1];
else
c2 = c1;
}
}
else
c1 = c2 = -1000;
}
assume_ok_REG:
REGCP_SET(lastcp);
while (n >= ln) {
if (c1 == -1000 ||
UCHARAT(PL_reginput) == c1 ||
UCHARAT(PL_reginput) == c2)
{
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s trying tail with n=%"IVdf"...\n",
(int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
);
if (paren) {
if (n) {
PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
PL_regendp[paren] = PL_reginput - PL_bostr;
}
else
PL_regendp[paren] = -1;
}
if (regmatch(next))
sayYES;
REGCP_UNWIND(lastcp);
}
n--;
locinput = HOPc(locinput, -l);
PL_reginput = locinput;
}
}
sayNO;
break;
}
case CURLYN:
paren = scan->flags;
if (paren > PL_regsize)
PL_regsize = paren;
if (paren > (I32)*PL_reglastparen)
*PL_reglastparen = paren;
ln = ARG1(scan);
n = ARG2(scan);
scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
goto repeat;
case CURLY:
paren = 0;
ln = ARG1(scan);
n = ARG2(scan);
scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
goto repeat;
case STAR:
ln = 0;
n = REG_INFTY;
scan = NEXTOPER(scan);
paren = 0;
goto repeat;
case PLUS:
ln = 1;
n = REG_INFTY;
scan = NEXTOPER(scan);
paren = 0;
repeat:
if (HAS_TEXT(next) || JUMPABLE(next)) {
U8 *s;
regnode *text_node = next;
if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
else {
if (PL_regkind[(U8)OP(text_node)] == REF) {
I32 n, ln;
n = ARG(text_node);
ln = PL_regstartp[n];
if (
(I32)*PL_reglastparen < n ||
ln == -1 ||
ln == PL_regendp[n]
) {
c1 = c2 = -1000;
goto assume_ok_easy;
}
s = (U8*)PL_bostr + ln;
}
else { s = (U8*)STRING(text_node); }
if (!UTF) {
c2 = c1 = *s;
if (OP(text_node) == EXACTF || OP(text_node) == REFF)
c2 = PL_fold[c1];
else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
c2 = PL_fold_locale[c1];
}
else {
if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
STRLEN ulen1, ulen2;
U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY);
c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY);
}
else {
c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY);
}
}
}
}
else
c1 = c2 = -1000;
assume_ok_easy:
PL_reginput = locinput;
if (minmod) {
CHECKPOINT lastcp;
minmod = 0;
if (ln && regrepeat(scan, ln) < ln)
sayNO;
locinput = PL_reginput;
REGCP_SET(lastcp);
if (c1 != -1000) {
char *e;
char *old = locinput;
int count = 0;
if (n == REG_INFTY) {
e = PL_regeol - 1;
if (do_utf8)
while (UTF8_IS_CONTINUATION(*(U8*)e))
e--;
}
else if (do_utf8) {
int m = n - ln;
for (e = locinput;
m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
e += UTF8SKIP(e);
}
else {
e = locinput + n - ln;
if (e >= PL_regeol)
e = PL_regeol - 1;
}
while (1) {
if (!do_utf8) {
if (c1 == c2) {
while (locinput <= e &&
UCHARAT(locinput) != c1)
locinput++;
} else {
while (locinput <= e
&& UCHARAT(locinput) != c1
&& UCHARAT(locinput) != c2)
locinput++;
}
count = locinput - old;
}
else {
STRLEN len;
if (c1 == c2) {
while (locinput <= e &&
utf8n_to_uvchr((U8*)locinput,
UTF8_MAXLEN, &len,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY) != (UV)c1) {
locinput += len;
count++;
}
} else {
while (locinput <= e) {
UV c = utf8n_to_uvchr((U8*)locinput,
UTF8_MAXLEN, &len,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY);
if (c == (UV)c1 || c == (UV)c2)
break;
locinput += len;
count++;
}
}
}
if (locinput > e)
sayNO;
if (locinput != old) {
ln = 1;
if (regrepeat(scan, count) < count)
sayNO;
}
TRYPAREN(paren, ln, locinput);
PL_reginput = locinput;
REGCP_UNWIND(lastcp);
old = locinput;
if (do_utf8)
locinput += UTF8SKIP(locinput);
else
locinput++;
count = 1;
}
}
else
while (n >= ln || (n == REG_INFTY && ln > 0)) {
UV c;
if (c1 != -1000) {
if (do_utf8)
c = utf8n_to_uvchr((U8*)PL_reginput,
UTF8_MAXLEN, 0,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY);
else
c = UCHARAT(PL_reginput);
if (c == (UV)c1 || c == (UV)c2)
{
TRYPAREN(paren, ln, PL_reginput);
REGCP_UNWIND(lastcp);
}
}
else if (c1 == -1000)
{
TRYPAREN(paren, ln, PL_reginput);
REGCP_UNWIND(lastcp);
}
PL_reginput = locinput;
if (regrepeat(scan, 1)) {
ln++;
locinput = PL_reginput;
}
else
sayNO;
}
}
else {
CHECKPOINT lastcp;
n = regrepeat(scan, n);
locinput = PL_reginput;
if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
((!PL_multiline && OP(next) != MEOL) ||
OP(next) == SEOL || OP(next) == EOS))
{
ln = n;
if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
ln--;
}
REGCP_SET(lastcp);
if (paren) {
UV c = 0;
while (n >= ln) {
if (c1 != -1000) {
if (do_utf8)
c = utf8n_to_uvchr((U8*)PL_reginput,
UTF8_MAXLEN, 0,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY);
else
c = UCHARAT(PL_reginput);
}
if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
{
TRYPAREN(paren, n, PL_reginput);
REGCP_UNWIND(lastcp);
}
n--;
PL_reginput = locinput = HOPc(locinput, -1);
}
}
else {
UV c = 0;
while (n >= ln) {
if (c1 != -1000) {
if (do_utf8)
c = utf8n_to_uvchr((U8*)PL_reginput,
UTF8_MAXLEN, 0,
ckWARN(WARN_UTF8) ?
0 : UTF8_ALLOW_ANY);
else
c = UCHARAT(PL_reginput);
}
if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
{
TRYPAREN(paren, n, PL_reginput);
REGCP_UNWIND(lastcp);
}
n--;
PL_reginput = locinput = HOPc(locinput, -1);
}
}
}
sayNO;
break;
case END:
if (PL_reg_call_cc) {
re_cc_state *cur_call_cc = PL_reg_call_cc;
CURCUR *cctmp = PL_regcc;
regexp *re = PL_reg_re;
CHECKPOINT cp, lastcp;
cp = regcppush(0);
REGCP_SET(lastcp);
regcp_set_to(PL_reg_call_cc->ss);
PL_reginput = locinput;
cache_re(PL_reg_call_cc->re);
PL_regcc = PL_reg_call_cc->cc;
PL_reg_call_cc = PL_reg_call_cc->prev;
if (regmatch(cur_call_cc->node)) {
PL_reg_call_cc = cur_call_cc;
regcpblow(cp);
sayYES;
}
REGCP_UNWIND(lastcp);
regcppop();
PL_reg_call_cc = cur_call_cc;
PL_regcc = cctmp;
PL_reg_re = re;
cache_re(re);
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s continuation failed...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
sayNO_SILENT;
}
if (locinput < PL_regtill) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
PL_colors[4],
(long)(locinput - PL_reg_starttry),
(long)(PL_regtill - PL_reg_starttry),
PL_colors[5]));
sayNO_FINAL;
}
PL_reginput = locinput;
sayYES_FINAL;
case SUCCEED:
PL_reginput = locinput;
sayYES_LOUD;
case SUSPEND:
n = 1;
PL_reginput = locinput;
goto do_ifmatch;
case UNLESSM:
n = 0;
if (scan->flags) {
s = HOPBACKc(locinput, scan->flags);
if (!s)
goto say_yes;
PL_reginput = s;
}
else
PL_reginput = locinput;
goto do_ifmatch;
case IFMATCH:
n = 1;
if (scan->flags) {
s = HOPBACKc(locinput, scan->flags);
if (!s)
goto say_no;
PL_reginput = s;
}
else
PL_reginput = locinput;
do_ifmatch:
inner = NEXTOPER(NEXTOPER(scan));
if (regmatch(inner) != n) {
say_no:
if (logical) {
logical = 0;
sw = 0;
goto do_longjump;
}
else
sayNO;
}
say_yes:
if (logical) {
logical = 0;
sw = 1;
}
if (OP(scan) == SUSPEND) {
locinput = PL_reginput;
nextchr = UCHARAT(locinput);
}
case LONGJMP:
do_longjump:
next = scan + ARG(scan);
if (next == scan)
next = NULL;
break;
default:
PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
PTR2UV(scan), OP(scan));
Perl_croak(aTHX_ "regexp memory corruption");
}
reenter:
scan = next;
}
Perl_croak(aTHX_ "corrupted regexp pointers");
sayNO;
yes_loud:
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s %scould match...%s\n",
REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
);
goto yes;
yes_final:
DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
PL_colors[4],PL_colors[5]));
yes:
#ifdef DEBUGGING
PL_regindent--;
#endif
#if 0
if (unwind)
regcpblow(firstcp);
#endif
return 1;
no:
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s %sfailed...%s\n",
REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
);
goto do_no;
no_final:
do_no:
if (unwind) {
re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
switch (uw->type) {
case RE_UNWIND_BRANCH:
case RE_UNWIND_BRANCHJ:
{
re_unwind_branch_t *uwb = &(uw->branch);
I32 lastparen = uwb->lastparen;
REGCP_UNWIND(uwb->lastcp);
for (n = *PL_reglastparen; n > lastparen; n--)
PL_regendp[n] = -1;
*PL_reglastparen = n;
scan = next = uwb->next;
if ( !scan ||
OP(scan) != (uwb->type == RE_UNWIND_BRANCH
? BRANCH : BRANCHJ) ) {
unwind = uwb->prev;
#ifdef DEBUGGING
PL_regindent--;
#endif
goto do_no;
}
if ((n = (uwb->type == RE_UNWIND_BRANCH
? NEXT_OFF(next) : ARG(next))))
next += n;
else
next = NULL;
uwb->next = next;
next = NEXTOPER(scan);
if (uwb->type == RE_UNWIND_BRANCHJ)
next = NEXTOPER(next);
locinput = uwb->locinput;
nextchr = uwb->nextchr;
#ifdef DEBUGGING
PL_regindent = uwb->regindent;
#endif
goto reenter;
}
default:
Perl_croak(aTHX_ "regexp unwind memory corruption");
}
}
#ifdef DEBUGGING
PL_regindent--;
#endif
return 0;
}
STATIC I32
S_regrepeat(pTHX_ regnode *p, I32 max)
{
register char *scan;
register I32 c;
register char *loceol = PL_regeol;
register I32 hardcount = 0;
register bool do_utf8 = PL_reg_match_utf8;
scan = PL_reginput;
if (max == REG_INFTY)
max = I32_MAX;
else if (max < loceol - scan)
loceol = scan + max;
switch (OP(p)) {
case REG_ANY:
if (do_utf8) {
loceol = PL_regeol;
while (scan < loceol && hardcount < max && *scan != '\n') {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && *scan != '\n')
scan++;
}
break;
case SANY:
if (do_utf8) {
loceol = PL_regeol;
while (scan < loceol && hardcount < max) {
scan += UTF8SKIP(scan);
hardcount++;
}
}
else
scan = loceol;
break;
case CANY:
scan = loceol;
break;
case EXACT:
c = (U8)*STRING(p);
while (scan < loceol && UCHARAT(scan) == c)
scan++;
break;
case EXACTF:
c = (U8)*STRING(p);
while (scan < loceol &&
(UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
scan++;
break;
case EXACTFL:
PL_reg_flags |= RF_tainted;
c = (U8)*STRING(p);
while (scan < loceol &&
(UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
scan++;
break;
case ANYOF:
if (do_utf8) {
loceol = PL_regeol;
while (hardcount < max && scan < loceol &&
reginclass(p, (U8*)scan, 0, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && REGINCLASS(p, (U8*)scan))
scan++;
}
break;
case ALNUM:
if (do_utf8) {
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(alnum,"a");
while (hardcount < max && scan < loceol &&
swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && isALNUM(*scan))
scan++;
}
break;
case ALNUML:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
loceol = PL_regeol;
while (hardcount < max && scan < loceol &&
isALNUM_LC_utf8((U8*)scan)) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && isALNUM_LC(*scan))
scan++;
}
break;
case NALNUM:
if (do_utf8) {
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(alnum,"a");
while (hardcount < max && scan < loceol &&
!swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && !isALNUM(*scan))
scan++;
}
break;
case NALNUML:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
loceol = PL_regeol;
while (hardcount < max && scan < loceol &&
!isALNUM_LC_utf8((U8*)scan)) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && !isALNUM_LC(*scan))
scan++;
}
break;
case SPACE:
if (do_utf8) {
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(space," ");
while (hardcount < max && scan < loceol &&
(*scan == ' ' ||
swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && isSPACE(*scan))
scan++;
}
break;
case SPACEL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
loceol = PL_regeol;
while (hardcount < max && scan < loceol &&
(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && isSPACE_LC(*scan))
scan++;
}
break;
case NSPACE:
if (do_utf8) {
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(space," ");
while (hardcount < max && scan < loceol &&
!(*scan == ' ' ||
swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && !isSPACE(*scan))
scan++;
break;
}
case NSPACEL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
loceol = PL_regeol;
while (hardcount < max && scan < loceol &&
!(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && !isSPACE_LC(*scan))
scan++;
}
break;
case DIGIT:
if (do_utf8) {
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(digit,"0");
while (hardcount < max && scan < loceol &&
swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && isDIGIT(*scan))
scan++;
}
break;
case NDIGIT:
if (do_utf8) {
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(digit,"0");
while (hardcount < max && scan < loceol &&
!swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
while (scan < loceol && !isDIGIT(*scan))
scan++;
}
break;
default:
break;
}
if (hardcount)
c = hardcount;
else
c = scan - PL_reginput;
PL_reginput = scan;
DEBUG_r(
{
SV *prop = sv_newmortal();
regprop(prop, p);
PerlIO_printf(Perl_debug_log,
"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
});
return(c);
}
STATIC I32
S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
{
register char *scan = Nullch;
register char *start;
register char *loceol = PL_regeol;
I32 l = 0;
I32 count = 0, res = 1;
if (!max)
return 0;
start = PL_reginput;
if (PL_reg_match_utf8) {
while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
if (!count++) {
l = 0;
while (start < PL_reginput) {
l++;
start += UTF8SKIP(start);
}
*lp = l;
if (l == 0)
return max;
}
if (count == max)
return count;
}
}
else {
while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
if (!count++) {
*lp = l = PL_reginput - start;
if (max != REG_INFTY && l*max < loceol - scan)
loceol = scan + l*max;
if (l == 0)
return max;
}
}
}
if (!res)
PL_reginput = scan;
return count;
}
SV *
Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
{
SV *sw = NULL;
SV *si = NULL;
SV *alt = NULL;
if (PL_regdata && PL_regdata->count) {
U32 n = ARG(node);
if (PL_regdata->what[n] == 's') {
SV *rv = (SV*)PL_regdata->data[n];
AV *av = (AV*)SvRV((SV*)rv);
SV **ary = AvARRAY(av);
SV **a, **b;
si = *ary;
a = SvTYPE(ary[1]) == SVt_RV ? &ary[1] : 0;
b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
if (a)
sw = *a;
else if (si && doinit) {
sw = swash_init("utf8", "", si, 1, 0);
(void)av_store(av, 1, sw);
}
if (b)
alt = *b;
}
}
if (listsvp)
*listsvp = si;
if (altsvp)
*altsvp = alt;
return sw;
}
STATIC bool
S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
{
char flags = ANYOF_FLAGS(n);
bool match = FALSE;
UV c = *p;
STRLEN len = 0;
STRLEN plen;
if (do_utf8 && !UTF8_IS_INVARIANT(c))
c = utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
if (do_utf8 || (flags & ANYOF_UNICODE)) {
if (lenp)
*lenp = 0;
if (do_utf8 && !ANYOF_RUNTIME(n)) {
if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
match = TRUE;
}
if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
match = TRUE;
if (!match) {
AV *av;
SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
if (sw) {
if (swash_fetch(sw, p, do_utf8))
match = TRUE;
else if (flags & ANYOF_FOLD) {
if (!match && lenp && av) {
I32 i;
for (i = 0; i <= av_len(av); i++) {
SV* sv = *av_fetch(av, i, FALSE);
STRLEN len;
char *s = SvPV(sv, len);
if (len <= plen && memEQ(s, (char*)p, len)) {
*lenp = len;
match = TRUE;
break;
}
}
}
if (!match) {
U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
STRLEN tmplen;
to_utf8_fold(p, tmpbuf, &tmplen);
if (swash_fetch(sw, tmpbuf, do_utf8))
match = TRUE;
}
}
}
}
if (match && lenp && *lenp == 0)
*lenp = UNISKIP(NATIVE_TO_UNI(c));
}
if (!match && c < 256) {
if (ANYOF_BITMAP_TEST(n, c))
match = TRUE;
else if (flags & ANYOF_FOLD) {
U8 f;
if (flags & ANYOF_LOCALE) {
PL_reg_flags |= RF_tainted;
f = PL_fold_locale[c];
}
else
f = PL_fold[c];
if (f != c && ANYOF_BITMAP_TEST(n, f))
match = TRUE;
}
if (!match && (flags & ANYOF_CLASS)) {
PL_reg_flags |= RF_tainted;
if (
(ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
)
{
match = TRUE;
}
}
}
return (flags & ANYOF_INVERT) ? !match : match;
}
STATIC U8 *
S_reghop(pTHX_ U8 *s, I32 off)
{
return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
}
STATIC U8 *
S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
{
if (off >= 0) {
while (off-- && s < lim) {
s += UTF8SKIP(s);
}
}
else {
while (off++) {
if (s > lim) {
s--;
if (UTF8_IS_CONTINUED(*s)) {
while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
s--;
}
}
}
}
return s;
}
STATIC U8 *
S_reghopmaybe(pTHX_ U8 *s, I32 off)
{
return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
}
STATIC U8 *
S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
{
if (off >= 0) {
while (off-- && s < lim) {
s += UTF8SKIP(s);
}
if (off >= 0)
return 0;
}
else {
while (off++) {
if (s > lim) {
s--;
if (UTF8_IS_CONTINUED(*s)) {
while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
s--;
}
}
else
break;
}
if (off <= 0)
return 0;
}
return s;
}
static void
restore_pos(pTHX_ void *arg)
{
if (PL_reg_eval_set) {
if (PL_reg_oldsaved) {
PL_reg_re->subbeg = PL_reg_oldsaved;
PL_reg_re->sublen = PL_reg_oldsavedlen;
RX_MATCH_COPIED_on(PL_reg_re);
}
PL_reg_magic->mg_len = PL_reg_oldpos;
PL_reg_eval_set = 0;
PL_curpm = PL_reg_oldcurpm;
}
}
STATIC void
S_to_utf8_substr(pTHX_ register regexp *prog)
{
SV* sv;
if (prog->float_substr && !prog->float_utf8) {
prog->float_utf8 = sv = NEWSV(117, 0);
SvSetSV(sv, prog->float_substr);
sv_utf8_upgrade(sv);
if (SvTAIL(prog->float_substr))
SvTAIL_on(sv);
if (prog->float_substr == prog->check_substr)
prog->check_utf8 = sv;
}
if (prog->anchored_substr && !prog->anchored_utf8) {
prog->anchored_utf8 = sv = NEWSV(118, 0);
SvSetSV(sv, prog->anchored_substr);
sv_utf8_upgrade(sv);
if (SvTAIL(prog->anchored_substr))
SvTAIL_on(sv);
if (prog->anchored_substr == prog->check_substr)
prog->check_utf8 = sv;
}
}
STATIC void
S_to_byte_substr(pTHX_ register regexp *prog)
{
SV* sv;
if (prog->float_utf8 && !prog->float_substr) {
prog->float_substr = sv = NEWSV(117, 0);
SvSetSV(sv, prog->float_utf8);
if (sv_utf8_downgrade(sv, TRUE)) {
if (SvTAIL(prog->float_utf8))
SvTAIL_on(sv);
} else {
SvREFCNT_dec(sv);
prog->float_substr = sv = &PL_sv_undef;
}
if (prog->float_utf8 == prog->check_utf8)
prog->check_substr = sv;
}
if (prog->anchored_utf8 && !prog->anchored_substr) {
prog->anchored_substr = sv = NEWSV(118, 0);
SvSetSV(sv, prog->anchored_utf8);
if (sv_utf8_downgrade(sv, TRUE)) {
if (SvTAIL(prog->anchored_utf8))
SvTAIL_on(sv);
} else {
SvREFCNT_dec(sv);
prog->anchored_substr = sv = &PL_sv_undef;
}
if (prog->anchored_utf8 == prog->check_utf8)
prog->check_substr = sv;
}
}