Skip to content

Commit

Permalink
add new operator \J (skip search)
Browse files Browse the repository at this point in the history
  • Loading branch information
kkos committed Jun 7, 2024
1 parent f2f9c69 commit 8860d24
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 5 deletions.
65 changes: 61 additions & 4 deletions src/regexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2022 K.Kosako
* Copyright (c) 2002-2024 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -177,6 +177,9 @@ typedef struct {
#ifdef USE_CALL
unsigned long subexp_call_in_search_counter;
#endif
#ifdef USE_SKIP_SEARCH
UChar* skip_search;
#endif
} MatchArg;


Expand Down Expand Up @@ -1261,6 +1264,7 @@ struct OnigCalloutArgsStruct {
#endif

#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
#ifdef USE_SKIP_SEARCH
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option)|(reg)->options;\
Expand All @@ -1272,6 +1276,35 @@ struct OnigCalloutArgsStruct {
(msa).mp = mpv;\
(msa).best_len = ONIG_MISMATCH;\
(msa).ptr_num = PTR_NUM_SIZE(reg);\
(msa).skip_search = (UChar* )(arg_start);\
} while(0)
#else
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option)|(reg)->options;\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
(msa).match_stack_limit = (mpv)->match_stack_limit;\
RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
(msa).mp = mpv;\
(msa).best_len = ONIG_MISMATCH;\
(msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
#endif
#else
#ifdef USE_SKIP_SEARCH
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option)|(reg)->options;\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
(msa).match_stack_limit = (mpv)->match_stack_limit;\
RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
(msa).mp = mpv;\
(msa).ptr_num = PTR_NUM_SIZE(reg);\
(msa).skip_search = (UChar* )(arg_start);\
} while(0)
#else
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
Expand All @@ -1286,6 +1319,7 @@ struct OnigCalloutArgsStruct {
(msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
#endif
#endif

#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)

Expand Down Expand Up @@ -4335,6 +4369,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case UPDATE_VAR_RIGHT_RANGE_INIT:
INIT_RIGHT_RANGE;
break;
#ifdef USE_SKIP_SEARCH
case UPDATE_VAR_SKIP_SEARCH:
if (s > msa->skip_search) msa->skip_search = s;
break;
#endif
}
}
INC_OP;
Expand Down Expand Up @@ -5629,6 +5668,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
while (s <= high) {
MATCH_AND_RETURN_CHECK(data_range);
s += enclen(reg->enc, s);
#ifdef USE_SKIP_SEARCH
if (s < msa.skip_search) s = msa.skip_search;
#endif
}
} while (s < range);
goto mismatch;
Expand All @@ -5646,10 +5688,18 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
prev = s;
s += enclen(reg->enc, s);

while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
prev = s;
s += enclen(reg->enc, s);
#ifdef USE_SKIP_SEARCH
if (s < msa.skip_search) s = msa.skip_search;
else {
#endif
while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) &&
s < range) {
prev = s;
s += enclen(reg->enc, s);
}
#ifdef USE_SKIP_SEARCH
}
#endif
}
goto mismatch;
}
Expand All @@ -5660,6 +5710,13 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
MATCH_AND_RETURN_CHECK(data_range);
if (s >= range) break;
s += enclen(reg->enc, s);

#ifdef USE_SKIP_SEARCH
if (s < msa.skip_search) {
s = msa.skip_search;
if (s > range) break;
}
#endif
}
}
else { /* backward search */
Expand Down
6 changes: 5 additions & 1 deletion src/regint.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2023 K.Kosako
* Copyright (c) 2002-2024 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -62,6 +62,7 @@
#define USE_REGSET
#define USE_CALL
#define USE_CALLOUT
#define USE_SKIP_SEARCH
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
#define USE_WHOLE_OPTIONS
#define USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
Expand Down Expand Up @@ -584,6 +585,9 @@ enum UpdateVarType {
UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK = 3,
UPDATE_VAR_RIGHT_RANGE_TO_S = 4,
UPDATE_VAR_RIGHT_RANGE_INIT = 5,
#ifdef USE_SKIP_SEARCH
UPDATE_VAR_SKIP_SEARCH = 6,
#endif
};

enum CheckPositionType {
Expand Down
27 changes: 27 additions & 0 deletions src/regparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -2781,6 +2781,16 @@ node_new_keep(Node** node, ParseEnv* env)
return ONIG_NORMAL;
}

#ifdef USE_SKIP_SEARCH
static int
node_new_skip_search(Node** node, ParseEnv* env)
{
int r;
r = node_new_update_var_gimmick(node, UPDATE_VAR_SKIP_SEARCH, 0, env);
return r;
}
#endif

#ifdef USE_CALLOUT

extern void
Expand Down Expand Up @@ -4526,6 +4536,9 @@ enum TokenSyms {
TK_QUOTE_OPEN,
TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
TK_KEEP, /* \K */
#ifdef USE_SKIP_SEARCH
TK_SKIP_SEARCH, /* \J */
#endif
TK_GENERAL_NEWLINE, /* \R */
TK_NO_NEWLINE, /* \N */
TK_TRUE_ANYCHAR, /* \O */
Expand Down Expand Up @@ -5743,6 +5756,13 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env)
tok->type = TK_KEEP;
break;

#ifdef USE_SKIP_SEARCH
case 'J':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;
tok->type = TK_SKIP_SEARCH;
break;
#endif

case 'R':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;
tok->type = TK_GENERAL_NEWLINE;
Expand Down Expand Up @@ -9093,6 +9113,13 @@ prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (r < 0) return r;
break;

#ifdef USE_SKIP_SEARCH
case TK_SKIP_SEARCH:
r = node_new_skip_search(np, env);
if (r < 0) return r;
break;
#endif

case TK_GENERAL_NEWLINE:
r = node_new_general_newline(np, env);
if (r < 0) return r;
Expand Down

0 comments on commit 8860d24

Please sign in to comment.