From 39ac84afcedcf2d2569f900dbb33cd395247266c Mon Sep 17 00:00:00 2001 From: Lourisvaldo Figueredo Junior Date: Sat, 6 Aug 2022 00:12:36 -0300 Subject: [PATCH] Migration to new version of PCRE library (PCRE2) --- src/Makefile | 17 +++++++---------- src/anlghea3.h | 7 ++++--- src/init2.c | 22 ++++++++++++---------- src/utils.c | 40 +++++++++++++++++++++++++++------------- 4 files changed, 50 insertions(+), 36 deletions(-) diff --git a/src/Makefile b/src/Makefile index 2d68400..b32d1a9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -11,7 +11,7 @@ CFLAGS = -O2 # options, e.g. for optimisation or ANSI compilation. # BeOS needs CFLAGS = -O2 -Wl,-L/boot/home/config/lib # BS2000/OSD needs CFLAGS = -XLLML -XLLMK # NeXTSTEP needs CFLAGS = -O2 -pipe -no-precomp -DEFS = # any combination of -DNOPIPES -DNODNS -DNODIRENT -DNOGLOB ... +DEFS = -DHAVE_PCRE # any combination of -DNOPIPES -DNODNS -DNODIRENT -DNOGLOB ... # ... -DNOOPEN -DNOFOLLOW -DNOALARM -DNOGRAPHICS -DNOGMTIME ... # ... -DEBCDIC -DUSE_PLAIN_SETJMP ... # ... -DHAVE_GD -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_PCRE ... @@ -30,12 +30,12 @@ DEFS = # any combination of -DNOPIPES -DNODNS -DNODIRENT -DNOGLOB ... # OS = UNIX # Operating system: UNIX, DOS, WIN32, MAC, OS2, OSX, VMS # RISCOS, BEOS, NEXTSTEP, MPEIX, BS2000, AS400, OS390 -LIBS = -lm # extra libraries needed; most platforms (but not OS X or BeOS) +LIBS = -lm -lpcre2-8 # extra libraries needed; most platforms (but not OS X or BeOS) # need -lm LAST # if you defined HAVE_GD above you also need -lgd -lpng -ljpeg -lz # if you defined HAVE_ZLIB above you also need -lz # if you defined HAVE_BZLIB above you also need -lbz2 -# if you defined HAVE_PCRE above you also need -lpcre +# if you defined HAVE_PCRE above you also need -lpcre2-8 # Also Solaris 2 (SunOS 5+) needs LIBS = -lnsl -lm # SCO Unix needs LIBS = -lsocket -lm # IRIX needs LIBS = -lc -lm @@ -54,12 +54,12 @@ OBJS = alias.o analog.o cache.o dates.o globals.o hash.o init.o init2.o \ input.o macinput.o macstuff.o output.o output2.o outcro.o outhtml.o \ outlatex.o outplain.o outxhtml.o outxml.o process.o settings.o sort.o \ tree.o utils.o win32.o -SUBDIRS = bzip2 libgd libpng pcre unzip zlib +SUBDIRS = bzip2 libgd libpng unzip zlib SUBDIROBJS = libgd/gd.o libgd/gd_io.o libgd/gd_io_file.o libgd/gd_png.o \ libgd/gdfontf.o libgd/gdfonts.o libgd/gdtables.o \ libpng/png.o libpng/pngerror.o libpng/pngmem.o libpng/pngset.o \ libpng/pngtrans.o libpng/pngwio.o libpng/pngwrite.o \ - libpng/pngwtran.o libpng/pngwutil.o pcre/pcre.o \ + libpng/pngwtran.o libpng/pngwutil.o \ zlib/adler32.o zlib/compress.o zlib/crc32.o zlib/deflate.o \ zlib/gzio.o zlib/infblock.o zlib/infcodes.o zlib/inffast.o \ zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \ @@ -67,7 +67,7 @@ SUBDIROBJS = libgd/gd.o libgd/gd_io.o libgd/gd_io_file.o libgd/gd_png.o \ bzip2/bzlib.o bzip2/blocksort.o bzip2/compress.o bzip2/crctable.o \ bzip2/decompress.o bzip2/huffman.o bzip2/randtable.o HEADERS = anlghead.h anlghea2.h anlghea3.h anlghea4.h macdir.h \ - pcre/pcre.h libgd/gd.h libgd/gdfontf.h libgd/gdfonts.h unzip/unzip.h \ + libgd/gd.h libgd/gdfontf.h libgd/gdfonts.h unzip/unzip.h \ zlib/zlib.h bzip2/bzlib.h ALLCFLAGS = $(CFLAGS) $(DEFS) -D$(OS) ALLOBJS = $(OBJS) $(SUBDIROBJS) @@ -86,9 +86,6 @@ libgd: ALWAYS libpng: ALWAYS cd libpng && $(MAKE) 'CC=$(CC)' 'ALLCFLAGS=$(ALLCFLAGS)' -pcre: ALWAYS - cd pcre && $(MAKE) 'CC=$(CC)' 'ALLCFLAGS=$(ALLCFLAGS)' - unzip: ALWAYS cd unzip && $(MAKE) 'CC=$(CC)' 'ALLCFLAGS=$(ALLCFLAGS)' @@ -217,7 +214,7 @@ ALWAYS: # to include "-lbz2" in the LIBS above. # HAVE_PCRE means you already have libpcre, and you want to use your version # not mine. This is likely to reduce the size of the binary. -# You will also need to include "-lpcre" in the LIBS. +# You will also need to include "-lpcre2-8" in the LIBS. # EBCDIC is needed if your server uses the EBCDIC character set, not ASCII. # USE_PLAIN_SETJMP uses ANSI setjmp() instead of POSIX sigsetjmp(). # NEED_STRCMP, NEED_MEMMOVE, NEED_STRTOUL and NEED_DIFFTIME provide diff --git a/src/anlghea3.h b/src/anlghea3.h index fbf2964..f756ee5 100644 --- a/src/anlghea3.h +++ b/src/anlghea3.h @@ -16,6 +16,7 @@ #ifndef ANLGHEA3_H #define ANLGHEA3_H +#define PCRE2_CODE_UNIT_WIDTH 8 /*** OS specific things ***/ @@ -261,7 +262,7 @@ extern void Win32Cleanup(void); #include "bzip2/bzlib.h" #endif -#include "pcre/pcre.h" +#include #include "libgd/gd.h" #include "libgd/gdfontf.h" @@ -759,7 +760,7 @@ typedef struct aliasto { typedef struct alias { char *from; - pcre *pattern; + pcre2_code *pattern; AliasTo *to; logical isregex; struct alias *next; @@ -889,7 +890,7 @@ typedef struct { typedef struct include { char *name; - pcre *pattern; + pcre2_code *pattern; unsigned long minaddr, maxaddr; choice type; struct include *next; diff --git a/src/init2.c b/src/init2.c index 767065a..6a540c1 100644 --- a/src/init2.c +++ b/src/init2.c @@ -1469,9 +1469,10 @@ void configalias(void *opt, char *cmd, char *arg1, char *arg2, int rc) { int maxrightstar; char starchar; logical is_regex = FALSE; - pcre *pattern = NULL; char *errstr; - int erroffset; + pcre2_code *pattern; + int err; + PCRE2_SIZE erroffset; if (rc == 0) { shortwarn(cmd, arg1, rc); @@ -1489,15 +1490,15 @@ void configalias(void *opt, char *cmd, char *arg1, char *arg2, int rc) { if (headcasematch(arg1, "REGEXP:") || headcasematch(arg1, "REGEXPI:")) { if ((pattern = - pcre_compile(arg1 + 7 + (arg1[6] != ':'), - PCRE_DOTALL | ((arg1[6] == ':')?0:PCRE_CASELESS), - (const char **)(&errstr), &erroffset, NULL)) == NULL) { + pcre2_compile((PCRE2_SPTR) arg1 + 7 + (arg1[6] != ':'), PCRE2_ZERO_TERMINATED, + PCRE2_DOTALL | ((arg1[6] == ':')?0:PCRE2_CASELESS), + &err, &erroffset, NULL)) == NULL) { badwarn(cmd, FALSE, arg1, arg2, rc); warn('C', CONTINUATION, " (%s in regular expression)", errstr); return; } starchar = '('; - pcre_fullinfo(pattern, NULL, PCRE_INFO_CAPTURECOUNT, (void *)&leftstars); + pcre2_pattern_info(pattern, PCRE2_INFO_CAPTURECOUNT, (void *)&leftstars); is_regex = TRUE; } else { @@ -1617,8 +1618,9 @@ void configinex(void *opt, char *cmd, char *arg1, char *arg2, int rc, Include **include = (Include **)opt; Include *ip; char *errstr; - int erroffset; char *t; + int err; + PCRE2_SIZE erroffset; if (rc == 0) { shortwarn(cmd, arg1, rc); @@ -1652,9 +1654,9 @@ void configinex(void *opt, char *cmd, char *arg1, char *arg2, int rc, (*include)->next = ip; /* save name even for regex: might l.c. */ if (headcasematch(arg1, "REGEXP:") || headcasematch(arg1, "REGEXPI:")) { if (((*include)->pattern = - pcre_compile(arg1 + 7 + (arg1[6] != ':'), - PCRE_DOTALL | ((arg1[6] == ':')?0:PCRE_CASELESS), - (const char **)(&errstr), &erroffset, NULL)) == NULL) { + pcre2_compile((PCRE2_SPTR) arg1 + 7 + (arg1[6] != ':'), PCRE2_ZERO_TERMINATED, + PCRE2_DOTALL | ((arg1[6] == ':')?0:PCRE2_CASELESS), + &err, &erroffset, NULL)) == NULL) { *include = ip; badwarn(cmd, FALSE, arg1, arg2, rc); warn('C', CONTINUATION, " (%s in regular expression)", errstr); diff --git a/src/utils.c b/src/utils.c index bc89a14..c95f6f7 100644 --- a/src/utils.c +++ b/src/utils.c @@ -112,13 +112,14 @@ void strtolowerx(char *n) { /* lower-case filename: only used in next 3 fns. */ void toloweralias(Alias *ap, logical both) { AliasTo *tp; - char *errstr; - int erroffset; + int err; + PCRE2_SIZE erroffset; for ( ; ap != NULL; TO_NEXT(ap)) { if (headcasematch(ap->from, "REGEXP:")) - ap->pattern = pcre_compile(ap->from + 7, PCRE_CASELESS | PCRE_DOTALL, - (const char **)(&errstr), &erroffset, NULL); + ap->pattern = pcre2_compile((PCRE2_SPTR) ap->from + 7, PCRE2_ZERO_TERMINATED, + PCRE2_CASELESS | PCRE2_DOTALL, + &err, &erroffset, NULL); /* if original regcomp succeeded, this one should too */ else if (!headcasematch(ap->from, "REGEXPI:")) /* nothing to do for REGEXPI: */ @@ -132,13 +133,14 @@ void toloweralias(Alias *ap, logical both) { void tolowerinc(Include *ip) { /* see comments in previous function */ - char *errstr; - int erroffset; + int err; + PCRE2_SIZE erroffset; for ( ; ip != NULL; TO_NEXT(ip)) { if (headcasematch(ip->name, "REGEXP:")) - ip->pattern = pcre_compile(ip->name + 7, PCRE_CASELESS | PCRE_DOTALL, - (const char **)(&errstr), &erroffset, NULL); + ip->pattern = pcre2_compile((PCRE2_SPTR) ip->name + 7, PCRE2_ZERO_TERMINATED, + PCRE2_CASELESS | PCRE2_DOTALL, + &err, &erroffset, NULL); else if (!headcasematch(ip->name, "REGEXPI:")) strtolowerx((char *)(ip->name)); } @@ -740,11 +742,23 @@ logical wildmatch(char *s, char *p, char *whole, int nmatch, int *pmatch) { } logical matchq(char *s, void *p, logical is_regex, int *pmatch) { - if (is_regex) - return(pcre_exec((pcre *)p, NULL, s, (int)strlen(s), 0, 0, pmatch, - (pmatch == NULL)?0:PMATCH_SIZE) >= 0); - else - return(wildmatch(s, (char *)p, s, (pmatch == NULL)?0:PMATCH_SIZE, pmatch)); + if (is_regex) { + int rc; + pcre2_match_data *match_data; + match_data = pcre2_match_data_create_from_pattern((pcre2_code *)p, NULL); + rc = pcre2_match((pcre2_code *)p, (PCRE2_SPTR)s, (PCRE2_SIZE)strlen((char *)s), + 0, 0, match_data, NULL); + if (pmatch != NULL) { + PCRE2_SIZE *ovector; + ovector = pcre2_get_ovector_pointer(match_data); + for (int i=0; i <= sizeof(ovector); i++) { + *(pmatch+i) = (int) *(ovector+i); + } + } + return (rc >=1); + } + else + return(wildmatch(s, (char *)p, s, (pmatch == NULL)?0:PMATCH_SIZE, pmatch)); } logical genwildmatch(char *s, char *s2, char *p) {