Skip to content

Commit 4878175

Browse files
committed
import
0 parents  commit 4878175

23 files changed

+5899
-0
lines changed

COPYRIGHT

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved.
2+
This software is not subject to any license of the American Telephone
3+
and Telegraph Company or of the Regents of the University of California.
4+
5+
Permission is granted to anyone to use this software for any purpose on
6+
any computer system, and to alter it and redistribute it, subject
7+
to the following restrictions:
8+
9+
1. The author is not responsible for the consequences of use of this
10+
software, no matter how awful, even if they arise from flaws in it.
11+
12+
2. The origin of this software must not be misrepresented, either by
13+
explicit claim or by omission. Since few users ever read sources,
14+
credits must appear in the documentation.
15+
16+
3. Altered versions must be plainly marked as such, and must not be
17+
misrepresented as being the original software. Since few users
18+
ever read sources, credits must appear in the documentation.
19+
20+
4. This notice may not be removed or altered.

Makefile

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# You probably want to take -DREDEBUG out of CFLAGS, and put something like
2+
# -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of
3+
# internal assertion checking and some debugging facilities).
4+
# Put -Dconst= in for a pre-ANSI compiler.
5+
# Do not take -DPOSIX_MISTAKE out.
6+
# REGCFLAGS isn't important to you (it's for my use in some special contexts).
7+
CFLAGS=-I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
8+
9+
# If you have a pre-ANSI compiler, put -o into MKHFLAGS. If you want
10+
# the Berkeley __P macro, put -b in.
11+
MKHFLAGS=
12+
13+
# Flags for linking but not compiling, if any.
14+
LDFLAGS=
15+
16+
# Extra libraries for linking, if any.
17+
LIBS=
18+
19+
# Internal stuff, should not need changing.
20+
OBJPRODN=regcomp.o regexec.o regerror.o regfree.o
21+
OBJS=$(OBJPRODN) split.o debug.o main.o
22+
H=cclass.h cname.h regex2.h utils.h
23+
REGSRC=regcomp.c regerror.c regexec.c regfree.c
24+
ALLSRC=$(REGSRC) engine.c debug.c main.c split.c
25+
26+
# Stuff that matters only if you're trying to lint the package.
27+
LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG
28+
LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c main.c
29+
JUNKLINT=possible pointer alignment|null effect
30+
31+
# arrangements to build forward-reference header files
32+
.SUFFIXES: .ih .h
33+
.c.ih:
34+
sh ./mkh $(MKHFLAGS) -p $< >$@
35+
36+
default: r
37+
38+
lib: purge $(OBJPRODN)
39+
rm -f libregex.a
40+
ar crv libregex.a $(OBJPRODN)
41+
42+
purge:
43+
rm -f *.o
44+
45+
# stuff to build regex.h
46+
REGEXH=regex.h
47+
REGEXHSRC=regex2.h $(REGSRC)
48+
$(REGEXH): $(REGEXHSRC) mkh
49+
sh ./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp
50+
cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h
51+
rm -f regex.tmp
52+
53+
# dependencies
54+
$(OBJPRODN) debug.o: utils.h regex.h regex2.h
55+
regcomp.o: cclass.h cname.h regcomp.ih
56+
regexec.o: engine.c engine.ih
57+
regerror.o: regerror.ih
58+
debug.o: debug.ih
59+
main.o: main.ih
60+
61+
# tester
62+
re: $(OBJS)
63+
$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
64+
65+
# regression test
66+
r: re tests
67+
./re <tests
68+
./re -el <tests
69+
./re -er <tests
70+
71+
# 57 variants, and other stuff, for development use -- not useful to you
72+
ra: ./re tests
73+
-./re <tests
74+
-./re -el <tests
75+
-./re -er <tests
76+
77+
rx: ./re tests
78+
./re -x <tests
79+
./re -x -el <tests
80+
./re -x -er <tests
81+
82+
t: ./re tests
83+
-time ./re <tests
84+
-time ./re -cs <tests
85+
-time ./re -el <tests
86+
-time ./re -cs -el <tests
87+
88+
l: $(LINTC)
89+
lint $(LINTFLAGS) -h $(LINTC) 2>&1 | egrep -v '$(JUNKLINT)' | tee lint
90+
91+
fullprint:
92+
ti README WHATSNEW notes todo | list
93+
ti *.h | list
94+
list *.c
95+
list regex.3 regex.7
96+
97+
print:
98+
ti README WHATSNEW notes todo | list
99+
ti *.h | list
100+
list reg*.c engine.c
101+
102+
103+
mf.tmp: Makefile
104+
sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@
105+
106+
DTRH=cclass.h cname.h regex2.h utils.h
107+
PRE=COPYRIGHT README WHATSNEW
108+
POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch]
109+
FILES=$(PRE) Makefile $(POST)
110+
DTR=$(PRE) Makefile=mf.tmp $(POST)
111+
dtr: $(FILES) mf.tmp
112+
makedtr $(DTR) >$@
113+
rm mf.tmp
114+
115+
cio: $(FILES)
116+
cio $(FILES)
117+
118+
rdf: $(FILES)
119+
rcsdiff -c $(FILES) 2>&1 | p
120+
121+
# various forms of cleanup
122+
tidy:
123+
rm -f junk* core core.* *.core dtr *.tmp lint
124+
125+
clean: tidy
126+
rm -f *.o *.s *.ih re libregex.a
127+
128+
# don't do this one unless you know what you're doing
129+
spotless: clean
130+
rm -f mkh regex.h

README

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
alpha3.8 release.
2+
Tue Aug 10 15:51:48 EDT 1999
3+
4+
5+
See WHATSNEW for change listing.
6+
7+
installation notes:
8+
--------
9+
Read the comments at the beginning of Makefile before running.
10+
11+
Utils.h contains some things that just might have to be modified on
12+
some systems, as well as a nested include (ugh) of <assert.h>.
13+
14+
The "fake" directory contains quick-and-dirty fakes for some header
15+
files and routines that old systems may not have. Note also that
16+
-DUSEBCOPY will make utils.h substitute bcopy() for memmove().
17+
18+
After that, "make r" will build regcomp.o, regexec.o, regfree.o,
19+
and regerror.o (the actual routines), bundle them together into a test
20+
program, and run regression tests on them. No output is good output.
21+
22+
"make lib" builds just the .o files for the actual routines (when
23+
you're happy with testing and have adjusted CFLAGS for production),
24+
and puts them together into libregex.a. You can pick up either the
25+
library or *.o ("make lib" makes sure there are no other .o files left
26+
around to confuse things).
27+
28+
Main.c, debug.c, split.c are used for regression testing but are not part
29+
of the RE routines themselves.
30+
31+
Regex.h goes in /usr/include. All other .h files are internal only.
32+
--------

WHATSNEW

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
New in alpha3.8: Bug fix for signed/unsigned mixup, found and fixed
2+
by the FreeBSD folks.
3+
4+
New in alpha3.7: A bit of cleanup aimed at maximizing portability,
5+
possibly at slight cost in efficiency. "ul" suffixes and "unsigned long"
6+
no longer appear, in particular.
7+
8+
New in alpha3.6: A couple more portability glitches fixed.
9+
10+
New in alpha3.5: Active development of this code has been stopped --
11+
I'm working on a complete reimplementation -- but folks have found some
12+
minor portability glitches and the like, hence this release to fix them.
13+
One penalty: slightly reduced compatibility with old compilers, because
14+
the ANSI C `unsigned long' type and `ul' constant suffix are used in a
15+
few places (I could avoid this but it would be considerably more work).
16+
17+
New in alpha3.4: The complex bug alluded to below has been fixed (in a
18+
slightly kludgey temporary way that may hurt efficiency a bit; this is
19+
another "get it out the door for 4.4" release). The tests at the end of
20+
the tests file have accordingly been uncommented. The primary sign of
21+
the bug was that something like a?b matching ab matched b rather than ab.
22+
(The bug was essentially specific to this exact situation, else it would
23+
have shown up earlier.)
24+
25+
New in alpha3.3: The definition of word boundaries has been altered
26+
slightly, to more closely match the usual programming notion that "_"
27+
is an alphabetic. Stuff used for pre-ANSI systems is now in a subdir,
28+
and the makefile no longer alludes to it in mysterious ways. The
29+
makefile has generally been cleaned up some. Fixes have been made
30+
(again!) so that the regression test will run without -DREDEBUG, at
31+
the cost of weaker checking. A workaround for a bug in some folks'
32+
<assert.h> has been added. And some more things have been added to
33+
tests, including a couple right at the end which are commented out
34+
because the code currently flunks them (complex bug; fix coming).
35+
Plus the usual minor cleanup.
36+
37+
New in alpha3.2: Assorted bits of cleanup and portability improvement
38+
(the development base is now a BSDI system using GCC instead of an ancient
39+
Sun system, and the newer compiler exposed some glitches). Fix for a
40+
serious bug that affected REs using many [] (including REG_ICASE REs
41+
because of the way they are implemented), *sometimes*, depending on
42+
memory-allocation patterns. The header-file prototypes no longer name
43+
the parameters, avoiding possible name conflicts. The possibility that
44+
some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is
45+
now handled gracefully. "uchar" is no longer used as an internal type
46+
name (too many people have the same idea). Still the same old lousy
47+
performance, alas.
48+
49+
New in alpha3.1: Basically nothing, this release is just a bookkeeping
50+
convenience. Stay tuned.
51+
52+
New in alpha3.0: Performance is no better, alas, but some fixes have been
53+
made and some functionality has been added. (This is basically the "get
54+
it out the door in time for 4.4" release.) One bug fix: regfree() didn't
55+
free the main internal structure (how embarrassing). It is now possible
56+
to put NULs in either the RE or the target string, using (resp.) a new
57+
REG_PEND flag and the old REG_STARTEND flag. The REG_NOSPEC flag to
58+
regcomp() makes all characters ordinary, so you can match a literal
59+
string easily (this will become more useful when performance improves!).
60+
There are now primitives to match beginnings and ends of words, although
61+
the syntax is disgusting and so is the implementation. The REG_ATOI
62+
debugging interface has changed a bit. And there has been considerable
63+
internal cleanup of various kinds.
64+
65+
New in alpha2.3: Split change list out of README, and moved flags notes
66+
into Makefile. Macro-ized the name of regex(7) in regex(3), since it has
67+
to change for 4.4BSD. Cleanup work in engine.c, and some new regression
68+
tests to catch tricky cases thereof.
69+
70+
New in alpha2.2: Out-of-date manpages updated. Regerror() acquires two
71+
small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges
72+
in my own test program and might be useful to others for similar purposes.
73+
The regression test will now compile (and run) without REDEBUG. The
74+
BRE \$ bug is fixed. Most uses of "uchar" are gone; it's all chars now.
75+
Char/uchar parameters are now written int/unsigned, to avoid possible
76+
portability problems with unpromoted parameters. Some unsigned casts have
77+
been introduced to minimize portability problems with shifting into sign
78+
bits.
79+
80+
New in alpha2.1: Lots of little stuff, cleanup and fixes. The one big
81+
thing is that regex.h is now generated, using mkh, rather than being
82+
supplied in the distribution; due to circularities in dependencies,
83+
you have to build regex.h explicitly by "make h". The two known bugs
84+
have been fixed (and the regression test now checks for them), as has a
85+
problem with assertions not being suppressed in the absence of REDEBUG.
86+
No performance work yet.
87+
88+
New in alpha2: Backslash-anything is an ordinary character, not an
89+
error (except, of course, for the handful of backslashed metacharacters
90+
in BREs), which should reduce script breakage. The regression test
91+
checks *where* null strings are supposed to match, and has generally
92+
been tightened up somewhat. Small bug fixes in parameter passing (not
93+
harmful, but technically errors) and some other areas. Debugging
94+
invoked by defining REDEBUG rather than not defining NDEBUG.
95+
96+
New in alpha+3: full prototyping for internal routines, using a little
97+
helper program, mkh, which extracts prototypes given in stylized comments.
98+
More minor cleanup. Buglet fix: it's CHAR_BIT, not CHAR_BITS. Simple
99+
pre-screening of input when a literal string is known to be part of the
100+
RE; this does wonders for performance.
101+
102+
New in alpha+2: minor bits of cleanup. Notably, the number "32" for the
103+
word width isn't hardwired into regexec.c any more, the public header
104+
file prototypes the functions if __STDC__ is defined, and some small typos
105+
in the manpages have been fixed.
106+
107+
New in alpha+1: improvements to the manual pages, and an important
108+
extension, the REG_STARTEND option to regexec().

cclass.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/* character-class table */
2+
static struct cclass {
3+
char *name;
4+
char *chars;
5+
char *multis;
6+
} cclasses[] = {
7+
"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
8+
0123456789", "",
9+
"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
10+
"",
11+
"blank", " \t", "",
12+
"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
13+
\25\26\27\30\31\32\33\34\35\36\37\177", "",
14+
"digit", "0123456789", "",
15+
"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
16+
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
17+
"",
18+
"lower", "abcdefghijklmnopqrstuvwxyz",
19+
"",
20+
"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
21+
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
22+
"",
23+
"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
24+
"",
25+
"space", "\t\n\v\f\r ", "",
26+
"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
27+
"",
28+
"xdigit", "0123456789ABCDEFabcdef",
29+
"",
30+
NULL, 0, ""
31+
};

0 commit comments

Comments
 (0)