Skip to content

Commit ba02786

Browse files
committed
Initial rust support
1 parent 2e76f7b commit ba02786

26 files changed

+3300
-4
lines changed

doc/ragel-guide.tex

+1-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ \section{Abstract}
134134
regular expression. The single-expression model affords concise and elegant
135135
descriptions of languages and the generation of very simple, fast and robust
136136
code. Ragel compiles executable finite state machines from a high level regular language
137-
notation. Ragel targets C, C++, Objective-C, D, Go, Java and Ruby.
137+
notation. Ragel targets C, C++, Objective-C, D, Go, Java, Ruby, O'Caml and Rust.
138138

139139
In addition to building state machines from regular expressions, Ragel allows
140140
the programmer to directly specify state machines with state charts. These two

examples/rust/.gitignore

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/clang
2+
/concurrent
3+
/gotocallret
4+
/url
5+
/rpn
6+
/scan2
7+
/atoi
8+
/*.rs
9+
/*.dSYM

examples/rust/Makefile

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
2+
.SUFFIXES:
3+
4+
RAGEL = ../../ragel/ragel
5+
MODE =
6+
7+
all: url concurrent rpn atoi clang gotocallret scan1 scan2
8+
9+
url: url.rl
10+
$(RAGEL) $(MODE) --host-lang=rust url.rl -o url.rs
11+
$(RAGEL) $(MODE) --host-lang=rust url_authority.rl -o url_authority.rs
12+
rustc --test url.rc
13+
14+
concurrent: concurrent.rl
15+
$(RAGEL) $(MODE) --host-lang=rust -o concurrent.rs concurrent.rl
16+
rustc concurrent.rs
17+
18+
rpn: rpn.rl
19+
$(RAGEL) $(MODE) --host-lang=rust -o rpn.rs rpn.rl
20+
rustc --test rpn.rs
21+
22+
atoi: atoi.rl
23+
$(RAGEL) $(MODE) --host-lang=rust -o atoi.rs atoi.rl
24+
rustc atoi.rs
25+
26+
clang: clang.rl
27+
$(RAGEL) $(MODE) --host-lang=rust -o clang.rs clang.rl
28+
rustc clang.rs
29+
30+
gotocallret: gotocallret.rl
31+
$(RAGEL) $(MODE) --host-lang=rust -o gotocallret.rs gotocallret.rl
32+
rustc gotocallret.rs
33+
34+
scan1: scan1.rl
35+
$(RAGEL) $(MODE) --host-lang=rust -o scan1.rs scan1.rl
36+
rustc scan1.rs
37+
38+
scan2: scan2.rl
39+
$(RAGEL) $(MODE) --host-lang=rust -o scan2.rs scan2.rl
40+
rustc scan2.rs
41+
42+
.PHONY: clean
43+
clean:
44+
rm -f *.o
45+
rm -f $(subst .rl,.rs,$(wildcard *.rl))
46+
rm -f $(subst .rl,,$(wildcard *.rl))

examples/rust/README

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
ragel examples for OCaml codegen
3+
================================
4+
5+
`make` will run several simple tests.
6+
`make run_url` will compile and run url parser benchmark.
7+
8+
Examples were taken from examples/go/ and examples/ and
9+
converted to OCaml, thanks to original authors.
10+
11+
--
12+
ygrek
13+

examples/rust/atoi.rl

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
2+
%%{
3+
machine atoi;
4+
write data;
5+
}%%
6+
7+
8+
fn atoi(data: ~str) -> option<int> {
9+
let mut cs: int;
10+
let mut p = 0;
11+
let mut pe = data.len();
12+
let mut neg = false;
13+
let mut res = 0;
14+
15+
%%{
16+
action see_neg { neg = true; }
17+
action add_digit { res = res * 10 + (fc as int - '0' as int); }
18+
19+
main :=
20+
( '-' @see_neg | '+' )? ( digit @add_digit )+
21+
'\n'?
22+
;
23+
24+
write init;
25+
write exec;
26+
}%%
27+
28+
if neg { res = -1 * res; }
29+
30+
if cs < atoi_first_final {
31+
none
32+
} else {
33+
some(res)
34+
}
35+
}
36+
37+
fn main() {
38+
assert atoi(~"7") == some(7);
39+
assert atoi(~"666") == some(666);
40+
assert atoi(~"-666") == some(-666);
41+
assert atoi(~"+666") == some(666);
42+
assert atoi(~"123456789") == some(123456789);
43+
assert atoi(~"+123456789\n") == some(123456789);
44+
assert atoi(~"+ 1234567890") == none;
45+
}

examples/rust/clang.rl

+130
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* A mini C-like language scanner.
3+
*/
4+
5+
%%{
6+
machine clang;
7+
8+
newline = '\n' @{ curlin += 1; };
9+
any_count_line = any | newline;
10+
11+
# Consume a C comment.
12+
c_comment := any_count_line* :>> '*/' @{ fgoto main; };
13+
14+
main := |*
15+
16+
# Alpha numberic characters or underscore.
17+
alnum_u = alnum | '_';
18+
19+
# Alpha charactres or underscore.
20+
alpha_u = alpha | '_';
21+
22+
# Symbols. Upon entering clear the buffer. On all transitions
23+
# buffer a character. Upon leaving dump the symbol.
24+
( punct - [_'"] ) {
25+
io::println(#fmt("symbol(%i): %c", curlin, data[ts] as char));
26+
};
27+
28+
# Identifier. Upon entering clear the buffer. On all transitions
29+
# buffer a character. Upon leaving, dump the identifier.
30+
alpha_u alnum_u* {
31+
io::println(#fmt("ident(%i): %s", curlin, str::from_bytes(data.slice(ts, te))));
32+
};
33+
34+
# Single Quote.
35+
sliteralChar = [^'\\] | newline | ( '\\' . any_count_line );
36+
'\'' . sliteralChar* . '\'' {
37+
io::println(#fmt("single_lit(%i): %s", curlin, str::from_bytes(data.slice(ts, te))));
38+
};
39+
40+
# Double Quote.
41+
dliteralChar = [^"\\] | newline | ( '\\' any_count_line );
42+
'"' . dliteralChar* . '"' {
43+
io::println(#fmt("double_lit(%i): %s", curlin, str::from_bytes(data.slice(ts, te))));
44+
};
45+
46+
# Whitespace is standard ws, newlines and control codes.
47+
any_count_line - 0x21..0x7e;
48+
49+
# Describe both c style comments and c++ style comments. The
50+
# priority bump on tne terminator of the comments brings us
51+
# out of the extend* which matches everything.
52+
'//' [^\n]* newline;
53+
54+
'/*' { fgoto c_comment; };
55+
56+
# Match an integer. We don't bother clearing the buf or filling it.
57+
# The float machine overlaps with int and it will do it.
58+
digit+ {
59+
io::println(#fmt("int(%i): %s", curlin, str::from_bytes(data.slice(ts, te))));
60+
};
61+
62+
# Match a float. Upon entering the machine clear the buf, buffer
63+
# characters on every trans and dump the float upon leaving.
64+
digit+ '.' digit+ {
65+
io::println(#fmt("float(%i): %s", curlin, str::from_bytes(data.slice(ts, te))));
66+
};
67+
68+
# Match a hex. Upon entering the hex part, clear the buf, buffer characters
69+
# on every trans and dump the hex on leaving transitions.
70+
'0x' xdigit+ {
71+
io::println(#fmt("hex(%i): %s", curlin, str::from_bytes(data.slice(ts, te))));
72+
};
73+
74+
*|;
75+
}%%
76+
77+
%% write data nofinal;
78+
79+
const BUFSIZE: uint = 2048;
80+
81+
fn main() {
82+
let mut data = vec::to_mut(vec::from_elem(BUFSIZE, 0));
83+
84+
let mut cs = 0;
85+
let mut act = 0;
86+
let mut have = 0;
87+
let mut curlin = 1;
88+
let mut ts = 0;
89+
let mut te = 0;
90+
let mut done = false;
91+
92+
%% write init;
93+
94+
while !done {
95+
let mut p = have;
96+
let space = BUFSIZE - have;
97+
let mut eof = -1;
98+
99+
if space == 0 {
100+
/* We've used up the entire buffer storing an already-parsed token
101+
* prefix that must be preserved. */
102+
fail ~"OUT OF BUFFER SPACE";
103+
}
104+
105+
let pe = io::stdin().read(vec::mut_view(data, have, data.len()),
106+
space);
107+
108+
/* Check if this is the end of file. */
109+
if pe < space {
110+
eof = pe;
111+
done = true;
112+
}
113+
114+
%% write exec;
115+
116+
if cs == clang_error {
117+
fail ~"PARSE ERROR";
118+
}
119+
120+
if ts == -1 {
121+
have = 0;
122+
} else {
123+
/* There is a prefix to preserve, shift it over. */
124+
have = pe - ts;
125+
vec::u8::memmove(data, vec::view(data, ts, pe), have);
126+
te = te - ts;
127+
ts = 0;
128+
}
129+
}
130+
}

examples/rust/concurrent.rl

+117
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/*
2+
* Show off concurrent abilities.
3+
*/
4+
5+
%%{
6+
machine concurrent;
7+
8+
action next_char {
9+
self.cur_char += 1;
10+
}
11+
12+
action start_word {
13+
self.start_word = self.cur_char;
14+
}
15+
action end_word {
16+
io::println(#fmt("word: %i %i", self.start_word, self.cur_char - 1));
17+
}
18+
19+
action start_comment {
20+
self.start_comment = self.cur_char;
21+
}
22+
action end_comment {
23+
io::println(#fmt("comment: %i %i", self.start_comment,
24+
self.cur_char - 1));
25+
}
26+
27+
action start_literal {
28+
self.start_literal = self.cur_char;
29+
}
30+
action end_literal {
31+
io::println(#fmt("literal: %i %i", self.start_literal,
32+
self.cur_char - 1));
33+
}
34+
35+
# Count characters.
36+
chars = ( any @next_char )*;
37+
38+
# Words are non-whitespace.
39+
word = ( any-space )+ >start_word %end_word;
40+
words = ( ( word | space ) $1 %0 )*;
41+
42+
# Finds C style comments.
43+
comment = ( '/*' any* :>> '*/' ) >start_comment %end_comment;
44+
comments = ( comment | any )**;
45+
46+
# Finds single quoted strings.
47+
literal_char = ( any - ['\\] ) | ( '\\' . any );
48+
literal = ('\'' literal_char* '\'' ) >start_literal %end_literal;
49+
literals = ( ( literal | (any-'\'') ) $1 %0 )*;
50+
51+
main := chars | words | comments | literals;
52+
}%%
53+
54+
%% write data;
55+
56+
const BUFSIZE: uint = 2048u;
57+
58+
class concurrent {
59+
let mut cur_char: int;
60+
let mut start_word: int;
61+
let mut start_comment: int;
62+
let mut start_literal: int;
63+
64+
let mut cs: int;
65+
66+
new() {
67+
let mut cs: int;
68+
%% write init;
69+
self.cs = cs;
70+
self.cur_char = 0;
71+
self.start_word = 0;
72+
self.start_comment = 0;
73+
self.start_literal = 0;
74+
}
75+
76+
fn execute(data: ~[const u8], len: uint, is_eof: bool) -> int {
77+
let mut p = 0;
78+
let pe = len;
79+
let eof = if is_eof { pe } else { 0 };
80+
81+
let mut cs = self.cs;
82+
83+
%% write exec;
84+
85+
self.cs = cs;
86+
87+
self.finish()
88+
}
89+
90+
fn finish() -> int {
91+
if self.cs == concurrent_error {
92+
-1
93+
} else if self.cs >= concurrent_first_final {
94+
1
95+
} else {
96+
0
97+
}
98+
}
99+
}
100+
101+
fn main() {
102+
let mut buf = vec::to_mut(vec::from_elem(BUFSIZE, 0));
103+
104+
let concurrent = concurrent();
105+
106+
loop {
107+
let len = io::stdin().read(buf, BUFSIZE);
108+
concurrent.execute(buf, len, len != BUFSIZE);
109+
if len != BUFSIZE {
110+
break;
111+
}
112+
}
113+
114+
if concurrent.finish() <= 0 {
115+
io::stderr().write_line("concurrent: error parsing input");
116+
}
117+
}

0 commit comments

Comments
 (0)