|
| 1 | +/* |
| 2 | + * A mini C-like language scanner. |
| 3 | + */ |
| 4 | + |
| 5 | +%%{ |
| 6 | + machine clang; |
| 7 | + |
| 8 | + newline = '\n' @{ curlin += 1; }; |
| 9 | + any_count_line = any | newline; |
| 10 | + |
| 11 | + # Consume a C comment. |
| 12 | + c_comment := any_count_line* :>> '*/' @{ fgoto main; }; |
| 13 | + |
| 14 | + main := |* |
| 15 | + |
| 16 | + # Alpha numberic characters or underscore. |
| 17 | + alnum_u = alnum | '_'; |
| 18 | + |
| 19 | + # Alpha charactres or underscore. |
| 20 | + alpha_u = alpha | '_'; |
| 21 | + |
| 22 | + # Symbols. Upon entering clear the buffer. On all transitions |
| 23 | + # buffer a character. Upon leaving dump the symbol. |
| 24 | + ( punct - [_'"] ) { |
| 25 | + io::println(#fmt("symbol(%i): %c", curlin, data[ts] as char)); |
| 26 | + }; |
| 27 | + |
| 28 | + # Identifier. Upon entering clear the buffer. On all transitions |
| 29 | + # buffer a character. Upon leaving, dump the identifier. |
| 30 | + alpha_u alnum_u* { |
| 31 | + io::println(#fmt("ident(%i): %s", curlin, str::from_bytes(data.slice(ts, te)))); |
| 32 | + }; |
| 33 | + |
| 34 | + # Single Quote. |
| 35 | + sliteralChar = [^'\\] | newline | ( '\\' . any_count_line ); |
| 36 | + '\'' . sliteralChar* . '\'' { |
| 37 | + io::println(#fmt("single_lit(%i): %s", curlin, str::from_bytes(data.slice(ts, te)))); |
| 38 | + }; |
| 39 | + |
| 40 | + # Double Quote. |
| 41 | + dliteralChar = [^"\\] | newline | ( '\\' any_count_line ); |
| 42 | + '"' . dliteralChar* . '"' { |
| 43 | + io::println(#fmt("double_lit(%i): %s", curlin, str::from_bytes(data.slice(ts, te)))); |
| 44 | + }; |
| 45 | + |
| 46 | + # Whitespace is standard ws, newlines and control codes. |
| 47 | + any_count_line - 0x21..0x7e; |
| 48 | + |
| 49 | + # Describe both c style comments and c++ style comments. The |
| 50 | + # priority bump on tne terminator of the comments brings us |
| 51 | + # out of the extend* which matches everything. |
| 52 | + '//' [^\n]* newline; |
| 53 | + |
| 54 | + '/*' { fgoto c_comment; }; |
| 55 | + |
| 56 | + # Match an integer. We don't bother clearing the buf or filling it. |
| 57 | + # The float machine overlaps with int and it will do it. |
| 58 | + digit+ { |
| 59 | + io::println(#fmt("int(%i): %s", curlin, str::from_bytes(data.slice(ts, te)))); |
| 60 | + }; |
| 61 | + |
| 62 | + # Match a float. Upon entering the machine clear the buf, buffer |
| 63 | + # characters on every trans and dump the float upon leaving. |
| 64 | + digit+ '.' digit+ { |
| 65 | + io::println(#fmt("float(%i): %s", curlin, str::from_bytes(data.slice(ts, te)))); |
| 66 | + }; |
| 67 | + |
| 68 | + # Match a hex. Upon entering the hex part, clear the buf, buffer characters |
| 69 | + # on every trans and dump the hex on leaving transitions. |
| 70 | + '0x' xdigit+ { |
| 71 | + io::println(#fmt("hex(%i): %s", curlin, str::from_bytes(data.slice(ts, te)))); |
| 72 | + }; |
| 73 | + |
| 74 | + *|; |
| 75 | +}%% |
| 76 | + |
| 77 | +%% write data nofinal; |
| 78 | + |
| 79 | +const BUFSIZE: uint = 2048; |
| 80 | + |
| 81 | +fn main() { |
| 82 | + let mut data = vec::to_mut(vec::from_elem(BUFSIZE, 0)); |
| 83 | + |
| 84 | + let mut cs = 0; |
| 85 | + let mut act = 0; |
| 86 | + let mut have = 0; |
| 87 | + let mut curlin = 1; |
| 88 | + let mut ts = 0; |
| 89 | + let mut te = 0; |
| 90 | + let mut done = false; |
| 91 | + |
| 92 | + %% write init; |
| 93 | + |
| 94 | + while !done { |
| 95 | + let mut p = have; |
| 96 | + let space = BUFSIZE - have; |
| 97 | + let mut eof = -1; |
| 98 | + |
| 99 | + if space == 0 { |
| 100 | + /* We've used up the entire buffer storing an already-parsed token |
| 101 | + * prefix that must be preserved. */ |
| 102 | + fail ~"OUT OF BUFFER SPACE"; |
| 103 | + } |
| 104 | + |
| 105 | + let pe = io::stdin().read(vec::mut_view(data, have, data.len()), |
| 106 | + space); |
| 107 | + |
| 108 | + /* Check if this is the end of file. */ |
| 109 | + if pe < space { |
| 110 | + eof = pe; |
| 111 | + done = true; |
| 112 | + } |
| 113 | + |
| 114 | + %% write exec; |
| 115 | + |
| 116 | + if cs == clang_error { |
| 117 | + fail ~"PARSE ERROR"; |
| 118 | + } |
| 119 | + |
| 120 | + if ts == -1 { |
| 121 | + have = 0; |
| 122 | + } else { |
| 123 | + /* There is a prefix to preserve, shift it over. */ |
| 124 | + have = pe - ts; |
| 125 | + vec::u8::memmove(data, vec::view(data, ts, pe), have); |
| 126 | + te = te - ts; |
| 127 | + ts = 0; |
| 128 | + } |
| 129 | + } |
| 130 | +} |
0 commit comments