-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGroupToken.cs
172 lines (142 loc) · 6.26 KB
/
GroupToken.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace regexer {
/** The GroupToken represents a sequence of patterns which must appear
* in the specified order in the input string; groups induced by round
* brackets are also represented by GroupTokens.
*
* When a RegexMatch is found, it will contain a collection of the groups
* found during the match. Groups can be named using the following syntax:
* (?<group-name>pattern).
*
* For example,
* - The pattern ab+c is compiled to a GroupToken containing three
* distinct patterns: a, b+ and c.
*
* - The pattern (\w+)(\d+) is compiled to a GroupToken containing the
* two patterns \w+ and \d+.
*/
public class GroupToken : Token {
public List<Token> Content { get; set; } ///< The content of this group. Order is important.
public string Name { get; set; } ///< Optional name if specified in the pattern or null
/** Index of the group in the pattern.
*
* Groups specified in the pattern will have positive indexes starting
* from 1 (0 is the whole match) respecting their order of appearance.
* Groups added during the processing of the pattern should have a negative
* index so that they will not be shown.
*
* Negative indexing can thus be used for "internal" purposes, for example
* for marking special kinds of groups etc.
*/
public int Index { get; set; }
public RegexGroup Match {
get {
return new RegexGroup( _cursor_start, _cursor_end, _input, Index, Name );
}
}
// used to build the RegexGroup with the info from the last match
private int _cursor_start;
private int _cursor_end;
private string _input;
public GroupToken( string pattern, int index )
: base( TokenType.Group, pattern ) {
if ( pattern.StartsWith( "(?<" ) && pattern.EndsWith( ">" ) ) {
this.Name = pattern.Substring( 3, pattern.Length - 4 );
if ( this.Name.Length == 0 )
throw new ParsingException( "empty group name not allowed" );
}
this.Index = index;
this.Content = new List<Token>( );
}
public GroupToken( )
: base( TokenType.Group, "()" ) {
Content = new List<Token>( );
Index = -1;
}
public GroupToken( List<Token> content )
: base( TokenType.Group, "()" ) {
Index = -1;
Content = content;
}
public override bool Matches( string input, ref int cursor ) {
this._input = input;
return matchesFrom( 0, input, ref cursor );
}
public override bool CanBacktrack( string input, ref int cursor ) {
int i = Content.Count;
if ( findBacktrackToken( input, ref cursor, ref i ) )
return matchesFrom( ++i, input, ref cursor );
else return false;
}
/** Checks wheter the input, starting at the cursor, matches all the
* tokens, starting at the given point, in the correct sequence.
*
* If the matching succeeds, the cursor will be moved after the
* end of the match; on the contrary, if the matching fails, its
* value will not be modified.
*
* \param start The token from where start matching (it will be included)
* \param input The input we are trying to match
* \param cursor The current position in the input. If the input matches
* the cursor will be moved after the end of the match, it won't be
* affected otherwise.
* \return True if the input matches this token. In this case the cursor
* will also be moved after the end of the match.
*/
private bool matchesFrom( int start, string input, ref int cursor ) {
int cursor_start = cursor;
for ( ; start < Content.Count; start++ ) {
if ( !Content[ start ].Matches( input, ref cursor ) ) {
if ( !findBacktrackToken( input, ref cursor, ref start ) ) {
cursor = cursor_start;
return false;
}
}
}
this._cursor_start = cursor_start;
this._cursor_end = cursor;
return true;
}
/** Find the first token which can backtrack, starting at the given point and going
* backwards. Backtracking will be performed if possible, moving the cursor to
* the correct position and saving the token which backtracked.
*
* \param input The input that we are trying to match
* \param cursor The current position in the input. It will be moved to the
* correct position if backtracking is possible.
* \param token Where to start. Note that this token will not be asked for
* backtracking; this method starts from the previous one.
* \returns True if it was possible to backtrack. In this case, both cursor and
* token will be modified to reflect the changes.
*/
private bool findBacktrackToken( string input, ref int cursor, ref int token ) {
int start = cursor;
while ( --token >= 0 ) {
if ( Content[ token ].CanBacktrack( input, ref cursor ) )
return true;
}
cursor = start;
return false;
}
public override void Reverse( ) {
this.Content.Reverse( );
foreach ( Token t in this.Content )
t.Reverse( );
}
/** String representation of this group token; the tokens contained in
* this group are included
*
* \returns The string representation of this token.
*/
protected override string printContent( ) {
var sb = new StringBuilder( );
foreach ( Token t in Content )
sb.AppendLine( t.ToString( ) );
return sb.ToString( );
}
}
}