@@ -9,17 +9,40 @@ const FuzzyPartialUrlPatterns_1 = require("../pattern/FuzzyPartialUrlPatterns");
99const BasePatterns_1 = require ( "../pattern/BasePatterns" ) ;
1010const ProtocolPatterns_1 = require ( "../pattern/ProtocolPatterns" ) ;
1111const DomainPatterns_1 = require ( "../pattern/DomainPatterns" ) ;
12+ const valid_1 = __importDefault ( require ( "../valid" ) ) ;
1213exports . UrlNormalizer = {
13- modifiedUrl : null ,
14+ sacrificedUrl : null ,
15+ currentStep : 0 ,
16+ /**
17+ * Initializes the UrlNormalizer with a given URL.
18+ * @param url - The URL to normalize.
19+ */
20+ initializeSacrificedUrl ( url ) {
21+ this . sacrificedUrl = util_1 . default . Text . removeAllSpaces ( valid_1 . default . validateAndTrimString ( url ) ) ;
22+ if ( ! this . sacrificedUrl ) {
23+ throw new Error ( "modifiedUrl cannot be null or empty" ) ;
24+ }
25+ this . currentStep = 1 ;
26+ } ,
27+ /**
28+ * Check if the required previous step is completed.
29+ * @param requiredStep - The step that should have been completed.
30+ */
31+ ensureStepCompleted ( requiredStep ) {
32+ if ( this . currentStep != requiredStep ) {
33+ throw new Error ( `Step ${ requiredStep } must be completed before this step ${ this . currentStep } ` ) ;
34+ }
35+ } ,
1436 extractAndNormalizeProtocolFromSpacesRemovedUrl ( ) {
15- if ( this . modifiedUrl == undefined ) {
16- throw new Error ( "modifiedUrl cannot be null" ) ;
37+ this . ensureStepCompleted ( 1 ) ;
38+ if ( ! this . sacrificedUrl ) {
39+ throw new Error ( "modifiedUrl cannot be null or empty" ) ;
1740 }
1841 let protocol = null ;
1942 let rx = new RegExp ( '^(' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . getFuzzyProtocolsRxStr + '|' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . fuzzierProtocol + ')' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . fuzzierProtocolDomainDelimiter ) ;
2043 let match ;
2144 let isMatched = false ;
22- while ( ( match = rx . exec ( this . modifiedUrl ) ) !== null ) {
45+ while ( ( match = rx . exec ( this . sacrificedUrl ) ) !== null ) {
2346 if ( match && match [ 1 ] ) {
2447 isMatched = true ;
2548 if ( match [ 1 ] === 'localhost' ) {
@@ -37,11 +60,13 @@ exports.UrlNormalizer = {
3760 break ;
3861 }
3962 }
40- this . modifiedUrl = this . modifiedUrl . replace ( rx , '' ) ;
63+ this . sacrificedUrl = this . sacrificedUrl . replace ( rx , '' ) ;
64+ this . currentStep = 2 ;
4165 return protocol ;
4266 } ,
4367 extractAndNormalizeDomainFromProtocolRemovedUrl ( ) {
44- if ( this . modifiedUrl == undefined ) {
68+ this . ensureStepCompleted ( 2 ) ;
69+ if ( this . sacrificedUrl == undefined ) {
4570 throw new Error ( "modifiedUrl cannot be null" ) ;
4671 }
4772 let result = {
@@ -51,7 +76,7 @@ exports.UrlNormalizer = {
5176 let rx1 = new RegExp ( '(' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . getFuzzyDomainBody + '.*?)(' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . optionalFuzzyPort +
5277 FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . optionalFuzzyUrlParams + ')$' , 'gi' ) ;
5378 let match1 ;
54- while ( ( match1 = rx1 . exec ( this . modifiedUrl ) ) !== null ) {
79+ while ( ( match1 = rx1 . exec ( this . sacrificedUrl ) ) !== null ) {
5580 // remaining full url
5681 let domain_temp = match1 [ 0 ] ;
5782 // domain
@@ -141,46 +166,49 @@ exports.UrlNormalizer = {
141166 else {
142167 result . domain = domain_temp2 ;
143168 }
144- this . modifiedUrl = domain_temp3 ;
169+ this . sacrificedUrl = domain_temp3 ;
145170 }
146171 //console.log("before : " + this.modifiedUrl)
147172 // This sort of characters should NOT be located at the start.
148- this . modifiedUrl = this . modifiedUrl . replace ( new RegExp ( '^(?:' + BasePatterns_1 . BasePatterns . twoBytesNum + '|' + BasePatterns_1 . BasePatterns . langChar + ')+' , 'i' ) , '' ) ;
149- //console.log("after : " + this.modifiedUrl)
173+ this . sacrificedUrl = this . sacrificedUrl . replace ( new RegExp ( '^(?:' + BasePatterns_1 . BasePatterns . twoBytesNum + '|' + BasePatterns_1 . BasePatterns . langChar + ')+' , 'i' ) , '' ) ;
174+ this . currentStep = 3 ;
150175 return result ;
151176 } ,
152177 extractAndNormalizePortFromDomainRemovedUrl ( ) {
178+ this . ensureStepCompleted ( 3 ) ;
153179 let port = null ;
154180 let rx = new RegExp ( '^' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . mandatoryFuzzyPort , 'gi' ) ;
155181 let match ;
156- if ( this . modifiedUrl == undefined ) {
182+ if ( this . sacrificedUrl == undefined ) {
157183 throw new Error ( "modifiedUrl cannot be null" ) ;
158184 }
159- while ( ( match = rx . exec ( this . modifiedUrl ) ) !== null ) {
185+ while ( ( match = rx . exec ( this . sacrificedUrl ) ) !== null ) {
160186 port = match [ 0 ] . replace ( / ^ \D + / g, '' ) ;
161- if ( this . modifiedUrl != undefined ) {
162- this . modifiedUrl = this . modifiedUrl . replace ( rx , '' ) ;
187+ if ( this . sacrificedUrl != undefined ) {
188+ this . sacrificedUrl = this . sacrificedUrl . replace ( rx , '' ) ;
163189 }
164190 }
191+ this . currentStep = 4 ;
165192 return port ;
166193 } ,
167- finalizeNormalization ( protocol , port , domain ) {
168- if ( this . modifiedUrl == undefined ) {
194+ extractNormalizedUrl ( protocol , port , domain ) {
195+ this . ensureStepCompleted ( 4 ) ;
196+ if ( this . sacrificedUrl == undefined ) {
169197 throw new Error ( "modifiedUrl cannot be null" ) ;
170198 }
171199 /* Now, only the end part of a domain is left */
172200 /* Consecutive param delimiters should be replaced into one */
173- this . modifiedUrl = this . modifiedUrl . replace ( / [ # ] { 2 , } / gi, '#' ) ;
174- this . modifiedUrl = this . modifiedUrl . replace ( / [ / ] { 2 , } / gi, '/' ) ;
175- this . modifiedUrl = this . modifiedUrl . replace ( / ( .* ?) [ ? ] { 2 , } ( [ ^ / ] * ?(?: = | $ ) ) ( .* ) / i, function ( match , $1 , $2 , $3 ) {
201+ this . sacrificedUrl = this . sacrificedUrl . replace ( / [ # ] { 2 , } / gi, '#' ) ;
202+ this . sacrificedUrl = this . sacrificedUrl . replace ( / [ / ] { 2 , } / gi, '/' ) ;
203+ this . sacrificedUrl = this . sacrificedUrl . replace ( / ( .* ?) [ ? ] { 2 , } ( [ ^ / ] * ?(?: = | $ ) ) ( .* ) / i, function ( match , $1 , $2 , $3 ) {
176204 //console.log(modified_url + ' a :' + $1 + '?' + $2 + $3);
177205 return $1 + '?' + $2 + $3 ;
178206 } ) ;
179207 /* 'modified_url' must start with '/,?,#' */
180208 let rx_modified_url = new RegExp ( '(?:\\/|\\?|\\#)' , 'i' ) ;
181209 let match_modified_url ;
182- if ( ( match_modified_url = rx_modified_url . exec ( this . modifiedUrl ) ) !== null ) {
183- this . modifiedUrl = this . modifiedUrl . replace ( new RegExp ( '^.*?(' + util_1 . default . Text . escapeRegex ( match_modified_url [ 0 ] ) + '.*)$' , 'i' ) , function ( match , $1 ) {
210+ if ( ( match_modified_url = rx_modified_url . exec ( this . sacrificedUrl ) ) !== null ) {
211+ this . sacrificedUrl = this . sacrificedUrl . replace ( new RegExp ( '^.*?(' + util_1 . default . Text . escapeRegex ( match_modified_url [ 0 ] ) + '.*)$' , 'i' ) , function ( match , $1 ) {
184212 return $1 ;
185213 } ) ;
186214 }
@@ -202,42 +230,45 @@ exports.UrlNormalizer = {
202230 if ( ! onlyDomain_str ) {
203231 onlyDomain_str = '' ;
204232 }
205- return protocol_str + onlyDomain_str + port_str + this . modifiedUrl ;
233+ this . currentStep = 5 ;
234+ return protocol_str + onlyDomain_str + port_str + this . sacrificedUrl ;
206235 } ,
207236 extractAndNormalizeUriParamsFromPortRemovedUrl ( ) {
208- if ( this . modifiedUrl == undefined ) {
237+ this . ensureStepCompleted ( 5 ) ;
238+ if ( this . sacrificedUrl == undefined ) {
209239 throw new Error ( "modifiedUrl cannot be null" ) ;
210240 }
211241 let result = {
212242 uri : null ,
213243 params : null
214244 } ;
215- if ( ! this . modifiedUrl || this . modifiedUrl . trim ( ) === '' ) {
245+ if ( ! this . sacrificedUrl || this . sacrificedUrl . trim ( ) === '' ) {
216246 result . params = null ;
217247 result . uri = null ;
218248 }
219249 else {
220250 // PARAMS
221251 let rx3 = new RegExp ( '\\?(?:.)*$' , 'gi' ) ;
222252 let match3 ;
223- while ( ( match3 = rx3 . exec ( this . modifiedUrl ) ) !== null ) {
253+ while ( ( match3 = rx3 . exec ( this . sacrificedUrl ) ) !== null ) {
224254 result . params = match3 [ 0 ] ;
225255 }
226- this . modifiedUrl = this . modifiedUrl . replace ( rx3 , '' ) ;
256+ this . sacrificedUrl = this . sacrificedUrl . replace ( rx3 , '' ) ;
227257 if ( result . params === "?" ) {
228258 result . params = null ;
229259 }
230260 // URI
231261 let rx4 = new RegExp ( '[#/](?:.)*$' , 'gi' ) ;
232262 let match4 ;
233- while ( ( match4 = rx4 . exec ( this . modifiedUrl ) ) !== null ) {
263+ while ( ( match4 = rx4 . exec ( this . sacrificedUrl ) ) !== null ) {
234264 result . uri = match4 [ 0 ] ;
235265 }
236- this . modifiedUrl = this . modifiedUrl . replace ( rx4 , '' ) ;
266+ this . sacrificedUrl = this . sacrificedUrl . replace ( rx4 , '' ) ;
237267 if ( result . uri === "/" ) {
238268 result . uri = null ;
239269 }
240270 }
271+ this . currentStep = 6 ;
241272 return result ;
242273 }
243274} ;
0 commit comments