diff --git a/include/c_tokenizer.h b/include/c_tokenizer.h index abe0fdb30d..4444ca2e6a 100644 --- a/include/c_tokenizer.h +++ b/include/c_tokenizer.h @@ -1,5 +1,3 @@ -/* c_tokenizer.h */ -// some code borrowed from http://www.cplusplus.com/faq/sequences/strings/split/ #pragma once #ifndef C_TOKENIZER_H @@ -34,7 +32,6 @@ const char* free_tokenizer( tokenizer_t* tokenizer ); const char* tokenize( tokenizer_t* tokenizer ); char * mysql_query_digest_first_stage(const char* const q, int q_len, char** const fst_cmnt, char* const buf); char * mysql_query_digest_second_stage(const char* const q, int q_len, char** const fst_cmnt, char* const buf); -char * mysql_query_digest_and_first_comment(char *s , int len , char **first_comment, char *buf); char * mysql_query_digest_and_first_comment_2(const char* const q, int q_len, char** const fst_cmnt, char* const buf); char * mysql_query_digest_and_first_comment_one_it(char *s , int len , char **first_comment, char *buf); char * mysql_query_strip_comments(char *s , int len); diff --git a/lib/c_tokenizer.cpp b/lib/c_tokenizer.cpp index c468e0b185..5b35ef0fb3 100644 --- a/lib/c_tokenizer.cpp +++ b/lib/c_tokenizer.cpp @@ -1,6 +1,3 @@ -/* c_tokenizer.c */ -// Borrowed from http://www.cplusplus.com/faq/sequences/strings/split/ - #include #include #include @@ -234,591 +231,6 @@ static inline void replace_with_q_mark( } } -char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comment, char *buf){ - int i = 0; - - char cur_comment[FIRST_COMMENT_MAX_LENGTH]; - cur_comment[0]=0; - int ccl=0; - int cmd=0; - - int len = _len; - if (_len > mysql_thread___query_digests_max_query_length) { - len = mysql_thread___query_digests_max_query_length; - } - char *r = buf; - if (r==NULL) { - r = (char *) malloc(len + SIZECHAR); - } - char *p_r = r; - char *p_r_t = r; - - char prev_char = 0; - char qutr_char = 0; - - char flag = 0; - char fc=0; - int fc_len=0; - - char fns=0; - - bool lowercase=0; - bool replace_null=0; - bool replace_number=0; - - char grouping_digest=0; - char grouping_limit_exceeded=0; - int grouping_count=0; - int grouping_lim = mysql_thread___query_digests_grouping_limit; - - lowercase=mysql_thread___query_digests_lowercase; - replace_null = mysql_thread___query_digests_replace_null; - replace_number = mysql_thread___query_digests_no_digits; - - while(i < len) - { - // Handy for debugging purposes - // ============================ - // printf( - // "state-1: { flag: `%d`, prev_char: `%c`, s: `%s`, p_r: `%s`, r: `%s`}\n", - // flag, prev_char, s, p_r, r - // ); - // ============================ - - // ================================================= - // START - read token char and set flag what's going on. - // ================================================= - if(flag == 0) - { - // store current position - p_r_t = p_r; - - // comment type 1 - start with '/*' - if(prev_char == '/' && *s == '*') - { - ccl=0; - flag = 1; - if (i != (len-1) && *(s+1)=='!') - cmd=1; - } - - // comment type 2 - start with '#' - else if(*s == '#') - { - flag = 2; - } - - // comment type 3 - start with '--' - - // NOTE: Looks like the general rule for parsing comments of this type could simply be: - // - // - `.*--.*` which could be translated into `(*s == '-' && *(s+1) == '-')`. - // - // But this can not hold, since the first '-' could have been consumed previously, for example - // during the parsing of a digit: - // - // - `select 1.1-- final_comment\n` - // - // For this reason 'prev_char' needs to be checked too when searching for the `--` pattern. - else if(i != (len-1) && prev_char == '-' && *s == '-' && ((*(s+1)==' ') || (*(s+1)=='\n') || (*(s+1)=='\r') || (*(s+1)=='\t') )) - { - flag = 3; - } - - // Previous character can be a consumed ' ' instead of '-' as in the previous case, for this - // reason, we need to look ahead for '--'. - // - // NOTE: There is no reason for not checking for the subsequent space char that should follow - // the '-- ', otherwise we would consider valid queries as `SELECT --1` like comments. - else if (i != (len-1) && *s == '-' && (*(s+1)=='-')) { - if (prev_char != '-') { - flag = 3; - } - else if (i==0) { - flag = 3; - } - } - - // string - start with ' - else if(*s == '\'' || *s == '"') - { - flag = 4; - qutr_char = *s; - } - - // may be digit - start with digit - else if(is_token_char(prev_char) && is_digit_char(*s)) - { - flag = 5; - if(len == i+1) - continue; - } - - // not above case - remove duplicated space char - else - { - flag = 0; - if (fns==0 && is_space_char(*s)) { - s++; - i++; - continue; - } - if (fns==0) fns=1; - if(is_space_char(prev_char) && is_space_char(*s)){ - prev_char = ' '; - *p_r = ' '; - s++; - i++; - continue; - } - if (replace_number) { - if (!is_digit_char(prev_char) && is_digit_char(*s)) { - *p_r++ = '?'; - while(*s != '\0' && is_digit_char(*s)) { - s++; - i++; - } - } - } - { - char* p = p_r - 2; - // suppress spaces before arithmetic operators - if (p >= r && is_space_char(prev_char) && is_arithmetic_op(*s)) { - if (*p == '?') { - prev_char = *s; - --p_r; - *p_r++ = *s; - s++; - i++; - continue; - } - } - // suppress spaces before and after commas - if (p >= r && is_space_char(prev_char) && ((*s == ',') || (*p == ','))) { - if (*s == ',') { - --p_r; - // only copy the comma if we are not grouping a query - if (!grouping_limit_exceeded) { - *p_r++ = *s; - } - prev_char = ','; - s++; - i++; - } else { - prev_char = ','; - --p_r; - } - continue; - } - // suppress spaces before closing brackets when grouping or mark is present - if (p >= r && (*p == '.' || *p == '?') && is_space_char(prev_char) && (*s == ')')) { - prev_char = *s; - --p_r; - *p_r++ = *s; - s++; - i++; - continue; - } - } - if (replace_null) { - if (*s == 'n' || *s == 'N') { // we search for NULL , #2171 - if (i && is_token_char(prev_char)) { - if (len>=4) { - if (i=2) fc_len-=2; - char *c=*first_comment+fc_len; - *c=0; - //*first_comment[fc_len]=0; - fc=2; - } - } - } - if( - // comment type 1 - /* .. */ - (flag == 1 && prev_char == '*' && *s == '/') || - - // comment type 2 - # ... \n - (flag == 2 && (*s == '\n' || *s == '\r' || (i == len - 1) )) - || - // comment type 3 - -- ... \n - (flag == 3 && (*s == '\n' || *s == '\r' || (i == len -1) )) - ) - { - p_r = p_r_t; - if (flag == 1 || (i == len -1)) { - p_r -= SIZECHAR; - } - if (cmd) { - cur_comment[ccl]=0; - if (ccl>=2) { - ccl-=2; - cur_comment[ccl]=0; - char el=0; - int fcc=0; - while (el==0 && fcc= r && ( *(_p+2) == '-' || *(_p+2) == '+') ) { - if ( - ( *(_p+1) == ',' ) || ( *(_p+1) == '(' ) || - ( ( *(_p+1) == ' ' ) && ( *_p == ',' || *_p == '(' ) ) - ) { - p_r--; - } - } - - replace_with_q_mark( - grouping_digest, grouping_lim, &grouping_count, &p_r, &grouping_limit_exceeded - ); - - flag = 0; - break; - } - - // need to be ignored case - if(p_r > p_r_t + SIZECHAR) - { - if( - (prev_char == '\\' && *s == '\\') || // to process '\\\\', '\\' - (prev_char == '\\' && *s == qutr_char) || // to process '\'' - (prev_char == qutr_char && *s == qutr_char) // to process '''' - ) - { - prev_char = 'X'; - s++; - i++; - continue; - } - } - - // satisfied closing string - swap string to ? - if(*s == qutr_char && (len == i+1 || *(s + SIZECHAR) != qutr_char)) - { - char *_p = p_r_t; - _p-=3; - p_r = p_r_t; - if ( _p >= r && ( *(_p+2) == '-' || *(_p+2) == '+') ) { - if ( - ( *(_p+1) == ',' ) || ( *(_p+1) == '(' ) || - ( ( *(_p+1) == ' ' ) && ( *_p == ',' || *_p == '(' ) ) - ) { - p_r--; - } - } - - // Remove spaces before each literal found - if ( _p >= r && is_space_char(*(_p + 2)) && !is_normal_char(*(_p + 1))) { - if ( _p >= r && ( *(_p+3) == '\''|| *(_p+3) == '"' )) { - p_r--; - } - } - - replace_with_q_mark( - grouping_digest, grouping_lim, &grouping_count, &p_r, &grouping_limit_exceeded - ); - - prev_char = qutr_char; - qutr_char = 0; - flag = 0; - if(i < len) - s++; - i++; - continue; - } - } - - // -------- - // digit - // -------- - else if(flag == 5) - { - // last single char - if(p_r_t == p_r) - { - char *_p = p_r_t; - _p-=3; - if ( _p >= r && ( *(_p+2) == '-' || *(_p+2) == '+') ) { - if ( - ( *(_p+1) == ',' ) || ( *(_p+1) == '(' ) || - ( ( *(_p+1) == ' ' ) && ( *_p == ',' || *_p == '(' ) ) - ) { - p_r--; - } - } - if ( _p >= r && is_space_char(*(_p + 2))) { - if ( _p >= r && ( *(_p+1) == '-' || *(_p+1) == '+' || *(_p+1) == '*' || *(_p+1) == '/' || *(_p+1) == '%' || *(_p+1) == ',')) { - p_r--; - } - } - *p_r++ = '?'; - i++; - continue; - } - - // is float - if (*s == '.' || *s == 'e' || ((*s == '+' || *s == '-') && prev_char == 'e')) { - prev_char = *s; - i++; - s++; - continue; - } - - // token char or last char - if(is_token_char(*s) || len == i+1) - { - if(is_digit_string(p_r_t, p_r)) - { - char *_p = p_r_t; - _p-=3; - p_r = p_r_t; - // remove symbol and keep parenthesis or comma - if ( _p >= r && ( *(_p+2) == '-' || *(_p+2) == '+') ) { - if ( - ( *(_p+1) == ',' ) || ( *(_p+1) == '(' ) || - ( ( *(_p+1) == ' ' ) && ( *_p == ',' || *_p == '(' ) ) - ) { - p_r--; - } - } - - // Remove spaces before number counting with possible '.' presence - if (_p >= r && *_p == '.' && (*(_p + 1) == ' ' || *(_p + 1) == '.') && (*(_p+2) == '-' || *(_p+2) == '+') ) { - if (*(_p + 1) == ' ') { - p_r--; - } - p_r--; - } - - // Remove spaces after a opening bracket when followed by a number - if (_p >= r && *(_p+1) == '(' && *(_p+2) == ' ') { - p_r--; - } - - // Remove spaces before number - if ( _p >= r && is_space_char(*(_p + 2))) { - // A point can be found prior to a number in case of query grouping - if ( _p >= r && ( *(_p+1) == '-' || *(_p+1) == '+' || *(_p+1) == '*' || *(_p+1) == '/' || *(_p+1) == '%' || *(_p+1) == ',' || *(_p+1) == '.')) { - p_r--; - } - } - - replace_with_q_mark( - grouping_digest, grouping_lim, &grouping_count, &p_r, &grouping_limit_exceeded - ); - - if(len == i+1) - { - if(is_token_char(*s)) - *p_r++ = *s; - i++; - continue; - } - } else { - // collapse any digits found in the string - if (replace_number) { - int str_len = p_r - p_r_t + 1; - int collapsed = 0; - - for (int j = 0; j < str_len; j++) { - char* const c_p_r_t = ((char*)p_r_t + j); - char* const n_p_r_t = ((char*)p_r_t + j + 1); - - if (is_digit_char(*c_p_r_t) && is_digit_char(*n_p_r_t)) { - memmove(c_p_r_t, c_p_r_t + 1, str_len - j); - collapsed += 1; - } - } - - p_r -= collapsed; - - int new_str_len = p_r - p_r_t + 1; - for (int j = 0; j < new_str_len; j++) { - char* const c_p_r_t = ((char*)p_r_t + j); - if (is_digit_char(*c_p_r_t)) { - *c_p_r_t = '?'; - } - } - } - } - - flag = 0; - } - } - } - - // ================================================= - // COPY CHAR - // ================================================= - // convert every space char to ' ' - if (*s == ')') { - if (grouping_digest > 0) { - grouping_digest -= 1; - }; - grouping_count = 0; - grouping_limit_exceeded = 0; - } - - if (lowercase==0) { - *p_r++ = !is_space_char(*s) ? *s : ' '; - } else { - *p_r++ = !is_space_char(*s) ? (tolower(*s)) : ' '; - } - - if (*s == '(') { - grouping_digest += 1; - grouping_count = 0; - grouping_limit_exceeded = 0; - } - - prev_char = *s++; - - i++; - } - - // remove a trailing space - if (p_r>r) { - char *e=p_r; - e--; - if (*e==' ') { - *e=0; - // maybe 2 trailing spaces . It happens with comments - e--; - if (*e==' ') { - *e=0; - } - } - } - - *p_r = 0; - - // process query stats - return r; -} - /** * @brief Struct for holding all the configuration options used for query digests generation. */ diff --git a/lib/c_tokenizer_legacy.cpp b/lib/c_tokenizer_legacy.cpp new file mode 100644 index 0000000000..ae93027afe --- /dev/null +++ b/lib/c_tokenizer_legacy.cpp @@ -0,0 +1,588 @@ +/* + this file is here only for reference. + It includes the old mysql_query_digest_and_first_comment() , outdated since ProxySQL 2.4.0 +*/ +char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comment, char *buf){ + int i = 0; + + char cur_comment[FIRST_COMMENT_MAX_LENGTH]; + cur_comment[0]=0; + int ccl=0; + int cmd=0; + + int len = _len; + if (_len > mysql_thread___query_digests_max_query_length) { + len = mysql_thread___query_digests_max_query_length; + } + char *r = buf; + if (r==NULL) { + r = (char *) malloc(len + SIZECHAR); + } + char *p_r = r; + char *p_r_t = r; + + char prev_char = 0; + char qutr_char = 0; + + char flag = 0; + char fc=0; + int fc_len=0; + + char fns=0; + + bool lowercase=0; + bool replace_null=0; + bool replace_number=0; + + char grouping_digest=0; + char grouping_limit_exceeded=0; + int grouping_count=0; + int grouping_lim = mysql_thread___query_digests_grouping_limit; + + lowercase=mysql_thread___query_digests_lowercase; + replace_null = mysql_thread___query_digests_replace_null; + replace_number = mysql_thread___query_digests_no_digits; + + while(i < len) + { + // Handy for debugging purposes + // ============================ + // printf( + // "state-1: { flag: `%d`, prev_char: `%c`, s: `%s`, p_r: `%s`, r: `%s`}\n", + // flag, prev_char, s, p_r, r + // ); + // ============================ + + // ================================================= + // START - read token char and set flag what's going on. + // ================================================= + if(flag == 0) + { + // store current position + p_r_t = p_r; + + // comment type 1 - start with '/*' + if(prev_char == '/' && *s == '*') + { + ccl=0; + flag = 1; + if (i != (len-1) && *(s+1)=='!') + cmd=1; + } + + // comment type 2 - start with '#' + else if(*s == '#') + { + flag = 2; + } + + // comment type 3 - start with '--' + + // NOTE: Looks like the general rule for parsing comments of this type could simply be: + // + // - `.*--.*` which could be translated into `(*s == '-' && *(s+1) == '-')`. + // + // But this can not hold, since the first '-' could have been consumed previously, for example + // during the parsing of a digit: + // + // - `select 1.1-- final_comment\n` + // + // For this reason 'prev_char' needs to be checked too when searching for the `--` pattern. + else if(i != (len-1) && prev_char == '-' && *s == '-' && ((*(s+1)==' ') || (*(s+1)=='\n') || (*(s+1)=='\r') || (*(s+1)=='\t') )) + { + flag = 3; + } + + // Previous character can be a consumed ' ' instead of '-' as in the previous case, for this + // reason, we need to look ahead for '--'. + // + // NOTE: There is no reason for not checking for the subsequent space char that should follow + // the '-- ', otherwise we would consider valid queries as `SELECT --1` like comments. + else if (i != (len-1) && *s == '-' && (*(s+1)=='-')) { + if (prev_char != '-') { + flag = 3; + } + else if (i==0) { + flag = 3; + } + } + + // string - start with ' + else if(*s == '\'' || *s == '"') + { + flag = 4; + qutr_char = *s; + } + + // may be digit - start with digit + else if(is_token_char(prev_char) && is_digit_char(*s)) + { + flag = 5; + if(len == i+1) + continue; + } + + // not above case - remove duplicated space char + else + { + flag = 0; + if (fns==0 && is_space_char(*s)) { + s++; + i++; + continue; + } + if (fns==0) fns=1; + if(is_space_char(prev_char) && is_space_char(*s)){ + prev_char = ' '; + *p_r = ' '; + s++; + i++; + continue; + } + if (replace_number) { + if (!is_digit_char(prev_char) && is_digit_char(*s)) { + *p_r++ = '?'; + while(*s != '\0' && is_digit_char(*s)) { + s++; + i++; + } + } + } + { + char* p = p_r - 2; + // suppress spaces before arithmetic operators + if (p >= r && is_space_char(prev_char) && is_arithmetic_op(*s)) { + if (*p == '?') { + prev_char = *s; + --p_r; + *p_r++ = *s; + s++; + i++; + continue; + } + } + // suppress spaces before and after commas + if (p >= r && is_space_char(prev_char) && ((*s == ',') || (*p == ','))) { + if (*s == ',') { + --p_r; + // only copy the comma if we are not grouping a query + if (!grouping_limit_exceeded) { + *p_r++ = *s; + } + prev_char = ','; + s++; + i++; + } else { + prev_char = ','; + --p_r; + } + continue; + } + // suppress spaces before closing brackets when grouping or mark is present + if (p >= r && (*p == '.' || *p == '?') && is_space_char(prev_char) && (*s == ')')) { + prev_char = *s; + --p_r; + *p_r++ = *s; + s++; + i++; + continue; + } + } + if (replace_null) { + if (*s == 'n' || *s == 'N') { // we search for NULL , #2171 + if (i && is_token_char(prev_char)) { + if (len>=4) { + if (i=2) fc_len-=2; + char *c=*first_comment+fc_len; + *c=0; + //*first_comment[fc_len]=0; + fc=2; + } + } + } + if( + // comment type 1 - /* .. */ + (flag == 1 && prev_char == '*' && *s == '/') || + + // comment type 2 - # ... \n + (flag == 2 && (*s == '\n' || *s == '\r' || (i == len - 1) )) + || + // comment type 3 - -- ... \n + (flag == 3 && (*s == '\n' || *s == '\r' || (i == len -1) )) + ) + { + p_r = p_r_t; + if (flag == 1 || (i == len -1)) { + p_r -= SIZECHAR; + } + if (cmd) { + cur_comment[ccl]=0; + if (ccl>=2) { + ccl-=2; + cur_comment[ccl]=0; + char el=0; + int fcc=0; + while (el==0 && fcc= r && ( *(_p+2) == '-' || *(_p+2) == '+') ) { + if ( + ( *(_p+1) == ',' ) || ( *(_p+1) == '(' ) || + ( ( *(_p+1) == ' ' ) && ( *_p == ',' || *_p == '(' ) ) + ) { + p_r--; + } + } + + replace_with_q_mark( + grouping_digest, grouping_lim, &grouping_count, &p_r, &grouping_limit_exceeded + ); + + flag = 0; + break; + } + + // need to be ignored case + if(p_r > p_r_t + SIZECHAR) + { + if( + (prev_char == '\\' && *s == '\\') || // to process '\\\\', '\\' + (prev_char == '\\' && *s == qutr_char) || // to process '\'' + (prev_char == qutr_char && *s == qutr_char) // to process '''' + ) + { + prev_char = 'X'; + s++; + i++; + continue; + } + } + + // satisfied closing string - swap string to ? + if(*s == qutr_char && (len == i+1 || *(s + SIZECHAR) != qutr_char)) + { + char *_p = p_r_t; + _p-=3; + p_r = p_r_t; + if ( _p >= r && ( *(_p+2) == '-' || *(_p+2) == '+') ) { + if ( + ( *(_p+1) == ',' ) || ( *(_p+1) == '(' ) || + ( ( *(_p+1) == ' ' ) && ( *_p == ',' || *_p == '(' ) ) + ) { + p_r--; + } + } + + // Remove spaces before each literal found + if ( _p >= r && is_space_char(*(_p + 2)) && !is_normal_char(*(_p + 1))) { + if ( _p >= r && ( *(_p+3) == '\''|| *(_p+3) == '"' )) { + p_r--; + } + } + + replace_with_q_mark( + grouping_digest, grouping_lim, &grouping_count, &p_r, &grouping_limit_exceeded + ); + + prev_char = qutr_char; + qutr_char = 0; + flag = 0; + if(i < len) + s++; + i++; + continue; + } + } + + // -------- + // digit + // -------- + else if(flag == 5) + { + // last single char + if(p_r_t == p_r) + { + char *_p = p_r_t; + _p-=3; + if ( _p >= r && ( *(_p+2) == '-' || *(_p+2) == '+') ) { + if ( + ( *(_p+1) == ',' ) || ( *(_p+1) == '(' ) || + ( ( *(_p+1) == ' ' ) && ( *_p == ',' || *_p == '(' ) ) + ) { + p_r--; + } + } + if ( _p >= r && is_space_char(*(_p + 2))) { + if ( _p >= r && ( *(_p+1) == '-' || *(_p+1) == '+' || *(_p+1) == '*' || *(_p+1) == '/' || *(_p+1) == '%' || *(_p+1) == ',')) { + p_r--; + } + } + *p_r++ = '?'; + i++; + continue; + } + + // is float + if (*s == '.' || *s == 'e' || ((*s == '+' || *s == '-') && prev_char == 'e')) { + prev_char = *s; + i++; + s++; + continue; + } + + // token char or last char + if(is_token_char(*s) || len == i+1) + { + if(is_digit_string(p_r_t, p_r)) + { + char *_p = p_r_t; + _p-=3; + p_r = p_r_t; + // remove symbol and keep parenthesis or comma + if ( _p >= r && ( *(_p+2) == '-' || *(_p+2) == '+') ) { + if ( + ( *(_p+1) == ',' ) || ( *(_p+1) == '(' ) || + ( ( *(_p+1) == ' ' ) && ( *_p == ',' || *_p == '(' ) ) + ) { + p_r--; + } + } + + // Remove spaces before number counting with possible '.' presence + if (_p >= r && *_p == '.' && (*(_p + 1) == ' ' || *(_p + 1) == '.') && (*(_p+2) == '-' || *(_p+2) == '+') ) { + if (*(_p + 1) == ' ') { + p_r--; + } + p_r--; + } + + // Remove spaces after a opening bracket when followed by a number + if (_p >= r && *(_p+1) == '(' && *(_p+2) == ' ') { + p_r--; + } + + // Remove spaces before number + if ( _p >= r && is_space_char(*(_p + 2))) { + // A point can be found prior to a number in case of query grouping + if ( _p >= r && ( *(_p+1) == '-' || *(_p+1) == '+' || *(_p+1) == '*' || *(_p+1) == '/' || *(_p+1) == '%' || *(_p+1) == ',' || *(_p+1) == '.')) { + p_r--; + } + } + + replace_with_q_mark( + grouping_digest, grouping_lim, &grouping_count, &p_r, &grouping_limit_exceeded + ); + + if(len == i+1) + { + if(is_token_char(*s)) + *p_r++ = *s; + i++; + continue; + } + } else { + // collapse any digits found in the string + if (replace_number) { + int str_len = p_r - p_r_t + 1; + int collapsed = 0; + + for (int j = 0; j < str_len; j++) { + char* const c_p_r_t = ((char*)p_r_t + j); + char* const n_p_r_t = ((char*)p_r_t + j + 1); + + if (is_digit_char(*c_p_r_t) && is_digit_char(*n_p_r_t)) { + memmove(c_p_r_t, c_p_r_t + 1, str_len - j); + collapsed += 1; + } + } + + p_r -= collapsed; + + int new_str_len = p_r - p_r_t + 1; + for (int j = 0; j < new_str_len; j++) { + char* const c_p_r_t = ((char*)p_r_t + j); + if (is_digit_char(*c_p_r_t)) { + *c_p_r_t = '?'; + } + } + } + } + + flag = 0; + } + } + } + + // ================================================= + // COPY CHAR + // ================================================= + // convert every space char to ' ' + if (*s == ')') { + if (grouping_digest > 0) { + grouping_digest -= 1; + }; + grouping_count = 0; + grouping_limit_exceeded = 0; + } + + if (lowercase==0) { + *p_r++ = !is_space_char(*s) ? *s : ' '; + } else { + *p_r++ = !is_space_char(*s) ? (tolower(*s)) : ' '; + } + + if (*s == '(') { + grouping_digest += 1; + grouping_count = 0; + grouping_limit_exceeded = 0; + } + + prev_char = *s++; + + i++; + } + + // remove a trailing space + if (p_r>r) { + char *e=p_r; + e--; + if (*e==' ') { + *e=0; + // maybe 2 trailing spaces . It happens with comments + e--; + if (*e==' ') { + *e=0; + } + } + } + + *p_r = 0; + + // process query stats + return r; +}