From fccef7f64edc8025f26c1b68cc1aba811d2bf680 Mon Sep 17 00:00:00 2001 From: Brice Figureau Date: Mon, 28 May 2018 10:16:27 +0200 Subject: [PATCH 1/3] Parse multiple variable (simple) SET commands The current parser for SET in MySQL_Session is not able to parse multiple variables SET commands like: SET sql_mode='TRADITIONAL', NAMES utf8 COLLATE unicode_ci This patch introduces a simple regex based parser for all variation of simple variables. This is not a generic SET parser, though. --- include/set_parser.h | 16 ++ lib/Makefile | 2 +- lib/MySQL_Session.cpp | 302 +++++++++---------------- lib/set_parser.cpp | 65 ++++++ test/set_parser_test/Makefile | 62 +++++ test/set_parser_test/setparsertest.cpp | 139 ++++++++++++ 6 files changed, 390 insertions(+), 196 deletions(-) create mode 100644 include/set_parser.h create mode 100644 lib/set_parser.cpp create mode 100644 test/set_parser_test/Makefile create mode 100644 test/set_parser_test/setparsertest.cpp diff --git a/include/set_parser.h b/include/set_parser.h new file mode 100644 index 0000000000..61a5c8ca8c --- /dev/null +++ b/include/set_parser.h @@ -0,0 +1,16 @@ +#ifndef __CLASS_SET_PARSER_H +#define __CLASS_SET_PARSER_H +#include +#include +#include + +class SetParser { + private: + std::string query; + public: + SetParser(std::string q); + std::map> parse(); +}; + + +#endif /* __CLASS_SET_PARSER_H */ diff --git a/lib/Makefile b/lib/Makefile index 9932c76363..e4eb880791 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -92,7 +92,7 @@ default: libproxysql.a _OBJ = c_tokenizer.o OBJ = $(patsubst %,$(ODIR)/%,$(_OBJ)) -_OBJ_CXX = ProxySQL_GloVars.oo network.oo debug.oo configfile.oo Query_Cache.oo SpookyV2.oo MySQL_Authentication.oo gen_utils.oo sqlite3db.oo mysql_connection.oo MySQL_HostGroups_Manager.oo mysql_data_stream.oo MySQL_Thread.oo MySQL_Session.oo MySQL_Protocol.oo mysql_backend.oo Query_Processor.oo ProxySQL_Admin.oo MySQL_Monitor.oo MySQL_Logger.oo thread.oo MySQL_PreparedStatement.oo ProxySQL_Cluster.oo SQLite3_Server.oo ClickHouse_Authentication.oo ClickHouse_Server.oo ProxySQL_Statistics.oo Chart_bundle_js.oo ProxySQL_HTTP_Server.oo font-awesome.min.css.oo main-bundle.min.css.oo +_OBJ_CXX = ProxySQL_GloVars.oo network.oo debug.oo configfile.oo Query_Cache.oo SpookyV2.oo MySQL_Authentication.oo gen_utils.oo sqlite3db.oo mysql_connection.oo MySQL_HostGroups_Manager.oo mysql_data_stream.oo MySQL_Thread.oo MySQL_Session.oo MySQL_Protocol.oo mysql_backend.oo Query_Processor.oo ProxySQL_Admin.oo MySQL_Monitor.oo MySQL_Logger.oo thread.oo MySQL_PreparedStatement.oo ProxySQL_Cluster.oo SQLite3_Server.oo ClickHouse_Authentication.oo ClickHouse_Server.oo ProxySQL_Statistics.oo Chart_bundle_js.oo ProxySQL_HTTP_Server.oo font-awesome.min.css.oo main-bundle.min.css.oo set_parser.oo OBJ_CXX = $(patsubst %,$(ODIR)/%,$(_OBJ_CXX)) %.ko: %.cpp diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index 66aac88acf..2a928ae2af 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -3,6 +3,7 @@ #include "re2/re2.h" #include "re2/regexp.h" #include "SpookyV2.h" +#include "set_parser.h" #define SELECT_VERSION_COMMENT "select @@version_comment limit 1" #define SELECT_VERSION_COMMENT_LEN 32 @@ -3909,81 +3910,69 @@ bool MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C } } } - if (match_regexes && match_regexes[1]->match(dig)) { - { - int query_no_space_length = nq.length(); - char *query_no_space=(char *)malloc(query_no_space_length+1); - memcpy(query_no_space,nq.c_str(),query_no_space_length); - query_no_space[query_no_space_length]='\0'; - query_no_space_length=remove_spaces(query_no_space); - nq = string(query_no_space); - free(query_no_space); - } - // set sql_mode - re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); - opt2->set_case_sensitive(false); - char *pattern=(char *)"^(?: *)SET *(?:|SESSION +|@@|@@session.)SQL_MODE *(?:|:)= *(?:'||\")((\\w|,)*)(?:'||\") *(?:(|;|-- .*|#.*))$"; - re2::RE2 *re=new RE2(pattern, *opt2); - string s; - rc=RE2::PartialMatch(nq, *re, &s); - delete re; - delete opt2; - if (rc) { - //fprintf(stderr,"sql_mode='%s'\n", s.c_str()); - uint32_t sql_mode_int=SpookyHash::Hash32(s.c_str(),s.length(),10); - if (client_myds->myconn->options.sql_mode_int != sql_mode_int) { - //fprintf(stderr,"sql_mode_int='%u'\n", sql_mode_int); - client_myds->myconn->options.sql_mode_int = sql_mode_int; - if (client_myds->myconn->options.sql_mode) { - free(client_myds->myconn->options.sql_mode); - } - client_myds->myconn->options.sql_mode=strdup(s.c_str()); - } - if (command_type == _MYSQL_COM_QUERY) { - client_myds->DSS=STATE_QUERY_SENT_NET; - uint16_t setStatus = (nTrx ? SERVER_STATUS_IN_TRANS : 0 ); - if (autocommit) setStatus= SERVER_STATUS_AUTOCOMMIT; - client_myds->myprot.generate_pkt_OK(true,NULL,NULL,1,0,0,setStatus,0,NULL); - client_myds->DSS=STATE_SLEEP; - status=WAITING_CLIENT_DATA; - l_free(pkt->size,pkt->ptr); - RequestEnd(NULL); - return true; + if (match_regexes && (match_regexes[1]->match(dig) || match_regexes[2]->match(dig))) { + proxy_debug(PROXY_DEBUG_MYSQL_COM, 5, "Parsing SET command %s\n", nq.c_str()); + SetParser parser(nq); + std::map> set = parser.parse(); + for(auto it = std::begin(set); it != std::end(set); ++it) { + std::string var = it->first; + proxy_debug(PROXY_DEBUG_MYSQL_COM, 5, "Processing SET variable %s\n", var.c_str()); + if (it->second.size() < 1 || it->second.size() > 2) { + // error not enough arguments + string nqn = string((char *)CurrentQuery.QueryPointer,CurrentQuery.QueryLength); + proxy_error("Unable to parse query. If correct, report it as a bug: %s\n", nqn.c_str()); + return false; } - } else { - // try case listed in #1279 - // this is not a complete solution. A right solution involves true parsing - re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); - opt2->set_case_sensitive(false); - char *pattern=(char *)"^(?: *)SET *(?:|SESSION +|@@|@@session.)SQL_MODE *(?:|:)= *(?:'||\")((\\w|,)*)(?:'||\")(?: *, *NAMES *)(?:'||\")((\\w|\\d)*)(?:'||\")(?:| +COLLATE +(?:'||\")((\\w|\\d)*)(?:'||\")) *(?:(|;|-- .*|#.*))$"; - re2::RE2 *re=new RE2(pattern, *opt2); - string s1; - string s2; - string s3; - rc=RE2::FullMatch(nq, *re, &s1, (void *)NULL, &s2, (void *)NULL, &s3); - //proxy_info("s1 = %s\n",s1.c_str()); - //proxy_info("s2 = %s\n",s2.c_str()); - //proxy_info("s3 = %s\n",s3.c_str()); - delete re; - delete opt2; - if (rc) { + auto values = std::begin(it->second); + if (var == "sql_mode") { + std::string value1 = *values; + proxy_debug(PROXY_DEBUG_MYSQL_COM, 5, "Processing SET SQL Mode value %s\n", value1.c_str()); + uint32_t sql_mode_int=SpookyHash::Hash32(value1.c_str(),value1.length(),10); + if (client_myds->myconn->options.sql_mode_int != sql_mode_int) { + //fprintf(stderr,"sql_mode_int='%u'\n", sql_mode_int); + client_myds->myconn->options.sql_mode_int = sql_mode_int; + if (client_myds->myconn->options.sql_mode) { + free(client_myds->myconn->options.sql_mode); + } + proxy_debug(PROXY_DEBUG_MYSQL_COM, 8, "Changing connection SQL Mode to %s\n", value1.c_str()); + client_myds->myconn->options.sql_mode=strdup(value1.c_str()); + } + } else if (var == "time_zone") { + std::string value1 = *values; + proxy_debug(PROXY_DEBUG_MYSQL_COM, 5, "Processing SET Time Zone value %s\n", value1.c_str()); + uint32_t time_zone_int=SpookyHash::Hash32(value1.c_str(),value1.length(),10); + if (client_myds->myconn->options.time_zone_int != time_zone_int) { + //fprintf(stderr,"time_zone_int='%u'\n", time_zone_int); + client_myds->myconn->options.time_zone_int = time_zone_int; + if (client_myds->myconn->options.time_zone) { + free(client_myds->myconn->options.time_zone); + } + proxy_debug(PROXY_DEBUG_MYSQL_COM, 8, "Changing connection Time zone to %s\n", value1.c_str()); + client_myds->myconn->options.time_zone=strdup(value1.c_str()); + } + } else if (var == "names") { + std::string value1 = *values++; + proxy_debug(PROXY_DEBUG_MYSQL_COM, 5, "Processing SET NAMES %s\n", value1.c_str()); const MARIADB_CHARSET_INFO * c; - if (s3.length()) { - c = proxysql_find_charset_collate_names(s2.c_str(), s3.c_str()); + std::string value2; + if (values != std::end(it->second)) { + value2 = *values; + proxy_debug(PROXY_DEBUG_MYSQL_COM, 5, "Processing SET NAMES With COLLATE %s\n", value2.c_str()); + c = proxysql_find_charset_collate_names(value1.c_str(), value2.c_str()); } else { - c = proxysql_find_charset_name(s2.c_str()); + c = proxysql_find_charset_name(value1.c_str()); } if (!c) { char *m = NULL; char *errmsg = NULL; - if (s3.length()) { + if (value2.length()) { m=(char *)"Unknown character set '%s' or collation '%s'"; - errmsg=(char *)malloc(s2.length() + s3.length() + strlen(m)); - sprintf(errmsg,m,s2.c_str(), s3.c_str()); + errmsg=(char *)malloc(value1.length() + value2.length() + strlen(m)); + sprintf(errmsg,m,value1.c_str(), value2.c_str()); } else { m=(char *)"Unknown character set: '%s'"; - errmsg=(char *)malloc(s2.length()+strlen(m)); - sprintf(errmsg,m,s2.c_str()); + errmsg=(char *)malloc(value1.length()+strlen(m)); + sprintf(errmsg,m,value1.c_str()); } client_myds->DSS=STATE_QUERY_SENT_NET; client_myds->myprot.generate_pkt_ERR(true,NULL,NULL,1,1115,(char *)"42000",errmsg); @@ -3992,86 +3981,53 @@ bool MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C free(errmsg); return true; } else { - uint32_t sql_mode_int=SpookyHash::Hash32(s1.c_str(),s1.length(),10); - if (client_myds->myconn->options.sql_mode_int != sql_mode_int) { - client_myds->myconn->options.sql_mode_int = sql_mode_int; - if (client_myds->myconn->options.sql_mode) { - free(client_myds->myconn->options.sql_mode); - } - client_myds->myconn->options.sql_mode=strdup(s1.c_str()); - } + proxy_debug(PROXY_DEBUG_MYSQL_COM, 8, "Changing connection charset to %d\n", c->nr); client_myds->myconn->set_charset(c->nr); - if (command_type == _MYSQL_COM_QUERY) { - client_myds->DSS=STATE_QUERY_SENT_NET; - uint16_t setStatus = (nTrx ? SERVER_STATUS_IN_TRANS : 0 ); - if (autocommit) setStatus= SERVER_STATUS_AUTOCOMMIT; - client_myds->myprot.generate_pkt_OK(true,NULL,NULL,1,0,0,setStatus,0,NULL); - client_myds->DSS=STATE_SLEEP; - status=WAITING_CLIENT_DATA; - l_free(pkt->size,pkt->ptr); - RequestEnd(NULL); - return true; + } + } + } + + // parseSetCommand wasn't able to parse anything... + if (set.size() == 0) { + // try case listed in #1373 + // SET @@SESSION.sql_mode = CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO'), @@SESSION.sql_auto_is_null = 0, @@SESSION.wait_timeout = 2147483 + // this is not a complete solution. A right solution involves true parsing + int query_no_space_length = nq.length(); + char *query_no_space=(char *)malloc(query_no_space_length+1); + memcpy(query_no_space,nq.c_str(),query_no_space_length); + query_no_space[query_no_space_length]='\0'; + query_no_space_length=remove_spaces(query_no_space); + + string nq1 = string(query_no_space); + free(query_no_space); + RE2::GlobalReplace(&nq1,(char *)"SESSION.",(char *)""); + RE2::GlobalReplace(&nq1,(char *)"SESSION ",(char *)""); + RE2::GlobalReplace(&nq1,(char *)"session.",(char *)""); + RE2::GlobalReplace(&nq1,(char *)"session ",(char *)""); + //fprintf(stderr,"%s\n",nq1.c_str()); + re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); + opt2->set_case_sensitive(false); + char *pattern=(char *)"^SET @@SQL_MODE *(?:|:)= *(?:'||\")(.*)(?:'||\") *, *@@sql_auto_is_null *(?:|:)= *(?:(?:\\w|\\d)*) *, @@wait_timeout *(?:|:)= *(?:\\d*)$"; + re2::RE2 *re=new RE2(pattern, *opt2); + string s1; + rc=RE2::FullMatch(nq1, *re, &s1); + delete re; + delete opt2; + if (rc) { + uint32_t sql_mode_int=SpookyHash::Hash32(s1.c_str(),s1.length(),10); + if (client_myds->myconn->options.sql_mode_int != sql_mode_int) { + client_myds->myconn->options.sql_mode_int = sql_mode_int; + if (client_myds->myconn->options.sql_mode) { + free(client_myds->myconn->options.sql_mode); } + client_myds->myconn->options.sql_mode=strdup(s1.c_str()); } } else { - // try case listed in #1373 - // SET @@SESSION.sql_mode = CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO'), @@SESSION.sql_auto_is_null = 0, @@SESSION.wait_timeout = 2147483 - // this is not a complete solution. A right solution involves true parsing - int query_no_space_length = nq.length(); - char *query_no_space=(char *)malloc(query_no_space_length+1); - memcpy(query_no_space,nq.c_str(),query_no_space_length); - query_no_space[query_no_space_length]='\0'; - query_no_space_length=remove_spaces(query_no_space); - - string nq1 = string(query_no_space); - free(query_no_space); - RE2::GlobalReplace(&nq1,(char *)"SESSION.",(char *)""); - RE2::GlobalReplace(&nq1,(char *)"SESSION ",(char *)""); - RE2::GlobalReplace(&nq1,(char *)"session.",(char *)""); - RE2::GlobalReplace(&nq1,(char *)"session ",(char *)""); - //fprintf(stderr,"%s\n",nq1.c_str()); - re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); - opt2->set_case_sensitive(false); - char *pattern=(char *)"^SET @@SQL_MODE *(?:|:)= *(?:'||\")(.*)(?:'||\") *, *@@sql_auto_is_null *(?:|:)= *(?:(?:\\w|\\d)*) *, @@wait_timeout *(?:|:)= *(?:\\d*)$"; - re2::RE2 *re=new RE2(pattern, *opt2); - string s1; - rc=RE2::FullMatch(nq1, *re, &s1); - delete re; - delete opt2; - if (rc) { - uint32_t sql_mode_int=SpookyHash::Hash32(s1.c_str(),s1.length(),10); - if (client_myds->myconn->options.sql_mode_int != sql_mode_int) { - client_myds->myconn->options.sql_mode_int = sql_mode_int; - if (client_myds->myconn->options.sql_mode) { - free(client_myds->myconn->options.sql_mode); - } - client_myds->myconn->options.sql_mode=strdup(s.c_str()); - } - if (command_type == _MYSQL_COM_QUERY) { - client_myds->DSS=STATE_QUERY_SENT_NET; - uint16_t setStatus = (nTrx ? SERVER_STATUS_IN_TRANS : 0 ); - if (autocommit) setStatus= SERVER_STATUS_AUTOCOMMIT; - client_myds->myprot.generate_pkt_OK(true,NULL,NULL,1,0,0,setStatus,0,NULL); - client_myds->DSS=STATE_SLEEP; - status=WAITING_CLIENT_DATA; - l_free(pkt->size,pkt->ptr); - RequestEnd(NULL); - return true; - } - } else { - int kq = 0; - kq = strncmp((const char *)CurrentQuery.QueryPointer, (const char *)"/*!40101 SET SQL_MODE=@OLD_SQL_MODE */" , CurrentQuery.QueryLength); - if (kq == 0) { - client_myds->DSS=STATE_QUERY_SENT_NET; - uint16_t setStatus = (nTrx ? SERVER_STATUS_IN_TRANS : 0 ); - if (autocommit) setStatus= SERVER_STATUS_AUTOCOMMIT; - client_myds->myprot.generate_pkt_OK(true,NULL,NULL,1,0,0,setStatus,0,NULL); - client_myds->DSS=STATE_SLEEP; - status=WAITING_CLIENT_DATA; - l_free(pkt->size,pkt->ptr); - RequestEnd(NULL); - return true; - } else { + int kq = 0; + kq = strncmp((const char *)CurrentQuery.QueryPointer, (const char *)"/*!40101 SET SQL_MODE=@OLD_SQL_MODE */" , CurrentQuery.QueryLength); + if (kq != 0) { + kq = strncmp((const char *)CurrentQuery.QueryPointer, (const char *)"/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */" , CurrentQuery.QueryLength); + if (kq != 0) { string nqn = string((char *)CurrentQuery.QueryPointer,CurrentQuery.QueryLength); proxy_error("Unable to parse query. If correct, report it as a bug: %s\n", nqn.c_str()); return false; @@ -4079,60 +4035,17 @@ bool MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C } } } - } - if (match_regexes && match_regexes[2]->match(dig)) { - // set time_zone - re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); - opt2->set_case_sensitive(false); - char *pattern=(char *)"^(?: *)SET *(?:|SESSION +|@@|@@session.)TIME_ZONE *(?:|:)= *(?:'||\")((\\w|/|:|\\d|\\+|-)*)(?:'||\") *(?:(|;|-- .*|#.*))$"; - re2::RE2 *re=new RE2(pattern, *opt2); - string s; - rc=RE2::PartialMatch(nq, *re, &s); - delete re; - delete opt2; - if (rc) { - //fprintf(stderr,"time_zone='%s'\n", s.c_str()); -#ifdef DEBUG - proxy_info("Setting TIME_ZONE to %s\n", s.c_str()); -#endif - uint32_t time_zone_int=SpookyHash::Hash32(s.c_str(),s.length(),10); - if (client_myds->myconn->options.time_zone_int != time_zone_int) { - //fprintf(stderr,"time_zone_int='%u'\n", time_zone_int); - client_myds->myconn->options.time_zone_int = time_zone_int; - if (client_myds->myconn->options.time_zone) { - free(client_myds->myconn->options.time_zone); - } - client_myds->myconn->options.time_zone=strdup(s.c_str()); - } - if (command_type == _MYSQL_COM_QUERY) { - client_myds->DSS=STATE_QUERY_SENT_NET; - uint16_t setStatus = (nTrx ? SERVER_STATUS_IN_TRANS : 0 ); - if (autocommit) setStatus += SERVER_STATUS_AUTOCOMMIT; - client_myds->myprot.generate_pkt_OK(true,NULL,NULL,1,0,0,setStatus,0,NULL); - client_myds->DSS=STATE_SLEEP; - status=WAITING_CLIENT_DATA; - l_free(pkt->size,pkt->ptr); - RequestEnd(NULL); - return true; - } - } else { - int kq = 0; - kq = strncmp((const char *)CurrentQuery.QueryPointer, (const char *)"/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */" , CurrentQuery.QueryLength); - if (kq == 0) { - client_myds->DSS=STATE_QUERY_SENT_NET; - uint16_t setStatus = (nTrx ? SERVER_STATUS_IN_TRANS : 0 ); - if (autocommit) setStatus += SERVER_STATUS_AUTOCOMMIT; - client_myds->myprot.generate_pkt_OK(true,NULL,NULL,1,0,0,setStatus,0,NULL); + + if (command_type == _MYSQL_COM_QUERY) { + client_myds->DSS=STATE_QUERY_SENT_NET; + uint16_t setStatus = (nTrx ? SERVER_STATUS_IN_TRANS : 0 ); + if (autocommit) setStatus= SERVER_STATUS_AUTOCOMMIT; + client_myds->myprot.generate_pkt_OK(true,NULL,NULL,1,0,0,setStatus,0,NULL); client_myds->DSS=STATE_SLEEP; - status=WAITING_CLIENT_DATA; - l_free(pkt->size,pkt->ptr); - RequestEnd(NULL); - return true; - } else { - string nqn = string((char *)CurrentQuery.QueryPointer,CurrentQuery.QueryLength); - proxy_error("Unable to parse query. If correct, report it as a bug: %s\n", nqn.c_str()); - return false; - } + status=WAITING_CLIENT_DATA; + l_free(pkt->size,pkt->ptr); + RequestEnd(NULL); + return true; } } } @@ -4247,7 +4160,6 @@ bool MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C return false; } - void MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_COM_STATISTICS(PtrSize_t *pkt) { proxy_debug(PROXY_DEBUG_MYSQL_COM, 5, "Got COM_STATISTICS packet\n"); l_free(pkt->size,pkt->ptr); diff --git a/lib/set_parser.cpp b/lib/set_parser.cpp new file mode 100644 index 0000000000..4d5e767551 --- /dev/null +++ b/lib/set_parser.cpp @@ -0,0 +1,65 @@ +#include "set_parser.h" +#include "re2/re2.h" +#include "re2/regexp.h" +#include "gen_utils.h" +#include +#include +#include + +SetParser::SetParser(std::string nq) { + int query_no_space_length = nq.length(); + char *query_no_space=(char *)malloc(query_no_space_length+1); + memcpy(query_no_space,nq.c_str(),query_no_space_length); + query_no_space[query_no_space_length]='\0'; + query_no_space_length=remove_spaces(query_no_space); + query = string(query_no_space); + free(query_no_space); +} + +std::map> SetParser::parse() { + re2::RE2::Replace(&query, "^\\s*SET\\s+", ""); + + re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); + opt2->set_case_sensitive(false); + opt2->set_longest_match(false); + + std::map> result; + +#define NAMES "(NAMES)" +#define QUOTES "(?:'||\")" +#define NAME_VALUE "((?:\\w|\\d)+)" +#define SESSION "(?:|SESSION +|@@|@@session.)" +#define VAR "(\\w+)" +#define SPACES " *" +#define VAR_VALUE "((?:[\\w/]|,)+)" + + const string pattern="(?:" NAMES SPACES QUOTES NAME_VALUE QUOTES "(?: +COLLATE +" QUOTES NAME_VALUE QUOTES "|)" "|" SESSION VAR SPACES "(?:|:)=" SPACES QUOTES VAR_VALUE QUOTES ") *,? *"; + re2::RE2 re(pattern, *opt2); + string var; + string value1, value2, value3, value4, value5; + re2::StringPiece input(query); + while (re2::RE2::Consume(&input, re, &value1, &value2, &value3, &value4, &value5)) { + std::vector op; + + string key; + if (value1 != "") { + // NAMES + key = value1; + op.push_back(value2); + if (value3 != "") { + op.push_back(value3); + } + } else if (value4 != "") { + // VARIABLE + key = value4; + op.push_back(value5); + } + + std::transform(key.begin(), key.end(), key.begin(), ::tolower); + result[key] = op; + } + return result; +} + + + diff --git a/test/set_parser_test/Makefile b/test/set_parser_test/Makefile new file mode 100644 index 0000000000..acb6cee0ee --- /dev/null +++ b/test/set_parser_test/Makefile @@ -0,0 +1,62 @@ + + + + +DEPS_PATH=../../deps + +RE2_PATH=$(DEPS_PATH)/re2/re2 +RE2_IDIR=$(RE2_PATH) + +MARIADB_PATH=$(DEPS_PATH)/mariadb-client-library/mariadb_client +MARIADB_IDIR=$(MARIADB_PATH)/include +MARIADB_LDIR=$(MARIADB_PATH)/libmariadb + +DAEMONPATH=$(DEPS_PATH)/libdaemon/libdaemon +DAEMONPATH_IDIR=$(DAEMONPATH) +DAEMONPATH_LDIR=$(DAEMONPATH)/libdaemon/.libs + +JEMALLOC_PATH=$(DEPS_PATH)/jemalloc/jemalloc +JEMALLOC_IDIR=$(JEMALLOC_PATH)/include/jemalloc +JEMALLOC_LDIR=$(JEMALLOC_PATH)/lib + +LIBCONFIG_PATH=$(DEPS_PATH)/libconfig/libconfig-1.4.9 +LIBCONFIG_IDIR=-I$(LIBCONFIG_PATH)/lib +LIBCONFIG_LDIR=-L$(LIBCONFIG_PATH)/lib/.libs + +SQLITE3_DIR=$(DEPS_PATH)/sqlite3/sqlite3 + +IDIR=../../include +LDIR=../../lib +IDIRS=-I$(IDIR) -I$(RE2_IDIR) -I$(JEMALLOC_IDIR) -I$(MARIADB_IDIR) $(LIBCONFIG_IDIR) -I$(DAEMONPATH_IDIR) -I$(SQLITE3_DIR) +LDIRS=-L$(LDIR) -L$(JEMALLOC_LDIR) $(LIBCONFIG_LDIR) -L$(RE2_PATH)/obj -L$(MARIADB_LDIR) -L$(DAEMONPATH_LDIR) + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Darwin) + IDIRS+= -I/usr/local/opt/openssl/include +endif + +LIBPROXYSQLAR=$(LDIR)/libproxysql.a + +MYCPPFLAGS=-std=c++11 $(IDIRS) $(OPTZ) $(DEBUG) -ggdb +LDFLAGS+= +MYLIBS=-Wl,--export-dynamic -Wl,-Bstatic -lconfig -lproxysql -ldaemon -ljemalloc -lconfig++ -lre2 -levent -lmariadbclient -Wl,-Bdynamic -lpthread -lm -lz -lrt -lcrypto -lssl $(EXTRALINK) + +ifeq ($(UNAME_S),Darwin) + MYLIBS=-lre2 -lpthread -lm -lz -liconv +endif +ifeq ($(UNAME_S),Linux) + MYLIBS+= -ldl +endif +ifeq ($(UNAME_S),FreeBSD) + MYLIBS+= -lexecinfo +endif + +.PHONY: default +default: setparsertest + +setparsertest: setparsertest.cpp $(RE2_PATH)/util/test.cc $(LDIR)/set_parser.cpp $(LIBPROXYSQLAR) + $(CXX) -o $@ $@.cpp $(RE2_PATH)/util/test.cc $(LIBPROXYSQLAR) $(MYCPPFLAGS) $(CPPFLAGS) $(LDIRS) $(LIBS) $(LDFLAGS) $(MYLIBS) + +clean: + rm -f *~ core $(default) + diff --git a/test/set_parser_test/setparsertest.cpp b/test/set_parser_test/setparsertest.cpp new file mode 100644 index 0000000000..0c3a313618 --- /dev/null +++ b/test/set_parser_test/setparsertest.cpp @@ -0,0 +1,139 @@ +#include "re2/re2.h" +#include "re2/regexp.h" +#include "util/test.h" +#include "set_parser.h" +#include +#include +#include +#include + +int remove_spaces(const char *s) { + char *inp = (char *)s, *outp = (char *)s; + bool prev_space = false; + bool fns = false; + while (*inp) { + if (isspace(*inp)) { + if (fns) { + if (!prev_space) { + *outp++ = ' '; + prev_space = true; + } + } + } else { + *outp++ = *inp; + prev_space = false; + if (!fns) fns=true; + } + ++inp; + } + if (outp>s) { + if (prev_space) { + outp--; + } + } + *outp = '\0'; + return strlen(s); +} + +bool iequals(const string& a, const string& b) +{ + unsigned int sz = a.size(); + if (b.size() != sz) + return false; + for (unsigned int i = 0; i < sz; ++i) + if (tolower(a[i]) != tolower(b[i])) + return false; + return true; +} + +void printMap(const char* prefix, const std::map>& dict) +{ + std::cout << prefix << ": "; + for(auto mapIt = begin(dict); mapIt != end(dict); ++mapIt) + { + std::cout << mapIt->first << " : "; + + for(auto c : mapIt->second) + { + std::cout << c << " "; + } + + std::cout << std::endl; + } +} + +struct Expected { + const char* var; + std::vector values; + Expected(const char* var, std::vector values): var(var), values(values){}; +}; + +struct Test { + const char* query; + std::vector results; +}; + +static Test sql_mode[] = { + { "SET @@sql_mode = 'TRADITIONAL'", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET SESSION sql_mode = 'TRADITIONAL'", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET @@session.sql_mode = 'TRADITIONAL'", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET sql_mode = 'TRADITIONAL'", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET SQL_MODE ='TRADITIONAL'", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET SQL_MODE = \"TRADITIONAL\"", { Expected("sql_mode", {"TRADITIONAL"}) } }, +}; + +void TestParse(const Test* tests, int ntests, const string& title) { + for (int i = 0; i < ntests; i++) { + std::map> data; + for(auto it = std::begin(tests[i].results); it != std::end(tests[i].results); ++it) { + data[it->var] = it->values; + } + + SetParser parser(tests[i].query); + std::map> result = parser.parse(); + + // printMap("result", result); + // printMap("expected", data); + + CHECK_EQ(result.size(), data.size()); + CHECK(std::equal(std::begin(result), std::end(result), std::begin(data))); + } +} + + +TEST(TestParse, SET_SQL_MODE) { + TestParse(sql_mode, arraysize(sql_mode), "sql_mode"); +} + +static Test time_zone[] = { + { "SET @@time_zone = 'Europe/Paris'", { Expected("time_zone", {"Europe/Paris"}) } }, +}; + +TEST(TestParse, SET_TIME_ZONE) { + TestParse(time_zone, arraysize(time_zone), "time_zone"); +} + +static Test names[] = { + { "SET NAMES utf8", { Expected("names", {"utf8"}) } }, + { "SET NAMES 'utf8'", { Expected("names", {"utf8"}) } }, + { "SET NAMES \"utf8\"", { Expected("names", {"utf8"}) } }, + { "SET NAMES utf8 COLLATE unicode_ci", { Expected("names", {"utf8", "unicode_ci"}) } }, +}; + +TEST(TestParse, SET_NAMES) { + TestParse(names, arraysize(names), "names"); +} + +static Test multiple[] = { + { "SET time_zone = 'Europe/Paris', sql_mode = 'TRADITIONAL'", { Expected("time_zone", {"Europe/Paris"}), Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET sql_mode = 'TRADITIONAL', NAMES 'utf8 COLLATE 'unicode_ci'", { Expected("sql_mode", {"TRADITIONAL"}), Expected("names", {"utf8", "unicode_ci"}) } }, +/* not supported by SetParser + { "SET @@SESSION.sql_mode = CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO'), @@SESSION.sql_auto_is_null = 0, @@SESSION.wait_timeout = 2147483", + { Expected("sql_mode", {"CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO')"}), Expected("sql_auto_is_null", {"0"}), + Expected("wait_timeout", {"2147483"}) } }, +*/ +}; + +TEST(TestParse, MULTIPLE) { + TestParse(multiple, arraysize(multiple), "multiple"); +} From 7249f006cc0a41458e79feff57398f0a1efa37bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Sat, 29 Sep 2018 01:43:31 +1000 Subject: [PATCH 2/3] Further improvement on SET parser #1528 `SET` is not case sensitive Handle `time_zone` with numeric time offset --- lib/set_parser.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/set_parser.cpp b/lib/set_parser.cpp index 4d5e767551..bbfa20d5fc 100644 --- a/lib/set_parser.cpp +++ b/lib/set_parser.cpp @@ -17,12 +17,14 @@ SetParser::SetParser(std::string nq) { } std::map> SetParser::parse() { - re2::RE2::Replace(&query, "^\\s*SET\\s+", ""); re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); opt2->set_case_sensitive(false); opt2->set_longest_match(false); + re2::RE2 re0("^\\s*SET\\s+", *opt2); + re2::RE2::Replace(&query, re0, ""); + std::map> result; #define NAMES "(NAMES)" @@ -31,7 +33,7 @@ std::map> SetParser::parse() { #define SESSION "(?:|SESSION +|@@|@@session.)" #define VAR "(\\w+)" #define SPACES " *" -#define VAR_VALUE "((?:[\\w/]|,)+)" +#define VAR_VALUE "((?:[\\w/\\d:\\+\\-]|,)+)" const string pattern="(?:" NAMES SPACES QUOTES NAME_VALUE QUOTES "(?: +COLLATE +" QUOTES NAME_VALUE QUOTES "|)" "|" SESSION VAR SPACES "(?:|:)=" SPACES QUOTES VAR_VALUE QUOTES ") *,? *"; re2::RE2 re(pattern, *opt2); From 2e2b436e729111810f8f7e26a652481335e842b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Sat, 29 Sep 2018 02:34:21 +1000 Subject: [PATCH 3/3] Further improvement on SET parser #1528 Support for SET NAMES + other variables --- lib/MySQL_Session.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index 2a928ae2af..31664940df 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -756,7 +756,11 @@ bool MySQL_Session::handler_special_queries(PtrSize_t *pkt) { pkt->ptr=pkt_2.ptr; } } - if ( (pkt->size < 100) && (pkt->size > 15) && (strncasecmp((char *)"SET NAMES ",(char *)pkt->ptr+5,10)==0) ) { + if ( + (pkt->size < 100) && (pkt->size > 15) && (strncasecmp((char *)"SET NAMES ",(char *)pkt->ptr+5,10)==0) + && + (memchr((const void *)((char *)pkt->ptr+5),',',pkt->size-15)==NULL) // there is no comma + ) { char *unstripped=strndup((char *)pkt->ptr+15,pkt->size-15); char *csname=trim_spaces_and_quotes_in_place(unstripped); bool collation_specified = false; @@ -3910,7 +3914,12 @@ bool MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C } } } - if (match_regexes && (match_regexes[1]->match(dig) || match_regexes[2]->match(dig))) { + if ( + ( + match_regexes && (match_regexes[1]->match(dig) || match_regexes[2]->match(dig)) + ) || + ( strncasecmp(dig,(char *)"SET NAMES", strlen((char *)"SET NAMES")) == 0) + ) { proxy_debug(PROXY_DEBUG_MYSQL_COM, 5, "Parsing SET command %s\n", nq.c_str()); SetParser parser(nq); std::map> set = parser.parse();