From 5f58f703530151bec3df1212dec30513c541b663 Mon Sep 17 00:00:00 2001 From: HuangWei Date: Mon, 31 Jul 2023 18:33:04 +0800 Subject: [PATCH 1/9] test: add msg and refresh for coverage test --- src/cmd/sql_cmd_test.cc | 8 ++++---- src/tablet/sql_cluster_availability_test.cc | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index 1c9fd6b1dc6..6303b8c0657 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -2999,7 +2999,7 @@ TEST_P(DBSDKTest, LongWindowsCleanup) { HandleSQL("use test2;"); HandleSQL(create_sql); sr->ExecuteSQL(deploy_sql, &status); - ASSERT_TRUE(status.IsOK()); + ASSERT_TRUE(status.IsOK()) << status.ToString(); std::string msg; std::string result_sql = "select * from __INTERNAL_DB.PRE_AGG_META_INFO;"; auto rs = sr->ExecuteSQL("", result_sql, &status); @@ -3032,10 +3032,10 @@ TEST_P(DBSDKTest, CreateWithoutIndexCol) { "c8 date, index(ts=c7));"; hybridse::sdk::Status status; sr->ExecuteSQL(create_sql, &status); - ASSERT_TRUE(status.IsOK()); + ASSERT_TRUE(status.IsOK()) << status.ToString(); std::string msg; - ASSERT_TRUE(cs->GetNsClient()->DropTable("test2", "trans", msg)); - ASSERT_TRUE(cs->GetNsClient()->DropDatabase("test2", msg)); + ASSERT_TRUE(cs->GetNsClient()->DropTable("test2", "trans", msg)) << msg; + ASSERT_TRUE(cs->GetNsClient()->DropDatabase("test2", msg)) << msg; } TEST_P(DBSDKTest, CreateIfNotExists) { diff --git a/src/tablet/sql_cluster_availability_test.cc b/src/tablet/sql_cluster_availability_test.cc index 267b9144a3d..431ed83fd66 100644 --- a/src/tablet/sql_cluster_availability_test.cc +++ b/src/tablet/sql_cluster_availability_test.cc @@ -235,8 +235,10 @@ TEST_F(SqlClusterTest, RecoverProcedure) { ::openmldb::tablet::TabletImpl* tablet2 = new ::openmldb::tablet::TabletImpl(); StartTablet(&tb_server2, tablet2); sleep(3); + // ensure tablet added in sdk + ASSERT_TRUE(router->RefreshCatalog()); rs = router->CallProcedure(db, sp_name, request_row, &status); - if (!rs) FAIL() << "call procedure failed"; + if (!rs) FAIL() << "call procedure failed, " + status.ToString(); schema = rs->GetSchema(); ASSERT_EQ(schema->GetColumnCnt(), 3); ASSERT_TRUE(rs->Next()); From a0c36585c1dc3bd6be23549403bd901b5aada1f4 Mon Sep 17 00:00:00 2001 From: HuangWei Date: Tue, 1 Aug 2023 11:19:11 +0800 Subject: [PATCH 2/9] debug --- src/client/tablet_client.cc | 11 +++++++++++ src/client/tablet_client.h | 1 + src/cmd/sql_cmd_test.cc | 2 ++ src/nameserver/name_server_impl.cc | 18 +++++++++++++++++- src/nameserver/name_server_impl.h | 2 ++ src/tablet/tablet_impl.cc | 9 +++++++-- 6 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/client/tablet_client.cc b/src/client/tablet_client.cc index e1d3ef9fa28..2a03c9a6b81 100644 --- a/src/client/tablet_client.cc +++ b/src/client/tablet_client.cc @@ -483,6 +483,17 @@ bool TabletClient::UpdateTTL(uint32_t tid, uint32_t pid, const ::openmldb::type: return false; } +bool TabletClient::Refresh() { + ::openmldb::api::RefreshRequest request; + ::openmldb::api::GeneralResponse response; + bool ret = client_.SendRequest(&::openmldb::api::TabletServer_Stub::Refresh, &request, &response, + FLAGS_request_timeout_ms, FLAGS_request_max_retry); + if (!ret || response.code() != 0) { + return false; + } + return true; +} + bool TabletClient::Refresh(uint32_t tid) { ::openmldb::api::RefreshRequest request; request.set_tid(tid); diff --git a/src/client/tablet_client.h b/src/client/tablet_client.h index 895960b9bdc..dfbec7ed8a2 100644 --- a/src/client/tablet_client.h +++ b/src/client/tablet_client.h @@ -236,6 +236,7 @@ class TabletClient : public Client { bool DropProcedure(const std::string& db_name, const std::string& sp_name); + bool Refresh(); bool Refresh(uint32_t tid); bool SubQuery(const ::openmldb::api::QueryRequest& request, diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index 6303b8c0657..21a98736631 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -3014,7 +3014,9 @@ TEST_P(DBSDKTest, LongWindowsCleanup) { ASSERT_FALSE(cs->GetNsClient()->DropTable("test2", "trans", msg)); ASSERT_TRUE(cs->GetNsClient()->DropProcedure("test2", "demo1", msg)) << msg; ASSERT_TRUE(cs->GetNsClient()->DropTable("test2", "trans", msg)) << msg; + ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk // helpful for debug + // TODO if refresh is not good, sleep more HandleSQL("show tables;"); HandleSQL("show deployments;"); ASSERT_TRUE(cs->GetNsClient()->DropDatabase("test2", msg)) << msg; diff --git a/src/nameserver/name_server_impl.cc b/src/nameserver/name_server_impl.cc index 550558b6132..ab44dd7f3eb 100644 --- a/src/nameserver/name_server_impl.cc +++ b/src/nameserver/name_server_impl.cc @@ -8586,7 +8586,7 @@ bool NameServerImpl::AddIndexToTableInfo(const std::string& name, const std::str endpoint_set.insert(meta.endpoint()); } } - // locked on top + // locked on top TODO(hw): for (const auto& tablet : tablets_) { if (!tablet.second->Health()) { continue; @@ -9428,6 +9428,8 @@ void NameServerImpl::DropProcedure(RpcController* controller, const api::DropPro db_sp_info_map_.erase(db_name); } NotifyTableChanged(::openmldb::type::NotifyType::kTable); + // Refresh on tablet to avoid meta inconsistent, notify may be slow TODO refresh works on procedure? + RefreshHealthTabletsUnlockWith([](const std::shared_ptr& tablet_info) { return true; }); } response->set_code(::openmldb::base::ReturnCode::kOk); response->set_msg("ok"); @@ -10511,5 +10513,19 @@ base::Status NameServerImpl::CreateDeployOP(const DeploySQLRequest& request, uin return {}; } +template +inline bool NameServerImpl::RefreshHealthTabletsUnlockWith(T pred) { + bool ret = true; + for (const auto& tablet : tablets_) { + if (!tablet.second->Health()) { + continue; + } + if (pred(tablet.second)) { + ret &= tablet.second->client_->Refresh(); + } + } + return ret; +} + } // namespace nameserver } // namespace openmldb diff --git a/src/nameserver/name_server_impl.h b/src/nameserver/name_server_impl.h index d8fc2245401..d1aa84fba7f 100644 --- a/src/nameserver/name_server_impl.h +++ b/src/nameserver/name_server_impl.h @@ -682,6 +682,8 @@ class NameServerImpl : public NameServer { bool IsExistDataBase(const std::string& db); + template bool RefreshHealthTabletsUnlockWith(T pred); + private: std::mutex mu_; Tablets tablets_; diff --git a/src/tablet/tablet_impl.cc b/src/tablet/tablet_impl.cc index 03335c60a7a..774b2c6f70e 100644 --- a/src/tablet/tablet_impl.cc +++ b/src/tablet/tablet_impl.cc @@ -553,8 +553,13 @@ void TabletImpl::Refresh(RpcController* controller, const ::openmldb::api::Refre ::openmldb::api::GeneralResponse* response, Closure* done) { brpc::ClosureGuard done_guard(done); if (IsClusterMode()) { - if (RefreshSingleTable(request->tid())) { - PDLOG(INFO, "refresh success. tid %u", request->tid()); + if(request->has_tid()) { + if (RefreshSingleTable(request->tid())) { + PDLOG(INFO, "refresh success. tid %u", request->tid()); + } + } else { + LOG(INFO) << "refresh table info by RefreshRequest without tid"; + RefreshTableInfo(); } } } From a92aa004de20aec0d08f780150f919481aa83e06 Mon Sep 17 00:00:00 2001 From: HuangWei Date: Tue, 1 Aug 2023 12:48:59 +0800 Subject: [PATCH 3/9] debug --- src/cmd/sql_cmd_test.cc | 5 ++++- src/tablet/tablet_impl.cc | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index 21a98736631..ef6d86e8022 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -2998,6 +2998,10 @@ TEST_P(DBSDKTest, LongWindowsCleanup) { HandleSQL("create database test2;"); HandleSQL("use test2;"); HandleSQL(create_sql); + sleep(5); + // TODO if refresh is not good, sleep more + // sp_cache_->ProcedureExist in tablet get deployment here, but nameserver no deployment + // refresh won't effet sp_cache_ in tablet sr->ExecuteSQL(deploy_sql, &status); ASSERT_TRUE(status.IsOK()) << status.ToString(); std::string msg; @@ -3016,7 +3020,6 @@ TEST_P(DBSDKTest, LongWindowsCleanup) { ASSERT_TRUE(cs->GetNsClient()->DropTable("test2", "trans", msg)) << msg; ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk // helpful for debug - // TODO if refresh is not good, sleep more HandleSQL("show tables;"); HandleSQL("show deployments;"); ASSERT_TRUE(cs->GetNsClient()->DropDatabase("test2", msg)) << msg; diff --git a/src/tablet/tablet_impl.cc b/src/tablet/tablet_impl.cc index 774b2c6f70e..850cfd97a31 100644 --- a/src/tablet/tablet_impl.cc +++ b/src/tablet/tablet_impl.cc @@ -5224,6 +5224,7 @@ void TabletImpl::CreateProcedure(RpcController* controller, const openmldb::api: const std::string& db_name = sp_info.db_name(); const std::string& sp_name = sp_info.sp_name(); const std::string& sql = sp_info.sql(); + LOG(INFO) << "create procedure rpc in " << endpoint_; // no get size func << " with sp cache " << sp_cache_-> if (sp_cache_->ProcedureExist(db_name, sp_name)) { response->set_code(::openmldb::base::ReturnCode::kProcedureAlreadyExists); response->set_msg("store procedure already exists"); @@ -5288,7 +5289,7 @@ void TabletImpl::CreateProcedure(RpcController* controller, const openmldb::api: response->set_code(::openmldb::base::ReturnCode::kOk); response->set_msg("ok"); - LOG(INFO) << "create procedure success! sp_name: " << sp_name << ", db: " << db_name << ", sql: " << sql; + LOG(INFO) << "create procedure success! sp_name: " << sp_name << ", db: " << db_name << ", sql: " << sql << " on " << endpoint_; } void TabletImpl::DropProcedure(RpcController* controller, const ::openmldb::api::DropProcedureRequest* request, @@ -5316,6 +5317,7 @@ void TabletImpl::DropProcedure(RpcController* controller, const ::openmldb::api: } response->set_code(::openmldb::base::ReturnCode::kOk); response->set_msg("ok"); + LOG(INFO) << "drop succ in " << endpoint_; PDLOG(INFO, "drop procedure success. db_name[%s] sp_name[%s]", db_name.c_str(), sp_name.c_str()); } From 2245adb87164355c35a8dd2d612fbfe63973928e Mon Sep 17 00:00:00 2001 From: HuangWei Date: Tue, 1 Aug 2023 14:32:37 +0800 Subject: [PATCH 4/9] debug --- src/cmd/sql_cmd_test.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index ef6d86e8022..bb88bc2ccf7 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -2998,7 +2998,9 @@ TEST_P(DBSDKTest, LongWindowsCleanup) { HandleSQL("create database test2;"); HandleSQL("use test2;"); HandleSQL(create_sql); - sleep(5); + // sleep(5); // revert sleep to check error + ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk + HandleSQL("show deployments;"); // ns deployment metadata, not tablet // TODO if refresh is not good, sleep more // sp_cache_->ProcedureExist in tablet get deployment here, but nameserver no deployment // refresh won't effet sp_cache_ in tablet From acea6a6bf6664921f6301fccd4bb869f32bcc764 Mon Sep 17 00:00:00 2001 From: HuangWei Date: Tue, 1 Aug 2023 17:23:55 +0800 Subject: [PATCH 5/9] debug --- src/cmd/sql_cmd_test.cc | 2 +- src/nameserver/name_server_impl.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index bb88bc2ccf7..a67a7ca6d1f 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -2998,7 +2998,7 @@ TEST_P(DBSDKTest, LongWindowsCleanup) { HandleSQL("create database test2;"); HandleSQL("use test2;"); HandleSQL(create_sql); - // sleep(5); // revert sleep to check error + // sleep(5); // sleep to avoid tablet metadata // revert sleep to check error ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk HandleSQL("show deployments;"); // ns deployment metadata, not tablet // TODO if refresh is not good, sleep more diff --git a/src/nameserver/name_server_impl.cc b/src/nameserver/name_server_impl.cc index ab44dd7f3eb..57d830b0a4c 100644 --- a/src/nameserver/name_server_impl.cc +++ b/src/nameserver/name_server_impl.cc @@ -9429,7 +9429,7 @@ void NameServerImpl::DropProcedure(RpcController* controller, const api::DropPro } NotifyTableChanged(::openmldb::type::NotifyType::kTable); // Refresh on tablet to avoid meta inconsistent, notify may be slow TODO refresh works on procedure? - RefreshHealthTabletsUnlockWith([](const std::shared_ptr& tablet_info) { return true; }); + // RefreshHealthTabletsUnlockWith([](const std::shared_ptr& tablet_info) { return true; }); } response->set_code(::openmldb::base::ReturnCode::kOk); response->set_msg("ok"); From d6bb1d3a1d25e1207f16ca926e3f31a664c3cba7 Mon Sep 17 00:00:00 2001 From: Huang Wei Date: Wed, 2 Aug 2023 10:13:30 +0800 Subject: [PATCH 6/9] debug --- src/cmd/sql_cmd_test.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index a67a7ca6d1f..e6f52bbaea3 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -3020,10 +3020,11 @@ TEST_P(DBSDKTest, LongWindowsCleanup) { ASSERT_FALSE(cs->GetNsClient()->DropTable("test2", "trans", msg)); ASSERT_TRUE(cs->GetNsClient()->DropProcedure("test2", "demo1", msg)) << msg; ASSERT_TRUE(cs->GetNsClient()->DropTable("test2", "trans", msg)) << msg; - ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk - // helpful for debug - HandleSQL("show tables;"); - HandleSQL("show deployments;"); + // fast drop and next loop + // ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk + // // helpful for debug + // HandleSQL("show tables;"); + // HandleSQL("show deployments;"); ASSERT_TRUE(cs->GetNsClient()->DropDatabase("test2", msg)) << msg; } } From c072f505cef08b0e5a1735f7a3b117d22a4c673f Mon Sep 17 00:00:00 2001 From: Huang Wei Date: Wed, 2 Aug 2023 10:14:05 +0800 Subject: [PATCH 7/9] debug --- src/cmd/sql_cmd_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index e6f52bbaea3..6eaab34ea7f 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -2999,8 +2999,8 @@ TEST_P(DBSDKTest, LongWindowsCleanup) { HandleSQL("use test2;"); HandleSQL(create_sql); // sleep(5); // sleep to avoid tablet metadata // revert sleep to check error - ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk - HandleSQL("show deployments;"); // ns deployment metadata, not tablet + // ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk + // HandleSQL("show deployments;"); // ns deployment metadata, not tablet // TODO if refresh is not good, sleep more // sp_cache_->ProcedureExist in tablet get deployment here, but nameserver no deployment // refresh won't effet sp_cache_ in tablet From f9baf0a474a2e20d2c8e315f78bc8c21b190baa5 Mon Sep 17 00:00:00 2001 From: Huang Wei Date: Wed, 2 Aug 2023 17:26:51 +0800 Subject: [PATCH 8/9] add debug info --- src/cmd/sql_cmd_test.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index 6eaab34ea7f..cced755f2b7 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -2999,8 +2999,8 @@ TEST_P(DBSDKTest, LongWindowsCleanup) { HandleSQL("use test2;"); HandleSQL(create_sql); // sleep(5); // sleep to avoid tablet metadata // revert sleep to check error - // ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk - // HandleSQL("show deployments;"); // ns deployment metadata, not tablet + ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk + HandleSQL("show deployments;"); // ns deployment metadata, not tablet // TODO if refresh is not good, sleep more // sp_cache_->ProcedureExist in tablet get deployment here, but nameserver no deployment // refresh won't effet sp_cache_ in tablet @@ -3020,11 +3020,11 @@ TEST_P(DBSDKTest, LongWindowsCleanup) { ASSERT_FALSE(cs->GetNsClient()->DropTable("test2", "trans", msg)); ASSERT_TRUE(cs->GetNsClient()->DropProcedure("test2", "demo1", msg)) << msg; ASSERT_TRUE(cs->GetNsClient()->DropTable("test2", "trans", msg)) << msg; - // fast drop and next loop - // ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk - // // helpful for debug - // HandleSQL("show tables;"); - // HandleSQL("show deployments;"); + + ASSERT_TRUE(sr->RefreshCatalog()); // avoid cache in sdk + // helpful for debug + HandleSQL("show tables;"); + HandleSQL("show deployments;"); ASSERT_TRUE(cs->GetNsClient()->DropDatabase("test2", msg)) << msg; } } From 41596951db91584ca9943d81009283ac08bc6247 Mon Sep 17 00:00:00 2001 From: Huang Wei Date: Thu, 24 Aug 2023 19:11:00 +0800 Subject: [PATCH 9/9] debug msg --- src/cmd/sql_cmd_test.cc | 3 ++- src/tablet/tablet_impl.cc | 11 +++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index cced755f2b7..3e97587f690 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -3005,7 +3005,8 @@ TEST_P(DBSDKTest, LongWindowsCleanup) { // sp_cache_->ProcedureExist in tablet get deployment here, but nameserver no deployment // refresh won't effet sp_cache_ in tablet sr->ExecuteSQL(deploy_sql, &status); - ASSERT_TRUE(status.IsOK()) << status.ToString(); + // may get error `Fail to transform data_provider op: table test2.trans not exist!` TODO + ASSERT_TRUE(status.IsOK()) << "deploy failed on " << status.ToString(); std::string msg; std::string result_sql = "select * from __INTERNAL_DB.PRE_AGG_META_INFO;"; auto rs = sr->ExecuteSQL("", result_sql, &status); diff --git a/src/tablet/tablet_impl.cc b/src/tablet/tablet_impl.cc index 850cfd97a31..bcdedc393b1 100644 --- a/src/tablet/tablet_impl.cc +++ b/src/tablet/tablet_impl.cc @@ -558,7 +558,7 @@ void TabletImpl::Refresh(RpcController* controller, const ::openmldb::api::Refre PDLOG(INFO, "refresh success. tid %u", request->tid()); } } else { - LOG(INFO) << "refresh table info by RefreshRequest without tid"; + LOG(INFO) << "refresh all by rpc without tid"; RefreshTableInfo(); } } @@ -5224,11 +5224,11 @@ void TabletImpl::CreateProcedure(RpcController* controller, const openmldb::api: const std::string& db_name = sp_info.db_name(); const std::string& sp_name = sp_info.sp_name(); const std::string& sql = sp_info.sql(); - LOG(INFO) << "create procedure rpc in " << endpoint_; // no get size func << " with sp cache " << sp_cache_-> if (sp_cache_->ProcedureExist(db_name, sp_name)) { response->set_code(::openmldb::base::ReturnCode::kProcedureAlreadyExists); response->set_msg("store procedure already exists"); - PDLOG(WARNING, "store procedure[%s] already exists in db[%s]", sp_name.c_str(), db_name.c_str()); + // print endpoint for ut debug + PDLOG(WARNING, "store procedure[%s] already exists in db[%s] on %s", sp_name.c_str(), db_name.c_str(), endpoint_.c_str()); return; } ::hybridse::base::Status status; @@ -5317,8 +5317,7 @@ void TabletImpl::DropProcedure(RpcController* controller, const ::openmldb::api: } response->set_code(::openmldb::base::ReturnCode::kOk); response->set_msg("ok"); - LOG(INFO) << "drop succ in " << endpoint_; - PDLOG(INFO, "drop procedure success. db_name[%s] sp_name[%s]", db_name.c_str(), sp_name.c_str()); + PDLOG(INFO, "drop procedure success. db_name[%s] sp_name[%s] on %s", db_name.c_str(), sp_name.c_str(), endpoint_.c_str()); } void TabletImpl::RunRequestQuery(RpcController* ctrl, const openmldb::api::QueryRequest& request, @@ -5402,7 +5401,7 @@ void TabletImpl::CreateProcedure(const std::shared_ptrInsertSQLProcedureCacheEntry(db_name, sp_name, sp_info, session.GetCompileInfo(), batch_session.GetCompileInfo()); - + // only called by RefreshTableInfo LOG(INFO) << "refresh procedure success! sp_name: " << sp_name << ", db: " << db_name << ", sql: " << sql; }