Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
befd51f
Update the Service Level Board
firebor Jun 4, 2025
ffcc0da
banyandb添加e2e
firebor Sep 27, 2025
9691d34
banyandb添加e2e
firebor Sep 28, 2025
73e4fe5
banyandb添加mal
firebor Sep 28, 2025
97afa99
banyandb添加ui
firebor Sep 28, 2025
311b128
Merge branch 'apache:master' into master
firebor Sep 28, 2025
8191cf4
add changes
firebor Sep 28, 2025
44341ea
Merge remote-tracking branch 'origin/master'
firebor Sep 28, 2025
7514231
Update oap-server/server-starter/src/main/resources/ui-initialized-te…
firebor Sep 29, 2025
4d0c843
e2e file adds to GHA control file
firebor Sep 29, 2025
c5b1c9d
fix: Add the modifications to the end rather than the beginning.
firebor Sep 29, 2025
aee11d0
add menu.yaml and UITemplateInitializer.java
firebor Sep 30, 2025
b07fb9d
Merge branch 'apache:master' into master
firebor Oct 10, 2025
a6254e2
add menu for self_observability_banyandb
firebor Oct 13, 2025
a444dd4
add menu for self_observability_banyandb
firebor Oct 13, 2025
cbaf6bb
Revert "add menu.yaml and UITemplateInitializer.java"
firebor Oct 15, 2025
b62f213
Revert "add menu for self_observability_banyandb"
firebor Oct 15, 2025
f276382
Revert "add menu for self_observability_banyandb"
firebor Oct 15, 2025
5c36f56
Merge branch 'master' into master
wu-sheng Oct 15, 2025
22ca133
fix:recover
firebor Oct 15, 2025
867b2be
Merge remote-tracking branch 'origin/master'
firebor Oct 15, 2025
a750924
Modify the ID of the banyandb-instance
firebor Oct 15, 2025
d00cdf9
Merge branch 'master' into master
wu-sheng Oct 15, 2025
94ddc81
Revise the UI
firebor Oct 15, 2025
cef2c22
Merge branch 'master' into master
wankai123 Oct 20, 2025
407c7a5
Merge remote-tracking branch 'origin/master'
firebor Jun 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/skywalking.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,8 @@ jobs:
config: test/e2e-v2/cases/storage/banyandb/e2e.yaml
- name: BanyanDB TLS
config: test/e2e-v2/cases/storage/banyandb/tls/e2e.yaml
- name: BanyanDB monitoring
config: test/e2e-v2/cases/banyandb/e2e.yaml
- name: Storage MySQL
config: test/e2e-v2/cases/storage/mysql/e2e.yaml
- name: Storage PostgreSQL
Expand Down
2 changes: 2 additions & 0 deletions docs/en/changes/changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#### OAP Server

* Implement self-monitoring for BanyanDB via OAP Server.
* BanyanDB: Support `hot/warm/cold` stages configuration.
* Fix query continues profiling policies error when the policy is already in the cache.
* Support `hot/warm/cold` stages TTL query in the status API and graphQL API.
Expand Down Expand Up @@ -113,6 +114,7 @@

#### UI

* Implement self-monitoring for BanyanDB via UI.
* Enhance the trace `List/Tree/Table` graph to support displaying multiple refs of spans and distinguishing different parents.
* Fix: correct the same labels for metrics.
* Refactor: use the Fetch API to instead of Axios.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,12 @@ public enum Layer {
/**
* Apache Flink is a framework and distributed processing engine for stateful computations over unbounded and bounded data streams
*/
FLINK(42, true);
FLINK(42, true),

/**
* BanyanDB is a distributed time-series database with built-in self-monitoring for real-time tracking of system health, performance, and resource utilization.
*/
BANYANDB(43, true);

private final int value;
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ public class UITemplateInitializer {
Layer.KONG.name(),
Layer.SO11Y_GO_AGENT.name(),
Layer.FLINK.name(),
Layer.BANYANDB.name(),
"custom"
};
private final UITemplateManagementService uiTemplateManagementService;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ receiver-otel:
selector: ${SW_OTEL_RECEIVER:default}
default:
enabledHandlers: ${SW_OTEL_RECEIVER_ENABLED_HANDLERS:"otlp-metrics,otlp-logs"}
enabledOtelMetricsRules: ${SW_OTEL_RECEIVER_ENABLED_OTEL_METRICS_RULES:"apisix,nginx/*,k8s/*,istio-controlplane,vm,mysql/*,postgresql/*,oap,aws-eks/*,windows,aws-s3/*,aws-dynamodb/*,aws-gateway/*,redis/*,elasticsearch/*,rabbitmq/*,mongodb/*,kafka/*,pulsar/*,bookkeeper/*,rocketmq/*,clickhouse/*,activemq/*,kong/*,flink/*"}
enabledOtelMetricsRules: ${SW_OTEL_RECEIVER_ENABLED_OTEL_METRICS_RULES:"apisix,nginx/*,k8s/*,istio-controlplane,vm,mysql/*,postgresql/*,oap,aws-eks/*,windows,aws-s3/*,aws-dynamodb/*,aws-gateway/*,redis/*,elasticsearch/*,rabbitmq/*,mongodb/*,kafka/*,pulsar/*,bookkeeper/*,rocketmq/*,clickhouse/*,activemq/*,kong/*,flink/*,banyandb/*"}

receiver-zipkin:
selector: ${SW_RECEIVER_ZIPKIN:-}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This will parse a textual representation of a duration. The formats
# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS}
# with days considered to be exactly 24 hours.
# <p>
# Examples:
# <pre>
# "PT20.345S" -- parses as "20.345 seconds"
# "PT15M" -- parses as "15 minutes" (where a minute is 60 seconds)
# "PT10H" -- parses as "10 hours" (where an hour is 3600 seconds)
# "P2D" -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
# "P2DT3H4M" -- parses as "2 days, 3 hours and 4 minutes"
# "P-6H3M" -- parses as "-6 hours and +3 minutes"
# "-P6H3M" -- parses as "-6 hours and -3 minutes"
# "-P-6H+3M" -- parses as "+6 hours and -3 minutes"
# </pre>
filter: "{ tags -> tags.job_name == 'banyandb-monitoring' }"
expSuffix: tag({tags -> tags.host_name = 'banyandb::' + tags.host_name}).service(['host_name'] , Layer.BANYANDB).instance(['host_name'], ['service_instance_id'], Layer.BANYANDB)
metricPrefix: meter_banyandb
metricsRules:
- name: instance_write_rate
exp: banyandb_measure_total_written.rate('PT15S')+banyandb_stream_tst_total_written.rate('PT15S')
- name: instance_total_memory
exp: banyandb_system_memory_state.tagEqual('kind','total')
- name: instance_disk_usage
exp: banyandb_system_disk.tagEqual('kind','used').sum(['host_name','service_instance_id'])
- name: instance_query_rate
exp: banyandb_liaison_grpc_total_started.sum(['method','host_name','service_instance_id'])
- name: instance_total_cpu
exp: banyandb_system_cpu_num
- name: instance_write_and_query_errors_rate
exp: banyandb_liaison_grpc_total_err.tagEqual('method','query').sum(['method','host_name','service_instance_id']).rate('PT15S')*60 + banyandb_liaison_grpc_total_stream_msg_sent_err.sum(['host_name','service_instance_id']).rate('PT15S')*60 + banyandb_liaison_grpc_total_stream_msg_received_err.sum(['host_name','service_instance_id']).rate('PT15S')*60 + banyandb_queue_sub_total_msg_sent_err.sum(['host_name','service_instance_id']).rate('PT15S')*60
- name: instance_etcd_operation_rate
exp: banyandb_liaison_grpc_total_registry_started.sum(['host_name','service_instance_id']).rate('PT15S') + banyandb_liaison_grpc_total_started.sum(['host_name','service_instance_id']).rate('PT15S')
- name: instance_active_instance
exp: up.sum(['host_name','service_instance_id']).downsampling(MIN)
- name: instance_cpu_usage
exp: (((process_cpu_seconds_total.sum(['host_name','service_instance_id']).rate('PT15S') / banyandb_system_cpu_num.sum(['host_name','service_instance_id']))).max(['host_name','service_instance_id']))*1000
- name: instance_rss_memory_usage
exp: ((process_resident_memory_bytes.sum(['host_name','service_instance_id']).downsampling(MAX) / banyandb_system_memory_state.tagEqual('kind','total').sum(['host_name','service_instance_id'])).max(['host_name','service_instance_id']))*1000
- name: instance_disk_usage_all
exp: ((banyandb_system_disk.tagEqual('kind','used').sum(['host_name','service_instance_id']) / banyandb_system_memory_state.tagEqual('kind','total').sum(['host_name','service_instance_id'])).max(['host_name','service_instance_id']))*1000
- name: instance_network_usage_recv
exp: banyandb_system_net_state.tagEqual('kind','bytes_recv').sum(['host_name','service_instance_id']).rate('PT15S')
- name: instance_network_usage_sent
exp: banyandb_system_net_state.tagEqual('kind','bytes_sent').sum(['host_name','service_instance_id']).rate('PT15S')
- name: instance_storage_write_rate
exp: banyandb_measure_total_written.sum(['group','host_name','service_instance_id']).rate('PT15S')*1000
- name: instance_query_latency
exp: (banyandb_liaison_grpc_total_latency.tagEqual('method','query').sum(['group','host_name','service_instance_id']).rate('PT15S') / banyandb_liaison_grpc_total_started.tagEqual('method','query').sum(['group','host_name','service_instance_id']).rate('PT15S'))*1000
- name: instance_total_data
exp: banyandb_measure_total_file_elements.sum(['group','host_name','service_instance_id'])
- name: instance_merge_file_data
exp: banyandb_measure_total_merge_loop_started.sum(['group','host_name','service_instance_id']).rate('PT15S') * 60 *1000
- name: instance_merge_file_latency
exp: (banyandb_measure_total_merge_latency.tagEqual('type','file').sum(['group','host_name','service_instance_id']).rate('PT15S') / banyandb_measure_total_merge_loop_started.sum(['group','host_name','service_instance_id']).rate('PT15S'))*1000
- name: instance_merge_file_partitions
exp: (banyandb_measure_total_merged_parts.tagEqual('type','file').sum(['group','host_name','service_instance_id']).rate('PT15S') / banyandb_measure_total_merge_loop_started.sum(['group','host_name','service_instance_id']).rate('PT15S'))*1000
- name: instance_series_write_rate
exp: (banyandb_measure_inverted_index_total_updates.sum(['group','host_name','service_instance_id']).rate('PT15S'))*1000
- name: instance_series_term_search_rate
exp: banyandb_stream_storage_inverted_index_total_term_searchers_started.sum(['group','host_name','service_instance_id']).rate('PT15S')
- name: instance_total_series
exp: banyandb_measure_inverted_index_total_doc_count.sum(['group','host_name','service_instance_id'])
- name: instance_stream_write_rate
exp: banyandb_stream_tst_inverted_index_total_updates.sum(['group','host_name','service_instance_id']).rate('PT15S')
- name: instance_term_search_rate
exp: banyandb_stream_tst_inverted_index_total_term_searchers_started.sum(['group','host_name','service_instance_id']).rate('PT15S')* 1000
- name: instance_total_document
exp: banyandb_stream_tst_inverted_index_total_doc_count.sum(['group','host_name','service_instance_id'])


Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This will parse a textual representation of a duration. The formats
# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS}
# with days considered to be exactly 24 hours.
# <p>
# Examples:
# <pre>
# "PT20.345S" -- parses as "20.345 seconds"
# "PT15M" -- parses as "15 minutes" (where a minute is 60 seconds)
# "PT10H" -- parses as "10 hours" (where an hour is 3600 seconds)
# "P2D" -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
# "P2DT3H4M" -- parses as "2 days, 3 hours and 4 minutes"
# "P-6H3M" -- parses as "-6 hours and +3 minutes"
# "-P6H3M" -- parses as "-6 hours and -3 minutes"
# "-P-6H+3M" -- parses as "+6 hours and -3 minutes"
# </pre>
filter: "{ tags -> tags.job_name == 'banyandb-monitoring' }"
expSuffix: tag({tags -> tags.host_name = 'banyandb::' + tags.host_name}).service(['host_name'] , Layer.BANYANDB)
metricPrefix: meter_banyandb
metricsRules:
- name: write_rate
exp: (banyandb_measure_total_written.sum(['host_name','service_instance_id']).rate('PT15S') + banyandb_stream_tst_total_written.sum(['host_name','service_instance_id']).rate('PT15S'))
- name: total_memory
exp: banyandb_system_memory_state.tagEqual('kind','total').sum(['host_name'])
- name: disk_usage
exp: banyandb_system_disk.tagEqual('kind','used').sum(['host_name','service_instance_id'])
- name: query_rate
exp: banyandb_liaison_grpc_total_started.sum(['method','host_name','service_instance_id'])
- name: total_cpu
exp: banyandb_system_cpu_num.sum(['method','host_name','service_instance_id'])
- name: write_and_query_errors_rate
exp: banyandb_liaison_grpc_total_err.tagEqual('method','query').sum(['method','host_name','service_instance_id']).rate('PT15S')*60 + banyandb_liaison_grpc_total_stream_msg_sent_err.sum(['host_name','service_instance_id']).rate('PT15S')*60 + banyandb_liaison_grpc_total_stream_msg_received_err.sum(['host_name','service_instance_id']).rate('PT15S')*60 + banyandb_queue_sub_total_msg_sent_err.sum(['host_name','service_instance_id']).rate('PT15S')*60
- name: etcd_operation_rate
exp: banyandb_liaison_grpc_total_registry_started.sum(['host_name','service_instance_id']).rate('PT15S') + banyandb_liaison_grpc_total_started.sum(['host_name','service_instance_id']).rate('PT15S')
- name: active_instance
exp: up.sum(['host_name','service_instance_id']).downsampling(MIN)
- name: cpu_usage
exp: (((process_cpu_seconds_total.sum(['host_name','service_instance_id']).rate('PT15S') / banyandb_system_cpu_num.sum(['host_name','service_instance_id']))).max(['host_name','service_instance_id']))*1000
- name: rss_memory_usage
exp: ((process_resident_memory_bytes.sum(['host_name','service_instance_id']).downsampling(MAX) / banyandb_system_memory_state.tagEqual('kind','total').sum(['host_name','service_instance_id'])).max(['host_name','service_instance_id']))*1000
- name: disk_usage_all
exp: ((banyandb_system_disk.tagEqual('kind','used').sum(['host_name','service_instance_id']) / banyandb_system_memory_state.tagEqual('kind','total').sum(['host_name','service_instance_id'])).max(['host_name','service_instance_id']))*1000
- name: network_usage_recv
exp: banyandb_system_net_state.tagEqual('kind','bytes_recv').sum(['host_name','service_instance_id']).rate('PT15S')
- name: network_usage_sent
exp: banyandb_system_net_state.tagEqual('kind','bytes_sent').sum(['host_name','service_instance_id']).rate('PT15S')
- name: storage_write_rate
exp: banyandb_measure_total_written.sum(['group','host_name','service_instance_id']).rate('PT15S')*1000
- name: query_latency
exp: (banyandb_liaison_grpc_total_latency.tagEqual('method','query').sum(['group','host_name','service_instance_id']).rate('PT15S') / banyandb_liaison_grpc_total_started.tagEqual('method','query').sum(['group','host_name','service_instance_id']).rate('PT15S'))*1000
- name: total_data
exp: banyandb_measure_total_file_elements.sum(['group','host_name','service_instance_id'])
- name: merge_file_data
exp: banyandb_measure_total_merge_loop_started.sum(['group','host_name','service_instance_id']).rate('PT15S') * 60 *1000
- name: merge_file_latency
exp: (banyandb_measure_total_merge_latency.tagEqual('type','file').sum(['group','host_name','service_instance_id']).rate('PT15S') / banyandb_measure_total_merge_loop_started.sum(['group','host_name','service_instance_id']).rate('PT15S'))*1000
- name: merge_file_partitions
exp: (banyandb_measure_total_merged_parts.tagEqual('type','file').sum(['group','host_name','service_instance_id']).rate('PT15S') / banyandb_measure_total_merge_loop_started.sum(['group','host_name','service_instance_id']).rate('PT15S'))*1000
- name: series_write_rate
exp: (banyandb_measure_inverted_index_total_updates.sum(['group','host_name','service_instance_id']).rate('PT15S'))*1000
- name: series_term_search_rate
exp: banyandb_stream_storage_inverted_index_total_term_searchers_started.sum(['group','host_name','service_instance_id']).rate('PT15S')
- name: total_series
exp: banyandb_measure_inverted_index_total_doc_count.sum(['group','host_name','service_instance_id'])
- name: stream_write_rate
exp: banyandb_stream_tst_inverted_index_total_updates.sum(['group','host_name','service_instance_id']).rate('PT15S')
- name: term_search_rate
exp: banyandb_stream_tst_inverted_index_total_term_searchers_started.sum(['group','host_name','service_instance_id']).rate('PT15S')* 1000
- name: total_document
exp: banyandb_stream_tst_inverted_index_total_doc_count.sum(['group','host_name','service_instance_id'])


Loading
Loading