diff --git a/build/docker/analytics/README.md b/build/docker/analytics/README.md new file mode 100644 index 000000000..aa9b69a16 --- /dev/null +++ b/build/docker/analytics/README.md @@ -0,0 +1,117 @@ +# StarRocks Analytics Stack + +These files deploy and set up a StarRocks analytics stack using `docker compose` for development and testing purposes. + +The stack consists of the following components: + +- StarRocks Frontend (FE): Query coordinator and metadata manager +- StarRocks Backend (BE): Data storage and query execution engine +- Kafka: Message broker for event streaming +- Kafka UI: Web interface for Kafka monitoring +- Init Services: Database(`yorkie`), Table(`user_events`) and topic(`user-events`) initialization scripts + +## How To Use + +```sh +# Start the analytics stack +docker compose -f build/docker/analytics/docker-compose.yml up -d + +# Open the Kafka UI +open http://localhost:8989 + +# Run StarRocks SQL client +docker exec -it starrocks-fe mysql -P 9030 -h starrocks-fe -u root --prompt="StarRocks > " + +# Shut down the stack +docker compose -f build/docker/analytics/docker-compose.yml down +``` + +The files used are as follows: + +- docker-compose.yml: Defines the StarRocks and Kafka services +- init-user-events-db.sql: Creates the Yorkie database and tables +- init-routine-load.sql: Sets up Kafka routine load jobs + +Key services: + +- StarRocks FE (ports: 8030, 9020, 9030) +- StarRocks BE (port: 8040) +- Kafka (port: 9092) +- Kafka UI (port: 8989) + +The initialization services will: + +- Start the StarRocks FE/BE nodes +- Create the required Kafka topics +- Initialize the StarRocks database and tables +- Configure the routine load from Kafka to StarRocks + +## For Setup Kafka Cluster Mode + +To set up Kafka in cluster mode, refer to the [Bitnami Kafka README.md](https://github.com/bitnami/containers/blob/main/bitnami/kafka/README.md) and [docker-compose-cluster.yml](https://github.com/bitnami/containers/blob/main/bitnami/kafka/docker-compose-cluster.yml) for detailed instructions on setting up Kafka in cluster mode using Docker Compose. + +## About StarRocks with Kafka Routine Load + +To use StarRocks with Kafka routine load, follow the [StarRocks Routine Load Quick Start Guide](https://docs.starrocks.io/docs/quick_start/routine-load/). This guide provides detailed instructions on setting up routine load jobs to ingest data from Kafka into StarRocks. Ensure that your Kafka and StarRocks instances are properly configured and running before starting the integration process. + +### How To Check Routine Load Status + +To check routine load status or fix a paused routine load, follow these steps: + +1. Connect to StarRocks Frontend (FE) using the following command: + + ```sh + docker exec -it starrocks-fe mysql -P 9030 -h starrocks-fe -u root --prompt="StarRocks > " + ``` + +2. Check the status of the routine load: + + ```sql + StarRocks > SHOW ROUTINE LOAD FROM yorkie; + ``` + + Example output: + + ``` + *************************** 1. row *************************** + Id: 17031 + Name: events + ... + DbName: yorkie + TableName: user_events + State: PAUSE + DataSourceType: KAFKA + ... + DataSourceProperties: {"topic":"user-events","currentKafkaPartitions":"0","brokerList":"kafka:9092"} + CustomProperties: {"group.id":"user_events_group"} + ... + ``` + +3. Resume the paused routine load: + + ```sql + StarRocks > RESUME ROUTINE LOAD FOR events; + ``` + +4. Verify that the routine load is running: + + ```sql + StarRocks > SHOW ROUTINE LOAD FROM yorkie; + ``` + + Example output: + + ``` + *************************** 1. row *************************** + Id: 17031 + Name: events + ... + DbName: yorkie + TableName: user_events + State: RUNNING + DataSourceType: KAFKA + ... + DataSourceProperties: {"topic":"user-events","currentKafkaPartitions":"0","brokerList":"kafka:9092"} + CustomProperties: {"group.id":"user_events_group"} + ... + ``` diff --git a/build/docker/analytics/docker-compose.yml b/build/docker/analytics/docker-compose.yml new file mode 100644 index 000000000..0d9113b69 --- /dev/null +++ b/build/docker/analytics/docker-compose.yml @@ -0,0 +1,175 @@ +version: "3" +services: + starrocks-fe: + image: starrocks/fe-ubuntu:2.5.4 + hostname: starrocks-fe + container_name: starrocks-fe + user: root + ports: + - 8030:8030 + - 9020:9020 + - 9030:9030 + command: /opt/starrocks/fe/bin/start_fe.sh + healthcheck: + test: 'mysql -u root -h starrocks-fe -P 9030 -e "show frontends\G" | grep "Alive: true"' + interval: 10s + timeout: 5s + retries: 3 + volumes: + # - fe.conf:/opt/starrocks/fe/conf/fe.conf + - ./starrocks/starrocks-fe/meta:/opt/starrocks/fe/meta + - ./starrocks/fe/log:/opt/starrocks/fe/log + networks: + network: + ipv4_address: 10.5.0.2 + + starrocks-be: + image: starrocks/be-ubuntu:2.5.4 + hostname: starrocks-be + container_name: starrocks-be + user: root + ports: + - 8040:8040 + depends_on: + - starrocks-fe + command: + - /bin/bash + - -c + - | + sleep 15s; mysql --connect-timeout 2 -h starrocks-fe -P 9030 -u root -e "alter system add backend \"starrocks-be:9050\";" + /opt/starrocks/be/bin/start_be.sh + healthcheck: + test: 'mysql -u root -h starrocks-fe -P 9030 -e "show backends\G" | grep "Alive: true"' + interval: 10s + timeout: 5s + retries: 3 + volumes: + # - be.conf:/opt/starrocks/be/conf/be.conf + - ./starrocks/starrocks-be/storage:/opt/starrocks/be/storage + - ./starrocks/starrocks-be/log:/opt/starrocks/be/log + networks: + network: + ipv4_address: 10.5.0.3 + + kafka: + image: docker.io/bitnami/kafka:3.9 + container_name: kafka + ports: + - "9092:9092" + volumes: + - "kafka_data:/bitnami" + environment: + # KRaft settings + - KAFKA_CFG_NODE_ID=0 + - KAFKA_CFG_PROCESS_ROLES=controller,broker + - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@kafka:9093 + # Listeners + - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093 + - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://:9092 + - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT + - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER + - KAFKA_CFG_INTER_BROKER_LISTENER_NAME=PLAINTEXT + healthcheck: + test: kafka-topics.sh --bootstrap-server kafka:9092 --list + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + networks: + network: + ipv4_address: 10.5.0.5 + + init-kafka-topics: + image: docker.io/bitnami/kafka:3.9 + depends_on: + - kafka + working_dir: /opt/bitnami/kafka/bin + entrypoint: ["/bin/sh", "-c"] + command: | + " + echo -e 'Waiting for Kafka to be ready...' + kafka-topics.sh --bootstrap-server kafka:9092 --list + + echo -e 'Creating kafka topics' + kafka-topics.sh --bootstrap-server kafka:9092 --create --if-not-exists --topic user-events --replication-factor 1 --partitions 1 + + echo -e 'Successfully created the following topics:' + kafka-topics.sh --bootstrap-server kafka:9092 --list + " + networks: + network: + ipv4_address: 10.5.0.6 + + kafka-ui: + image: provectuslabs/kafka-ui + container_name: kafka-ui + ports: + - "8989:8080" + depends_on: + - kafka + restart: always + environment: + - KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS=kafka:9092 + networks: + network: + ipv4_address: 10.5.0.7 + + init-starrocks-database: + image: starrocks/fe-ubuntu:2.5.4 + depends_on: + starrocks-fe: + condition: service_healthy + starrocks-be: + condition: service_healthy + kafka: + condition: service_healthy + init-kafka-topics: + condition: service_completed_successfully + volumes: + - ./init-user-events-db.sql:/init-user-events-db.sql + - ./init-routine-load.sql:/init-routine-load.sql + entrypoint: ["/bin/sh", "-c"] + command: | + " + echo -e 'Checking Starrocks status' + mysql -u root -h starrocks-fe -P 9030 -e 'show frontends\\G' | grep 'Alive: true' || echo -e 'Frontend is not ready' + mysql -u root -h starrocks-fe -P 9030 -e 'show backends\\G' | grep 'Alive: true' || echo -e 'Backend is not ready' + + + echo -e 'Creating Yorkie database, tables and routine load' + mysql -P 9030 -h starrocks-fe -u root < /init-user-events-db.sql + + echo -e 'Checking Yorkie database' + mysql -P 9030 -h starrocks-fe -u root -e 'show databases\\G' + mysql -P 9030 -h starrocks-fe -u root -e 'show databases\\G' | grep 'Database: yorkie' || echo -e 'Yorkie database not found' + + echo -e 'Checking user_event table' + mysql -P 9030 -h starrocks-fe -u root -e 'show tables from yorkie\\G' + mysql -P 9030 -h starrocks-fe -u root -e 'show tables from yorkie\\G' | grep 'Tables_in_yorkie: user_events' || echo -e 'user_events table not found' + + + sleep 5s + + echo -e 'Creating routine load' + mysql -P 9030 -h starrocks-fe -u root < /init-routine-load.sql + + echo -e 'Checking event routine load' + + mysql -P 9030 -h starrocks-fe -u root -e 'show routine load from yorkie\\G' + mysql -P 9030 -h starrocks-fe -u root -e 'show routine load from yorkie\\G' | grep 'State: RUNNING' || echo -e 'Routine load is not running' + " + networks: + network: + ipv4_address: 10.5.0.4 + +networks: + network: + driver: bridge + ipam: + config: + - subnet: 10.5.0.0/16 + gateway: 10.5.0.1 + +volumes: + kafka_data: + driver: local diff --git a/build/docker/analytics/init-routine-load.sql b/build/docker/analytics/init-routine-load.sql new file mode 100644 index 000000000..e15b22ccb --- /dev/null +++ b/build/docker/analytics/init-routine-load.sql @@ -0,0 +1,12 @@ +CREATE ROUTINE LOAD yorkie.events ON user_events +PROPERTIES +( + "format" = "JSON", + "desired_concurrent_number"="1" +) +FROM KAFKA +( + "kafka_broker_list" = "kafka:9092", + "kafka_topic" = "user-events", + "property.group.id" = "user_events_group" +); diff --git a/build/docker/analytics/init-user-events-db.sql b/build/docker/analytics/init-user-events-db.sql new file mode 100644 index 000000000..1e30eb989 --- /dev/null +++ b/build/docker/analytics/init-user-events-db.sql @@ -0,0 +1,17 @@ +CREATE DATABASE IF NOT EXISTS yorkie; + +USE yorkie; + +CREATE TABLE user_events ( + user_id VARCHAR(64), + timestamp DATETIME, + event_type VARCHAR(32), + project_id VARCHAR(64), + user_agent VARCHAR(32), + metadata STRING +) ENGINE = OLAP +DUPLICATE KEY(user_id) +DISTRIBUTED BY HASH(user_id) BUCKETS 10 +PROPERTIES ( + "replication_num" = "1" +);