Initial code import

salesforce · Feb 2, 2018 · 37d6cb6 · 37d6cb6
commit 37d6cb6
Show file tree

Hide file tree

Showing 12 changed files with 704 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+.idea
+*.iml
+target
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -0,0 +1,29 @@
+BSD 3-Clause License
+
+Copyright (c) 2018, Salesforce
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
@@ -0,0 +1,57 @@
+# kafka-partition-availability-benchmark
+
+This repository contains a Kafka partition stress test. The goal of it is to make it easier to validate changes to Kafka
+with respect how many concurrent replicated partitions it can support. 
+
+We want to ensure that our Kafka users have the following gaurantees: 
+
+1. Any topic or partition is available for consume or produce at any time
+2. Latencies are below certain SLAs for consume and produce
+3. Users can reset the consumer offset to begining and reconsume at any time
+
+Given that Kafka currently (as of 02/02/18) doesn't limit the number of topics you are allowed to created, this tool
+helped us answer, "how many topics and paritions can we place on our multitenant Kafka systems before things start 
+going downhill?"
+
+## Building
+
+This will create a runnable jar in the target directory called `kafka_partition_availability_benchmark.jar`:
+
+```
+mvn package
+```
+
+## Configuration
+You can see all configurable parameters in `src/main/resources/kafka-partition-availability-benchmark.properties`
+
+The defaults are set to something you can run against a local single-broker installation of kafka. In most cases, you only probably need to set four things to put stress on a real test environment:
+```
+cat > ~/.kafka-partition-availability-benchmark.properties << EOF
+kafka.replication.factor = 3
+num_topics = 4000
+num_concurrent_consumers = 4000
+kafka.brokers = my_test_kafka:9092
+EOF
+```
+
+Depending on how powerful your test runner host is, you might be able to bump up the number of topics past `4000`. In
+our testing, `4000` was what an i3.2xlarge instance could bear before test results started to get skewed. 
+
+To get past `4000` topics, we ran this tool on multiple test runners. We recommend setting the default topic prefix to something unique per test runner by doing something like this:
+```
+echo "default_topic_prefix = `hostname`" >> ~/.kafka-partition-availability-benchmark.properties
+```
+
+## Running
+
+You can use your favorite configuration management tool such as Ansible to make the below more elegant. From a central 
+host you can do something like this:
+
+```
+for test_runner in ${test_runners}; do
+    rsync ~/.kafka-partition-availability-benchmark.properties target/kafka_partition_availability_benchmark.jar ${test_runner}:
+    ssh ${test_runner} 'echo "default_topic_prefix = `hostname`" >> ~/.kafka-partition-availability-benchmark.properties'
+    ssh ${test_runner} 'nohup java -jar kafka_partition_availability_benchmark.jar &> kafka_partition_availability_benchmark.log &';
+    ssh ${test_runner} 'for pr in `pgrep -f kafka_partition_availability_benchmark`; do sudo prlimit -n50000:50000 -p $pr; done';
+done
+```
diff --git a/pom.xml b/pom.xml
@@ -0,0 +1,110 @@
+<!--
+  ~ Copyright (c) 2018, salesforce.com, inc.
+  ~ All rights reserved.
+  ~ Licensed under the BSD 3-Clause license.
+  ~ For full license text, see LICENSE.txt file in the repo root  or https://opensource.org/licenses/BSD-3-Clause
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>com.salesforce</groupId>
+    <packaging>jar</packaging>
+    <version>1.0-SNAPSHOT</version>
+    <artifactId>kafka_partition_availability_benchmark</artifactId>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.kafka</groupId>
+            <artifactId>kafka-clients</artifactId>
+            <version>1.0.0</version>
+        </dependency>
+        <dependency>
+            <groupId>io.prometheus</groupId>
+            <artifactId>simpleclient</artifactId>
+            <version>0.1.0</version>
+        </dependency>
+        <dependency>
+            <groupId>io.prometheus</groupId>
+            <artifactId>simpleclient_hotspot</artifactId>
+            <version>0.1.0</version>
+        </dependency>
+        <dependency>
+            <groupId>io.prometheus</groupId>
+            <artifactId>simpleclient_vertx</artifactId>
+            <version>0.1.0</version>
+        </dependency>
+        <dependency>
+            <groupId>io.vertx</groupId>
+            <artifactId>vertx-core</artifactId>
+            <version>3.0.0</version>
+        </dependency>
+        <dependency>
+            <groupId>io.vertx</groupId>
+            <artifactId>vertx-web</artifactId>
+            <version>3.0.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.7.25</version>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <version>1.7.25</version>
+        </dependency>
+    </dependencies>
+    <build>
+        <plugins>
+            <plugin>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>2.3.2</version>
+                <configuration>
+                    <source>1.8</source>
+                    <target>1.8</target>
+                    <encoding>UTF-8</encoding>
+                    <showDeprecation>true</showDeprecation>
+                    <showWarnings>true</showWarnings>
+                    <optimize>true</optimize>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.1.0</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <!--<filters>
+                                <filter>
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>-->
+                            <transformers>
+                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                                    <manifestEntries>
+                                        <Main-Class>com.salesforce.Main</Main-Class>
+                                        <X-Compile-Source-JDK>1.8</X-Compile-Source-JDK>
+                                        <X-Compile-Target-JDK>1.8</X-Compile-Target-JDK>
+                                    </manifestEntries>
+                                </transformer>
+                            </transformers>
+                            <finalName>${artifactId}</finalName>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/src/main/java/com/salesforce/ConsumeTopic.java b/src/main/java/com/salesforce/ConsumeTopic.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root  or https://opensource.org/licenses/BSD-3-Clause
+ */
+
+package com.salesforce;
+
+import io.prometheus.client.Histogram;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.common.TopicPartition;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.atomic.AtomicInteger;
+
+class ConsumeTopic implements Callable<Exception> {
+    private static final Logger log = LoggerFactory.getLogger(ConsumeTopic.class);
+
+    private static final Histogram consumerReceiveTimeSecs = Histogram
+            .build("consumerReceiveTimeSecs", "Time taken to do consumer.poll")
+            .register();
+    private static final Histogram consumerCommitTimeSecs = Histogram
+            .build("consumerCommitTimeSecs", "Time it takes to commit new offset")
+            .register();
+
+    private final int topicId;
+    private final String key;
+    private final int consumerPollInterval;
+    private final AdminClient kafkaAdminClient;
+    private final Map<String, Object> kafkaConsumerConfig;
+    private final short replicationFactor;
+
+    /**
+     * @param topicId Each topic gets a numeric id
+     * @param key Prefix for topics created by this tool
+     * @param consumerPollInterval How long should we wait before polls for new messages?
+     * @param kafkaAdminClient
+     * @param kafkaConsumerConfig
+     */
+    public ConsumeTopic(int topicId, String key, int consumerPollInterval, AdminClient kafkaAdminClient,
+                        Map<String, Object> kafkaConsumerConfig, short replicationFactor) {
+        this.topicId = topicId;
+        this.key = key;
+        this.consumerPollInterval = consumerPollInterval;
+        this.kafkaAdminClient = kafkaAdminClient;
+        this.kafkaConsumerConfig = Collections.unmodifiableMap(kafkaConsumerConfig);
+        this.replicationFactor = replicationFactor;
+    }
+
+    @Override
+    public Exception call() {
+        String topicName = TopicName.createTopicName(key, topicId);
+        try {
+            TopicVerifier.checkTopic(kafkaAdminClient, topicName, replicationFactor);
+
+            Map<String, Object> consumerConfigForTopic = new HashMap<>(kafkaConsumerConfig);
+            consumerConfigForTopic.put(ConsumerConfig.GROUP_ID_CONFIG, topicName);
+            KafkaConsumer<Integer, Integer> consumer = new KafkaConsumer<>(consumerConfigForTopic);
+            TopicPartition topicPartition = new TopicPartition(topicName, 0);
+            consumer.assign(Collections.singleton(topicPartition));
+
+            AtomicInteger numMessages = new AtomicInteger();
+            while (true) {
+                ConsumerRecords<Integer, Integer> messages;
+                Histogram.Timer consumerReceiveTimer = consumerReceiveTimeSecs.startTimer();
+                try {
+                    messages = consumer.poll(0);
+                } finally {
+                    consumerReceiveTimer.observeDuration();
+                }
+                if (messages.count() == 0) {
+                    log.debug("Ran out of messages to process for topic {}; starting from beginning", topicName);
+                    consumer.seekToBeginning(Collections.singleton(topicPartition));
+                    numMessages.set(0);
+                    Thread.sleep(consumerPollInterval);
+                    continue;
+                }
+                numMessages.addAndGet(messages.count());
+
+                consumerCommitTimeSecs.time(consumer::commitSync);
+
+                ConsumerRecord<Integer, Integer> lastMessage =
+                        messages.records(topicPartition).get(messages.count() - 1);
+
+                log.debug("Last consumed message {}:{}, consumed {} messages, topic: {}",
+                        lastMessage.key(), lastMessage.value(), messages.count(), topicName);
+                Thread.sleep(consumerPollInterval);
+            }
+        } catch (Exception e) {
+            log.error("Failed consume", e);
+            return new Exception("Failed consume on topicName " + topicId, e);
+        }
+
+    }
+}
-Original file line number
+Diff line change
@@ -0,0 +1,3 @@
+    .idea
+    *.iml
+    target