Skip to content

Commit

Permalink
first simple implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
Filipponi, Luca (ELS) committed Sep 29, 2019
0 parents commit 6e9f2f9
Show file tree
Hide file tree
Showing 12 changed files with 537 additions and 0 deletions.
32 changes: 32 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# sbt
# (may want to keep parts of 'project')
bin/
project/target
target/
build/

# eclipse
build
.classpath
.project
.settings
.worksheet

# intellij idea
*.log
*.iml
*.ipr
*.iws
.idea

# mac
.DS_Store

# other?
.history
.scala_dependencies
.cache
.cache-main

#general
*.class
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# kafka-message-seeker

This application will start a kafka consumer that will scan a topic looking for the a string.
To run the application you need to build (or download the pre built jar) with these options:

```
Usage: kafkaMessage seeker [options]
-t, --topic <value> The topic for which seek the message
-s, --search-for <value>
The string that will be searched in topic
-o, --offset <value> The offset to start with (for every partition!)
```

For example if you want to find all the messages that contains the string "hello" starting from
offset 1000 on the topic "test_topic" you should run in this way:

`java -jar kafka-message-seeker.jar --topic test_topic --offset 1000 --search-for hello`

# Build your jar:

You can build your own jar using sbt:
```
sbt clean assembly
```

This will run tests as well (will take less than a minute).
17 changes: 17 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name := "kafka-message-seeker"
version := "0.1"
organization := "com.filiponi"
scalaVersion := "2.12.8"

assemblyJarName in assembly := s"${name.value}-${version.value}.jar"

libraryDependencies ++= Seq(
"org.apache.kafka" % "kafka-clients" % "2.3.0",
"com.github.scopt" %% "scopt" % "4.0.0-RC2",
"ch.qos.logback" % "logback-classic" % "1.2.3",
"org.scalatest" %% "scalatest" % "3.0.8" % Test,
"com.whisk" %% "docker-testkit-scalatest" % "0.9.8" % Test,
"com.whisk" %% "docker-testkit-impl-spotify" % "0.9.8" % Test,
"org.mockito" % "mockito-all" % "1.10.19" % Test
)

1 change: 1 addition & 0 deletions project/build.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sbt.version = 1.3.2
1 change: 1 addition & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.9")
15 changes: 15 additions & 0 deletions src/main/resources/logback.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<configuration>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<layout class="ch.qos.logback.classic.PatternLayout">
<Pattern>
%d{HH:mm:ss.SSS} - %msg%n
</Pattern>
</layout>
</appender>
<root level="error">
<appender-ref ref="CONSOLE"/>
</root>
<logger name="com.filipponi" level="debug" additivity="false">
<appender-ref ref="CONSOLE"/>
</logger>
</configuration>
48 changes: 48 additions & 0 deletions src/main/scala/com/filipponi/seeker/CommandLineParser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package com.filipponi.seeker

import scopt.{OParser, OParserBuilder}

object CommandLineParser {

/**
* Simple configuration class for the input args.
* @param brokers the kafka brokers.
* @param topic the topic to use.
* @param stringToSeek the string to seek in the kafka messages.
* @param offset the offset to start with.
*/
case class Config(brokers: String,
topic: String,
stringToSeek: String,
offset: Long)

object Config {
def empty(): Config = new Config("","", "", 0L)
}

val builder: OParserBuilder[Config] = OParser.builder[Config]

val kafkaMsgSeekerArgsParser: OParser[Unit, Config] = {
import builder._
OParser.sequence(
programName("kafkaMessage seeker"),
head("kafkaMessageSeeker", "0.1"),
opt[String]( 'b',"brokers")
.required()
.action((x, c) => c.copy(brokers = x))
.text("The kafka brokers"),
opt[String]( 't',"topic")
.required()
.action((x, c) => c.copy(topic = x))
.text("The topic for which seek the message"),
opt[String]( 's',"search-for")
.required()
.action((x, c) => c.copy(stringToSeek = x))
.text("The string that will be searched in topic"),
opt[Long]( 'o',"offset")
.required()
.action((x, c) => c.copy(offset = x))
.text("The offset to start with (for every partition!)")
)
}
}
89 changes: 89 additions & 0 deletions src/main/scala/com/filipponi/seeker/MsgSeeker.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package com.filipponi.seeker

import java.time.Duration
import java.util.{Collections, Properties}

import com.filipponi.seeker.CommandLineParser.{Config, kafkaMsgSeekerArgsParser}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, ConsumerRecords, KafkaConsumer}
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, StringDeserializer}
import org.slf4j.{Logger, LoggerFactory}
import scopt.OParser

import scala.collection.JavaConverters._

object MsgSeeker extends App {

private val logger = LoggerFactory.getLogger(getClass)

seek(logger = logger,args)

private[seeker] def seek(logger: Logger, args: Array[String]) = {
OParser.parse(kafkaMsgSeekerArgsParser, args, Config.empty()) match {
case Some(config) =>

logger.info(s"Searching for string: ${config.stringToSeek}, from offset: ${config.offset} on topic: ${config.topic}")

val consumer = createConsumer(config.brokers)

consumer.subscribe(Collections.singletonList(config.topic))

val partitionInfos = consumer.partitionsFor(config.topic).asScala

//this poll does the trick to assign all the partition to this consumer, otherwise i can't seek.
consumer.poll(Duration.ofSeconds(1))

partitionInfos.foreach { partitionInfo =>
consumer.seek(new TopicPartition(partitionInfo.topic(), partitionInfo.partition()), config.offset)
}

var moreMessages = true //this is the way to stop cycle from odersky, but i don't really like it

var timer = System.currentTimeMillis()

while (moreMessages) {

val records: ConsumerRecords[String, Array[Byte]] = consumer.poll(Duration.ofSeconds(1))
if (records.isEmpty) moreMessages = false
val iterator = records.iterator()
while (iterator.hasNext) {
val record: ConsumerRecord[String, Array[Byte]] = iterator.next()

val value = new String(record.value())

if (value.contains(config.stringToSeek)) {
logger.info(s"I've found a match! \n {Key: ${record.key()} \n Offset: ${record.offset()} \n Partition: ${record.partition()} \n Value: $value}")
}

//prints updates roughly every 20 seconds
if (System.currentTimeMillis() - timer > 20000) {
logger.info(s"{Currently processing record at Offset: ${record.offset()} and partition: ${record.partition()} }")
timer = System.currentTimeMillis()
}
}
}

logger.info(s"No more messages!")

consumer.close()

case _ =>

}

}

private def createConsumer(brokers: String): KafkaConsumer[String, Array[Byte]] = {
val props = new Properties()
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[ByteArrayDeserializer])
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
props.put(ConsumerConfig.GROUP_ID_CONFIG, s"kafka-message-seeker-${scala.util.Random.nextString(10)}")
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
val consumer = new KafkaConsumer[String, Array[Byte]](props)
consumer
}


}
16 changes: 16 additions & 0 deletions src/test/resources/logback-test.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<configuration>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<layout class="ch.qos.logback.classic.PatternLayout">
<Pattern>
%d{HH:mm:ss.SSS} - %msg%n
</Pattern>
</layout>
</appender>
<root level="error">
<appender-ref ref="CONSOLE"/>
</root>
<logger name="com.filipponi" level="debug" additivity="false">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="FILE"/>
</logger>
</configuration>
52 changes: 52 additions & 0 deletions src/test/scala/com/filipponi/seeker/CommandLineParserTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package com.filipponi.seeker

import com.filipponi.seeker.CommandLineParser.{Config,kafkaMsgSeekerArgsParser}
import org.scalatest.{FlatSpec, Matchers}
import scopt.OParser

class CommandLineParserTest extends FlatSpec with Matchers {

"CommandLineParser" should "return a none when options are not correct" in {

val args = new Array[String](0)

OParser.parse(kafkaMsgSeekerArgsParser, args, Config.empty()) should be(None)

}

"CommandLineParser" should "return a Some(config) all required options are passed" in {

val args = new Array[String](8)

args(0) = "--topic"
args(1) = "test"
args(2) = "--search-for"
args(3) = "string"
args(4) = "--offset"
args(5) = "101010"
args(6) = "--brokers"
args(7) = "localhost:9092"


OParser.parse(kafkaMsgSeekerArgsParser, args, Config.empty()) should be(Some(Config("localhost:9092","test", "string", 101010)))

}

"CommandLineParser" should "return a Some(config) regardless the order of options couple" in {

val args = new Array[String](8)

args(2) = "--topic"
args(3) = "test"
args(0) = "--search-for"
args(1) = "string"
args(6) = "--offset"
args(7) = "101010"
args(4) = "--brokers"
args(5) = "localhost:9092"

OParser.parse(kafkaMsgSeekerArgsParser, args, Config.empty()) should be(Some(Config("localhost:9092","test", "string", 101010)))

}

}
Loading

0 comments on commit 6e9f2f9

Please sign in to comment.