Monday, July 2, 2018

Quickstart Druid Kafka Indexing Service

kafka indexing service

should have extensions loaded on overlord and middle-managers druid.extensions.loadList = ["druid-datasketches", "druid-hdfs-storage", "druid-kafka-indexing-service", "mysql-metadata-storage"]

supervisor spec

cat supervisor-spec.json
{
  "type": "kafka",
  "dataSchema": {
    "dataSource": "metrics-kafka",
    "parser": {
      "type": "string",
      "parseSpec": {
        "format": "json",
        "timestampSpec": {
          "column": "timestamp",
          "format": "auto"
        },
        "dimensionsSpec": {
          "dimensions": [],
          "dimensionExclusions": [
            "timestamp",
            "value"
          ]
        }
      }
    },
    "metricsSpec": [
      {
        "name": "count",
        "type": "count"
      },
      {
        "name": "value_sum",
        "fieldName": "value",
        "type": "doubleSum"
      },
      {
        "name": "value_min",
        "fieldName": "value",
        "type": "doubleMin"
      },
      {
        "name": "value_max",
        "fieldName": "value",
        "type": "doubleMax"
      }
    ],
    "granularitySpec": {
      "type": "uniform",
      "segmentGranularity": "HOUR",
      "queryGranularity": "NONE"
    }
  },
  "tuningConfig": {
    "type": "kafka",
    "maxRowsPerSegment": 5000000
  },
  "ioConfig": {
    "topic": "metrics",
    "consumerProperties": {
      "bootstrap.servers": "host2-node2:6667"
    },
    "taskCount": 1,
    "replicas": 1,
    "taskDuration": "PT1H"
  }
}

push the supervisor spec to overlord

curl -X POST -H 'Content-Type: application/json' -d @supervisor-spec.json http://host2-node2:8090/druid/indexer/v1/supervisor
{"id":"metrics-kafka"}

Get Supervisor IDs

[root@host2-node2 ~]# curl    http://host2-node2:8090/druid/indexer/v1/supervisor
["metrics-kafka"]

create kafka topic

/usr/hdp/current/kafka-broker/bin/kafka-topics.sh --create --zookeeper `hostname`:2181 --replication-factor 1 --partition 1 --topic metrics

start producing message to kafka

python generate-example-metrics -c 1  | /usr/hdp/current/kafka-broker/bin/kafka-console-producer.sh --broker-list `hostname -f`:6667 --topic metrics

now you can query this real time data. for datasource, check overlord and coordinator UI