作者:京东科技 刘恩浩

一、背景

基于K8s集群的私有化交付方案中,日志收集采用了ilogtail+logstash+kafka+es方案,其中ilogtail负责日志收集,logstash负责对数据转换,kafka负责对日志传递中的消峰进而减少es的写入压力,es用来保存日志数据。在私有化交付中本方案中涉及的中间件一般需要单独部署,但是在京东内网环境的部署考虑到kafka和es的高可用,则不推荐采用单独部署的方案。

二、新方案实践

1.新方案简介

在京东内网环境部署K8S收集日志, kafka+es的替代方案考虑使用JMQ+JES,由于JMQ的底层是基于kafaka、JES的底层基于ES,所以该替换方案理论上是可行的

2.主要架构

数据流向大致如下
应用日志 -> ilogtail -> JMQ -> logstash -> JES

3.如何使用

核心改造点汇总

  1. ilogtail nameservers配置
    增加解析JMQ域名的nameserver(京东云主机上无法直接解析.local域名)
spec:
    spec:
      dnsPolicy: "None"
      dnsConfig:
        nameservers:
          - x.x.x.x # 可以解析jmq域名的nameserver
  1. ilogtail flushers配置
    调整发送到JMQ到配置
apiVersion: v1
kind: ConfigMap
metadata:
  name: ilogtail-user-cm
  namespace: elastic-system
data:
  app_stdout.yaml: |
    flushers:
      - Type: flusher_stdout
        OnlyStdout: true
      - Type: flusher_kafka_v2
        Brokers:
          - nameserver.jmq.jd.local:80 # jmq元数据地址
        Topic: ai-middle-k8s-log-prod # jmq topic 
        ClientID: ai4middle4log # Kafka的用户ID(识别客户端并设置其唯一性),对应jmq的Group名称,重要‼️ (https://ilogtail.gitbook.io/ilogtail-docs/plugins/input/service-kafka#cai-ji-pei-zhi-v2)   
  1. logstash kafka&es配置
apiVersion: v1
kind: ConfigMap
metadata:
  name: logstash-config
  namespace: elastic-system
  labels:
    elastic-app: logstash
data:
  logstash.conf: |-
    input {
        kafka {
                bootstrap_servers => ["nameserver.jmq.jd.local:80"] #jmq的元数据地址
                group_id => "ai4middle4log" # jmq的Group的名称
                client_id => "ai4middle4log" # jmq的Group的名称,即jmq的省略了kafka中的client_id概念,用Group名称代替
                consumer_threads => 2
                decorate_events => true
                topics => ["ai-middle-k8s-log-prod"] # jmp的topic
                auto_offset_reset => "latest"
                codec => json { charset => "UTF-8" }
        }
    }
    output {
        elasticsearch {
                hosts => ["http://x.x.x.x:40000","http://x.x.x.x:40000","http://x.x.x.x:40000"] # es地址
                index =>  "%{[@metadata][kafka][topic]}-%{+YYYY-MM-dd}" # 索引规则
                user => "XXXXXX" #jes的用户名
                password => "xxxxx" #jes的密码
                ssl => "false"
                ssl_certificate_verification => "false"
        }
    }

ilogtail 的配置如下

# ilogtail-daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: ilogtail-ds
  namespace: elastic-system
  labels:
    k8s-app: logtail-ds
spec:
  selector:
    matchLabels:
      k8s-app: logtail-ds
  template:
    metadata:
      labels:
        k8s-app: logtail-ds
    spec:
      dnsPolicy: "None"
      dnsConfig:
        nameservers:
          - x.x.x.x # (京东云主机上)可以解析jmq域名的nameserver
      tolerations:
        - operator: Exists                    # deploy on all nodes
      containers:
        - name: logtail
          env:
            - name: ALIYUN_LOG_ENV_TAGS       # add log tags from env
              value: _node_name_|_node_ip_
            - name: _node_name_
              valueFrom:
                fieldRef:
                  apiVersion: v1
                  fieldPath: spec.nodeName
            - name: _node_ip_
              valueFrom:
                fieldRef:
                  apiVersion: v1
                  fieldPath: status.hostIP
            - name: cpu_usage_limit           # iLogtail's self monitor cpu limit
              value: "1"
            - name: mem_usage_limit           # iLogtail's self monitor mem limit
              value: "512"
          image: dockerhub.ai.jd.local/ai-middleware/ilogtail-community-edition/ilogtail:1.3.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 1000m
              memory: 1Gi
            requests:
              cpu: 400m
              memory: 384Mi
          volumeMounts:
            - mountPath: /var/run                       # for container runtime socket
              name: run
            - mountPath: /logtail_host                  # for log access on the node
              mountPropagation: HostToContainer
              name: root
              readOnly: true
            - mountPath: /usr/local/ilogtail/checkpoint # for checkpoint between container restart
              name: checkpoint
            - mountPath: /usr/local/ilogtail/user_yaml_config.d # mount config dir
              name: user-config
              readOnly: true
            - mountPath: /usr/local/ilogtail/apsara_log_conf.json
              name: apsara-log-config
              readOnly: true
              subPath: apsara_log_conf.json
      dnsPolicy: ClusterFirst
      hostNetwork: true
      volumes:
        - hostPath:
            path: /var/run
            type: Directory
          name: run
        - hostPath:
            path: /
            type: Directory
          name: root
        - hostPath:
            path: /etc/ilogtail-ilogtail-ds/checkpoint
            type: DirectoryOrCreate
          name: checkpoint
        - configMap:
            defaultMode: 420
            name: ilogtail-user-cm
          name: user-config
        - configMap:
            defaultMode: 420
            name: ilogtail-apsara-log-config-cm
          name: apsara-log-config
# ilogtail-user-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: ilogtail-user-cm
  namespace: elastic-system
data:
  app_stdout.yaml: |
    enable: true
    inputs:
      - Type: service_docker_stdout
        Stderr: true
        Stdout: true
        K8sNamespaceRegex: ai-train
        ExternalK8sLabelTag:
          platform/resource-name: k8s_label_resource-name
          platform/task-identify: k8s_label_task-identify
          task-id: k8s_label_task-id
          run-id: k8s_label_run-id
          request-id: k8s_label_request-id
    processors:
      - Type: processor_rename
        SourceKeys:
          - k8s_label_resource-name
          - k8s_label_task-identify
          - k8s_label_task-id
          - k8s_label_run-id
          - k8s_label_request-id
          - _namespace_
          - _image_name_
          - _pod_uid_
          - _pod_name_
          - _container_name_
          - _container_ip_
          - __path__
          - _source_
        DestKeys:
          - resource_name
          - task_identify
          - task_id
          - run_id
          - request_id
          - namespace
          - image_name
          - pod_uid
          - pod_name
          - container_name
          - container_ip
          - path
          - source
    flushers:
      - Type: flusher_stdout
        OnlyStdout: true
      - Type: flusher_kafka_v2
        Brokers:
          - nameserver.jmq.jd.local:80 # jmq元数据地址
        Topic: ai-middle-k8s-log-prod # jmq topic 
        ClientID: ai4middle4log # Kafka的用户ID(识别客户端并设置其唯一性),对应jmq的Group名称,重要‼️ (https://ilogtail.gitbook.io/ilogtail-docs/plugins/input/service-kafka#cai-ji-pei-zhi-v2)

  app_file_log.yaml: |
    enable: true
    inputs:
      - Type: file_log
        LogPath: /export/Logs/ai-dt-algorithm-tools
        FilePattern: "*.log"
        ContainerInfo:
          K8sNamespaceRegex: ai-train
          ExternalK8sLabelTag:
            platform/resource-name: k8s_label_resource-name
            platform/task-identify: k8s_label_task-identify
            task-id: k8s_label_task-id
            run-id: k8s_label_run-id
            request-id: k8s_label_request-id

    processors:
      - Type: processor_add_fields
        Fields:
          source: file
      - Type: processor_rename
        SourceKeys:
          - __tag__:k8s_label_resource-name
          - __tag__:k8s_label_task-identify
          - __tag__:k8s_label_task-id
          - __tag__:k8s_label_run-id
          - __tag__:k8s_label_request-id
          - __tag__:_namespace_
          - __tag__:_image_name_
          - __tag__:_pod_uid_
          - __tag__:_pod_name_
          - __tag__:_container_name_
          - __tag__:_container_ip_
          - __tag__:__path__
        DestKeys:
          - resource_name
          - task_identify
          - task_id
          - run_id
          - request_id
          - namespace
          - image_name
          - pod_uid
          - pod_name
          - container_name
          - container_ip
          - path

    flushers:
      - Type: flusher_stdout
        OnlyStdout: true
      - Type: flusher_kafka_v2
        Brokers:
          - nameserver.jmq.jd.local:80
        Topic: ai-middle-k8s-log-prod
        ClientID: ai4middle4log

logstash 的配置如下

# logstash-configmap.yaml
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: logstash-config
  namespace: elastic-system
  labels:
    elastic-app: logstash
data:
  logstash.conf: |-
    input {
        kafka {
                bootstrap_servers => ["nameserver.jmq.jd.local:80"] #jmq的元数据地址
                #group_id => "services"
                group_id => "ai4middle4log" # jmq的Group的名称
                client_id => "ai4middle4log" # jmq的Group的名称,即jmq的省略了kafka中的client_id概念,用Group名称代替
                consumer_threads => 2
                decorate_events => true
                #topics_pattern => ".*"
                topics => ["ai-middle-k8s-log-prod"] # jmp的topic
                auto_offset_reset => "latest"
                codec => json { charset => "UTF-8" }
        }
    }


    filter {
      ruby {
          code => "event.set('index_date', event.get('@timestamp').time.localtime + 8*60*60)"
      }
      ruby {
          code => "event.set('message',event.get('contents'))"
      }
      #ruby {
      #    code => "event.set('@timestamp',event.get('time').time.localtime)"
      #}

      mutate {
          remove_field => ["contents"]
          convert => ["index_date", "string"]
          #convert => ["@timestamp", "string"]
          gsub => ["index_date", "T.*Z",""]
          #gsub => ["@timestamp", "T.*Z",""]
      }
    }


    output {
        elasticsearch {
                #hosts => ["https://ai-middle-cluster-es-http:9200"]
                hosts => ["http://x.x.x.x:40000","http://x.x.x.x:40000","http://x.x.x.x:40000"] # es地址
                index =>  "%{[@metadata][kafka][topic]}-%{+YYYY-MM-dd}" # 索引规则
                user => "XXXXXX" #jes的用户名
                password => "xxxxx" #jes的密码
                ssl => "false"
                ssl_certificate_verification => "false"
                #cacert => "/usr/share/logstash/cert/ca_logstash.cer"
        }
        stdout {
            codec => rubydebug
        }
    }

4.核心价值

在私有化部署的基础上通过简单改造实现了与京东内部中间件的完美融合,使得系统在高可用性上适应性更强、可用范围更广。


京东云开发者
3.3k 声望5.4k 粉丝

京东云开发者(Developer of JD Technology)是京东云旗下为AI、云计算、IoT等相关领域开发者提供技术分享交流的平台。