前言

  • 技术栈

    Elasticsearch 7.17.2
    python 3.8
    httpx  0.22.0
    loguru 0.6.0
  • hao 分词器:https://github.com/tenlee2012...
  • 有时更新 ES 分词器或远程词典后,不确定每个节点是否都已更新到位,没找到直接的命令来校验,故写了一份 Python 脚本来做校验
  • 原理是利用 index.routing.allocation.include._ip 将索引分配到具体某个节点的分片上
  • 代码创建了 test_{nodeName} 的索引,测试完后手动删除

    DELETE test_*

代码

# encoding: utf8
# author: qbit
# date: 202-06-16
# summary:  遍历 ES 数据节点校验分词结果
import pprint
import httpx
from loguru import logger

coordnode = 'http://192.168.2.67:9200'      # ES 协调节点地址
esuser = 'elastic'                          # ES 集群账号
espwd = 'xxxx'              # ES 集群密码
analyzer = "hao_index_mode"                 # 分词器
intext = "燕雀安知鸿鹄之志"                    # 分词文本 
outtext = "燕雀;安;知;鸿鹄之志;鸿鹄"           # 分词结果
 
def GetNodeList():
    r""" 获取 ES 集群节点列表 """
    url = f'{coordnode}/_cat/nodes?v=true&h=name,ip,master,node.role&s=name&format=json'
    r = httpx.get(url, auth=(esuser, espwd))
    result = r.json()
    for dic in result:
        logger.debug(dic)

    return result

def CheckOneNodeAnalyzer(nodeDict: dict, expected: str):
    r""" 在某个节点创建索引,并测试分词 """
    nodeName = nodeDict['name']
    nodeIP = nodeDict['ip']
    indexName = f"test_{nodeName}"
    url = f"{coordnode}/{indexName}"
    logger.info(f"{nodeName}, {nodeIP}, {indexName}")
    dic = {
        "settings": {
            "index": {
                "number_of_shards": 1,
                "number_of_replicas": 0,
                "routing.allocation.include._ip": nodeIP
            }
        }
    }
    r = httpx.put(url, auth=(esuser, espwd), json=dic)      # 创建索引
    logger.debug(r)

    url = f"{coordnode}/{indexName}/_analyze"
    dic = {
        "analyzer": analyzer,
        "text": intext
    }
    r = httpx.post(url, auth=(esuser, espwd), json=dic)      # 验证分词
    logger.debug(r)
    tokenList = list()
    for dic in r.json()['tokens']:
        # logger.debug(dic)
        tokenList.append(dic['token'])
    tokenLine = ';'.join(tokenList)
    logger.info(tokenLine)
    if tokenLine == expected:
        return ['ok', nodeName, nodeIP, tokenLine]
    else:
        return ['no', nodeName, nodeIP, tokenLine]

if __name__ == '__main__':
    nodeList = GetNodeList()
    okList = list()
    noList = list()
    for node in nodeList:
        if 'd' in node['node.role']:   # 数据节点
            result = CheckOneNodeAnalyzer(node, outtext)
            if result[0] == 'ok':
                okList.append(result)
            else:
                noList.append(result)
            print('------')
    logger.info(f"okList size: {len(okList)}")
    pprint.pprint(okList)
    logger.info(f"noList size: {len(noList)}")
    pprint.pprint(noList)
qbit snap

qbit
268 声望279 粉丝