前言
技术栈
Elasticsearch 7.17.2 python 3.8 httpx 0.22.0 loguru 0.6.0
hao
分词器:https://github.com/tenlee2012...- 有时更新
ES
分词器或远程词典后,不确定每个节点是否都已更新到位,没找到直接的命令来校验,故写了一份Python
脚本来做校验 - 原理是利用 index.routing.allocation.include._ip 将索引分配到具体某个节点的分片上
代码创建了
test_{nodeName}
的索引,测试完后手动删除DELETE test_*
代码
# encoding: utf8
# author: qbit
# date: 202-06-16
# summary: 遍历 ES 数据节点校验分词结果
import pprint
import httpx
from loguru import logger
coordnode = 'http://192.168.2.67:9200' # ES 协调节点地址
esuser = 'elastic' # ES 集群账号
espwd = 'xxxx' # ES 集群密码
analyzer = "hao_index_mode" # 分词器
intext = "燕雀安知鸿鹄之志" # 分词文本
outtext = "燕雀;安;知;鸿鹄之志;鸿鹄" # 分词结果
def GetNodeList():
r""" 获取 ES 集群节点列表 """
url = f'{coordnode}/_cat/nodes?v=true&h=name,ip,master,node.role&s=name&format=json'
r = httpx.get(url, auth=(esuser, espwd))
result = r.json()
for dic in result:
logger.debug(dic)
return result
def CheckOneNodeAnalyzer(nodeDict: dict, expected: str):
r""" 在某个节点创建索引,并测试分词 """
nodeName = nodeDict['name']
nodeIP = nodeDict['ip']
indexName = f"test_{nodeName}"
url = f"{coordnode}/{indexName}"
logger.info(f"{nodeName}, {nodeIP}, {indexName}")
dic = {
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0,
"routing.allocation.include._ip": nodeIP
}
}
}
r = httpx.put(url, auth=(esuser, espwd), json=dic) # 创建索引
logger.debug(r)
url = f"{coordnode}/{indexName}/_analyze"
dic = {
"analyzer": analyzer,
"text": intext
}
r = httpx.post(url, auth=(esuser, espwd), json=dic) # 验证分词
logger.debug(r)
tokenList = list()
for dic in r.json()['tokens']:
# logger.debug(dic)
tokenList.append(dic['token'])
tokenLine = ';'.join(tokenList)
logger.info(tokenLine)
if tokenLine == expected:
return ['ok', nodeName, nodeIP, tokenLine]
else:
return ['no', nodeName, nodeIP, tokenLine]
if __name__ == '__main__':
nodeList = GetNodeList()
okList = list()
noList = list()
for node in nodeList:
if 'd' in node['node.role']: # 数据节点
result = CheckOneNodeAnalyzer(node, outtext)
if result[0] == 'ok':
okList.append(result)
else:
noList.append(result)
print('------')
logger.info(f"okList size: {len(okList)}")
pprint.pprint(okList)
logger.info(f"noList size: {len(noList)}")
pprint.pprint(noList)
qbit snap
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。