前言

  • 技术栈
Python    3.11
pyparsing 3.1.2
loguru    0.7.2

案例

  • 测试代码
# encoding: utf-8
# author: qbit
# date: 2024-04-23
# summary: 将与或非逻辑表达式转换为 ES 表达式

import json
import pyparsing as pp
from loguru import logger

line = 'owner=x_111 AND doc_type=%x%_222 OR author=x_333 OR organ=x_444 AND (NOT pub_year=x_555)'

operator = (
            pp.Literal(r'=x_') |            # 全等精确匹配
            pp.Literal(r'=%x%_')            # 前后模糊匹配
        )
field = pp.Word(pp.alphanums + '_')
value = pp.Word(pp.alphanums)
exprGroup: pp.Group = pp.Group(field("field") + operator("operator") + value("value"))
logicAND = pp.Word('AND')('logic')
logicOR = pp.Word('OR')('logic')
logicNOT = pp.Word('NOT')('logic')

exprForward = pp.infixNotation(
    exprGroup("Expr"),
    [
        (logicAND, 2, pp.opAssoc.LEFT, ),
        (logicOR, 2, pp.opAssoc.LEFT, ),
        (logicNOT, 1, pp.opAssoc.RIGHT, ),
    ]
).setResultsName("Result", True)

result: pp.results.ParseResults = exprForward.parseString(line, parseAll=True)
logger.debug(f"result list: \n{json.dumps(result.as_list(), indent=4)}")

def list2dsl(lst):
    r''' 将 pyparsing 解析出来的列表递归转化为 Elasticsearch DSL '''
    if (len(lst) == 1) and isinstance(lst[0], list):     # 列表中只有一个列表元素
        return list2dsl(lst[0])
    if lst[0] == 'NOT':
        return {
            'bool': {
                'must_not': list2dsl(lst[1])
            }
        }
    
    match lst[1]:
        case 'AND':
            mustList = []
            for item in lst:
                if item != 'AND':
                    mustList.append(list2dsl(item))
            return {
                'bool': {
                    'must': mustList
                }
            }
        case 'OR':
            shouldList = []
            for item in lst:
                if item != 'OR':
                    shouldList.append(list2dsl(item))
            return {
                'bool': {
                    'should': shouldList
                }
            }
        case r'=x_':
            return {
                'query_string': f"{lst[0]}:{lst[2]}"
            }
        case r'=%x%_':
            return {
                'query_string': f"{lst[0]}:*{lst[2]}*"
            }
        case _:
            pass

esdsl = json.dumps(list2dsl(result.as_list()), indent=4)
logger.debug(f"es dsl: \n {esdsl}")
logger.debug(f"line: {line}")
  • 测试输出
 {
    "bool": {
        "should": [
            {
                "bool": {
                    "must": [
                        {
                            "query_string": "owner:111"
                        },
                        {
                            "query_string": "doc_type:*222*"
                        }
                    ]
                }
            },
            {
                "query_string": "author:333"
            },
            {
                "bool": {
                    "must": [
                        {
                            "query_string": "organ:444"
                        },
                        {
                            "bool": {
                                "must_not": {
                                    "query_string": "pub_year:555"
                                }
                            }
                        }
                    ]
                }
            }
        ]
    }
}

相关资料

文章

本文出自 qbit snap

qbit
268 声望279 粉丝