前言

  • 技术栈
python      3.11.8
bidict      0.23.1
pyparsing   3.1.4
sympy       1.13.3  

案例

  • 测试代码
# encoding: utf-8
# author: qbit
# date: 2025-02-05
# summary: 使用 pyparsing 和 sympy 化简与或非逻辑表达式

import re
import pyparsing as pp
import bidict
from sympy import sympify

line = '(((owner=111 AND owner=111 AND doc_type=222))) OR author=333 OR organ=444 AND ((NOT pub_year>555))'

operator = (
            pp.Literal(r'=') |
            pp.Literal(r'>')
        )
field = pp.Word(pp.alphanums + '_')
value = pp.Word(pp.alphanums)
exprGroup: pp.Group = pp.Group(field("field") + operator("operator") + value("value"))
logicAND = pp.Word('AND')('logic')
logicOR = pp.Word('OR')('logic')
logicNOT = pp.Word('NOT')('logic')

exprForward = pp.infixNotation(
    exprGroup("Expr"),
    [
        (logicAND, 2, pp.opAssoc.LEFT, ),   # 第二个参数为操作数的个数,并不是结合优先级
        (logicOR, 2, pp.opAssoc.LEFT, ),
        (logicNOT, 1, pp.opAssoc.RIGHT, ),
    ]
).setResultsName("Result", True)

result: pp.results.ParseResults = exprForward.parseString(line, parseAll=True)

bldic = bidict.bidict()     # 定义双向字典
idx= 0
def rule2sym(lst):
    r""" 递归处理表达式,返回 sympy 输入表达式"""
    global idx

    if (len(lst) == 1) and isinstance(lst[0], list):            # 列表中只有一个列表元素
        return rule2sym(lst[0])
    
    if lst[0] == 'NOT':
        return f"(~{rule2sym(lst[1])})"
    
    if lst[1] == 'AND':
        tmp = ' & '.join( [rule2sym(x) for x in lst[0::2]])     # 步长为2取数据
        return f"({tmp})"

    if lst[1] == 'OR':
        tmp = ' | '.join( [rule2sym(x) for x in lst[0::2]])     # 步长为2取数据
        return f"({tmp})"
    
    key = ''.join(lst)    
    if key not in bldic:
        idx += 1
    val = f"s{idx}"
    bldic[key] = val

    return val

print(f"原始表达式: {line}")
symIn = rule2sym(result.as_list())
print(f"sympy 输入表达式: {symIn}")
symOut = sympify(symIn)       # sympy.sympify 化简表达式
print(f"sympy 简化表达式: {symOut}")
final = str(symOut)
final = re.sub(r'(~s\d+)', r'(\1)', final)      # 将 ~s1 替换为 (~s1),给 NOT 加括号
final = final.replace('&', 'AND')
final = final.replace('|', 'OR')
final = final.replace('~', 'NOT ')
for k, v in bldic.inv.items():      # 反向遍历字典
    final = final.replace(k, v)
print(f"最终简化表达式: {final}")
  • 测试输出
原始表达式: (((owner=111 AND owner=111 AND doc_type=222))) OR author=333 OR organ=444 AND ((NOT pub_year>555))
sympy 输入表达式: ((s1 & s1 & s2) | s3 | (s4 & (~s5)))
sympy 简化表达式: s3 | (s1 & s2) | (s4 & ~s5)
最终简化表达式: author=333 OR (owner=111 AND doc_type=222) OR (organ=444 AND (NOT pub_year>555))

相关阅读

本文出自 qbit snap

qbit
271 声望279 粉丝