/**
     * 生成字典树
     * @param $words
     */
    protected function initTrie($words)
    {
        $wordArr = self::splitStr($words);
        $curNode = &$this->dict;
        foreach ($wordArr as $char) {
            // 初始化dict
            if (!isset($curNode)) {
                $curNode[$char] = [];
            }
            $curNode = &$curNode[$char];
        }
        // 标记到达当前节点完整路径为"敏感词"
        $curNode['end'] = true;
    }
 /**
     * 测试是否匹配
     */
    public function isMatch($words){
        $tag = false;
        $ngWord = '';
        $data = [
            'tag' => $tag
        ];
        $abandon = false;
        try {
            // 西行寺胖胖子
            $words = TrieImpl::splitStr($words);
            $length = count($words);
            for($i = 0; $i<$length; $i++){
                $char = $words[$i];
                if(!isset(self::$trie[$char])){
                    continue;
                }
                $trie = &self::$trie[$char];
                $matchList = [$i]; // 这里应该置空
                for ($j = $i+1; $j<$length; $j++){
                    if(!isset($trie[$words[$j]])){
//                         $abandon = true;
                        break;
                    }
                    $matchList[] = $j;
                    $trie = &$trie[$words[$j]];
                }
                if(isset($trie['end']) && !$abandon){
                    // 匹配
                    foreach ($matchList as $v){
                        $ngWord.=$words[$v];
                    }
                    $tag = true;
                    $data = [
                        'tag' => $tag,
                        'match' => $ngWord
                    ];
                }
            }
        }catch (\Throwable $e){
            var_dump($e->getMessage());
        }
        return $data;
    }
/**
     * 分割字符串
     * @param $str
     * @return array|false|string[]
     */
    public static function splitStr($str)
    {
        return preg_split("//u", $str, -1, PREG_SPLIT_NO_EMPTY);
    }

时间复杂度可以,空间复杂度还是蛮高的,指数级

可以把汉字存起来,用引用解决空间复杂度高的问题


Devilu
85 声望4 粉丝

just a newbie