Implement wildcard pattern matching with support for '?' and '*'.

'?' Matches any single character.
'*' Matches any sequence of
characters (including the empty sequence).

The matching should cover the entire input string (not partial).

The function prototype should be:
bool isMatch(const char *s,
const char *p)

Some examples:
isMatch("aa","a") → false
isMatch("aa","aa") → true
isMatch("aaa","aa") → false

isMatch("aa", "*") → true
isMatch("aa", "a*") → true

isMatch("ab", "?*") → true
isMatch("aab", "c*a*b") → false

难度: Hard

题目给出一个字符串和一个pattern, 要求我们给出这个字符串是否匹配这个pattern. 其中通配符跟我们平常见到的一样, 是 ? 和 * . ?代表任意单个字符, * 代表一个或多个字符.
这个题跟Leetcode 10 简单正则匹配 比较类似, 可以跟这里面第二个解法一样, 采取类似的动态规划解法, 在pattern里取中间某个确定的字符串序列, 将字符串和pattern分别切分成两段再分别判定是否匹配.
例如, 字符串是 abc, 判定是否匹配到*b?, 我们可以抓住中间的b, 匹配到abc中的b, 将abc切分为["a","c"], 将*b?切分为 ["*","?"], 分别判定"a"和"*", 以及"c"和"?"是否匹配, 如果都匹配, 那么整个最终结果就是匹配的.
当然, 事情有时候不会这么顺利, 比如字符串出现多个b呢? 这时候需要尝试, pattern中的b到底是字符串中的哪一个b.

这里提出一种新的, 无递归的解法. 基本思路为:

  1. 将pattern中所有连续多个*置换为单个*, 比如a**a置换为a*a

  2. 跟Leetcode 10 的解法一样, 将pattern前后都确定的字符去跟输入字符串前后匹配, 比如字符串abc和模式a*c, 掐头去尾变成 b和*, 这期间如果有不同可以直接返回不匹配.

  3. 剩下的是有*的部分, 比如pattern可能是*a*bb*c*. 这样我们采取尽早匹配*之外字符的方式, 像上面那个pattern, 按顺序, 尽早匹配a, bb, 和c, 如果在字符串结束之前都匹配ok, 这样最终结果就是匹配成功的, 否则, 如果某个子串比如bb找不到, 或者还没全部匹配完就已经走到了字符串的末尾, 都算匹配失败.

AC的程序最终可以超越75%的算法, 如下:

public class Solution {
    public boolean isMatch(String s, String p) {
        // replace all redundent ** to *
        if (p.length() > 0) {
            StringBuffer sb = new StringBuffer();
            sb.append(p.charAt(0));
            for (int i = 1; i < p.length(); i++) {
                if (p.charAt(i) != '*' || p.charAt(i - 1) != '*') {
                    sb.append(p.charAt(i));
                }
            }
            p = sb.toString();
        }
        if (p.length() == 1 && p.charAt(0) == '*') {
            return true;
        }

        int slen = s.length();
        int plen = p.length();
        int ps = 0;
        int pp = 0;
        // trim left non-star element of s and p
        // "aabb","?*b" -> "abb","*b"
        while (pp < plen && ps < slen && p.charAt(pp) != '*') {
            if (p.charAt(pp) != '?' && p.charAt(pp) != s.charAt(ps)) {
                return false;
            }
            pp++;
            ps++;
        }
        int trimleft = pp;
        s = s.substring(trimleft);
        p = p.substring(trimleft);

        // if s and p is not empty
        // trim right non-star element of s and p
        // "abb","*b" -> "ab","*"
        if (s.length() > 0 && p.length() > 0) {
            slen = s.length();
            plen = p.length();
            ps = slen - 1;
            pp = plen - 1;
            while (pp >= 0 && ps >= 0 && p.charAt(pp) != '*') {
                if (p.charAt(pp) != '?' && p.charAt(pp) != s.charAt(ps)) {
                    return false;
                }
                pp--;
                ps--;
            }
            int trimright = plen - 1 - pp;
            s = s.substring(0, slen - trimright);
            p = p.substring(0, plen - trimright);
        }
        slen = s.length();
        plen = p.length();
        // length of s or length of p is zero judgement
        if (plen == 0) {
            if (slen > 0) {
                return false;
            } else {
                return true;
            }
        }
        if (slen == 0 && (plen > 1 || plen == 1 && p.charAt(0) != '*')) {
            return false;
        }
        ps = 0;
        pp = 0;
        int ptnl = 1;
        int ptnr = 1;
        int psl = 0;
        int psr = 0;
        // first and last character of p is star
        // p: *aa*bb* -> (aa,bb)
        // locate each of the non-star sub-patterns in s sequentially
        // if all satisfies, return true
        // otherwise false
        while (ptnl < plen && ptnr < plen) {
            // ptnl and ptnr designates left and right index of current
            // sub-pattern
            // find a sub-pattern
            while (p.charAt(ptnr) != '*') {
                ptnr++;
            }
            // find match in s
            for (int i = psl; i <= slen - (ptnr - ptnl); i++) {
                int j = ptnl;
                for (; j < ptnr; j++) {
                    if (s.charAt(i + (j - ptnl)) != p.charAt(j) && p.charAt(j) != '?') {
                        break;
                    }
                }
                if (j == ptnr) {
                    // matches current sub-pattern
                    psl = i;
                    psr = psl + (ptnr - ptnl);
                    break;
                }
            }
            if (psl == psr) {
                // no match for current sub-pattern
                return false;
            }
            // go to next position for next sub-pattern
            psl = psr;
            ptnr++;
            ptnl = ptnr;
        }
        return true;
    }

    public static void main(String[] args) {
        Solution s = new Solution();
        System.out.println(s.isMatch("bb", "?*?"));
        System.out.println(s.isMatch("b", "?*?"));
        System.out.println(s.isMatch("aa", "a"));
        System.out.println(s.isMatch("aa", "aa"));
        System.out.println(s.isMatch("aaa", "aa"));
        System.out.println(s.isMatch("aa", "*"));
        System.out.println(s.isMatch("aa", "a*"));
        System.out.println(s.isMatch("ab", "?*"));
        System.out.println(s.isMatch("aab", "c*a*b"));
        System.out.println(s.isMatch("aabbaab", "a*b"));
        System.out.println(s.isMatch("aabbbaaab", "a*b*b"));
        System.out.println(s.isMatch("aaabababaaabaababbbaaaabbbbbbabbbbabbbabbaabbababab", "*ab***ba**b*b*aaab*b"));
        System.out.println(s.isMatch("aaabaaaabbbbbbaaabbabbbbababbbaaabbabbabb", "*b*bbb*baa*bba*b*bb*b*a*aab*a*"));
        System.out.println(s.isMatch(
                "abbaabbbbababaababababbabbbaaaabbbbaaabbbabaabbbbbabbbbabbabbaaabaaaabbbbbbaaabbabbbbababbbaaabbabbabb",
                "***b**a*a*b***b*a*b*bbb**baa*bba**b**bb***b*a*aab*a**"));

        System.out.println(s.isMatch("bbbbbbbabbaabbabbbbaaabbabbabaaabbababbbabbbabaaabaab", "b*b*ab**ba*b**b***bba"));
        System.out.println(s.isMatch(
                "abbabaaabbabbaababbabbbbbabbbabbbabaaaaababababbbabababaabbababaabbbbbbaaaabababbbaabbbbaabbbbababababbaabbaababaabbbababababbbbaaabbbbbabaaaabbababbbbaababaabbababbbbbababbbabaaaaaaaabbbbbaabaaababaaaabb",
                "**aa*****ba*a*bb**aa*ab****a*aaaaaa***a*aaaa**bbabb*b*b**aaaaaaaaa*a********ba*bbb***a*ba*bb*bb**a*b*bb"));
    }
}

main中包含一部分测试用例.

这里, 我突然发现, 上面第一步, 将pattern中所有连续多个*置换为单个*, 虽然可以让pattern变得更简单, 但其实不是非常必要, 既然后面使用游标, 就可以略过连续多个的*. 比如上面接近最后有一句ptnr++, 实际上就是略过了其后的一个*, int ptnr=1; 就是略过了开头的一个*. 把这些都置换为略过连续的*, 即可. 改进后的程序如下:

public class Solution2 {
    public boolean isMatch(String s, String p) {
        int slen = s.length();
        int plen = p.length();
        int ps = 0;
        int pp = 0;

        // trim left non-star element of s and p
        // "aabb","?*b" -> "abb","*b"
        while (pp < plen && ps < slen && p.charAt(pp) != '*') {
            if (p.charAt(pp) != '?' && p.charAt(pp) != s.charAt(ps)) {
                return false;
            }
            pp++;
            ps++;
        }
        int trimleft = pp;
        s = s.substring(trimleft);
        p = p.substring(trimleft);

        // if s and p is not empty
        // trim right non-star element of s and p
        // "abb","*b" -> "ab","*"
        slen = s.length();
        plen = p.length();
        if (slen > 0 && plen > 0) {
            ps = slen - 1;
            pp = plen - 1;
            while (pp >= 0 && ps >= 0 && p.charAt(pp) != '*') {
                if (p.charAt(pp) != '?' && p.charAt(pp) != s.charAt(ps)) {
                    return false;
                }
                pp--;
                ps--;
            }
            int trimright = plen - 1 - pp;
            s = s.substring(0, slen - trimright);
            p = p.substring(0, plen - trimright);
        }
        slen = s.length();
        plen = p.length();
        // length of s or length of p is zero judgement
        if (plen == 0) {
            if (slen > 0) {
                return false;
            } else {
                return true;
            }
        }
        
        ps = 0;
        pp = 0;
        int ptnl = 0;
        int ptnr = 0;
        int psl = 0;
        int psr = 0;
        // skip preceding *
        while (ptnr < plen && p.charAt(ptnr) == '*') {
            ptnr++;
        }
        ptnl = ptnr;
        // first and last character of p is star
        // p: *aa*bb* -> (aa,bb)
        // locate each of the non-star sub-patterns in s sequentially
        // if all satisfies, return true
        // otherwise false
        while (ptnl < plen && ptnr < plen) {
            // ptnl and ptnr designates left and right index of current
            // sub-pattern
            // find a sub-pattern
            while (ptnr < plen && p.charAt(ptnr) != '*') {
                ptnr++;
            }
            // find match in s
            for (int i = psl; i <= slen - (ptnr - ptnl); i++) {
                int j = ptnl;
                for (; j < ptnr; j++) {
                    if (s.charAt(i + (j - ptnl)) != p.charAt(j) && p.charAt(j) != '?') {
                        break;
                    }
                }
                if (j == ptnr) {
                    // matches current sub-pattern
                    psl = i;
                    psr = psl + (ptnr - ptnl);
                    break;
                }
            }
            if (psl == psr) {
                // no match for current sub-pattern
                return false;
            }
            // go to next position for next sub-pattern
            psl = psr;
            while (ptnr < plen && p.charAt(ptnr) == '*') {
                ptnr++;
            }
            ptnl = ptnr;
        }
        return true;
    }
}

最终提交结果速度又快了很多, 在99.49%的提交之前:
图片描述


EthanSun
45 声望3 粉丝