Swift 5.5 如何在用一个 Regex 批配出对应的行之后、使用另一个 Regex 做行内查找替换?

打比方说我想针对 strContent 这个 String 的内容进行操作。该 String 内容如下

危机 0 ㄨㄟˊ ㄐㄧ
除夕 102 ㄔㄨˊ ㄒㄧˋ
圳沟 0 ㄗㄨㄣˋ ㄍㄡ
细菌 12 ㄒㄧˋ ㄐㄩㄣˋ

Regex A: ^.*危.*\b(ㄨㄟˊ)\b.*$
(因为内容格式的原因,才仅对注音使用 whole-word 标记。)

当且仅当对 Regex A 批配到的结果执行行内替换操作 Regex B:查找 \b(ㄨㄟˊ)\b 替换成 ㄨㄟ。(「危」的陆规审音是「ㄨㄟ」,注音文第一声不用写调号。)
同样,Regex B 将除夕的第四声改成第一声的时候,不应该影响到「细菌」的「细」。

然而我用 StackOverflow 找到的方法好像并不是很好用,至少目前无法完成匹配:

#!/usr/bin/env swift

import Foundation

extension String {
    /* https://stackoverflow.com/a/66189289/4162914 */
    func match(_ pattern: String) -> [String] {
        do {
            let regex = try NSRegularExpression(pattern: pattern, options: NSRegularExpression.Options(rawValue: 0))
            let nsstr = self as NSString
            let all = NSRange(location: 0, length: nsstr.length)
            var matches : [String] = [String]()
            regex.enumerateMatches(in: self, options: [], range: all) {
                (result : NSTextCheckingResult?, _, _) in
                if let r = result {
                    let result = nsstr.substring(with: r.range) as String
                    matches.append(result)
                }
            }
            return matches
        } catch {
            return [String]()
        }
    }
}

func filterTone_Romaji(inputString: String) -> [String] {
    var arrResult = inputString.match("^.*危.*\\b(ㄨㄟˊ)\\b.*$")
    arrResult.append(contentsOf: inputString.match("^.*圳.*\\b(ㄗㄨㄣˋ)\\b.*$"))
    return arrResult
}

let str_Test_File = "危机 0 ㄨㄟˊ ㄐㄧ\n圳沟 0 ㄗㄨㄣˋ ㄍㄡ"


// Trying to find the matched result
var arrConvResultTest : [[String]] = [[]]

arrConvResultTest.append(["@# phrases-test-pragma-header.txt"])
var arrConv_Test_File = filterTone_Romaji(inputString: str_Test_File)

// Print Out the matched result
var varLineData = ""

for lineData in arrConvResultTest {
     varLineData = lineData.joined()
     print(varLineData)
}

我该怎样做才能让匹配生效呢?

P.S.: 论及行内查找替换,可以用这个函数:

extension String {
    /* https://stackoverflow.com/a/40993403/4162914 */
    mutating func regReplace(pattern: String, replaceWith: String = "") {
        do {
            let regex = try NSRegularExpression(pattern: pattern, options: .caseInsensitive)
            let range = NSRange(location: 0, length: count)
            self = regex.stringByReplacingMatches(in: self, options: [], range: range, withTemplate: replaceWith)
        } catch { return }
    }
}
阅读 1.9k
2 个回答

Incorrect results may be the cause of incorrect regular expression writing;

"\b(ㄨㄟˊ)\b" will match "(ㄨㄟˊ)" not "ㄨㄟˊ"

var str_Test_File = "危机 0  ㄨㄟˊ  ㄐㄧ\n圳沟 0 ㄗㄨㄣˋ ㄍㄡ"

var arrResult = str_Test_File.match("^.*危.*\\bㄨㄟˊ\\b.*") //remove () $
print(arrResult)  //print ["危机 0  ㄨㄟˊ  ㄐㄧ"]


str_Test_File.regReplace(pattern: "\\bㄨㄟˊ\\b", replaceWith: "ㄨㄟ")
print(str_Test_File) //print 危机 0  ㄨㄟ  ㄐㄧ 圳沟 0 ㄗㄨㄣˋ ㄍㄡ

可能是正则表达式书写不正确导致的错误;"\b(ㄨㄟˊ)\b" 匹配的是 "(ㄨㄟˊ)" 而不是 "ㄨㄟˊ"

var str_Test_File = "危机 0  ㄨㄟˊ  ㄐㄧ\n圳沟 0 ㄗㄨㄣˋ ㄍㄡ"

var arrResult = str_Test_File.match("^.*危.*\\bㄨㄟˊ\\b.*") //去掉了() $
print(arrResult)  //print ["危机 0  ㄨㄟˊ  ㄐㄧ"]


str_Test_File.regReplace(pattern: "\\bㄨㄟˊ\\b", replaceWith: "ㄨㄟ")
print(str_Test_File) //print 危机 0  ㄨㄟ  ㄐㄧ 圳沟 0 ㄗㄨㄣˋ ㄍㄡ

不用 Regex 来判断当前行即可,应该已经足够了。

#!/usr/bin/env swift

import Foundation

extension String {
    /* https://stackoverflow.com/a/40993403/4162914 */
    mutating func regReplace(pattern: String, replaceWith: String = "") {
        do {
            let regex = try NSRegularExpression(pattern: pattern, options: .caseInsensitive)
            let range = NSRange(location: 0, length: count)
            self = regex.stringByReplacingMatches(in: self, options: [], range: range, withTemplate: replaceWith)
        } catch { return }
    }
    // by OnlyU
    func replaseStrForTW(str: String, regStr: String) -> String{
        
        var newStr = ""
        let regex = try! NSRegularExpression(pattern: "\\" + regStr + "\\b", options: .caseInsensitive)
        newStr = regex.stringByReplacingMatches(in: str, options: [], range: NSRange(0..<str.utf16.count), withTemplate: "***")
        return newStr
    }
}

func getDocumentsDirectory() -> URL {
    let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
    return paths[0]
}

// MARK: - 定義檔案路徑

let urlCHT = "./ToProcess-CHT.txt"

// MARK: - 檔案載入
var strClusterCHT = ""

do {
    strClusterCHT = try String(contentsOfFile: urlCHT, encoding: .utf8)
}
catch {print("Exception happened when reading raw data.")}

// MARK: - 轉換音韻

var blobProcessed = ""

var arrClusterCHT = strClusterCHT.components(separatedBy: "\n")
for blob in arrClusterCHT {
    var currentBlob = blob
    if currentBlob.contains("績") && currentBlob.contains(" ㄐㄧ ") {currentBlob.regReplace(pattern: #"\bㄐㄧ\b"#, replaceWith: "ㄐㄧˋ")}
    if currentBlob.contains("蹟") && currentBlob.contains(" ㄐㄧ ") {currentBlob.regReplace(pattern: #"\bㄐㄧ\b"#, replaceWith: "ㄐㄧˋ")}
    if currentBlob.contains("跡") && currentBlob.contains(" ㄐㄧ ") {currentBlob.regReplace(pattern: #"\bㄐㄧ\b"#, replaceWith: "ㄐㄧˋ")}
    if currentBlob.contains("嵌") && currentBlob.contains(" ㄑㄧㄢ ") {currentBlob.regReplace(pattern: #"\bㄑㄧㄢ\b"#, replaceWith: "ㄑㄧㄢˋ")}
    if currentBlob.contains("框") && currentBlob.contains(" ㄎㄨㄤ ") {currentBlob.regReplace(pattern: #"\bㄎㄨㄤ\b"#, replaceWith: "ㄎㄨㄤˋ")}
    if currentBlob.contains("期") && currentBlob.contains(" ㄑㄧㄢˊ ") {currentBlob.regReplace(pattern: #"\bㄑㄧㄢˊ\b"#, replaceWith: "ㄑㄧㄢ")}
    if currentBlob.contains("微") && currentBlob.contains(" ㄨㄟˊ ") {currentBlob.regReplace(pattern: #"\bㄨㄟˊ\b"#, replaceWith: "ㄨㄟ")}
    if currentBlob.contains("薇") && currentBlob.contains(" ㄨㄟˊ ") {currentBlob.regReplace(pattern: #"\bㄨㄟˊ\b"#, replaceWith: "ㄨㄟ")}
    if currentBlob.contains("突") && currentBlob.contains(" ㄊㄨˊ ") {currentBlob.regReplace(pattern: #"\bㄊㄨˊ\b"#, replaceWith: "ㄊㄨ")}
    if currentBlob.contains("帆") && currentBlob.contains(" ㄈㄢˊ ") {currentBlob.regReplace(pattern: #"\bㄈㄢˊ\b"#, replaceWith: "ㄈㄢ")}
    if currentBlob.contains("藩") && currentBlob.contains(" ㄈㄢˊ ") {currentBlob.regReplace(pattern: #"\bㄈㄢˊ\b"#, replaceWith: "ㄈㄢ")}
    if currentBlob.contains("擊") && currentBlob.contains(" ㄐㄧˊ ") {currentBlob.regReplace(pattern: #"\bㄐㄧˊ\b"#, replaceWith: "ㄐㄧ")}
    if currentBlob.contains("夾") && currentBlob.contains(" ㄐㄧㄚˊ ") {currentBlob.regReplace(pattern: #"\bㄐㄧㄚˊ\b"#, replaceWith: "ㄐㄧㄚ")}
    if currentBlob.contains("鞠") && currentBlob.contains(" ㄐㄩˊ ") {currentBlob.regReplace(pattern: #"\bㄐㄩˊ\b"#, replaceWith: "ㄐㄩ")}
    if currentBlob.contains("拈") && currentBlob.contains(" ㄋㄧㄢˊ ") {currentBlob.regReplace(pattern: #"\bㄋㄧㄢˊ\b"#, replaceWith: "ㄋㄧㄢ")}
    if currentBlob.contains("夕") && currentBlob.contains(" ㄒㄧˋ ") {currentBlob.regReplace(pattern: #"\bㄒㄧˋ\b"#, replaceWith: "ㄒㄧ")}
    if currentBlob.contains("昔") && currentBlob.contains(" ㄒㄧˊ ") {currentBlob.regReplace(pattern: #"\bㄒㄧˊ\b"#, replaceWith: "ㄒㄧ")}
    if currentBlob.contains("惜") && currentBlob.contains(" ㄒㄧˊ ") {currentBlob.regReplace(pattern: #"\bㄒㄧˊ\b"#, replaceWith: "ㄒㄧ")}
    if currentBlob.contains("熄") && currentBlob.contains(" ㄒㄧˊ ") {currentBlob.regReplace(pattern: #"\bㄒㄧˊ\b"#, replaceWith: "ㄒㄧ")}
    if currentBlob.contains("息") && currentBlob.contains(" ㄒㄧˊ ") {currentBlob.regReplace(pattern: #"\bㄒㄧˊ\b"#, replaceWith: "ㄒㄧ")}
    if currentBlob.contains("危") && currentBlob.contains(" ㄨㄟˊ ") {currentBlob.regReplace(pattern: #"\bㄨㄟˊ\b"#, replaceWith: "ㄨㄟ")}
    if currentBlob.contains("椰") && currentBlob.contains(" ㄧㄝˊ ") {currentBlob.regReplace(pattern: #"\bㄧㄝˊ\b"#, replaceWith: "ㄧㄝ")}
    if currentBlob.contains("叔") && currentBlob.contains(" ㄕㄨˊ ") {currentBlob.regReplace(pattern: #"\bㄕㄨˊ\b"#, replaceWith: "ㄕㄨ")}
    if currentBlob.contains("濤") && currentBlob.contains(" ㄊㄠˊ ") {currentBlob.regReplace(pattern: #"\bㄊㄠˊ\b"#, replaceWith: "ㄊㄠ")}
    if currentBlob.contains("跌") && currentBlob.contains(" ㄉㄧㄝˊ ") {currentBlob.regReplace(pattern: #"\bㄉㄧㄝˊ\b"#, replaceWith: "ㄉㄧㄝ")}
    if currentBlob.contains("尤") && currentBlob.contains(" ㄧㄡˊ ") {currentBlob.regReplace(pattern: #"\bㄧㄡˊ\b"#, replaceWith: "ㄧㄡ")}
    if currentBlob.contains("寂") && currentBlob.contains(" ㄐㄧˊ ") {currentBlob.regReplace(pattern: #"\bㄐㄧˊ\b"#, replaceWith: "ㄐㄧˋ")}
    if currentBlob.contains("寧") && currentBlob.contains(" ㄋㄧㄥˊ ") {currentBlob.regReplace(pattern: #"\bㄋㄧㄥˊ\b"#, replaceWith: "ㄋㄧㄥˋ")}
    if currentBlob.contains("築") && currentBlob.contains(" ㄓㄨㄛˊ ") {currentBlob.regReplace(pattern: #"\bㄓㄨㄛˊ\b"#, replaceWith: "ㄓㄨㄛˋ")}
    if currentBlob.contains("馴") && currentBlob.contains(" ㄒㄩㄣˊ ") {currentBlob.regReplace(pattern: #"\bㄒㄩㄣˊ\b"#, replaceWith: "ㄒㄩㄣˋ")}
    if currentBlob.contains("播") && currentBlob.contains(" ㄅㄛˋ ") {currentBlob.regReplace(pattern: #"\bㄅㄛˋ\b"#, replaceWith: "ㄅㄛ")}
    if currentBlob.contains("究") && currentBlob.contains(" ㄐㄧㄡˋ ") {currentBlob.regReplace(pattern: #"\bㄐㄧㄡˋ\b"#, replaceWith: "ㄐㄧㄡ")}
    if currentBlob.contains("蹬") && currentBlob.contains(" ㄉㄥˋ ") {currentBlob.regReplace(pattern: #"\bㄉㄥˋ\b"#, replaceWith: "ㄉㄥ")}
    if currentBlob.contains("剽") && currentBlob.contains(" ㄆㄧㄠˋ ") {currentBlob.regReplace(pattern: #"\bㄆㄧㄠˋ\b"#, replaceWith: "ㄆㄧㄠ")}
    if currentBlob.contains("菌") && currentBlob.contains(" ㄐㄩㄣˋ ") {currentBlob.regReplace(pattern: #"\bㄐㄩㄣˋ\b"#, replaceWith: "ㄐㄩㄣ")}
    if currentBlob.contains("噸") && currentBlob.contains(" ㄉㄨㄣˋ ") {currentBlob.regReplace(pattern: #"\bㄉㄨㄣˋ\b"#, replaceWith: "ㄉㄨㄣ")}
    if currentBlob.contains("穴") && currentBlob.contains(" ㄒㄩㄝˋ ") {currentBlob.regReplace(pattern: #"\bㄒㄩㄝˋ\b"#, replaceWith: "ㄒㄩㄝˊ")}
    if currentBlob.contains("餾") && currentBlob.contains(" ㄌㄧㄡˋ ") {currentBlob.regReplace(pattern: #"\bㄌㄧㄡˋ\b"#, replaceWith: "ㄌㄧㄡˊ")}
    if currentBlob.contains("識") && currentBlob.contains(" ㄕˋ ") {currentBlob.regReplace(pattern: #"\bㄕˋ\b"#, replaceWith: "ㄕˊ")}
    if currentBlob.contains("企") && currentBlob.contains(" ㄑㄧˋ ") {currentBlob.regReplace(pattern: #"\bㄑㄧˋ\b"#, replaceWith: "ㄑㄧˇ")}
    if currentBlob.contains("辱") && currentBlob.contains(" ㄖㄨˋ ") {currentBlob.regReplace(pattern: #"\bㄖㄨˋ\b"#, replaceWith: "ㄖㄨˇ")}
    if currentBlob.contains("署") && currentBlob.contains(" ㄕㄨˋ ") {currentBlob.regReplace(pattern: #"\bㄕㄨˋ\b"#, replaceWith: "ㄕㄨˇ")}
    if currentBlob.contains("諷") && currentBlob.contains(" ㄈㄥˋ ") {currentBlob.regReplace(pattern: #"\bㄈㄥˋ\b"#, replaceWith: "ㄈㄥˇ")}
    if currentBlob.contains("蹈") && currentBlob.contains(" ㄎㄠˋ ") {currentBlob.regReplace(pattern: #"\bㄎㄠˋ\b"#, replaceWith: "ㄎㄠˇ")}
    if currentBlob.contains("偽") && currentBlob.contains(" ㄨㄟˋ ") {currentBlob.regReplace(pattern: #"\bㄨㄟˋ\b"#, replaceWith: "ㄨㄟˇ")}
    if currentBlob.contains("樸") && currentBlob.contains(" ㄆㄨˊ ") {currentBlob.regReplace(pattern: #"\bㄆㄨˊ\b"#, replaceWith: "ㄆㄨˇ")}
    if currentBlob.contains("儲") && currentBlob.contains(" ㄔㄨˊ ") {currentBlob.regReplace(pattern: #"\bㄔㄨˊ\b"#, replaceWith: "ㄔㄨˇ")}
    if currentBlob.contains("髮") && currentBlob.contains(" ㄈㄚˇ ") {currentBlob.regReplace(pattern: #"\bㄈㄚˇ\b"#, replaceWith: "ㄈㄚˋ")}
    if currentBlob.contains("悄") && currentBlob.contains(" ㄑㄧㄠˇ ") {currentBlob.regReplace(pattern: #"\bㄑㄧㄠˇ\b"#, replaceWith: "ㄑㄧㄠ")}
    if currentBlob.contains("坊") && currentBlob.contains(" ㄈㄤ ") {currentBlob.regReplace(pattern: #"\bㄈㄤ\b"#, replaceWith: "ㄈㄤˊ")}
    if currentBlob.contains("綏") && currentBlob.contains(" ㄙㄨㄟ ") {currentBlob.regReplace(pattern: #"\bㄙㄨㄟ\b"#, replaceWith: "ㄙㄨㄟˊ")}
    if currentBlob.contains("縛") && currentBlob.contains(" ㄈㄨˊ ") {currentBlob.regReplace(pattern: #"\bㄈㄨˊ\b"#, replaceWith: "ㄈㄨˋ")}
    if currentBlob.contains("斂") && currentBlob.contains(" ㄌㄧㄢˋ ") {currentBlob.regReplace(pattern: #"\bㄌㄧㄢˋ\b"#, replaceWith: "ㄌㄧㄢˇ")}
    if currentBlob.contains("矽") && currentBlob.contains(" ㄒㄧˋ ") {currentBlob.regReplace(pattern: #"\bㄒㄧˋ\b"#, replaceWith: "ㄒㄧ")}
    if currentBlob.contains("綜") && currentBlob.contains(" ㄗㄨㄥˋ ") {currentBlob.regReplace(pattern: #"\bㄗㄨㄥˋ\b"#, replaceWith: "ㄗㄨㄥ")}
    if currentBlob.contains("頗") && currentBlob.contains(" ㄆㄛˇ ") {currentBlob.regReplace(pattern: #"\bㄆㄛˇ\b"#, replaceWith: "ㄆㄛ")}
    if currentBlob.contains("擁") && currentBlob.contains(" ㄩㄥˇ ") {currentBlob.regReplace(pattern: #"\bㄩㄥˇ\b"#, replaceWith: "ㄩㄥ")}
    if currentBlob.contains("姣") && currentBlob.contains(" ㄐㄧㄠˇ ") {currentBlob.regReplace(pattern: #"\bㄐㄧㄠˇ\b"#, replaceWith: "ㄐㄧㄠ")}
    if currentBlob.contains("檔") && currentBlob.contains(" ㄉㄤˇ ") {currentBlob.regReplace(pattern: #"\bㄉㄤˇ\b"#, replaceWith: "ㄉㄤˋ")}
    if currentBlob.contains("菽") && currentBlob.contains(" ㄕㄨˊ ") {currentBlob.regReplace(pattern: #"\bㄕㄨˊ\b"#, replaceWith: "ㄕㄨ")}
    if currentBlob.contains("築") && currentBlob.contains(" ㄓㄨㄛˊ ") {currentBlob.regReplace(pattern: #"\bㄓㄨㄛˊ\b"#, replaceWith: "ㄓㄨㄛˋ")}
    if currentBlob.contains("銻") && currentBlob.contains(" ㄊㄧˋ ") {currentBlob.regReplace(pattern: #"\bㄊㄧˋ\b"#, replaceWith: "ㄊㄧ")}
    if currentBlob.contains("掇") && currentBlob.contains(" ㄉㄨㄛˊ ") {currentBlob.regReplace(pattern: #"\bㄉㄨㄛˊ\b"#, replaceWith: "ㄉㄨㄛ")}
    if currentBlob.contains("銨") && currentBlob.contains(" ㄢ ") {currentBlob.regReplace(pattern: #"\bㄢ\b"#, replaceWith: "ㄢˇ")}
    if currentBlob.contains("菌") && currentBlob.contains(" ㄐㄩㄣˋ ") {currentBlob.regReplace(pattern: #"\bㄐㄩㄣˋ\b"#, replaceWith: "ㄐㄩㄣ")}
    print(currentBlob)
}