python多进程 如何写入txt文件。

import multiprocessing as mp
import re
import time


def showtxt(line):
    linksRegex = re.compile('.*car.*|', re.IGNORECASE)
    filted_contents = linksRegex.findall(str(line))
    print('pid:%s, filted domain:%s' % (mp.current_process().pid, filted_contents))


if __name__ == '__main__':
    start = time.clock()
    pool = mp.Pool()
    with open("E:/uk/uk.txt", 'r') as f:
        pool.map_async(showtxt, f)
    pool.close()
    pool.join()
    elapsed = (time.clock() - start)
    print(elapsed)
    input('ok')

请问上面代码(可以运行)要把showtxt函数里的filted_contents写入txt要如何操作。
我直接把with open语句写入showtxt函数,但是不起作用,代码如下。

def showtxt(line):
    linksRegex = re.compile('.*car.*|', re.IGNORECASE)
    filted_contents = linksRegex.findall(str(line))
    
    with open('/abc.txt','wb') as f:
        f.write(filted_contents)
        f.close()
    
    print('pid:%s, filted domain:%s' % (mp.current_process().pid, filted_contents))        

请问要怎么写才能写入txt文件。谢谢。

阅读 6.3k
评论
    3 个回答

    python3, 改用 惰性的imap,避免 map返回太大的结果list而爆掉内存。
    chunksize=100是每个进程每次预先读取100行,避免频繁的传递参数。
    @self

    import multiprocessing as mp
    import re
    import time
    
    def txtfilter(line, runing=False):
        if not runing:
            linksRegex = re.compile('.*car.*|', re.IGNORECASE)
            runing = True
        return linksRegex.findall(line)   
    
    if __name__ == '__main__':
        start = time.clock()
        pool = mp.Pool()
        with open("E:/uk/uk.txt") as r,\
             open("E:/uk/uk_fltr.txt", 'w') as w:
            for rslt in pool.imap(txtfilter, r, chunksize=100):
                w.write(str(rslt))
        pool.close()
        pool.join()
        elapsed = (time.clock() - start)
        print(elapsed)
        input('ok')    
    
    评论 赞赏