0
import multiprocessing as mp
import re
import time


def showtxt(line):
    linksRegex = re.compile('.*car.*|', re.IGNORECASE)
    filted_contents = linksRegex.findall(str(line))
    print('pid:%s, filted domain:%s' % (mp.current_process().pid, filted_contents))


if __name__ == '__main__':
    start = time.clock()
    pool = mp.Pool()
    with open("E:/uk/uk.txt", 'r') as f:
        pool.map_async(showtxt, f)
    pool.close()
    pool.join()
    elapsed = (time.clock() - start)
    print(elapsed)
    input('ok')

请问上面代码(可以运行)要把showtxt函数里的filted_contents写入txt要如何操作。
我直接把with open语句写入showtxt函数,但是不起作用,代码如下。

def showtxt(line):
    linksRegex = re.compile('.*car.*|', re.IGNORECASE)
    filted_contents = linksRegex.findall(str(line))
    
    with open('/abc.txt','wb') as f:
        f.write(filted_contents)
        f.close()
    
    print('pid:%s, filted domain:%s' % (mp.current_process().pid, filted_contents))        

请问要怎么写才能写入txt文件。谢谢。

dreambig 130
2017-09-07 提问

查看全部 3 个回答

1

已采纳

python3, 改用 惰性的imap,避免 map返回太大的结果list而爆掉内存。
chunksize=100是每个进程每次预先读取100行,避免频繁的传递参数。
@self

import multiprocessing as mp
import re
import time

def txtfilter(line, runing=False):
    if not runing:
        linksRegex = re.compile('.*car.*|', re.IGNORECASE)
        runing = True
    return linksRegex.findall(line)   

if __name__ == '__main__':
    start = time.clock()
    pool = mp.Pool()
    with open("E:/uk/uk.txt") as r,\
         open("E:/uk/uk_fltr.txt", 'w') as w:
        for rslt in pool.imap(txtfilter, r, chunksize=100):
            w.write(str(rslt))
    pool.close()
    pool.join()
    elapsed = (time.clock() - start)
    print(elapsed)
    input('ok')    

推广链接