# -*- coding: utf-8 -*-
"""
Created on Wed Mar 28 17:06:34 2018
@author: Administrator
"""
import sys
import thulac
import sys
import os
import io
thu=thulac.thulac(seg_only=True)
for i in range(0, 19):
path2="E:/社交网络/weiboAPIsprider/数据集/"#原数据集
f = io.open(path2+str(i)+'.txt', "r" , encoding='UTF-8')
text = ""
for line in f:
text = text+line
print(text)
seg_list = thu.cut(text, Text=False)#处理过的数据集
with io.open("E:/社交网络/jiebaTest/原始结果集/"+'thulac'+str(i)+'.txt', 'w', encoding='UTF-8') as file:
file.write('\n'.join(seg_list))#结果写入文件
file.close()
把最终处理的文件路径打出来,并确认本地存在