from nltk.tokenize import StanfordSegmenter
segmenter = StanfordSegmenter(
path_to_sihan_corpora_dict="E:/NLP/NLP_code/Installation/base/stanford-segmenter-2017-06-09/data", path_to_model="E:/NLP/NLP_code/Installation/base/stanford-segmenter-2017-06-09/data/pku.gz", path_to_dict="E:/NLP/NLP_code/Installation/base/stanford-segmenter-2017-06-09/data/dict-chris6.ser.gz")
res = segmenter.segment(u"北海已成为中国对外开放中升起的一颗明星")
print(res)
C:\Users\lybroman\AppData\Local\Programs\Python\Python36-32\python.exe D:/programming/leetcode/test.py
D:/programming/leetcode/test.py:3: DeprecationWarning:
The StanfordTokenizer will be deprecated in version 3.2.5.
Please use nltk.parse.corenlp.CoreNLPTokenizer instead.'
path_to_sihan_corpora_dict="E:/NLP/NLP_code/Installation/base/stanford-segmenter-2017-06-09/data", path_to_model="E:/NLP/NLP_code/Installation/base/stanford-segmenter-2017-06-09/data/pku.gz", path_to_dict="E:/NLP/NLP_code/Installation/base/stanford-segmenter-2017-06-09/data/dict-chris6.ser.gz")
Traceback (most recent call last):
File "D:/programming/leetcode/test.py", line 4, in <module>
res = segmenter.segment(u"北海已成为中国对外开放中升起的一颗明星")
File "C:\Users\lybroman\AppData\Local\Programs\Python\Python36-32\lib\site-packages\nltk\tokenize\stanford_segmenter.py", line 182, in segment
return self.segment_sents([tokens])
File "C:\Users\lybroman\AppData\Local\Programs\Python\Python36-32\lib\site-packages\nltk\tokenize\stanford_segmenter.py", line 210, in segment_sents
stdout = self._execute(cmd)
File "C:\Users\lybroman\AppData\Local\Programs\Python\Python36-32\lib\site-packages\nltk\tokenize\stanford_segmenter.py", line 229, in _execute
stdout, _stderr = java(cmd, classpath=self._stanford_jar, stdout=PIPE, stderr=PIPE)
File "C:\Users\lybroman\AppData\Local\Programs\Python\Python36-32\lib\site-packages\nltk\internals.py", line 129, in java
p = subprocess.Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr)
File "C:\Users\lybroman\AppData\Local\Programs\Python\Python36-32\lib\subprocess.py", line 709, in __init__
restore_signals, start_new_session)
File "C:\Users\lybroman\AppData\Local\Programs\Python\Python36-32\lib\subprocess.py", line 971, in _execute_child
args = list2cmdline(args)
File "C:\Users\lybroman\AppData\Local\Programs\Python\Python36-32\lib\subprocess.py", line 461, in list2cmdline
needquote = (" " in arg) or ("\t" in arg) or not arg
TypeError: argument of type 'NoneType' is not iterable
缺少了一些参数,java_class是必须的
segmenter = StanfordSegmenter(
)