如题:如何设置dask.array的chunksize和shape?
代码如下
>>> x = data.values
>>> x
dask.array<values, shape=(nan, 24), dtype=int64, chunksize=(nan, 24)>
>>> y = data['label'].values
>>> y
dask.array<values, shape=(nan,), dtype=int64, chunksize=(nan,)>
>>> type(y)
dask.array.core.Array
>>> from dask_ml.linear_model import LogisticRegression
>>> lgr = LogisticRegression()
>>> lgr_model = lgr.fit(x, y)
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-116-f51a6d13cf8c> in <module>()
----> 1 lgr_model = lgr.fit(x, y)
E:\miniconda\envs\course_py35\lib\site-packages\dask_ml\linear_model\glm.py in fit(self, X, y)
151 self : objectj
152 """
--> 153 X = self._check_array(X)
154
155 solver_kwargs = self._get_solver_kwargs()
E:\miniconda\envs\course_py35\lib\site-packages\dask_ml\linear_model\glm.py in _check_array(self, X)
165 def _check_array(self, X):
166 if self.fit_intercept:
--> 167 X = add_intercept(X)
168
169 return check_array(X)
E:\miniconda\envs\course_py35\lib\site-packages\multipledispatch\dispatcher.py in __call__(self, *args, **kwargs)
208 self._cache[types] = func
209 try:
--> 210 return func(*args, **kwargs)
211
212 except MDNotImplementedError:
E:\miniconda\envs\course_py35\lib\site-packages\dask_glm\utils.py in add_intercept(X)
145 def add_intercept(X):
146 if np.isnan(np.sum(X.shape)):
--> 147 raise NotImplementedError("Can not add intercept to array with "
148 "unknown chunk shape")
149 j, k = X.chunks
NotImplementedError: Can not add intercept to array with unknown chunk shape
根据报错提示,x、y数据的chunksize和shape未明确,所以如何设定它们的chunksize