当我使用 FOR 循环运行迭代来构建多个模型时,出现上述错误。具有相似数据集的前两个模型构建良好。在构建第三个模型时出现此错误。抛出错误的代码是当我使用 python 的 Statsmodel 包调用 sm.logit() 时:
y = y_mort.convert_objects(convert_numeric=True)
#Building Logistic model_LSVC
print("Shape of y:", y.shape, " &&Shape of X_selected_lsvc:", X.shape)
print("y values:",y.head())
logit = sm.Logit(y,X,missing='drop')
出现的错误:
Shape of y: (9018,) &&Shape of X_selected_lsvc: (9018, 59)
y values: 0 0
1 1
2 0
3 0
4 0
Name: mort, dtype: int64
ValueError Traceback (most recent call last)
<ipython-input-8-fec746e2ee99> in <module>()
160 print("Shape of y:", y.shape, " &&Shape of X_selected_lsvc:", X.shape)
161 print("y values:",y.head())
--> 162 logit = sm.Logit(y,X,missing='drop')
163 # fit the model
164 est = logit.fit(method='cg')
D:\Anaconda3\lib\site-packages\statsmodels\discrete\discrete_model.py in __init__(self, endog, exog, **kwargs)
399
400 def __init__(self, endog, exog, **kwargs):
--> 401 super(BinaryModel, self).__init__(endog, exog, **kwargs)
402 if (self.__class__.__name__ != 'MNLogit' and
403 not np.all((self.endog >= 0) & (self.endog <= 1))):
D:\Anaconda3\lib\site-packages\statsmodels\discrete\discrete_model.py in __init__(self, endog, exog, **kwargs)
152 """
153 def __init__(self, endog, exog, **kwargs):
--> 154 super(DiscreteModel, self).__init__(endog, exog, **kwargs)
155 self.raise_on_perfect_prediction = True
156
D:\Anaconda3\lib\site-packages\statsmodels\base\model.py in __init__(self, endog, exog, **kwargs)
184
185 def __init__(self, endog, exog=None, **kwargs):
--> 186 super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
187 self.initialize()
188
D:\Anaconda3\lib\site-packages\statsmodels\base\model.py in __init__(self, endog, exog, **kwargs)
58 hasconst = kwargs.pop('hasconst', None)
59 self.data = self._handle_data(endog, exog, missing, hasconst,
---> 60 **kwargs)
61 self.k_constant = self.data.k_constant
62 self.exog = self.data.exog
D:\Anaconda3\lib\site-packages\statsmodels\base\model.py in _handle_data(self, endog, exog, missing, hasconst, **kwargs)
82
83 def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
---> 84 data = handle_data(endog, exog, missing, hasconst, **kwargs)
85 # kwargs arrays could have changed, easier to just attach here
86 for key in kwargs:
D:\Anaconda3\lib\site-packages\statsmodels\base\data.py in handle_data(endog, exog, missing, hasconst, **kwargs)
564 klass = handle_data_class_factory(endog, exog)
565 return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
--> 566 **kwargs)
D:\Anaconda3\lib\site-packages\statsmodels\base\data.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
74 # this has side-effects, attaches k_constant and const_idx
75 self._handle_constant(hasconst)
---> 76 self._check_integrity()
77 self._cache = resettable_cache()
78
D:\Anaconda3\lib\site-packages\statsmodels\base\data.py in _check_integrity(self)
450 (hasattr(endog, 'index') and hasattr(exog, 'index')) and
451 not self.orig_endog.index.equals(self.orig_exog.index)):
--> 452 raise ValueError("The indices for endog and exog are not aligned")
453 super(PandasData, self)._check_integrity()
454
ValueError: The indices for endog and exog are not aligned
y 矩阵和 X 矩阵的形状为 (9018,),(9018, 59)。因此,因变量和自变量的任何不匹配都不会出现。任何想法?
原文由 Sanoj 发布,翻译遵循 CC BY-SA 4.0 许可协议
尝试在 sm.Logit() 行之前将 y 转换为列表。