我有以下法典,通常运作起来,但可以操作。
UserWarning: One or more of the test scores are non-finite: [nan nan]
category=UserWarning
当我将其修订为更简明的版本(载于随后的法典中)。 一个热点的输出是否使这一问题的主线化?
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import RidgeClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import GridSearchCV
train = pd.read_csv( /train.csv )
test = pd.read_csv( /test.csv )
sparse_features = [col for col in train.columns if col.startswith( cat )]
dense_features = [col for col in train.columns if col not in sparse_features+[ target ]]
X = train.drop([ target ], axis=1)
y = train[ target ].values
skf = StratifiedKFold(n_splits=5)
clf = RidgeClassifier()
full_pipeline = ColumnTransformer(transformers=[
( num , StandardScaler(), dense_features),
( cat , OneHotEncoder(), sparse_features)
])
X_prepared = full_pipeline.fit_transform(X)
param_grid = {
alpha : [ 0.1],
fit_intercept : [False]
}
gs = GridSearchCV(
estimator=clf,
param_grid=param_grid,
scoring= roc_auc ,
n_jobs=-1,
cv=skf
)
gs.fit(X_prepared, y)
修订如下。
clf2 = RidgeClassifier()
preprocess_pipeline2 = ColumnTransformer([
( num , StandardScaler(), dense_features),
( cat , OneHotEncoder(), sparse_features)
])
from sklearn.pipeline import Pipeline
final_pipeline = Pipeline(steps=[
( p , preprocess_pipeline2),
( c , clf2)
])
param_grid2 = {
c__alpha : [0.4, 0.1],
c__fit_intercept : [False]
}
gs2 = GridSearchCV(
estimator=final_pipeline,
param_grid=param_grid2,
scoring= roc_auc ,
n_jobs=-1,
cv=skf
)
gs2.fit(X, y)
谁能指出哪一部分错误?
EDIT:在建立<代码>error_score至之后,我可以收到更多有关这个问题的反馈意见。 在我看来,我需要把培训组和测试组结合起来的合并数据集上调一个热层。 我是否正确? 但是,如果是这样,为什么没有第一版就抱怨同一问题? BTW,提出以下论点是否明智?
ValueError
---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "/opt/conda/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py", line 431, in _process_worker
r = call_item()
File "/opt/conda/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py", line 285, in __call__
return self.fn(*self.args, **self.kwargs)
File "/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py", line 595, in __call__
return self.func(*args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/joblib/parallel.py", line 263, in __call__
for func, args, kwargs in self.items]
File "/opt/conda/lib/python3.7/site-packages/joblib/parallel.py", line 263, in <listcomp>
for func, args, kwargs in self.items]
File "/opt/conda/lib/python3.7/site-packages/sklearn/utils/fixes.py", line 222, in __call__
return self.function(*args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 620, in _fit_and_score
test_scores = _score(estimator, X_test, y_test, scorer, error_score)
File "/opt/conda/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 674, in _score
scores = scorer(estimator, X_test, y_test)
File "/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_scorer.py", line 200, in __call__
sample_weight=sample_weight)
File "/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_scorer.py", line 334, in _score
y_pred = method_caller(clf, "decision_function", X)
File "/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_scorer.py", line 53, in _cached_call
return getattr(estimator, method)(*args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/sklearn/utils/metaestimators.py", line 120, in <lambda>
out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py", line 493, in decision_function
Xt = transform.transform(Xt)
File "/opt/conda/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py", line 565, in transform
Xs = self._fit_transform(X, None, _transform_one, fitted=True)
File "/opt/conda/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py", line 444, in _fit_transform
self._iter(fitted=fitted, replace_strings=True), 1))
File "/opt/conda/lib/python3.7/site-packages/joblib/parallel.py", line 1044, in __call__
while self.dispatch_one_batch(iterator):
File "/opt/conda/lib/python3.7/site-packages/joblib/parallel.py", line 859, in dispatch_one_batch
self._dispatch(tasks)
File "/opt/conda/lib/python3.7/site-packages/joblib/parallel.py", line 777, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
result = ImmediateResult(func)
File "/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py", line 572, in __init__
self.results = batch()
File "/opt/conda/lib/python3.7/site-packages/joblib/parallel.py", line 263, in __call__
for func, args, kwargs in self.items]
File "/opt/conda/lib/python3.7/site-packages/joblib/parallel.py", line 263, in <listcomp>
for func, args, kwargs in self.items]
File "/opt/conda/lib/python3.7/site-packages/sklearn/utils/fixes.py", line 222, in __call__
return self.function(*args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py", line 733, in _transform_one
res = transformer.transform(X)
File "/opt/conda/lib/python3.7/site-packages/sklearn/preprocessing/_encoders.py", line 462, in transform
force_all_finite= allow-nan )
File "/opt/conda/lib/python3.7/site-packages/sklearn/preprocessing/_encoders.py", line 136, in _transform
raise ValueError(msg)
ValueError: Found unknown categories [ MR , MW , DA ] in column 10 during transform
"""
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-48-b81f3b7b0724> in <module>
21 cv=skf
22 )
---> 23 gs2.fit(X, y)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
/opt/conda/lib/python3.7/site-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
839 return results
840
--> 841 self._run_search(evaluate_candidates)
842
843 # multimetric is determined here because in the case of a callable
/opt/conda/lib/python3.7/site-packages/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
1286 def _run_search(self, evaluate_candidates):
1287 """Search all candidates in param_grid"""
-> 1288 evaluate_candidates(ParameterGrid(self.param_grid))
1289
1290
/opt/conda/lib/python3.7/site-packages/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params, cv, more_results)
807 (split_idx, (train, test)) in product(
808 enumerate(candidate_params),
--> 809 enumerate(cv.split(X, y, groups))))
810
811 if len(out) < 1:
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
1052
1053 with self._backend.retrieval_context():
-> 1054 self.retrieve()
1055 # Make sure that we get a last message telling us we are done
1056 elapsed_time = time.time() - self._start_time
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in retrieve(self)
931 try:
932 if getattr(self._backend, supports_timeout , False):
--> 933 self._output.extend(job.get(timeout=self.timeout))
934 else:
935 self._output.extend(job.get())
/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
540 AsyncResults.get from multiprocessing."""
541 try:
--> 542 return future.result(timeout=timeout)
543 except CfTimeoutError as e:
544 raise TimeoutError from e
/opt/conda/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
433 raise CancelledError()
434 elif self._state == FINISHED:
--> 435 return self.__get_result()
436 else:
437 raise TimeoutError()
/opt/conda/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
ValueError: Found unknown categories [ MR , MW , DA ] in column 10 during transform