boxcox1p归一化+pipeline+StackingCVRegressor

找到最好的那个参数lmbda。

from mlxtend.regressor import StackingCVRegressor
from sklearn.datasets import load_boston
from sklearn.svm import SVR
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
import numpy as np
RANDOM_SEED = 42
X,y = load_boston(return_X_y=True)
svr = SVR(kernel=‘linear‘)
lasso = Lasso()
rf = RandomForestRegressor(n_estimators=5,random_state=RANDOM_SEED)
# The StackingCVRegressor uses scikit-learn‘s check_cv
# internally,which doesn‘t support a random seed. Thus
# NumPy‘s random seed need to be specified explicitely for
# deterministic behavior
np.random.seed(RANDOM_SEED)
stack = StackingCVRegressor(regressors=(svr,lasso,rf),Meta_regressor=lasso)
print(‘5-fold cross validation scores:\n‘)
for clf,label in zip([svr,rf,stack],[‘SVM‘,‘Lasso‘,‘Random Forest‘,‘StackingClassifier‘]):
scores = cross_val_score(clf,X,y,cv=5)
print("R^2 score: %0.2f (+/- %0.2f) [%s]" % (scores.mean(),scores.std(),label))
5-fold cross validation scores:
R^2 score: 0.45 (+/- 0.29) [SVM]
R^2 score: 0.43 (+/- 0.14) [Lasso]
R^2 score: 0.52 (+/- 0.28) [Random Forest]
R^2 score: 0.58 (+/- 0.24) [StackingClassifier]
# The StackingCVRegressor uses scikit-learn‘s check_cv
# internally,cv=5,scoring=‘neg_mean_squared_error‘)
print("Neg. MSE score: %0.2f (+/- %0.2f) [%s]"

from mlxtend.regressor import StackingCVRegressor
from sklearn.datasets import load_boston
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
X,y = load_boston(return_X_y=True)
ridge = Ridge()
lasso = Lasso()
rf = RandomForestRegressor(random_state=RANDOM_SEED)
# The StackingCVRegressor uses scikit-learn‘s check_cv
# internally,which doesn‘t support a random seed. Thus
# NumPy‘s random seed need to be specified explicitely for
# deterministic behavior
np.random.seed(RANDOM_SEED)


stack = StackingCVRegressor(regressors=(lasso,ridge),Meta_regressor=rf,use_features_in_secondary=True)
params = {‘lasso__alpha‘: [0.1,1.0,10.0],‘ridge__alpha‘: [0.1,10.0]}

grid = GridSearchCV(
                  estimator=stack,param_grid={‘lasso__alpha‘: [x/5.0 for x in range(1,10)],‘ridge__alpha‘: [x/20.0 for x in range(1,‘Meta-randomforestregressor__n_estimators‘: [10,100]},cv=5,refit=True
)

grid.fit(X,y)

print("Best: %f using %s" % (grid.best_score_,grid.best_params_))

#Best: 0.673590 using {‘lasso__alpha‘: 0.4,‘Meta-randomforestregressor__n_estimators‘: 10,‘ridge__alpha

cv_keys = (‘mean_test_score‘,‘std_test_score‘,‘params‘)
for r,_ in enumerate(grid.cv_results_[‘mean_test_score‘]):
  print("%0.3f +/- %0.2f %r"
          % (grid.cv_results_[cv_keys[0]][r],grid.cv_results_[cv_keys[1]][r] / 2.0,grid.cv_results_[cv_keys[2]][r]))
  if r > 10:
    break
print(‘...‘)

print(‘Best parameters: %s‘ % grid.best_params_)
print(‘Accuracy: %.2f‘ % grid.best_score_)

boxcox1p归一化+pipeline+StackingCVRegressor

猜你在找的设计模式相关文章