Found another sample code for best parameter search from here:
https://www.kaggle.com/code/arindambaner...d/notebook
Got error message about one of the option iid=False, it says "__init__() got an unexpected keyword argument 'iid' "
Appreciate very much for help to fix the problem, thank you!
Error message comes up when run this piece of code:
https://www.kaggle.com/code/arindambaner...d/notebook
Got error message about one of the option iid=False, it says "__init__() got an unexpected keyword argument 'iid' "
Appreciate very much for help to fix the problem, thank you!
Error message comes up when run this piece of code:
random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
n_iter=20, cv=5, iid=False)Entire code is below:import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, auc
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
import matplotlib.pyplot as plt
import seaborn as sns
# pip install PrettyTable
# python -m pip install --upgrade pip
from prettytable import PrettyTable
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
# Prepare data
X, y = load_breast_cancer(return_X_y=True)
print(X.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 123)
# Standardize features
ss = StandardScaler()
X_train_ss = ss.fit_transform(X_train)
X_test_ss = ss.transform(X_test)
# Random Forest without Randomized Search (using default values)
clf = RandomForestClassifier()
clf.fit(X_train_ss, y_train)
y_pred = clf.predict(X_test_ss)
# plot_conf_matrix is a function to plot a heatmap of confusion matrix
def plot_conf_matrix (conf_matrix, dtype):
class_names = [0,1]
fontsize=14
df_conf_matrix = pd.DataFrame(
conf_matrix, index=class_names, columns=class_names,
)
fig = plt.figure()
heatmap = sns.heatmap(df_conf_matrix, annot=True, fmt="d")
heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.title('Confusion Matrix for {0}'.format(dtype))
acc_rf = accuracy_score(y_test, y_pred)
print(acc_rf)
plot_conf_matrix(confusion_matrix(y_test, y_pred), "Test data")
# Using Randomized Search to find out the best possible values of the hyperparameters
"""
We are tuning five hyperparameters of the Random Forest classifier here, such as max_depth, max_features,
min_samples_split, bootstrap, and criterion. Randomized Search will search through the given hyperparameters
distribution to find the best values. We will also use 3 fold cross-validation scheme (cv = 3)
"""
# Once the training data is fit into the model, the best parameters from the Randomized Search can be extracted from the final result
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
param_dist = {"max_depth": [3, 5],
"max_features": sp_randint(1, 11),
"min_samples_split": sp_randint(2, 11),
"bootstrap": [True, False],
"criterion": ["gini", "entropy"]}
# build a classifier
clf = RandomForestClassifier(n_estimators=50)
# Randomized search
random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
n_iter=20, cv=5, iid=False)
# get error message here, it says: __init__() got an unexpected keyword argument 'iid'
random_search.fit(X_train_ss, y_train)
print(random_search.best_params_)
