Jul-15-2019, 08:14 AM
I was trying to load a dataset from my local computer using pandas when I run the code I got these problems, please anyone help me :
#!/usr/bin/env python
'''
An example file to show how to use the feature-selection code in ml_lib
'''
import pandas
from tqdm import tqdm
import pandas as pd
import csv
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import svm
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import feature_select
import depmeas
if __name__=='__main__':
NUM_CV = 3
RANDOM_SEED = 123
MAX_ITER = 1000
leuk = pd.read_csv(r'C:/Users/pc/Desktop/dataset/leukemia.csv')
X = leuk['data']
y = leuk['target']
# split the data for testing
(X_train, X_test, y_train, y_test) = train_test_split(X, y, test_size=0.3, random_state=RANDOM_SEED)
# perform feature selection
num_features_to_select = 25
K_MAX = 1000
estimator = depmeas.mi_tau
n_jobs = -1
feature_ranking = feature_select.feature_select(X_train, y_train, num_features_to_select=num_features_to_select, K_MAX=K_MAX, estimator=estimator, n_jobs=n_jobs)
num_selected_features = len(feature_ranking )
# for each feature, compute the accuracy on the test data as we add features
mean_acc = np.empty((num_selected_features,))
var_acc = np.empty((num_selected_features,))
for ii in tqdm(range(num_selected_features), desc='Computing Classifier Performance...'):
classifier = svm.SVC(random_state=RANDOM_SEED,max_iter=MAX_ITER)
X_test_in = X_test[:,feature_ranking [0:ii+1]]
scores = cross_val_score(classifier, X_test_in, y_test, cv=NUM_CV, n_jobs=-1)
mu = scores.mean()
sigma_sq = scores.std()
mean_acc[ii] = mu
var_acc[ii] = sigma_sq
x = np.arange(num_selected_features)+1
y = mean_acc
yLo = mean_acc-var_acc/2.
yHi = mean_acc+var_acc/2.
plt.plot(x,y)
plt.fill_between(x,yLo,yHi,alpha=0.2)
plt.grid(True)
plt.title('Leukemia Dataset Feature Selection\n Total # Features=%d' % (X.shape[1]))
plt.xlabel('# Selected Features')
plt.ylabel('SVC Classifier Accuracy')
plt.show()Error:Traceback (most recent call last):
File "C:\Users\pc\PycharmProjects\MymrmrTest\venv\lib\site-packages\pandas\core\indexes\base.py", line 2657, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'data'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/pc/PycharmProjects/MymrmrTest/feature_select_test.py", line 39, in <module>
X = leuk['data']
File "C:\Users\pc\PycharmProjects\MymrmrTest\venv\lib\site-packages\pandas\core\frame.py", line 2927, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\pc\PycharmProjects\MymrmrTest\venv\lib\site-packages\pandas\core\indexes\base.py", line 2659, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'data'
Process finished with exit code 1
