1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
|
import pickle import numpy as np import pandas as pd from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV from sklearn.preprocessing import StandardScaler, label_binarize from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
if __name__ == '__main__': data = pd.read_csv('iris.data', header=None).values X = data[0:100, [0, 2]] y = data[0:100, 4] y = label_binarize(y, classes=np.unique(y)).ravel()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test)
lr = LogisticRegression(penalty='l2', solver='liblinear') lr.fit(X_train_std, y_train)
with open('lr.pkl', 'wb') as fd: pickle.dump(lr, fd)
fd = open('lr.pkl', 'rb') lr2 = pickle.load(fd) fd.close()
y_test_pred = lr2.predict(X_test_std) y_test_proba = lr2.predict_proba(X_test_std)
print('results =') for a in zip(y_test_proba, y_test_pred, y_test): print(a)
print('acc = {}'.format(lr2.score(X_test_std, y_test)))
print('confusion_matrix = \n{}'.format(confusion_matrix(y_test, y_test_pred)))
print('classification_report = \n{}'.format(classification_report(y_test, y_test_pred)))
print('cross_val_score = \n{}'.format(cross_val_score(lr2, X_test_std, y_test, cv=5)))
hyper_paras = dict(penalty=['l1', 'l2'], C=np.logspace(0, 4, 10)) clf = GridSearchCV(LogisticRegression(solver='liblinear'), hyper_paras, cv=5, verbose=0) best_model = clf.fit(X_train_std, y_train) print('Best Penalty= \n{}'.format(best_model.best_estimator_.get_params()['penalty'])) print('Best C: = \n{}'.format(best_model.best_estimator_.get_params()['C']))
|