from sklearn.svm import SVC, LinearSVC from sklearn import metrics from sklearn.cross_validation import StratifiedKFold from sklearn.datasets import load_svmlight_file if __name__ == "__main__": X, y = load_svmlight_file("ntcir.en.vec") kf = StratifiedKFold(y, k=5, indices=True) clf = SVC(kernel="linear") #clf = LinearSVC() for train_index, test_index in kf: y_train, y_test = y[train_index], y[test_index] X_train, X_test = X[train_index], X[test_index] clf.fit(X_train, y_train) y_predict = clf.predict(X_test) print metrics.confusion_matrix(y_test, y_predict) print metrics.classification_report(y_test, y_predict)