import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score # import warnings filter from warnings import simplefilter # ignore all future warnings simplefilter(action='ignore', category=FutureWarning) dtypes = {'order_id': int, 'store_id': int, 'name': object, 'email': object, 'telephone': object, 'payment_postcode': object, 'payment_country_id': int, 'payment_code': object, 'total': float, 'ip': object, 'forwarded_ip': object, 'blacklisted': bool} orderData = pd.read_csv('order-data.csv', low_memory=False, keep_default_na=False, dtype=dtypes) # print(orderData.head()) # print(orderData.describe()) # print(orderData.corr()) features = orderData[['name', 'email', 'total']] target = orderData.blacklisted # 30% data will go to test data set feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.3) model = LogisticRegression() model.fit = model.fit(feature_train, target_train) predictions = model.fit.predict(feature_test) print(confusion_matrix(target_test, predictions)) print(accuracy_score(target_test, predictions))