''' A simple program to calculate if a tumor is malignant or benign and judging our trained gaussianNB classifiers accuracy using scikit learn module accuracy_score ''' from sklearn.datasets import load_breast_cancer # Importing cancer dataset from sklearn.model_selection import train_test_split # Importing train_test_split module to split our bulk data into training data and testing data from sklearn.naive_bayes import GaussianNB # Importing classifier called 'GaussianNaiveBayes' from sklearn.metrics import accuracy_score # Importing scikit learn module to evaluate accuracy of our classifier model #Storing imported data into 'data' data = load_breast_cancer() # Organize our data for usage convinency label_names = data['target_names'] labels = data['target'] feature_names = data['feature_names'] features = data['data'] # Remove quotations to print description of dataset ''' description = data['DESCR'] print (description) ''' # Remove the quotations to view the organized data. ''' print(label_names) print('Class label = ', labels[0]) print(feature_names) print(features[0]) ''' # Splitting our data into training data and testing data from the bulk data # Where 33 percent data is stored as testing purpose and rest for training purpose train, test, train_labels, test_labels = train_test_split(features, labels, test_size=0.33, random_state=42) # Initialize classifier 'GaussianNB' imported_classifier = GaussianNB() # Training our classifier by feeding training data trained_classifier = imported_classifier.fit(train, train_labels) # Feeding testing data to our trained classifier to predict the probablity predictions = trained_classifier.predict(test) # Printing out the predctions print(predictions) print("\n") # Evaluate accuracy by using the imported scikit module print(accuracy_score(test_labels, predictions))