import pandas as pd df = pd.read_csv("./Data/PlayTennis.csv") target = str(list(df)[-1]) print(target) target_list = set(df[target]) print(target_list) # Play Tennis # {'Yes', 'No'} Attr = {} for a in list(df)[:-1] : Attr[a] = set(df[a]) print(Attr) # {'Outlook': {'Overcast', 'Rain', 'Sunny'}, 'Temperature': {'Cool', 'Mild', 'Hot'}, 'Humidity': {'High', 'Normal'}, 'Wind': {'Weak', 'Strong'}} def probAttr(data, attr, val) : Total = data.shape[0] cnt = len(data[data[attr]==val]) return cnt, cnt / Total probAttr(df, target, "Yes") # (9, 0.6428571428571429) def train(data, Attr, targetVals, target) : targetProbs = {} # P(A) countTarget = {} for targetVal in targetVals : countTarget[targetVal], targetProbs[targetVal] = probAttr(data, target, targetVal) AttrConcept = {} # P(X/A) probability_list = {} # p(X) for att in Attr : probability_list[att] = {} AttrConcept[att] = {} for val in Attr[att] : AttrConcept[att][val] = {} cnt, probability_list[att][val] = probAttr(data, att, val) for targetVal in targetVals : dataTemp = data[data[att] == val] AttrConcept[att][val][targetVal] = len(dataTemp[dataTemp[target] == targetVal]) / countTarget[targetVal] print("P(A) : ", targetProbs, "\n") print("P(X\A) : ", AttrConcept, "\n") print("P(X) : ", probability_list, "\n") return targetProbs, AttrConcept, probability_list def test(examples, Attr, target_list, targetProbs, AttrConcept, probability_list) : misclassification_count = 0 Total = len(examples) for ex in examples : px = {} for a in Attr : for x in ex : for t in target_list : if x in AttrConcept[a] : if t not in px : px[t] = targetProbs[t] * AttrConcept[a][x][t] / probability_list[a][x] else : px[t] = px[t] * AttrConcept[a][x][t] / probability_list[a][x] print(px) classification = max(px, key = px.get) print("Classification :", classification, "Expected :", ex[-1]) if classification != ex[-1] : misclassification_count+=1 misclassification_rate = misclassification_count * 100 /Total accuracy = 100 - misclassification_rate print("Misclassification Count = {}".format(misclassification_count)) print("Misclassification Rate = {}%".format(misclassification_rate)) print("Accuracy = {}%".format(accuracy)) targetProbs, AttrConcept, probability_list = train(df, Attr, target_list, target) test(df.values, Attr, target_list, targetProbs, AttrConcept, probability_list) # OUTPUT :- # P(A) : {'Yes': 0.6428571428571429, 'No': 0.35714285714285715} # P(X\A) : {'Outlook': {'Overcast': {'Yes': 0.4444444444444444, 'No': 0.0}, 'Rain': {'Yes': 0.3333333333333333, 'No': 0.4}, 'Sunny': {'Yes': 0.2222222222222222, 'No': 0.6}}, 'Temperature': {'Cool': {'Yes': 0.3333333333333333, 'No': 0.2}, 'Mild': {'Yes': 0.4444444444444444, 'No': 0.4}, 'Hot': {'Yes': 0.2222222222222222, 'No': 0.4}}, 'Humidity': {'High': {'Yes': 0.3333333333333333, 'No': 0.8}, 'Normal': {'Yes': 0.6666666666666666, 'No': 0.2}}, 'Wind': {'Weak': {'Yes': 0.6666666666666666, 'No': 0.4}, 'Strong': {'Yes': 0.3333333333333333, 'No': 0.6}}} # P(X) : {'Outlook': {'Overcast': 0.2857142857142857, 'Rain': 0.35714285714285715, 'Sunny': 0.35714285714285715}, 'Temperature': {'Cool': 0.2857142857142857, 'Mild': 0.42857142857142855, 'Hot': 0.2857142857142857}, 'Humidity': {'High': 0.5, 'Normal': 0.5}, 'Wind': {'Weak': 0.5714285714285714, 'Strong': 0.42857142857142855}} # {'Yes': 0.2419753086419753, 'No': 0.9408000000000002} # Classification : No Expected : No # {'Yes': 0.16131687242798354, 'No': 1.8816000000000002} # Classification : No Expected : No # {'Yes': 0.6049382716049383, 'No': 0.0} # Classification : Yes Expected : Yes # {'Yes': 0.4839506172839506, 'No': 0.4181333333333335} # Classification : Yes Expected : Yes # {'Yes': 1.0888888888888888, 'No': 0.07840000000000004} # Classification : Yes Expected : Yes # {'Yes': 0.7259259259259259, 'No': 0.15680000000000005} # Classification : Yes Expected : No # {'Yes': 1.2098765432098766, 'No': 0.0} # Classification : Yes Expected : Yes # {'Yes': 0.3226337448559671, 'No': 0.6272000000000001} # Classification : No Expected : No # {'Yes': 0.7259259259259256, 'No': 0.11760000000000002} # Classification : Yes Expected : Yes # {'Yes': 0.9679012345679012, 'No': 0.10453333333333338} # Classification : Yes Expected : Yes # {'Yes': 0.43017832647462273, 'No': 0.31360000000000005} # Classification : Yes Expected : Yes # {'Yes': 0.5377229080932785, 'No': 0.0} # Classification : Yes Expected : Yes # {'Yes': 1.2098765432098766, 'No': 0.0} # Classification : Yes Expected : Yes # {'Yes': 0.3226337448559671, 'No': 0.8362666666666669} # Classification : No Expected : No # Misclassification Count = 1 # Misclassification Rate = 7.142857142857143% # Accuracy = 92.85714285714286%