Last active
January 9, 2024 19:41
-
-
Save Adithya-Rama/fb9f275c3dcecd3337bd45a9a01aea85 to your computer and use it in GitHub Desktop.
This Gist Contains all the lab programs of AIML 7th sem (18CS71)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def recAOStar(n): | |
| print("Expanding Node : ", n) | |
| and_nodes = [] | |
| or_nodes = [] | |
| #Segregation of AND and OR nodes | |
| if (n in allNodes): | |
| if 'AND' in allNodes[n]: | |
| and_nodes = allNodes[n]['AND'] | |
| if 'OR' in allNodes[n]: | |
| or_nodes = allNodes[n]['OR'] | |
| # If leaf node then return | |
| if len(and_nodes) == 0 and len(or_nodes) == 0: | |
| return | |
| solvable = False | |
| marked = {} | |
| while not solvable: | |
| # If all the child nodes are visited and expanded, take the least cost of all the child nodes | |
| if len(marked) == len(and_nodes) + len(or_nodes): | |
| min_cost_least, min_cost_group_least = least_cost_group(and_nodes, or_nodes, {}) | |
| solvable = True | |
| change_heuristic(n, min_cost_least) | |
| optimal_child_group[n] = min_cost_group_least | |
| continue | |
| # Least cost of the unmarked child nodes | |
| min_cost, min_cost_group = least_cost_group(and_nodes, or_nodes, marked) | |
| is_expanded = False | |
| # If the child nodes have sub trees then recursively visit them to recalculate the heuristic of the child node | |
| if len(min_cost_group) > 1: | |
| if (min_cost_group[0] in allNodes): | |
| is_expanded = True | |
| recAOStar(min_cost_group[0]) | |
| if (min_cost_group[1] in allNodes): | |
| is_expanded = True | |
| recAOStar(min_cost_group[1]) | |
| else: | |
| if (min_cost_group in allNodes): | |
| is_expanded = True | |
| recAOStar(min_cost_group) | |
| # If the child node had any subtree and expanded, verify if the new heuristic value is still the least among all nodes | |
| if is_expanded: | |
| min_cost_verify, min_cost_group_verify = least_cost_group(and_nodes, or_nodes, {}) | |
| if min_cost_group == min_cost_group_verify: | |
| solvable = True | |
| change_heuristic(n, min_cost_verify) | |
| optimal_child_group[n] = min_cost_group | |
| # If the child node does not have any subtrees then no change in heuristic, so update the min cost of the current node | |
| else: | |
| solvable = True | |
| change_heuristic(n, min_cost) | |
| optimal_child_group[n] = min_cost_group | |
| #Mark the child node which was expanded | |
| marked[min_cost_group] = 1 | |
| return heuristic(n) | |
| # Function to calculate the min cost among all the child nodes | |
| def least_cost_group(and_nodes, or_nodes, marked): | |
| node_wise_cost = {} | |
| for node_pair in and_nodes: | |
| if not node_pair[0] + node_pair[1] in marked: | |
| cost = 0 | |
| cost = cost + heuristic(node_pair[0]) + heuristic(node_pair[1]) + 2 | |
| node_wise_cost[node_pair[0] + node_pair[1]] = cost | |
| for node in or_nodes: | |
| if not node in marked: | |
| cost = 0 | |
| cost = cost + heuristic(node) + 1 | |
| node_wise_cost[node] = cost | |
| min_cost = 999999 | |
| min_cost_group = None | |
| # Calculates the min heuristic | |
| for costKey in node_wise_cost: | |
| if node_wise_cost[costKey] < min_cost: | |
| min_cost = node_wise_cost[costKey] | |
| min_cost_group = costKey | |
| return [min_cost, min_cost_group] | |
| # Returns heuristic of a node | |
| def heuristic(n): | |
| return H_dist[n] | |
| # Updates the heuristic of a node | |
| def change_heuristic(n, cost): | |
| H_dist[n] = cost | |
| return | |
| # Function to print the optimal cost nodes | |
| def print_path(node): | |
| print(optimal_child_group[node], end="") | |
| node = optimal_child_group[node] | |
| if len(node) > 1: | |
| if node[0] in optimal_child_group: | |
| print("->", end="") | |
| print_path(node[0]) | |
| if node[1] in optimal_child_group: | |
| print("->", end="") | |
| print_path(node[1]) | |
| else: | |
| if node in optimal_child_group: | |
| print("->", end="") | |
| print_path(node) | |
| #Describe the heuristic here | |
| H_dist = { | |
| 'A': -1, | |
| 'B': 4, | |
| 'C': 2, | |
| 'D': 3, | |
| 'E': 6, | |
| 'F': 8, | |
| 'G': 2, | |
| 'H': 0, | |
| 'I': 0, | |
| 'J': 0 | |
| } | |
| #Describe your graph here | |
| allNodes = { | |
| 'A': {'AND': [('C', 'D')], 'OR': ['B']}, | |
| 'B': {'OR': ['E', 'F']}, | |
| 'C': {'OR': ['G'], 'AND': [('H', 'I')]}, | |
| 'D': {'OR': ['J']} | |
| } | |
| optimal_child_group = {} | |
| optimal_cost = recAOStar('A') | |
| print('Nodes which gives optimal cost are') | |
| print_path('A') | |
| print('\nOptimal Cost is :: ', optimal_cost) | |
| print(optimal_child_group) | |
| # OUTPUT :- | |
| # Nodes which gives optimal cost are | |
| # CD->HI->J | |
| # Optimal Cost is :: 5 | |
| # {'B': 'E', 'C': 'HI', 'D': 'J', 'A': 'CD'} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Graph_nodes = { | |
| 'A' : [('B', 6), ('F', 3)], | |
| 'B' : [('C', 3), ('D', 2)], | |
| 'C' : [('D', 1), ('E', 5)], | |
| 'D' : [('C', 1), ('E', 8)], | |
| 'E' : [('I', 5), ('J', 5)], | |
| 'F' : [('G', 1), ('H', 7)], | |
| 'G' : [('I', 3)], | |
| 'H' : [('I', 2)], | |
| 'I' : [('E', 5), ('J', 3)], | |
| } | |
| def heuristic(v) : | |
| H_dist = { | |
| 'A' : 10, | |
| 'B' : 8, | |
| 'C' : 5, | |
| 'D' : 7, | |
| 'E' : 3, | |
| 'F' : 6, | |
| 'G' : 5, | |
| 'H' : 3, | |
| 'I' : 1, | |
| 'J' : 0, | |
| } | |
| return H_dist[v] | |
| def neighbors(v) : | |
| if v in Graph_nodes : | |
| return Graph_nodes[v] | |
| else : | |
| return None | |
| def aStarAlgo(start_node, stop_node) : | |
| open_set = set(start_node) | |
| closed_set = set() | |
| g = {} | |
| parents = {} | |
| g[start_node] = 0 | |
| parents[start_node] = start_node | |
| while len(open_set) > 0 : | |
| n = None | |
| for v in open_set : | |
| if n == None or g[v] + heuristic(v) < g[n] + heuristic(n) : | |
| n = v | |
| if n == stop_node or Graph_nodes[n] == None : | |
| pass | |
| else : | |
| for (m, weight) in neighbors(n) : | |
| if m not in open_set or m not in closed_set : | |
| open_set.add(m) | |
| parents[m] = n | |
| g[m] = g[n] + weight | |
| else : | |
| if g[m] > g[n] + weight : | |
| g[m] = g[n] + weight | |
| parents[m] = n | |
| if m in closed_set : | |
| closed_set.remove(n) | |
| open_set.add(m) | |
| if n == None : | |
| print("Path doenst exist!!") | |
| return None | |
| if n == stop_node : | |
| path = [] | |
| while parents[n]!=n : | |
| path.append(n) | |
| n = parents[n] | |
| path.append(start_node) | |
| path.reverse() | |
| print("The path is : ", path) | |
| return path | |
| open_set.remove(n) | |
| closed_set.add(n) | |
| print("Path doesnt exist") | |
| return None | |
| aStarAlgo('A', 'J') | |
| # Output:- ['A', 'F', 'G', 'I', 'J'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| X = np.array(([2, 9], [1, 5], [3, 6]), dtype='float') | |
| Y = np.array(([92], [86], [89]), dtype = 'float') | |
| X = X / np.amax(X , axis=0) | |
| Y = Y / 100 | |
| epochs = 1000 | |
| learning_rate = 0.6 | |
| inputLayers = 2 | |
| hiddenLayers = 3 | |
| outputLayers = 1 | |
| wh = np.random.uniform(size = (inputLayers, hiddenLayers)) | |
| bh = np.random.uniform(size = (1, hiddenLayers)) | |
| w0 = np.random.uniform(size = (hiddenLayers, outputLayers)) | |
| b0 = np.random.uniform(size = (1, outputLayers)) | |
| def sigmoid(z) : | |
| return 1 / (1 + np.exp(-z)) | |
| def derivative(x) : | |
| return x * (1-x) | |
| for i in range(epochs) : | |
| # Forward Propagation | |
| z_h = np.dot(X, wh) + bh | |
| sigmoid_h = sigmoid(z_h) | |
| z_0 = np.dot(sigmoid_h, w0) + b0 | |
| output = sigmoid(z_0) | |
| # Backward Propagation | |
| deltaK = (Y - output) * derivative(output) | |
| deltaH = deltaK.dot(w0.T) * derivative(sigmoid_h) | |
| w0 = w0 + learning_rate * sigmoid_h.T.dot(deltaK) | |
| wh = wh + learning_rate * X.T.dot(deltaH) | |
| print(f"Input:\n {X}") | |
| print(f"Actual Output:\n {Y} ") | |
| print(f"Predicted Output:\n {output}") | |
| # OUTPUT :- | |
| # Input: | |
| # [[0.66666667 1. ] | |
| # [0.33333333 0.55555556] | |
| # [1. 0.66666667]] | |
| # Actual Output: | |
| # [[0.92] | |
| # [0.86] | |
| # [0.89]] | |
| # Predicted Output: | |
| # [[0.89561426] | |
| # [0.87785989] | |
| # [0.89594741]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| from pandas import DataFrame | |
| from math import log | |
| from collections import Counter | |
| from pprint import pprint | |
| df_tennis = pd.read_csv('./Data/PlayTennis.csv') | |
| def entropy(probs): | |
| return sum([-prob * log(prob, 2) for prob in probs]) | |
| def entropy_of_list(a_list): | |
| cnt = Counter(x for x in a_list) | |
| num_instances = len(a_list) * 1.0 | |
| probs = [x / num_instances for x in cnt.values()] | |
| return entropy(probs) | |
| def information_gain(df, split_attribute_name, target_attribute_name): | |
| df_split = df.groupby(split_attribute_name) | |
| nobs = len(df.index) * 1.0 | |
| df_agg_ent = df_split.agg({target_attribute_name: [entropy_of_list, lambda x: len(x)/nobs]})[target_attribute_name] | |
| df_agg_ent.columns = ['Entropy', 'PropObservations'] | |
| new_entropy = sum(df_agg_ent['Entropy'] * df_agg_ent['PropObservations']) | |
| old_entropy = entropy_of_list(df[target_attribute_name]) | |
| return old_entropy - new_entropy | |
| def id3(df, target_attribute_name, attribute_names, default_class=None): | |
| cnt = Counter(x for x in df[target_attribute_name]) | |
| print(cnt) | |
| if len(cnt) == 1: | |
| return next(iter(cnt)) | |
| elif df.empty or (not attribute_names): | |
| return default_class | |
| else: | |
| default_class = max(cnt.keys()) | |
| gainz = [information_gain(df, attr, target_attribute_name) for attr in attribute_names] | |
| index_of_max = gainz.index(max(gainz)) | |
| best_attr = attribute_names[index_of_max] | |
| tree = {best_attr:{}} | |
| remaining_attribute_names = [i for i in attribute_names if i != best_attr] | |
| for attr_val, data_subset in df.groupby(best_attr): | |
| subtree = id3(data_subset, target_attribute_name, remaining_attribute_names, default_class) | |
| tree[best_attr][attr_val] = subtree | |
| return tree | |
| attribute_names = list(df_tennis.columns) | |
| print("List of attributes: ", attribute_names) | |
| attribute_names.remove('Play Tennis') | |
| print("Predicting Attributes: ", attribute_names) | |
| tree = id3(df_tennis, 'Play Tennis', attribute_names) | |
| print("\n\nThe Resultant Decistion Tree is: \n") | |
| pprint(tree) | |
| # OUTPUT :- | |
| # The Resultant Decistion Tree is: | |
| # {'Outlook': {'Overcast': 'Yes', | |
| # 'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}}, | |
| # 'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| df = pd.read_csv("./Data/EnjoySport.csv") | |
| print(df.head()) | |
| concepts = df.values[:,:-1] | |
| target = df.values[:, -1] | |
| def learn(concepts, target) : | |
| specific_h = concepts[0].copy() | |
| print(specific_h) | |
| general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))] | |
| print(general_h) | |
| for i, h in enumerate(concepts) : | |
| if target[i] == "yes" : | |
| for x in range(len(specific_h)) : | |
| if h[x] != specific_h[x] : | |
| specific_h[x] = "?" | |
| general_h[x][x] = "?" | |
| if target[i] == "no" : | |
| for x in range(len(specific_h)) : | |
| if h[x] != specific_h[x] : | |
| general_h[x][x] = specific_h[x] | |
| else : | |
| general_h[x][x] = "?" | |
| indices = [i for i, val in enumerate(general_h) if val == ["?", "?", "?", "?", "?", "?"]] | |
| for i in indices : | |
| general_h.remove(["?", "?", "?", "?", "?", "?"]) | |
| print(specific_h) | |
| print(general_h) | |
| learn(concepts, target) | |
| # OUTPUT:- | |
| # ['sunny' 'warm' '?' 'strong' '?' '?'] | |
| # [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| EnjoySport.csv | |
| sky air_temp humidity wind water forecast enjoy_sport | |
| 0 sunny warm normal strong warm same yes | |
| 1 sunny warm high strong warm same yes | |
| 2 rainy cold high strong warm change no | |
| 3 sunny warm high strong cool change yes | |
| PlayTennis.csv | |
| Outlook Temperature Humidity Wind Play Tennis | |
| 0 Sunny Hot High Weak No | |
| 1 Sunny Hot High Strong No | |
| 2 Overcast Hot High Weak Yes | |
| 3 Rain Mild High Weak Yes | |
| 4 Rain Cool Normal Weak Yes | |
| 5 Rain Cool Normal Strong No | |
| 6 Overcast Cool Normal Strong Yes | |
| 7 Sunny Mild High Weak No | |
| 8 Sunny Cool Normal Weak Yes | |
| 9 Rain Mild Normal Weak Yes | |
| 10 Sunny Mild Normal Strong Yes | |
| 11 Overcast Mild High Strong Yes | |
| 12 Overcast Hot Normal Weak Yes | |
| 13 Rain Mild High Strong No |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn import preprocessing | |
| from sklearn.cluster import KMeans | |
| from sklearn.datasets import load_iris | |
| from sklearn.mixture import GaussianMixture | |
| iris = load_iris() | |
| df = pd.DataFrame(iris['data'], columns=iris['feature_names']) | |
| df['target'] = iris['target'] | |
| print(df.head()) | |
| # sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \ | |
| # 0 5.1 3.5 1.4 0.2 | |
| # 1 4.9 3.0 1.4 0.2 | |
| # 2 4.7 3.2 1.3 0.2 | |
| # 3 4.6 3.1 1.5 0.2 | |
| # 4 5.0 3.6 1.4 0.2 | |
| # target | |
| # 0 0 | |
| # 1 0 | |
| # 2 0 | |
| # 3 0 | |
| # 4 0 | |
| X = df.iloc[:, :-1] | |
| Y = df['target'] | |
| scaler = preprocessing.StandardScaler() | |
| scaler.fit(X) | |
| X_Scaled_Array = scaler.transform(X) | |
| X_Scaled = pd.DataFrame(X_Scaled_Array, columns = X.columns) | |
| print(X_Scaled.head()) | |
| # sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) | |
| # 0 -0.900681 1.019004 -1.340227 -1.315444 | |
| # 1 -1.143017 -0.131979 -1.340227 -1.315444 | |
| # 2 -1.385353 0.328414 -1.397064 -1.315444 | |
| # 3 -1.506521 0.098217 -1.283389 -1.315444 | |
| # 4 -1.021849 1.249201 -1.340227 -1.315444 | |
| plt.figure(figsize=(14, 7)) | |
| colormap = np.array(['red', 'green', 'blue']) | |
| #REAL PLOT | |
| plt.subplot(1, 3, 1) | |
| plt.scatter(X_Scaled['petal length (cm)'], X_Scaled['petal width (cm)'], c=colormap[Y], s=40) | |
| plt.title('Real') | |
| #K-PLOT | |
| plt.subplot(1, 3, 2) | |
| model = KMeans(n_clusters=3, random_state=0) | |
| pred_y = model.fit_predict(X_Scaled) | |
| pred_y = np.choose(pred_y, [1, 0, 2]).astype(np.int64) | |
| plt.scatter(X_Scaled['petal length (cm)'], X_Scaled['petal width (cm)'],c=colormap[pred_y], s=40) | |
| plt.title('KMeans') | |
| #GMM PLOT | |
| gmm = GaussianMixture(n_components=3, max_iter=200) | |
| y_cluster_gmm = gmm.fit_predict(X_Scaled) | |
| y_cluster_gmm = np.choose(y_cluster_gmm, [2, 0, 1]).astype(np.int64) | |
| plt.subplot(1, 3, 3) | |
| plt.scatter(X['petal length (cm)'], X['petal width (cm)'],c=colormap[y_cluster_gmm], s=40) | |
| plt.title('GMM Classification') | |
| # OUTPUT :- GRAPHS | |
|  |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # KNN Algorithm | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.neighbors import KNeighborsClassifier | |
| from sklearn.datasets import load_iris | |
| iris = load_iris() | |
| print("Iris Dataset Loaded...") | |
| x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1) | |
| classifier = KNeighborsClassifier(n_neighbors=2) | |
| classifier.fit(x_train, y_train) | |
| y_pred = classifier.predict(x_test) | |
| print("Results of Classification using KNN with K=1 ") | |
| for r in range(0, len(x_test)) : | |
| print(f"Sample: {str(x_test[r])} Actual-Label: {str(y_test[r])} Predicted-Label: {str(y_pred[r])}") | |
| print(f"Classification Accuracy : {classifier.score(x_test, y_test)}") | |
| # OUTPUT :- | |
| # Results of Classification using KNN with K=1 | |
| # Sample: [6.3 2.5 4.9 1.5] Actual-Label: 1 Predicted-Label: 2 | |
| # Sample: [4.7 3.2 1.3 0.2] Actual-Label: 0 Predicted-Label: 0 | |
| # Sample: [4.9 2.4 3.3 1. ] Actual-Label: 1 Predicted-Label: 1 | |
| # Sample: [5.1 3.8 1.6 0.2] Actual-Label: 0 Predicted-Label: 0 | |
| # Sample: [6.7 3. 5.2 2.3] Actual-Label: 2 Predicted-Label: 2 | |
| # Sample: [5.5 3.5 1.3 0.2] Actual-Label: 0 Predicted-Label: 0 | |
| # Sample: [6.4 2.7 5.3 1.9] Actual-Label: 2 Predicted-Label: 2 | |
| # Sample: [5.5 2.5 4. 1.3] Actual-Label: 1 Predicted-Label: 1 | |
| # Sample: [5.4 3. 4.5 1.5] Actual-Label: 1 Predicted-Label: 1 | |
| # Sample: [6. 2.2 4. 1. ] Actual-Label: 1 Predicted-Label: 1 | |
| # Sample: [6.8 3.2 5.9 2.3] Actual-Label: 2 Predicted-Label: 2 | |
| # Sample: [7.2 3.6 6.1 2.5] Actual-Label: 2 Predicted-Label: 2 | |
| # Sample: [4.9 2.5 4.5 1.7] Actual-Label: 2 Predicted-Label: 1 | |
| # Sample: [5.7 2.6 3.5 1. ] Actual-Label: 1 Predicted-Label: 1 | |
| # Sample: [6.1 2.8 4.7 1.2] Actual-Label: 1 Predicted-Label: 1 | |
| # Classification Accuracy : 0.8666666666666667 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| X = np.linspace(-3, 3, num=1000) | |
| domain = X | |
| Y = np.log(np.abs(X**2) + 0.5) | |
| def local_regression(X0, X, Y, tau) : | |
| X0 = [1, X0] | |
| X = [[1, i] for i in X] | |
| X = np.asarray(X) | |
| XW = (X.T) * np.exp(np.sum((X - X0) ** 2, axis=1) / (-2 * (tau ** 2))) | |
| beta = np.linalg.pinv(XW @ X) @ XW @ Y @ X0 # np.linearalgebra.pseudoinverse | |
| return beta | |
| def draw(tau): | |
| prediction = [local_regression(x0, X, Y, tau) for x0 in domain] | |
| plt.plot(X, Y, 'o', color='black') | |
| plt.plot(domain, prediction, color='red') | |
| plt.show() | |
| draw(0.1) | |
| # OUTPUT :- Graph |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| df = pd.read_csv("./Data/PlayTennis.csv") | |
| target = str(list(df)[-1]) | |
| print(target) | |
| target_list = set(df[target]) | |
| print(target_list) | |
| # Play Tennis | |
| # {'Yes', 'No'} | |
| Attr = {} | |
| for a in list(df)[:-1] : | |
| Attr[a] = set(df[a]) | |
| print(Attr) | |
| # {'Outlook': {'Overcast', 'Rain', 'Sunny'}, 'Temperature': {'Cool', 'Mild', 'Hot'}, 'Humidity': {'High', 'Normal'}, 'Wind': {'Weak', 'Strong'}} | |
| def probAttr(data, attr, val) : | |
| Total = data.shape[0] | |
| cnt = len(data[data[attr]==val]) | |
| return cnt, cnt / Total | |
| probAttr(df, target, "Yes") | |
| # (9, 0.6428571428571429) | |
| def train(data, Attr, targetVals, target) : | |
| targetProbs = {} # P(A) | |
| countTarget = {} | |
| for targetVal in targetVals : | |
| countTarget[targetVal], targetProbs[targetVal] = probAttr(data, target, targetVal) | |
| AttrConcept = {} # P(X/A) | |
| probability_list = {} # p(X) | |
| for att in Attr : | |
| probability_list[att] = {} | |
| AttrConcept[att] = {} | |
| for val in Attr[att] : | |
| AttrConcept[att][val] = {} | |
| cnt, probability_list[att][val] = probAttr(data, att, val) | |
| for targetVal in targetVals : | |
| dataTemp = data[data[att] == val] | |
| AttrConcept[att][val][targetVal] = len(dataTemp[dataTemp[target] == targetVal]) / countTarget[targetVal] | |
| print("P(A) : ", targetProbs, "\n") | |
| print("P(X\A) : ", AttrConcept, "\n") | |
| print("P(X) : ", probability_list, "\n") | |
| return targetProbs, AttrConcept, probability_list | |
| def test(examples, Attr, target_list, targetProbs, AttrConcept, probability_list) : | |
| misclassification_count = 0 | |
| Total = len(examples) | |
| for ex in examples : | |
| px = {} | |
| for a in Attr : | |
| for x in ex : | |
| for t in target_list : | |
| if x in AttrConcept[a] : | |
| if t not in px : | |
| px[t] = targetProbs[t] * AttrConcept[a][x][t] / probability_list[a][x] | |
| else : | |
| px[t] = px[t] * AttrConcept[a][x][t] / probability_list[a][x] | |
| print(px) | |
| classification = max(px, key = px.get) | |
| print("Classification :", classification, "Expected :", ex[-1]) | |
| if classification != ex[-1] : | |
| misclassification_count+=1 | |
| misclassification_rate = misclassification_count * 100 /Total | |
| accuracy = 100 - misclassification_rate | |
| print("Misclassification Count = {}".format(misclassification_count)) | |
| print("Misclassification Rate = {}%".format(misclassification_rate)) | |
| print("Accuracy = {}%".format(accuracy)) | |
| targetProbs, AttrConcept, probability_list = train(df, Attr, target_list, target) | |
| test(df.values, Attr, target_list, targetProbs, AttrConcept, probability_list) | |
| # OUTPUT :- | |
| # P(A) : {'Yes': 0.6428571428571429, 'No': 0.35714285714285715} | |
| # P(X\A) : {'Outlook': {'Overcast': {'Yes': 0.4444444444444444, 'No': 0.0}, 'Rain': {'Yes': 0.3333333333333333, 'No': 0.4}, 'Sunny': {'Yes': 0.2222222222222222, 'No': 0.6}}, 'Temperature': {'Cool': {'Yes': 0.3333333333333333, 'No': 0.2}, 'Mild': {'Yes': 0.4444444444444444, 'No': 0.4}, 'Hot': {'Yes': 0.2222222222222222, 'No': 0.4}}, 'Humidity': {'High': {'Yes': 0.3333333333333333, 'No': 0.8}, 'Normal': {'Yes': 0.6666666666666666, 'No': 0.2}}, 'Wind': {'Weak': {'Yes': 0.6666666666666666, 'No': 0.4}, 'Strong': {'Yes': 0.3333333333333333, 'No': 0.6}}} | |
| # P(X) : {'Outlook': {'Overcast': 0.2857142857142857, 'Rain': 0.35714285714285715, 'Sunny': 0.35714285714285715}, 'Temperature': {'Cool': 0.2857142857142857, 'Mild': 0.42857142857142855, 'Hot': 0.2857142857142857}, 'Humidity': {'High': 0.5, 'Normal': 0.5}, 'Wind': {'Weak': 0.5714285714285714, 'Strong': 0.42857142857142855}} | |
| # {'Yes': 0.2419753086419753, 'No': 0.9408000000000002} | |
| # Classification : No Expected : No | |
| # {'Yes': 0.16131687242798354, 'No': 1.8816000000000002} | |
| # Classification : No Expected : No | |
| # {'Yes': 0.6049382716049383, 'No': 0.0} | |
| # Classification : Yes Expected : Yes | |
| # {'Yes': 0.4839506172839506, 'No': 0.4181333333333335} | |
| # Classification : Yes Expected : Yes | |
| # {'Yes': 1.0888888888888888, 'No': 0.07840000000000004} | |
| # Classification : Yes Expected : Yes | |
| # {'Yes': 0.7259259259259259, 'No': 0.15680000000000005} | |
| # Classification : Yes Expected : No | |
| # {'Yes': 1.2098765432098766, 'No': 0.0} | |
| # Classification : Yes Expected : Yes | |
| # {'Yes': 0.3226337448559671, 'No': 0.6272000000000001} | |
| # Classification : No Expected : No | |
| # {'Yes': 0.7259259259259256, 'No': 0.11760000000000002} | |
| # Classification : Yes Expected : Yes | |
| # {'Yes': 0.9679012345679012, 'No': 0.10453333333333338} | |
| # Classification : Yes Expected : Yes | |
| # {'Yes': 0.43017832647462273, 'No': 0.31360000000000005} | |
| # Classification : Yes Expected : Yes | |
| # {'Yes': 0.5377229080932785, 'No': 0.0} | |
| # Classification : Yes Expected : Yes | |
| # {'Yes': 1.2098765432098766, 'No': 0.0} | |
| # Classification : Yes Expected : Yes | |
| # {'Yes': 0.3226337448559671, 'No': 0.8362666666666669} | |
| # Classification : No Expected : No | |
| # Misclassification Count = 1 | |
| # Misclassification Rate = 7.142857142857143% | |
| # Accuracy = 92.85714285714286% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment