Adithya-Rama · January 9, 2024 19:41 · Jan 9, 2024 · Jan 9, 2024 · Jan 9, 2024
diff --git a/KMeansAlgorithm.py b/KMeansAlgorithm.py
@@ -65,4 +65,5 @@
 plt.scatter(X['petal length (cm)'], X['petal width (cm)'],c=colormap[y_cluster_gmm], s=40)
 plt.title('GMM Classification')
 
-# OUTPUT :-  GRAPHS
+# OUTPUT :-  GRAPHS
+![2024-01-10](https://gist.github.com/assets/69115355/be2ca29c-68ad-48a5-a206-f49c1d9f6e3a)
diff --git a/CSVFiles.txt b/CSVFiles.txt
@@ -0,0 +1,26 @@
+EnjoySport.csv
+
+    sky air_temp humidity    wind water forecast enjoy_sport
+0  sunny     warm   normal  strong  warm     same         yes
+1  sunny     warm     high  strong  warm     same         yes
+2  rainy     cold     high  strong  warm   change          no
+3  sunny     warm     high  strong  cool   change         yes
+
+
+PlayTennis.csv
+
+ Outlook Temperature Humidity    Wind Play Tennis
+0      Sunny         Hot     High    Weak          No
+1      Sunny         Hot     High  Strong          No
+2   Overcast         Hot     High    Weak         Yes
+3       Rain        Mild     High    Weak         Yes
+4       Rain        Cool   Normal    Weak         Yes
+5       Rain        Cool   Normal  Strong          No
+6   Overcast        Cool   Normal  Strong         Yes
+7      Sunny        Mild     High    Weak          No
+8      Sunny        Cool   Normal    Weak         Yes
+9       Rain        Mild   Normal    Weak         Yes
+10     Sunny        Mild   Normal  Strong         Yes
+11  Overcast        Mild     High  Strong         Yes
+12  Overcast         Hot   Normal    Weak         Yes
+13      Rain        Mild     High  Strong          No
diff --git a/KMeansAlgorithm.py b/KMeansAlgorithm.py
@@ -0,0 +1,68 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from sklearn import preprocessing
+from sklearn.cluster import KMeans
+from sklearn.datasets import load_iris
+from sklearn.mixture import GaussianMixture
+
+iris = load_iris()
+df = pd.DataFrame(iris['data'], columns=iris['feature_names'])
+df['target'] = iris['target']
+print(df.head())
+
+#  sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
+# 0                5.1               3.5                1.4               0.2   
+# 1                4.9               3.0                1.4               0.2   
+# 2                4.7               3.2                1.3               0.2   
+# 3                4.6               3.1                1.5               0.2   
+# 4                5.0               3.6                1.4               0.2   
+
+#    target  
+# 0       0  
+# 1       0  
+# 2       0  
+# 3       0  
+# 4       0  
+
+X = df.iloc[:, :-1]
+Y = df['target']
+
+scaler = preprocessing.StandardScaler()
+scaler.fit(X)
+X_Scaled_Array = scaler.transform(X)
+X_Scaled = pd.DataFrame(X_Scaled_Array, columns = X.columns)
+print(X_Scaled.head())
+
+#   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
+# 0          -0.900681          1.019004          -1.340227         -1.315444
+# 1          -1.143017         -0.131979          -1.340227         -1.315444
+# 2          -1.385353          0.328414          -1.397064         -1.315444
+# 3          -1.506521          0.098217          -1.283389         -1.315444
+# 4          -1.021849          1.249201          -1.340227         -1.315444
+
+plt.figure(figsize=(14, 7))
+colormap = np.array(['red', 'green', 'blue'])
+
+#REAL PLOT
+plt.subplot(1, 3, 1)
+plt.scatter(X_Scaled['petal length (cm)'], X_Scaled['petal width (cm)'], c=colormap[Y], s=40)
+plt.title('Real')
+
+#K-PLOT
+plt.subplot(1, 3, 2)
+model = KMeans(n_clusters=3, random_state=0)
+pred_y = model.fit_predict(X_Scaled)
+pred_y = np.choose(pred_y, [1, 0, 2]).astype(np.int64)
+plt.scatter(X_Scaled['petal length (cm)'], X_Scaled['petal width (cm)'],c=colormap[pred_y], s=40)
+plt.title('KMeans')
+
+#GMM PLOT
+gmm = GaussianMixture(n_components=3, max_iter=200)
+y_cluster_gmm = gmm.fit_predict(X_Scaled)
+y_cluster_gmm = np.choose(y_cluster_gmm, [2, 0, 1]).astype(np.int64)
+plt.subplot(1, 3, 3)
+plt.scatter(X['petal length (cm)'], X['petal width (cm)'],c=colormap[y_cluster_gmm], s=40)
+plt.title('GMM Classification')
+
+# OUTPUT :-  GRAPHS
diff --git a/KNNAlgorithm.py b/KNNAlgorithm.py
@@ -0,0 +1,36 @@
+# KNN Algorithm
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.datasets import load_iris
+
+iris = load_iris()
+print("Iris Dataset Loaded...")
+x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1)
+
+classifier = KNeighborsClassifier(n_neighbors=2)
+classifier.fit(x_train, y_train)
+y_pred = classifier.predict(x_test)
+
+print("Results of Classification using KNN with K=1 ")
+for r in range(0, len(x_test)) :
+    print(f"Sample: {str(x_test[r])}  Actual-Label: {str(y_test[r])} Predicted-Label: {str(y_pred[r])}")
+print(f"Classification Accuracy : {classifier.score(x_test, y_test)}")
+
+# OUTPUT :-
+# Results of Classification using KNN with K=1 
+# Sample: [6.3 2.5 4.9 1.5]  Actual-Label: 1 Predicted-Label: 2
+# Sample: [4.7 3.2 1.3 0.2]  Actual-Label: 0 Predicted-Label: 0
+# Sample: [4.9 2.4 3.3 1. ]  Actual-Label: 1 Predicted-Label: 1
+# Sample: [5.1 3.8 1.6 0.2]  Actual-Label: 0 Predicted-Label: 0
+# Sample: [6.7 3.  5.2 2.3]  Actual-Label: 2 Predicted-Label: 2
+# Sample: [5.5 3.5 1.3 0.2]  Actual-Label: 0 Predicted-Label: 0
+# Sample: [6.4 2.7 5.3 1.9]  Actual-Label: 2 Predicted-Label: 2
+# Sample: [5.5 2.5 4.  1.3]  Actual-Label: 1 Predicted-Label: 1
+# Sample: [5.4 3.  4.5 1.5]  Actual-Label: 1 Predicted-Label: 1
+# Sample: [6.  2.2 4.  1. ]  Actual-Label: 1 Predicted-Label: 1
+# Sample: [6.8 3.2 5.9 2.3]  Actual-Label: 2 Predicted-Label: 2
+# Sample: [7.2 3.6 6.1 2.5]  Actual-Label: 2 Predicted-Label: 2
+# Sample: [4.9 2.5 4.5 1.7]  Actual-Label: 2 Predicted-Label: 1
+# Sample: [5.7 2.6 3.5 1. ]  Actual-Label: 1 Predicted-Label: 1
+# Sample: [6.1 2.8 4.7 1.2]  Actual-Label: 1 Predicted-Label: 1
+# Classification Accuracy : 0.8666666666666667
diff --git a/LocalRegression.py b/LocalRegression.py
@@ -0,0 +1,25 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+X = np.linspace(-3, 3, num=1000)
+domain = X
+Y = np.log(np.abs(X**2) + 0.5)
+
+def local_regression(X0, X, Y, tau) :
+    X0 = [1, X0]
+    X = [[1, i] for i in X]
+    X = np.asarray(X)
+    XW = (X.T) * np.exp(np.sum((X - X0) ** 2, axis=1) / (-2 * (tau ** 2)))
+    beta = np.linalg.pinv(XW @ X) @ XW @ Y @ X0 # np.linearalgebra.pseudoinverse
+    return beta
+
+
+def draw(tau):
+    prediction = [local_regression(x0, X, Y, tau) for x0 in domain]
+    plt.plot(X, Y, 'o', color='black')
+    plt.plot(domain, prediction, color='red')
+    plt.show()
+
+draw(0.1)
+
+# OUTPUT :-  Graph
diff --git a/NaiveByesAlgorithm.py b/NaiveByesAlgorithm.py
@@ -0,0 +1,118 @@
+import pandas as pd
+
+df = pd.read_csv("./Data/PlayTennis.csv")
+
+target = str(list(df)[-1])
+print(target)
+target_list = set(df[target])
+print(target_list)
+# Play Tennis
+# {'Yes', 'No'}
+
+Attr = {}
+for a in list(df)[:-1] :
+    Attr[a] = set(df[a])
+print(Attr)
+# {'Outlook': {'Overcast', 'Rain', 'Sunny'}, 'Temperature': {'Cool', 'Mild', 'Hot'}, 'Humidity': {'High', 'Normal'}, 'Wind': {'Weak', 'Strong'}}
+
+def probAttr(data, attr, val) :
+    Total = data.shape[0]
+    cnt = len(data[data[attr]==val])
+    return cnt, cnt / Total
+probAttr(df, target, "Yes")
+# (9, 0.6428571428571429)
+
+def train(data, Attr, targetVals, target) :
+    targetProbs = {}   # P(A)
+    countTarget = {}
+
+    for targetVal in targetVals :
+        countTarget[targetVal], targetProbs[targetVal] = probAttr(data, target, targetVal)
+
+    AttrConcept = {}    # P(X/A)
+    probability_list = {}   # p(X)
+
+    for att in Attr :
+        probability_list[att] = {}
+        AttrConcept[att] = {}
+
+        for val in Attr[att] :
+            AttrConcept[att][val] = {}
+            cnt, probability_list[att][val] = probAttr(data, att, val)
+
+            for targetVal in targetVals :
+                dataTemp = data[data[att] == val]
+                AttrConcept[att][val][targetVal] = len(dataTemp[dataTemp[target] == targetVal]) / countTarget[targetVal]
+
+    print("P(A) : ", targetProbs, "\n")
+    print("P(X\A) : ", AttrConcept, "\n")
+    print("P(X) : ", probability_list, "\n")
+    return targetProbs, AttrConcept, probability_list
+
+def test(examples, Attr, target_list, targetProbs, AttrConcept, probability_list) :
+    misclassification_count = 0
+    Total = len(examples)
+
+    for ex in examples :
+        px = {}
+
+        for a in Attr :
+            for x in ex :
+                for t in target_list :
+                    if x in AttrConcept[a] :
+                        if t not in px :
+                            px[t] = targetProbs[t] * AttrConcept[a][x][t] / probability_list[a][x]
+                        else :
+                            px[t] = px[t] * AttrConcept[a][x][t] / probability_list[a][x]
+        print(px)
+        classification = max(px, key = px.get)
+        print("Classification :", classification, "Expected :", ex[-1])
+        if classification != ex[-1] :
+            misclassification_count+=1
+    misclassification_rate = misclassification_count * 100 /Total
+    accuracy = 100 - misclassification_rate
+    print("Misclassification Count = {}".format(misclassification_count))
+    print("Misclassification Rate = {}%".format(misclassification_rate))
+    print("Accuracy = {}%".format(accuracy))
+
+targetProbs, AttrConcept, probability_list = train(df, Attr, target_list, target)
+test(df.values, Attr, target_list, targetProbs, AttrConcept, probability_list)
+
+# OUTPUT :-
+# P(A) :  {'Yes': 0.6428571428571429, 'No': 0.35714285714285715} 
+
+# P(X\A) :  {'Outlook': {'Overcast': {'Yes': 0.4444444444444444, 'No': 0.0}, 'Rain': {'Yes': 0.3333333333333333, 'No': 0.4}, 'Sunny': {'Yes': 0.2222222222222222, 'No': 0.6}}, 'Temperature': {'Cool': {'Yes': 0.3333333333333333, 'No': 0.2}, 'Mild': {'Yes': 0.4444444444444444, 'No': 0.4}, 'Hot': {'Yes': 0.2222222222222222, 'No': 0.4}}, 'Humidity': {'High': {'Yes': 0.3333333333333333, 'No': 0.8}, 'Normal': {'Yes': 0.6666666666666666, 'No': 0.2}}, 'Wind': {'Weak': {'Yes': 0.6666666666666666, 'No': 0.4}, 'Strong': {'Yes': 0.3333333333333333, 'No': 0.6}}} 
+
+# P(X) :  {'Outlook': {'Overcast': 0.2857142857142857, 'Rain': 0.35714285714285715, 'Sunny': 0.35714285714285715}, 'Temperature': {'Cool': 0.2857142857142857, 'Mild': 0.42857142857142855, 'Hot': 0.2857142857142857}, 'Humidity': {'High': 0.5, 'Normal': 0.5}, 'Wind': {'Weak': 0.5714285714285714, 'Strong': 0.42857142857142855}} 
+
+# {'Yes': 0.2419753086419753, 'No': 0.9408000000000002}
+# Classification : No Expected : No
+# {'Yes': 0.16131687242798354, 'No': 1.8816000000000002}
+# Classification : No Expected : No
+# {'Yes': 0.6049382716049383, 'No': 0.0}
+# Classification : Yes Expected : Yes
+# {'Yes': 0.4839506172839506, 'No': 0.4181333333333335}
+# Classification : Yes Expected : Yes
+# {'Yes': 1.0888888888888888, 'No': 0.07840000000000004}
+# Classification : Yes Expected : Yes
+# {'Yes': 0.7259259259259259, 'No': 0.15680000000000005}
+# Classification : Yes Expected : No
+# {'Yes': 1.2098765432098766, 'No': 0.0}
+# Classification : Yes Expected : Yes
+# {'Yes': 0.3226337448559671, 'No': 0.6272000000000001}
+# Classification : No Expected : No
+# {'Yes': 0.7259259259259256, 'No': 0.11760000000000002}
+# Classification : Yes Expected : Yes
+# {'Yes': 0.9679012345679012, 'No': 0.10453333333333338}
+# Classification : Yes Expected : Yes
+# {'Yes': 0.43017832647462273, 'No': 0.31360000000000005}
+# Classification : Yes Expected : Yes
+# {'Yes': 0.5377229080932785, 'No': 0.0}
+# Classification : Yes Expected : Yes
+# {'Yes': 1.2098765432098766, 'No': 0.0}
+# Classification : Yes Expected : Yes
+# {'Yes': 0.3226337448559671, 'No': 0.8362666666666669}
+# Classification : No Expected : No
+# Misclassification Count = 1
+# Misclassification Rate = 7.142857142857143%
+# Accuracy = 92.85714285714286%
diff --git a/AOStarAlgorithm.py b/AOStarAlgorithm.py
@@ -0,0 +1,141 @@
+def recAOStar(n):
+    print("Expanding Node : ", n)
+    and_nodes = []
+    or_nodes = []
+#Segregation of AND and OR nodes
+    if (n in allNodes):
+        if 'AND' in allNodes[n]:
+            and_nodes = allNodes[n]['AND'] 
+        if 'OR' in allNodes[n]:
+            or_nodes = allNodes[n]['OR']
+# If leaf node then return
+    if len(and_nodes) == 0 and len(or_nodes) == 0: 
+        return
+    solvable = False
+    marked = {}
+
+    while not solvable:
+  # If all the child nodes are visited and expanded, take the least cost of all the child nodes
+        if len(marked) == len(and_nodes) + len(or_nodes):
+            min_cost_least, min_cost_group_least = least_cost_group(and_nodes, or_nodes, {})
+            solvable = True
+            change_heuristic(n, min_cost_least)
+            optimal_child_group[n] = min_cost_group_least
+            continue
+# Least cost of the unmarked child nodes
+        min_cost, min_cost_group = least_cost_group(and_nodes, or_nodes, marked)
+        is_expanded = False
+
+# If the child nodes have sub trees then recursively visit them to recalculate the heuristic of the child node
+
+        if len(min_cost_group) > 1:
+            if (min_cost_group[0] in allNodes):
+                is_expanded = True
+                recAOStar(min_cost_group[0]) 
+            if (min_cost_group[1] in allNodes): 
+                is_expanded = True 
+                recAOStar(min_cost_group[1]) 
+        else: 
+            if (min_cost_group in allNodes): 
+                is_expanded = True 
+                recAOStar(min_cost_group) 
+# If the child node had any subtree and expanded, verify if the new heuristic value is still the least among all nodes 
+        if is_expanded: 
+            min_cost_verify, min_cost_group_verify = least_cost_group(and_nodes, or_nodes, {}) 
+            if min_cost_group == min_cost_group_verify: 
+                solvable = True 
+                change_heuristic(n, min_cost_verify) 
+                optimal_child_group[n] = min_cost_group 
+# If the child node does not have any subtrees then no change in heuristic, so update the min cost of the current node 
+        else: 
+              solvable = True 
+              change_heuristic(n, min_cost) 
+              optimal_child_group[n] = min_cost_group 
+#Mark the child node which was expanded 
+        marked[min_cost_group] = 1 
+    return heuristic(n) 
+
+
+
+# Function to calculate the min cost among all the child nodes 
+def least_cost_group(and_nodes, or_nodes, marked): 
+    node_wise_cost = {} 
+    for node_pair in and_nodes: 
+        if not node_pair[0] + node_pair[1] in marked: 
+            cost = 0 
+            cost = cost + heuristic(node_pair[0]) + heuristic(node_pair[1]) + 2 
+            node_wise_cost[node_pair[0] + node_pair[1]] = cost 
+    for node in or_nodes: 
+        if not node in marked: 
+            cost = 0 
+            cost = cost + heuristic(node) + 1 
+            node_wise_cost[node] = cost 
+    min_cost = 999999 
+    min_cost_group = None 
+  # Calculates the min heuristic
+    for costKey in node_wise_cost: 
+        if node_wise_cost[costKey] < min_cost: 
+            min_cost = node_wise_cost[costKey] 
+            min_cost_group = costKey 
+    return [min_cost, min_cost_group] 
+
+# Returns heuristic of a node 
+def heuristic(n): 
+    return H_dist[n] 
+
+# Updates the heuristic of a node 
+def change_heuristic(n, cost): 
+    H_dist[n] = cost 
+    return 
+
+# Function to print the optimal cost nodes 
+def print_path(node): 
+    print(optimal_child_group[node], end="") 
+    node = optimal_child_group[node] 
+    if len(node) > 1: 
+          if node[0] in optimal_child_group: 
+              print("->", end="") 
+              print_path(node[0]) 
+          if node[1] in optimal_child_group: 
+              print("->", end="") 
+              print_path(node[1]) 
+    else: 
+          if node in optimal_child_group: 
+              print("->", end="") 
+              print_path(node)
+
+#Describe the heuristic here 
+H_dist = { 
+    'A': -1,
+    'B': 4, 
+    'C': 2, 
+    'D': 3, 
+    'E': 6,
+    'F': 8, 
+    'G': 2,
+    'H': 0,
+    'I': 0, 
+    'J': 0
+  }
+
+#Describe your graph here 
+allNodes = { 
+      'A': {'AND': [('C', 'D')], 'OR': ['B']},
+      'B': {'OR': ['E', 'F']}, 
+      'C': {'OR': ['G'], 'AND': [('H', 'I')]}, 
+      'D': {'OR': ['J']}
+}
+
+optimal_child_group = {} 
+optimal_cost = recAOStar('A')
+
+print('Nodes which gives optimal cost are') 
+print_path('A') 
+print('\nOptimal Cost is :: ', optimal_cost)
+print(optimal_child_group)
+
+# OUTPUT :-
+# Nodes which gives optimal cost are
+# CD->HI->J
+# Optimal Cost is ::  5
+# {'B': 'E', 'C': 'HI', 'D': 'J', 'A': 'CD'}
diff --git a/AStarAlgorithm.py b/AStarAlgorithm.py
@@ -0,0 +1,89 @@
+Graph_nodes = {
+    'A' : [('B', 6), ('F', 3)],
+    'B' : [('C', 3), ('D', 2)],
+    'C' : [('D', 1), ('E', 5)],
+    'D' : [('C', 1), ('E', 8)],
+    'E' : [('I', 5), ('J', 5)],
+    'F' : [('G', 1), ('H', 7)],
+    'G' : [('I', 3)],
+    'H' : [('I', 2)],
+    'I' : [('E', 5), ('J', 3)],
+}
+
+def heuristic(v) :
+    H_dist = {
+        'A' : 10,
+        'B' : 8,
+        'C' : 5,
+        'D' : 7,
+        'E' : 3,
+        'F' : 6,
+        'G' : 5,
+        'H' : 3,
+        'I' : 1,
+        'J' : 0,
+    }
+    return H_dist[v]
+
+def neighbors(v) :
+    if v in Graph_nodes :
+        return Graph_nodes[v]
+    else :
+        return None
+
+def aStarAlgo(start_node, stop_node) :
+
+    open_set = set(start_node)
+    closed_set = set()
+    g = {}
+    parents = {}
+
+    g[start_node] = 0
+    parents[start_node] = start_node
+
+    while len(open_set) > 0 :
+        n = None
+
+        for v in open_set : 
+            if n == None or g[v] + heuristic(v) < g[n] + heuristic(n) :
+                n = v
+
+        if n == stop_node or Graph_nodes[n] == None :
+            pass
+        else :
+            for (m, weight) in neighbors(n) :
+                if m not in open_set or m not in closed_set :
+                    open_set.add(m)
+                    parents[m] = n
+                    g[m] = g[n] + weight
+                else :
+                    if g[m] > g[n] + weight :
+                        g[m] = g[n] + weight
+                        parents[m] = n
+                        if m in closed_set :
+                            closed_set.remove(n)
+                            open_set.add(m)
+
+        if n == None :
+            print("Path doenst exist!!")
+            return None
+        if n == stop_node :
+            path = []
+
+            while parents[n]!=n :
+                path.append(n)
+                n = parents[n]
+
+            path.append(start_node)
+            path.reverse()
+            print("The path is : ", path)
+            return path
+
+        open_set.remove(n)
+        closed_set.add(n)
+    print("Path doesnt exist")
+    return None
+
+aStarAlgo('A', 'J')
+
+# Output:-  ['A', 'F', 'G', 'I', 'J']
diff --git a/BackwardPropagationAlgorithm.py b/BackwardPropagationAlgorithm.py
@@ -0,0 +1,53 @@
+import numpy as np
+
+X = np.array(([2, 9], [1, 5], [3, 6]), dtype='float')
+Y = np.array(([92], [86], [89]), dtype = 'float')
+X = X / np.amax(X , axis=0)
+Y = Y / 100
+
+epochs = 1000
+learning_rate = 0.6
+inputLayers = 2
+hiddenLayers = 3 
+outputLayers = 1
+
+wh = np.random.uniform(size = (inputLayers, hiddenLayers))
+bh = np.random.uniform(size = (1, hiddenLayers))
+w0 = np.random.uniform(size = (hiddenLayers, outputLayers))
+b0 = np.random.uniform(size = (1, outputLayers))
+
+def sigmoid(z) :
+    return 1 / (1 + np.exp(-z))
+
+def derivative(x) :
+    return x * (1-x)
+
+for i in range(epochs) :
+    # Forward Propagation
+    z_h = np.dot(X, wh) + bh
+    sigmoid_h = sigmoid(z_h)
+    z_0 = np.dot(sigmoid_h, w0) + b0
+    output = sigmoid(z_0)
+    # Backward Propagation
+    deltaK = (Y - output) * derivative(output)
+    deltaH = deltaK.dot(w0.T) * derivative(sigmoid_h)
+    w0 = w0 + learning_rate * sigmoid_h.T.dot(deltaK)
+    wh = wh + learning_rate * X.T.dot(deltaH)
+print(f"Input:\n {X}")
+print(f"Actual Output:\n {Y} ")
+print(f"Predicted Output:\n {output}")
+
+
+# OUTPUT :-
+# Input:
+#  [[0.66666667 1.        ]
+#  [0.33333333 0.55555556]
+#  [1.         0.66666667]]
+# Actual Output:
+#  [[0.92]
+#  [0.86]
+#  [0.89]] 
+# Predicted Output:
+#  [[0.89561426]
+#  [0.87785989]
+#  [0.89594741]]
diff --git a/BasicDecisionTreeAlgorithm.py b/BasicDecisionTreeAlgorithm.py
@@ -0,0 +1,64 @@
+import pandas as pd
+from pandas import DataFrame
+from math import log
+from collections import Counter
+from pprint import pprint
+
+df_tennis = pd.read_csv('./Data/PlayTennis.csv')
+
+def entropy(probs):
+    return sum([-prob * log(prob, 2) for prob in probs])
+
+def entropy_of_list(a_list):
+    cnt = Counter(x for x in a_list)
+    num_instances = len(a_list) * 1.0
+    probs = [x / num_instances for x in cnt.values()]
+    return entropy(probs)
+
+def information_gain(df, split_attribute_name, target_attribute_name):
+    df_split = df.groupby(split_attribute_name)
+    nobs = len(df.index) * 1.0
+    df_agg_ent = df_split.agg({target_attribute_name: [entropy_of_list, lambda x: len(x)/nobs]})[target_attribute_name]
+    df_agg_ent.columns = ['Entropy', 'PropObservations']
+    new_entropy = sum(df_agg_ent['Entropy'] * df_agg_ent['PropObservations'])
+    old_entropy = entropy_of_list(df[target_attribute_name])
+    return old_entropy - new_entropy
+
+def id3(df, target_attribute_name, attribute_names, default_class=None):
+    cnt = Counter(x for x in df[target_attribute_name])
+    print(cnt)
+    if len(cnt) == 1:
+        return next(iter(cnt))
+    elif df.empty or (not attribute_names):
+        return default_class
+    else:
+        default_class = max(cnt.keys())
+        gainz = [information_gain(df, attr, target_attribute_name) for attr in attribute_names]
+        index_of_max = gainz.index(max(gainz))
+        best_attr = attribute_names[index_of_max]
+        tree = {best_attr:{}}
+        remaining_attribute_names = [i for i in attribute_names if i != best_attr]
+
+        for attr_val, data_subset in df.groupby(best_attr):
+            subtree = id3(data_subset, target_attribute_name, remaining_attribute_names, default_class)
+            tree[best_attr][attr_val] = subtree
+    return tree
+
+attribute_names = list(df_tennis.columns)
+
+print("List of attributes: ", attribute_names)
+
+attribute_names.remove('Play Tennis')
+
+print("Predicting Attributes: ", attribute_names)
+
+tree = id3(df_tennis, 'Play Tennis', attribute_names)
+
+print("\n\nThe Resultant Decistion Tree is: \n")
+pprint(tree)
+
+# OUTPUT :-
+# The Resultant Decistion Tree is: 
+# {'Outlook': {'Overcast': 'Yes',
+#              'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}},
+#              'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}
diff --git a/CandidateEliminationAlgorithm.py b/CandidateEliminationAlgorithm.py
@@ -0,0 +1,36 @@
+import pandas as pd
+
+df = pd.read_csv("./Data/EnjoySport.csv")
+print(df.head())
+
+concepts = df.values[:,:-1]
+target = df.values[:, -1]
+
+def learn(concepts, target) :
+    specific_h = concepts[0].copy()
+    print(specific_h)
+    general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
+    print(general_h)
+    for i, h in enumerate(concepts) :
+        if target[i] == "yes" :
+            for x in range(len(specific_h)) :
+                if h[x] != specific_h[x] :
+                    specific_h[x] = "?"
+                    general_h[x][x] = "?"
+        if target[i] == "no" :
+            for x in range(len(specific_h)) :
+                if h[x] != specific_h[x] :
+                    general_h[x][x] = specific_h[x]
+                else :
+                    general_h[x][x] = "?"
+    indices = [i for i, val in enumerate(general_h) if val == ["?", "?", "?", "?", "?", "?"]]
+    for i in indices :
+        general_h.remove(["?", "?", "?", "?", "?", "?"])
+    print(specific_h)
+    print(general_h)
+
+  learn(concepts, target)
+
+# OUTPUT:-
+# ['sunny' 'warm' '?' 'strong' '?' '?']
+# [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]