imrel · November 11, 2016 21:47 · May 15, 2011
diff --git a/helloevolve.py b/helloevolve.py
@@ -0,0 +1,190 @@
+"""
+helloevolve.py implements a genetic algorithm that starts with a base
+population of randomly generated strings, iterates over a certain number of
+generations while implementing 'natural selection', and prints out the most fit
+string.
+
+The parameters of the simulation can be changed by modifying one of the many
+global variables. To change the "most fit" string, modify OPTIMAL. POP_SIZE
+controls the size of each generation, and GENERATIONS is the amount of 
+generations that the simulation will loop through before returning the fittest
+string.
+
+This program subject to the terms of the BSD license listed below.
+
+---
+
+Copyright (c) 2011 Colin Drake
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+
+import random
+
+#
+# Global variables
+# Setup optimal string and GA input variables.
+#
+
+OPTIMAL     = "Hello, World"
+DNA_SIZE    = len(OPTIMAL)
+POP_SIZE    = 20
+GENERATIONS = 5000
+
+#
+# Helper functions
+# These are used as support, but aren't direct GA-specific functions.
+#
+
+def weighted_choice(items):
+  """
+  Chooses a random element from items, where items is a list of tuples in
+  the form (item, weight). weight determines the probability of choosing its
+  respective item. Note: this function is borrowed from ActiveState Recipes.
+  """
+  weight_total = sum((item[1] for item in items))
+  n = random.uniform(0, weight_total)
+  for item, weight in items:
+    if n < weight:
+      return item
+    n = n - weight
+  return item
+
+def random_char():
+  """
+  Return a random character between ASCII 32 and 126 (i.e. spaces, symbols,
+  letters, and digits). All characters returned will be nicely printable.
+  """
+  return chr(int(random.randrange(32, 126, 1)))
+
+def random_population():
+  """
+  Return a list of POP_SIZE individuals, each randomly generated via iterating
+  DNA_SIZE times to generate a string of random characters with random_char().
+  """
+  pop = []
+  for i in xrange(POP_SIZE):
+    dna = ""
+    for c in xrange(DNA_SIZE):
+      dna += random_char()
+    pop.append(dna)
+  return pop
+
+#
+# GA functions
+# These make up the bulk of the actual GA algorithm.
+#
+
+def fitness(dna):
+  """
+  For each gene in the DNA, this function calculates the difference between
+  it and the character in the same position in the OPTIMAL string. These values
+  are summed and then returned.
+  """
+  fitness = 0
+  for c in xrange(DNA_SIZE):
+    fitness += abs(ord(dna[c]) - ord(OPTIMAL[c]))
+  return fitness
+
+def mutate(dna):
+  """
+  For each gene in the DNA, there is a 1/mutation_chance chance that it will be
+  switched out with a random character. This ensures diversity in the
+  population, and ensures that is difficult to get stuck in local minima.
+  """
+  dna_out = ""
+  mutation_chance = 100
+  for c in xrange(DNA_SIZE):
+    if int(random.random()*mutation_chance) == 1:
+      dna_out += random_char()
+    else:
+      dna_out += dna[c]
+  return dna_out
+
+def crossover(dna1, dna2):
+  """
+  Slices both dna1 and dna2 into two parts at a random index within their
+  length and merges them. Both keep their initial sublist up to the crossover
+  index, but their ends are swapped.
+  """
+  pos = int(random.random()*DNA_SIZE)
+  return (dna1[:pos]+dna2[pos:], dna2[:pos]+dna1[pos:])
+
+#
+# Main driver
+# Generate a population and simulate GENERATIONS generations.
+#
+
+if __name__ == "__main__":
+  # Generate initial population. This will create a list of POP_SIZE strings,
+  # each initialized to a sequence of random characters.
+  population = random_population()
+
+  # Simulate all of the generations.
+  for generation in xrange(GENERATIONS):
+    print "Generation %s... Random sample: '%s'" % (generation, population[0])
+    weighted_population = []
+
+    # Add individuals and their respective fitness levels to the weighted
+    # population list. This will be used to pull out individuals via certain
+    # probabilities during the selection phase. Then, reset the population list
+    # so we can repopulate it after selection.
+    for individual in population:
+      fitness_val = fitness(individual)
+
+      # Generate the (individual,fitness) pair, taking in account whether or
+      # not we will accidently divide by zero.
+      if fitness_val == 0:
+        pair = (individual, 1.0)
+      else:
+        pair = (individual, 1.0/fitness_val)
+
+      weighted_population.append(pair)
+
+    population = []
+
+    # Select two random individuals, based on their fitness probabilites, cross
+    # their genes over at a random point, mutate them, and add them back to the
+    # population for the next iteration.
+    for _ in xrange(POP_SIZE/2):
+      # Selection
+      ind1 = weighted_choice(weighted_population)
+      ind2 = weighted_choice(weighted_population)
+
+      # Crossover
+      ind1, ind2 = crossover(ind1, ind2)
+
+      # Mutate and add back into the population.
+      population.append(mutate(ind1))
+      population.append(mutate(ind2))
+
+  # Display the highest-ranked string after all generations have been iterated
+  # over. This will be the closest string to the OPTIMAL string, meaning it
+  # will have the smallest fitness value. Finally, exit the program.
+  fittest_string = population[0]
+  minimum_fitness = fitness(population[0])
+
+  for individual in population:
+    ind_fitness = fitness(individual)
+    if ind_fitness <= minimum_fitness:
+      fittest_string = individual
+      minimum_fitness = ind_fitness
+
+  print "Fittest String: %s" % fittest_string
+  exit(0)
No results found