cernceph · February 7, 2017 09:12 · Feb 7, 2017 · Feb 3, 2017
diff --git a/mpa.py b/mpa.py
@@ -3,20 +3,27 @@
 import random
 from math import sqrt, log, pow
 
-NPGS = 100000  # num PGs
+DEBUG = True
+NPGS = 100000 # num PGs
 R = 3  # num replicas
 
+if NPGS > 3:
+  DEBUG = False
+
+print 'Num PGs:', NPGS
+print 'Num Replicas:', R
+
 # OSDs: (id, weight)
 # TODO: everything below is hard coded for exactly 4 OSDs
-osds = [(0, 3), (1, 3), (2, 3), (3, 1)]
+osds = [(0, 3.0), (1, 3.0), (2, 3.0), (3, 1.0)]
 
 weights = {}  # OSD weights.. we might tweak these round by round
 orig_weights = {}  # OSD weights. Read only.
 total_weight = 0
 for id, w in osds:
-  weights[id] = w
-  orig_weights[id] = w
-  total_weight += w
+  weights[id] = float(w)
+  orig_weights[id] = float(w)
+  total_weight += float(w)
 
 print 'OSDs (id: weight):', weights
 
@@ -28,7 +35,9 @@
   3: NPGS * R * weights[3]/total_weight,
 }
 
-print '\nExpected PGs per OSD:      ', expected
+print '\nExpected:  ', expected
+for r in xrange(R):
+  print '  Replica %d: %s' % (r, [float(v/R) for k,v in expected.iteritems()])
 
 # initialize PG and PG-by-replicanum counters for each OSD
 pgs = {
@@ -37,11 +46,7 @@
   2: 0,
   3: 0,
 }
-pgs_by_r = [
-  dict(pgs),
-  dict(pgs),
-  dict(pgs),
-]
+pgs_by_r = [dict(pgs) for r in xrange(R)]
 
 def weighted_choice(choices):
    total = sum(w for c, w in choices)
@@ -65,8 +70,9 @@ def weighted_choice(choices):
     pgs_by_r[r][pick] += 1
     choices.remove((pick, weights[pick]),)
 
-print 'Observed:                  ', pgs
-print 'Observed for Nth replica:  ', pgs_by_r
+print 'Observed:   ', pgs
+for r in xrange(R):
+  print '  Replica %d: %s' % (r, pgs_by_r[r])
 
 
 print "\nNow trying your new algorithm\n"
@@ -78,23 +84,35 @@ def weighted_choice(choices):
   2: 0,
   3: 0,
 }
-pgs_by_r = [
-  dict(pgs),
-  dict(pgs),
-  dict(pgs),
-]
+pgs_by_r = [dict(pgs) for r in xrange(R)]
 
 # modified CRUSH
 # loop over each PG.
 # After each replica selection, adjust the remaining choices' weights with your magic value.
-for i in xrange(NPGS):
+sum_all_estimates = 0.0
+sum_all_totals = 0.0
+for pg in xrange(NPGS):
+  if DEBUG: print "\nPlacing PG", pg
   choices = list(osds)
+  prob_of_getting_to_next_round = {
+    0: 1.0,
+    1: 1.0,
+    2: 1.0,
+    3: 1.0,
+  }
+  placement = [] # record where this pg is placed for debugging
+
+  # loop over R replicas, choosing a distinct OSD each time
   for r in xrange(R):
+    if DEBUG: print "Placing replica %d.%d" % (pg,r)
+    if DEBUG: print '  OSD choices',choices
     total_weight_start_of_round = float(sum(w for c, w in choices)) # keep track of the original sum of all weights
-    #print choices
     pick = weighted_choice(choices) # choose a OSD for this round
     pgs[pick] += 1 # inc the chosen OSD's PG counter
     pgs_by_r[r][pick] += 1 # inc the chosen OSD's PG-by-R counter
+    placement.append(pick)
+    if DEBUG: print '  Total weight =', total_weight_start_of_round
+    if DEBUG: print '  Picked OSD =', pick
 
     new_choices = [i for i in choices if i[0] != pick] # make a list of new choices, dropping the selected one
     choices = new_choices
@@ -103,8 +121,16 @@ def weighted_choice(choices):
 
     # reweight if we have more than one choice left
     if len(choices) > 1:
+      if DEBUG: print "Tweaking for replica %d.%d" % (pg, r+1)
       new_choices = []
+      if DEBUG: print '  estimated_new_total = %s - (%s / %s)' % (total_weight_start_of_round, total_weight_start_of_round, float(len(choices)+1))
+      #estimated_new_total = total_weight_start_of_round - (total_weight_start_of_round/(float(len(choices)+1)))
+      estimated_new_total = total_weight_next_round
+      if DEBUG: print '  estimated_new_total =', estimated_new_total
+      sum_all_estimates += estimated_new_total
+      sum_all_totals += total_weight_next_round
       for id, w in choices:
+        if DEBUG: print '  tweaking weight of OSD', id, 'with current weight', w
         # put your bright idea for adjusting the OSD weights here:
         #   new_weight = f(w, orig_weights[id], r, R, total_weight, total_weight_start_of_round, total_weight_next_round)
         #
@@ -126,14 +152,52 @@ def weighted_choice(choices):
         #   new_weight = pow(w, sqrt(total_weight_start_of_round / total_weight_next_round))   # utter craziness, almost works
         #
         # None of above are continue working if we set OSD weights to 1,3,3,1 -- this is a hard problem.
-        #
-        new_weight = w / (total_weight_start_of_round - w)
 
-        assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round)
+        # Loop invariant:
+        # P_thisround * W_current / T_current = W_original / T_original
+        #
+        # new_weight = W_current = ( W_original * T_current ) / ( T_original * P_thisround)
+        # Round 1:
+        #   P_thisround = 1.0
+        #   T_cur = total_weight
+        #   W_original = w
+        #   T_original = total_weight
+        #
+        #   1.0 * W_current / total_weight = w / total_weight
+        #   W_current = w
+        # 
+        # Round 2:
+        #   P_thisround = (1-W_round1/T_round1)
+        #   T_current = Unknown. E(T_current) = T_round1 - (T_round1/N_round1)
+        #
+        # Round 3:
+        #   P_thisround = (1-W_round1/T_round1)*(1-W_round2/T_round2)
+        #   E(T_current) = T_round2 - (T_round2/N_round2)
+        #    
+        if DEBUG: print "    prob_of_getting_to_next_round[%s] = %s * (1-%s/%s)" % (id, prob_of_getting_to_next_round[id], w, total_weight_start_of_round)
+        prob_of_getting_to_next_round[id] *= (1-w/total_weight_start_of_round)
+        if DEBUG: print '    prob_of_getting_to_next_round[%d] = %f' %  (id, prob_of_getting_to_next_round[id])
+        new_weight = orig_weights[id] * estimated_new_total / (total_weight * prob_of_getting_to_next_round[id])
+        if DEBUG: print '    new_weight = %s * %s / (%s * %s)' % (orig_weights[id], estimated_new_total, total_weight, prob_of_getting_to_next_round[id])
+        if DEBUG: print '    new_weight = ', new_weight
+
+        assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s,prob_of_getting_to_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round, prob_of_getting_to_next_round)
         new_choices.append((id, new_weight),)
         weights[id] = new_weight
 
-      choices = new_choices
+      # renormalize
+      actual_new_total = float(sum(w for c, w in new_choices))
+      factor = total_weight / actual_new_total
+      choices = [(id, w * factor) for id,w in new_choices]
+
+      if DEBUG: print "    Actual new total %s / Estimated new total %s = %s" % (actual_new_total, estimated_new_total, actual_new_total/estimated_new_total)
+
+  if DEBUG: print 'PG %d placed: %s' % (pg, placement)
+
+#print 'sum of all estimated totals', sum_all_estimates
+#print 'sum of all actual totals', sum_all_totals
+#print sum_all_estimates/sum_all_totals
 
-print 'Observed:                  ', pgs
-print 'Observed for Nth replica:  ', pgs_by_r
+print 'Observed:   ', pgs
+for r in xrange(R):
+  print '  Replica %d: %s' % (r, pgs_by_r[r])
diff --git a/mpa.py b/mpa.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+
+import random
+from math import sqrt, log, pow
+
+NPGS = 100000  # num PGs
+R = 3  # num replicas
+
+# OSDs: (id, weight)
+# TODO: everything below is hard coded for exactly 4 OSDs
+osds = [(0, 3), (1, 3), (2, 3), (3, 1)]
+
+weights = {}  # OSD weights.. we might tweak these round by round
+orig_weights = {}  # OSD weights. Read only.
+total_weight = 0
+for id, w in osds:
+  weights[id] = w
+  orig_weights[id] = w
+  total_weight += w
+
+print 'OSDs (id: weight):', weights
+
+# compute the expected # PGs for our 4 OSDs
+expected = {
+  0: NPGS * R * weights[0]/total_weight,
+  1: NPGS * R * weights[1]/total_weight,
+  2: NPGS * R * weights[2]/total_weight,
+  3: NPGS * R * weights[3]/total_weight,
+}
+
+print '\nExpected PGs per OSD:      ', expected
+
+# initialize PG and PG-by-replicanum counters for each OSD
+pgs = {
+  0: 0,
+  1: 0,
+  2: 0,
+  3: 0,
+}
+pgs_by_r = [
+  dict(pgs),
+  dict(pgs),
+  dict(pgs),
+]
+
+def weighted_choice(choices):
+   total = sum(w for c, w in choices)
+   r = random.uniform(0, total)
+   upto = 0
+   for c, w in choices:
+      if upto + w >= r:
+         return c
+      upto += w
+   assert False, "Shouldn't get here"
+
+print "\nSimulating with existing CRUSH\n"
+
+# simple CRUSH
+# loop over all PGs, select one with weighted choice, remove that option for the next choice.
+for i in xrange(NPGS):
+  choices = list(osds)
+  for r in xrange(R):
+    pick = weighted_choice(choices)
+    pgs[pick] += 1
+    pgs_by_r[r][pick] += 1
+    choices.remove((pick, weights[pick]),)
+
+print 'Observed:                  ', pgs
+print 'Observed for Nth replica:  ', pgs_by_r
+
+
+print "\nNow trying your new algorithm\n"
+
+# initialize PG and PG-by-replicanum counters for each OSD
+pgs = {
+  0: 0,
+  1: 0,
+  2: 0,
+  3: 0,
+}
+pgs_by_r = [
+  dict(pgs),
+  dict(pgs),
+  dict(pgs),
+]
+
+# modified CRUSH
+# loop over each PG.
+# After each replica selection, adjust the remaining choices' weights with your magic value.
+for i in xrange(NPGS):
+  choices = list(osds)
+  for r in xrange(R):
+    total_weight_start_of_round = float(sum(w for c, w in choices)) # keep track of the original sum of all weights
+    #print choices
+    pick = weighted_choice(choices) # choose a OSD for this round
+    pgs[pick] += 1 # inc the chosen OSD's PG counter
+    pgs_by_r[r][pick] += 1 # inc the chosen OSD's PG-by-R counter
+
+    new_choices = [i for i in choices if i[0] != pick] # make a list of new choices, dropping the selected one
+    choices = new_choices
+
+    total_weight_next_round = float(sum(w for c, w in choices)) # calculate the sum of remaining OSD weights
+
+    # reweight if we have more than one choice left
+    if len(choices) > 1:
+      new_choices = []
+      for id, w in choices:
+        # put your bright idea for adjusting the OSD weights here:
+        #   new_weight = f(w, orig_weights[id], r, R, total_weight, total_weight_start_of_round, total_weight_next_round)
+        #
+        # thoughts:
+        #   
+        # it seems clear that the weight of the small OSDs should become relatively smaller than the large OSDs for each round.
+        # also f cannot be a constant factor on w -- this doesn't change the distribution at all.
+        #
+        # ideas:
+        #   new_weight = w  # this is the current implementation
+        #   new_weight = orig_weights[id]  # this is also the current implementation
+        #
+        #   new_weight = w * total_weight_start_of_round / total_weight_next_round   # useless because same factor applied to all weights
+        #
+        #   new_weight = w / (total_weight_start_of_round - w)  # Sage's idea. kinda works for 3,3,3,1 example, not really for 1,3,3,1
+        #
+        #   new_weight = w*w # this goes in the right direction but is too aggressive
+        #   new_weight = w*sqrt(w) # getting closer, but still too agressive
+        #   new_weight = pow(w, sqrt(total_weight_start_of_round / total_weight_next_round))   # utter craziness, almost works
+        #
+        # None of above are continue working if we set OSD weights to 1,3,3,1 -- this is a hard problem.
+        #
+        new_weight = w / (total_weight_start_of_round - w)
+
+        assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round)
+        new_choices.append((id, new_weight),)
+        weights[id] = new_weight
+
+      choices = new_choices
+
+print 'Observed:                  ', pgs
+print 'Observed for Nth replica:  ', pgs_by_r
No results found