Skip to content

Instantly share code, notes, and snippets.

@cernceph
Forked from anonymous/mpa.py
Last active February 7, 2017 09:12
Show Gist options
  • Select an option

  • Save cernceph/59158b9c53f925cc778b44585ffc897c to your computer and use it in GitHub Desktop.

Select an option

Save cernceph/59158b9c53f925cc778b44585ffc897c to your computer and use it in GitHub Desktop.

Revisions

  1. cernceph revised this gist Feb 7, 2017. 1 changed file with 90 additions and 26 deletions.
    116 changes: 90 additions & 26 deletions mpa.py
    Original file line number Diff line number Diff line change
    @@ -3,20 +3,27 @@
    import random
    from math import sqrt, log, pow

    NPGS = 100000 # num PGs
    DEBUG = True
    NPGS = 100000 # num PGs
    R = 3 # num replicas

    if NPGS > 3:
    DEBUG = False

    print 'Num PGs:', NPGS
    print 'Num Replicas:', R

    # OSDs: (id, weight)
    # TODO: everything below is hard coded for exactly 4 OSDs
    osds = [(0, 3), (1, 3), (2, 3), (3, 1)]
    osds = [(0, 3.0), (1, 3.0), (2, 3.0), (3, 1.0)]

    weights = {} # OSD weights.. we might tweak these round by round
    orig_weights = {} # OSD weights. Read only.
    total_weight = 0
    for id, w in osds:
    weights[id] = w
    orig_weights[id] = w
    total_weight += w
    weights[id] = float(w)
    orig_weights[id] = float(w)
    total_weight += float(w)

    print 'OSDs (id: weight):', weights

    @@ -28,7 +35,9 @@
    3: NPGS * R * weights[3]/total_weight,
    }

    print '\nExpected PGs per OSD: ', expected
    print '\nExpected: ', expected
    for r in xrange(R):
    print ' Replica %d: %s' % (r, [float(v/R) for k,v in expected.iteritems()])

    # initialize PG and PG-by-replicanum counters for each OSD
    pgs = {
    @@ -37,11 +46,7 @@
    2: 0,
    3: 0,
    }
    pgs_by_r = [
    dict(pgs),
    dict(pgs),
    dict(pgs),
    ]
    pgs_by_r = [dict(pgs) for r in xrange(R)]

    def weighted_choice(choices):
    total = sum(w for c, w in choices)
    @@ -65,8 +70,9 @@ def weighted_choice(choices):
    pgs_by_r[r][pick] += 1
    choices.remove((pick, weights[pick]),)

    print 'Observed: ', pgs
    print 'Observed for Nth replica: ', pgs_by_r
    print 'Observed: ', pgs
    for r in xrange(R):
    print ' Replica %d: %s' % (r, pgs_by_r[r])


    print "\nNow trying your new algorithm\n"
    @@ -78,23 +84,35 @@ def weighted_choice(choices):
    2: 0,
    3: 0,
    }
    pgs_by_r = [
    dict(pgs),
    dict(pgs),
    dict(pgs),
    ]
    pgs_by_r = [dict(pgs) for r in xrange(R)]

    # modified CRUSH
    # loop over each PG.
    # After each replica selection, adjust the remaining choices' weights with your magic value.
    for i in xrange(NPGS):
    sum_all_estimates = 0.0
    sum_all_totals = 0.0
    for pg in xrange(NPGS):
    if DEBUG: print "\nPlacing PG", pg
    choices = list(osds)
    prob_of_getting_to_next_round = {
    0: 1.0,
    1: 1.0,
    2: 1.0,
    3: 1.0,
    }
    placement = [] # record where this pg is placed for debugging

    # loop over R replicas, choosing a distinct OSD each time
    for r in xrange(R):
    if DEBUG: print "Placing replica %d.%d" % (pg,r)
    if DEBUG: print ' OSD choices',choices
    total_weight_start_of_round = float(sum(w for c, w in choices)) # keep track of the original sum of all weights
    #print choices
    pick = weighted_choice(choices) # choose a OSD for this round
    pgs[pick] += 1 # inc the chosen OSD's PG counter
    pgs_by_r[r][pick] += 1 # inc the chosen OSD's PG-by-R counter
    placement.append(pick)
    if DEBUG: print ' Total weight =', total_weight_start_of_round
    if DEBUG: print ' Picked OSD =', pick

    new_choices = [i for i in choices if i[0] != pick] # make a list of new choices, dropping the selected one
    choices = new_choices
    @@ -103,8 +121,16 @@ def weighted_choice(choices):

    # reweight if we have more than one choice left
    if len(choices) > 1:
    if DEBUG: print "Tweaking for replica %d.%d" % (pg, r+1)
    new_choices = []
    if DEBUG: print ' estimated_new_total = %s - (%s / %s)' % (total_weight_start_of_round, total_weight_start_of_round, float(len(choices)+1))
    #estimated_new_total = total_weight_start_of_round - (total_weight_start_of_round/(float(len(choices)+1)))
    estimated_new_total = total_weight_next_round
    if DEBUG: print ' estimated_new_total =', estimated_new_total
    sum_all_estimates += estimated_new_total
    sum_all_totals += total_weight_next_round
    for id, w in choices:
    if DEBUG: print ' tweaking weight of OSD', id, 'with current weight', w
    # put your bright idea for adjusting the OSD weights here:
    # new_weight = f(w, orig_weights[id], r, R, total_weight, total_weight_start_of_round, total_weight_next_round)
    #
    @@ -126,14 +152,52 @@ def weighted_choice(choices):
    # new_weight = pow(w, sqrt(total_weight_start_of_round / total_weight_next_round)) # utter craziness, almost works
    #
    # None of above are continue working if we set OSD weights to 1,3,3,1 -- this is a hard problem.
    #
    new_weight = w / (total_weight_start_of_round - w)

    assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round)
    # Loop invariant:
    # P_thisround * W_current / T_current = W_original / T_original
    #
    # new_weight = W_current = ( W_original * T_current ) / ( T_original * P_thisround)
    # Round 1:
    # P_thisround = 1.0
    # T_cur = total_weight
    # W_original = w
    # T_original = total_weight
    #
    # 1.0 * W_current / total_weight = w / total_weight
    # W_current = w
    #
    # Round 2:
    # P_thisround = (1-W_round1/T_round1)
    # T_current = Unknown. E(T_current) = T_round1 - (T_round1/N_round1)
    #
    # Round 3:
    # P_thisround = (1-W_round1/T_round1)*(1-W_round2/T_round2)
    # E(T_current) = T_round2 - (T_round2/N_round2)
    #
    if DEBUG: print " prob_of_getting_to_next_round[%s] = %s * (1-%s/%s)" % (id, prob_of_getting_to_next_round[id], w, total_weight_start_of_round)
    prob_of_getting_to_next_round[id] *= (1-w/total_weight_start_of_round)
    if DEBUG: print ' prob_of_getting_to_next_round[%d] = %f' % (id, prob_of_getting_to_next_round[id])
    new_weight = orig_weights[id] * estimated_new_total / (total_weight * prob_of_getting_to_next_round[id])
    if DEBUG: print ' new_weight = %s * %s / (%s * %s)' % (orig_weights[id], estimated_new_total, total_weight, prob_of_getting_to_next_round[id])
    if DEBUG: print ' new_weight = ', new_weight

    assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s,prob_of_getting_to_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round, prob_of_getting_to_next_round)
    new_choices.append((id, new_weight),)
    weights[id] = new_weight

    choices = new_choices
    # renormalize
    actual_new_total = float(sum(w for c, w in new_choices))
    factor = total_weight / actual_new_total
    choices = [(id, w * factor) for id,w in new_choices]

    if DEBUG: print " Actual new total %s / Estimated new total %s = %s" % (actual_new_total, estimated_new_total, actual_new_total/estimated_new_total)

    if DEBUG: print 'PG %d placed: %s' % (pg, placement)

    #print 'sum of all estimated totals', sum_all_estimates
    #print 'sum of all actual totals', sum_all_totals
    #print sum_all_estimates/sum_all_totals

    print 'Observed: ', pgs
    print 'Observed for Nth replica: ', pgs_by_r
    print 'Observed: ', pgs
    for r in xrange(R):
    print ' Replica %d: %s' % (r, pgs_by_r[r])
  2. @invalid-email-address Anonymous created this gist Feb 3, 2017.
    139 changes: 139 additions & 0 deletions mpa.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,139 @@
    #!/usr/bin/env python

    import random
    from math import sqrt, log, pow

    NPGS = 100000 # num PGs
    R = 3 # num replicas

    # OSDs: (id, weight)
    # TODO: everything below is hard coded for exactly 4 OSDs
    osds = [(0, 3), (1, 3), (2, 3), (3, 1)]

    weights = {} # OSD weights.. we might tweak these round by round
    orig_weights = {} # OSD weights. Read only.
    total_weight = 0
    for id, w in osds:
    weights[id] = w
    orig_weights[id] = w
    total_weight += w

    print 'OSDs (id: weight):', weights

    # compute the expected # PGs for our 4 OSDs
    expected = {
    0: NPGS * R * weights[0]/total_weight,
    1: NPGS * R * weights[1]/total_weight,
    2: NPGS * R * weights[2]/total_weight,
    3: NPGS * R * weights[3]/total_weight,
    }

    print '\nExpected PGs per OSD: ', expected

    # initialize PG and PG-by-replicanum counters for each OSD
    pgs = {
    0: 0,
    1: 0,
    2: 0,
    3: 0,
    }
    pgs_by_r = [
    dict(pgs),
    dict(pgs),
    dict(pgs),
    ]

    def weighted_choice(choices):
    total = sum(w for c, w in choices)
    r = random.uniform(0, total)
    upto = 0
    for c, w in choices:
    if upto + w >= r:
    return c
    upto += w
    assert False, "Shouldn't get here"

    print "\nSimulating with existing CRUSH\n"

    # simple CRUSH
    # loop over all PGs, select one with weighted choice, remove that option for the next choice.
    for i in xrange(NPGS):
    choices = list(osds)
    for r in xrange(R):
    pick = weighted_choice(choices)
    pgs[pick] += 1
    pgs_by_r[r][pick] += 1
    choices.remove((pick, weights[pick]),)

    print 'Observed: ', pgs
    print 'Observed for Nth replica: ', pgs_by_r


    print "\nNow trying your new algorithm\n"

    # initialize PG and PG-by-replicanum counters for each OSD
    pgs = {
    0: 0,
    1: 0,
    2: 0,
    3: 0,
    }
    pgs_by_r = [
    dict(pgs),
    dict(pgs),
    dict(pgs),
    ]

    # modified CRUSH
    # loop over each PG.
    # After each replica selection, adjust the remaining choices' weights with your magic value.
    for i in xrange(NPGS):
    choices = list(osds)
    for r in xrange(R):
    total_weight_start_of_round = float(sum(w for c, w in choices)) # keep track of the original sum of all weights
    #print choices
    pick = weighted_choice(choices) # choose a OSD for this round
    pgs[pick] += 1 # inc the chosen OSD's PG counter
    pgs_by_r[r][pick] += 1 # inc the chosen OSD's PG-by-R counter

    new_choices = [i for i in choices if i[0] != pick] # make a list of new choices, dropping the selected one
    choices = new_choices

    total_weight_next_round = float(sum(w for c, w in choices)) # calculate the sum of remaining OSD weights

    # reweight if we have more than one choice left
    if len(choices) > 1:
    new_choices = []
    for id, w in choices:
    # put your bright idea for adjusting the OSD weights here:
    # new_weight = f(w, orig_weights[id], r, R, total_weight, total_weight_start_of_round, total_weight_next_round)
    #
    # thoughts:
    #
    # it seems clear that the weight of the small OSDs should become relatively smaller than the large OSDs for each round.
    # also f cannot be a constant factor on w -- this doesn't change the distribution at all.
    #
    # ideas:
    # new_weight = w # this is the current implementation
    # new_weight = orig_weights[id] # this is also the current implementation
    #
    # new_weight = w * total_weight_start_of_round / total_weight_next_round # useless because same factor applied to all weights
    #
    # new_weight = w / (total_weight_start_of_round - w) # Sage's idea. kinda works for 3,3,3,1 example, not really for 1,3,3,1
    #
    # new_weight = w*w # this goes in the right direction but is too aggressive
    # new_weight = w*sqrt(w) # getting closer, but still too agressive
    # new_weight = pow(w, sqrt(total_weight_start_of_round / total_weight_next_round)) # utter craziness, almost works
    #
    # None of above are continue working if we set OSD weights to 1,3,3,1 -- this is a hard problem.
    #
    new_weight = w / (total_weight_start_of_round - w)

    assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round)
    new_choices.append((id, new_weight),)
    weights[id] = new_weight

    choices = new_choices

    print 'Observed: ', pgs
    print 'Observed for Nth replica: ', pgs_by_r