Skip to content

Instantly share code, notes, and snippets.

@iiLaurens
Last active November 2, 2022 16:17
Show Gist options
  • Select an option

  • Save iiLaurens/ba9c479e71ee4ceef816ad50b87d9ebd to your computer and use it in GitHub Desktop.

Select an option

Save iiLaurens/ba9c479e71ee4ceef816ad50b87d9ebd to your computer and use it in GitHub Desktop.

Revisions

  1. iiLaurens revised this gist Apr 3, 2020. 1 changed file with 20 additions and 7 deletions.
    27 changes: 20 additions & 7 deletions generate.py
    Original file line number Diff line number Diff line change
    @@ -18,6 +18,7 @@
    STATELIST = {0: (0,0,0)} # Game start state
    STATELIST = {**STATELIST, **{nr+1:state for nr, state in enumerate(product(range(2), range(CARDS.min()*STARTING_CARDS_PLAYER,BLACKJACK + 2), range(CARDS.min()*STARTING_CARDS_DEALER, BLACKJACK+2)))}}


    def cartesian(x,y):
    return np.dstack(np.meshgrid(x, y)).reshape(-1, 2).sum(axis=1)

    @@ -77,15 +78,27 @@ def blackjack_probability(action, stateid_now, stateid_next):
    # Next state must be a drawable state
    return 0.0

    if dealer_now >= DEALER_SKIP and dealer_now != dealer_next:
    # Dealer always skips once it has a card count higher than set amount
    if dealer_now != dealer_next and player_now != player_next:
    # Only the player or the dealer can draw a card. Not both simultaneously!
    return 0.0


    dealer_prob = deal_card_probability(dealer_now, dealer_next, take=1) if dealer_now < DEALER_SKIP else 1
    player_prob = deal_card_probability(player_now, player_next, take=1) if not (ACTIONLIST[action] == 'skip' or skipped_now == 1) else 1

    return dealer_prob * player_prob
    # Now either the dealer or the player draws a card
    if ACTIONLIST[action] == 'draw' and skipped_now == 0:
    # Player draws a card
    prob = deal_card_probability(player_now, player_next, take=1)
    else:
    # Dealer draws a card
    if dealer_now >= DEALER_SKIP:
    if dealer_now != dealer_next:
    # Dealer always stands once it has a card count higher than set amount
    return 0.0
    else:
    # Dealer stands
    return 1.0

    prob = deal_card_probability(dealer_now, dealer_next, take=1)

    return prob


    def blackjack_rewards(action, stateid):
  2. iiLaurens revised this gist Apr 3, 2020. 1 changed file with 19 additions and 7 deletions.
    26 changes: 19 additions & 7 deletions generate.py
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,6 @@
    import numpy as np
    import pandas as pd

    from itertools import product
    from functools import reduce

    @@ -10,10 +12,11 @@
    CARDS = np.array([2,3,4,5,6,7,8,9,10,10,10,10,11])
    BLACKJACK = 21
    DEALER_SKIP = 17
    NR_STARTING_CARDS = 2
    STARTING_CARDS_PLAYER = 2
    STARTING_CARDS_DEALER = 1

    STATELIST = {0: (0,0,0)} # Game start state
    STATELIST = {**STATELIST, **{nr+1:state for nr, state in enumerate(product(range(2), range(CARDS.min()*NR_STARTING_CARDS,BLACKJACK + 2), range(CARDS.min()*NR_STARTING_CARDS, BLACKJACK+2)))}}
    STATELIST = {**STATELIST, **{nr+1:state for nr, state in enumerate(product(range(2), range(CARDS.min()*STARTING_CARDS_PLAYER,BLACKJACK + 2), range(CARDS.min()*STARTING_CARDS_DEALER, BLACKJACK+2)))}}

    def cartesian(x,y):
    return np.dstack(np.meshgrid(x, y)).reshape(-1, 2).sum(axis=1)
    @@ -32,7 +35,7 @@ def is_gameover(skipped, player, dealer):
    return any([
    dealer >= DEALER_SKIP and skipped == 1,
    dealer > BLACKJACK and skipped == 1,
    player > BLACKJACK and dealer >= DEALER_SKIP
    player > BLACKJACK
    ])

    def blackjack_probability(action, stateid_now, stateid_next):
    @@ -49,8 +52,8 @@ def blackjack_probability(action, stateid_now, stateid_next):
    return 0
    else:
    # State lower or equal than 1 is a start of a new game
    dealer_prob = deal_card_probability(0, dealer_next, take=NR_STARTING_CARDS)
    player_prob = deal_card_probability(0, player_next, take=NR_STARTING_CARDS)
    dealer_prob = deal_card_probability(0, dealer_next, take=STARTING_CARDS_DEALER)
    player_prob = deal_card_probability(0, player_next, take=STARTING_CARDS_PLAYER)

    return dealer_prob * player_prob

    @@ -78,6 +81,7 @@ def blackjack_probability(action, stateid_now, stateid_next):
    # Dealer always skips once it has a card count higher than set amount
    return 0.0


    dealer_prob = deal_card_probability(dealer_now, dealer_next, take=1) if dealer_now < DEALER_SKIP else 1
    player_prob = deal_card_probability(player_now, player_next, take=1) if not (ACTIONLIST[action] == 'skip' or skipped_now == 1) else 1

    @@ -89,7 +93,7 @@ def blackjack_rewards(action, stateid):

    if not is_gameover(skipped, player, dealer):
    return 0
    elif (player > BLACKJACK or player <= dealer) and dealer <= BLACKJACK:
    elif player > BLACKJACK or (player <= dealer and dealer <= BLACKJACK):
    return -1
    elif player == BLACKJACK and dealer < BLACKJACK:
    return 1.5
    @@ -111,12 +115,20 @@ def blackjack_rewards(action, stateid):


    def print_blackjack_policy(policy):
    idx = pd.MultiIndex.from_tuples(list(blackjack.STATELIST.values()), names=['Skipped', 'Player', 'Dealer'])
    idx = pd.MultiIndex.from_tuples(list(STATELIST.values()), names=['Skipped', 'Player', 'Dealer'])
    S = pd.Series(['x' if i == 1 else '.' for i in policy], index=idx)
    S = S.loc[S.index.get_level_values('Skipped')==0].reset_index('Skipped', drop=True)
    S = S.loc[S.index.get_level_values('Player')>0]
    S = S.loc[S.index.get_level_values('Dealer')>0]
    return S.unstack(-1)

    def print_blackjack_rewards():
    idx = pd.MultiIndex.from_tuples(list(STATELIST.values()), names=['Skipped', 'Player', 'Dealer'])
    S = pd.Series(R[:,0], index=idx)
    S = S.loc[S.index.get_level_values('Skipped')==1].reset_index('Skipped', drop=True)
    S = S.loc[S.index.get_level_values('Player')>0]
    S = S.loc[S.index.get_level_values('Dealer')>0]
    return S.unstack(-1)

    # Check that we have a valid transition matrix with transition probabilities summing to 1
    assert (T.sum(axis=2).round(10) == 1).all()
  3. iiLaurens revised this gist Apr 3, 2020. 1 changed file with 7 additions and 2 deletions.
    9 changes: 7 additions & 2 deletions generate.py
    Original file line number Diff line number Diff line change
    @@ -91,7 +91,7 @@ def blackjack_rewards(action, stateid):
    return 0
    elif (player > BLACKJACK or player <= dealer) and dealer <= BLACKJACK:
    return -1
    elif player == BLACKJACK:
    elif player == BLACKJACK and dealer < BLACKJACK:
    return 1.5
    elif player > dealer or dealer > BLACKJACK:
    return 1
    @@ -111,7 +111,12 @@ def blackjack_rewards(action, stateid):


    def print_blackjack_policy(policy):
    pass
    idx = pd.MultiIndex.from_tuples(list(blackjack.STATELIST.values()), names=['Skipped', 'Player', 'Dealer'])
    S = pd.Series(['x' if i == 1 else '.' for i in policy], index=idx)
    S = S.loc[S.index.get_level_values('Skipped')==0].reset_index('Skipped', drop=True)
    S = S.loc[S.index.get_level_values('Player')>0]
    S = S.loc[S.index.get_level_values('Dealer')>0]
    return S.unstack(-1)

    # Check that we have a valid transition matrix with transition probabilities summing to 1
    assert (T.sum(axis=2).round(10) == 1).all()
  4. iiLaurens revised this gist Apr 3, 2020. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions generate.py
    Original file line number Diff line number Diff line change
    @@ -29,9 +29,9 @@ def deal_card_probability(count_now, count_next, take=1):


    def is_gameover(skipped, player, dealer):
    return skipped == 1 and any([
    dealer >= DEALER_SKIP,
    dealer > BLACKJACK,
    return any([
    dealer >= DEALER_SKIP and skipped == 1,
    dealer > BLACKJACK and skipped == 1,
    player > BLACKJACK and dealer >= DEALER_SKIP
    ])

  5. iiLaurens revised this gist Apr 3, 2020. 1 changed file with 14 additions and 10 deletions.
    24 changes: 14 additions & 10 deletions generate.py
    Original file line number Diff line number Diff line change
    @@ -29,10 +29,10 @@ def deal_card_probability(count_now, count_next, take=1):


    def is_gameover(skipped, player, dealer):
    return any([
    dealer >= DEALER_SKIP and skipped == 1,
    return skipped == 1 and any([
    dealer >= DEALER_SKIP,
    dealer > BLACKJACK,
    player > BLACKJACK
    player > BLACKJACK and dealer >= DEALER_SKIP
    ])

    def blackjack_probability(action, stateid_now, stateid_next):
    @@ -64,7 +64,7 @@ def blackjack_probability(action, stateid_now, stateid_next):
    # Also player cards cannot increase once in a skipped state
    return 0.0

    if ACTIONLIST[action] == 'skip' or (ACTIONLIST[action] == 'draw' and skipped_now == 1):
    if ACTIONLIST[action] == 'skip' or skipped_now == 1:
    # If willingly skipped or in forced skip (attempted draw in already skipped game):
    if skipped_next != 1 or player_now != player_next:
    # Next state must be a skipped state with same card count for player
    @@ -89,25 +89,29 @@ def blackjack_rewards(action, stateid):

    if not is_gameover(skipped, player, dealer):
    return 0
    elif player > BLACKJACK or dealer == BLACKJACK or player <= dealer:
    elif (player > BLACKJACK or player <= dealer) and dealer <= BLACKJACK:
    return -1
    elif player == BLACKJACK:
    return 1.5
    elif player > dealer:
    elif player > dealer or dealer > BLACKJACK:
    return 1
    else:
    raise Exception('Undefined reward')
    raise Exception(f'Undefined reward: {skipped}, {player}, {dealer}')


    # Define transition matrix
    T = np.zeros((len(ACTIONLIST), len(STATELIST), len(STATELIST)))
    for a, i, j in product(ACTIONLIST.keys(), STATELIST.keys(), STATELIST.keys()):
    T[a,i,j] = blackjack_probability(0, i, j)
    T[a,i,j] = blackjack_probability(a, i, j)

    # Define reward matrix
    R = np.zeros((len(STATELIST), len(ACTIONLIST))
    R = np.zeros((len(STATELIST), len(ACTIONLIST)))
    for a, s in product(ACTIONLIST.keys(), STATELIST.keys()):
    R[s,a] = blackjack_rewards(a, s)
    R[s, a] = blackjack_rewards(a, s)


    def print_blackjack_policy(policy):
    pass

    # Check that we have a valid transition matrix with transition probabilities summing to 1
    assert (T.sum(axis=2).round(10) == 1).all()
  6. iiLaurens revised this gist Apr 3, 2020. No changes.
  7. iiLaurens revised this gist Apr 3, 2020. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion generate.py
    Original file line number Diff line number Diff line change
    @@ -105,7 +105,7 @@ def blackjack_rewards(action, stateid):
    T[a,i,j] = blackjack_probability(0, i, j)

    # Define reward matrix
    R = np.zeros((2, len(STATELIST)))
    R = np.zeros((len(STATELIST), len(ACTIONLIST))
    for a, s in product(ACTIONLIST.keys(), STATELIST.keys()):
    R[s,a] = blackjack_rewards(a, s)

  8. iiLaurens revised this gist Apr 3, 2020. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion generate.py
    Original file line number Diff line number Diff line change
    @@ -107,7 +107,7 @@ def blackjack_rewards(action, stateid):
    # Define reward matrix
    R = np.zeros((2, len(STATELIST)))
    for a, s in product(ACTIONLIST.keys(), STATELIST.keys()):
    R[a,s] = blackjack_rewards(a, s)
    R[s,a] = blackjack_rewards(a, s)

    # Check that we have a valid transition matrix with transition probabilities summing to 1
    assert (T.sum(axis=2).round(10) == 1).all()
  9. iiLaurens revised this gist Apr 3, 2020. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions generate.py
    Original file line number Diff line number Diff line change
    @@ -106,8 +106,8 @@ def blackjack_rewards(action, stateid):

    # Define reward matrix
    R = np.zeros((2, len(STATELIST)))
    for a, s in product(range(2), STATELIST.keys()):
    for a, s in product(ACTIONLIST.keys(), STATELIST.keys()):
    R[a,s] = blackjack_rewards(a, s)

    # Check that we have a valid transition matrix with transition probabilities summing to 1
    assert (T.sum(axis=2).round(10) == 1).all()
    assert (T.sum(axis=2).round(10) == 1).all()
  10. iiLaurens revised this gist Apr 3, 2020. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions generate.py
    Original file line number Diff line number Diff line change
    @@ -45,6 +45,7 @@ def blackjack_probability(action, stateid_now, stateid_next):

    if stateid_now == 0:
    if skipped_next == 1:
    # After start of the game the game cannot be in a skipped state
    return 0
    else:
    # State lower or equal than 1 is a start of a new game
  11. iiLaurens revised this gist Apr 3, 2020. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion generate.py
    Original file line number Diff line number Diff line change
    @@ -36,7 +36,6 @@ def is_gameover(skipped, player, dealer):
    ])

    def blackjack_probability(action, stateid_now, stateid_next):
    # 0,2,2
    skipped_now, player_now, dealer_now = STATELIST[stateid_now]
    skipped_next, player_next, dealer_next = STATELIST[stateid_next]

  12. iiLaurens created this gist Apr 3, 2020.
    113 changes: 113 additions & 0 deletions generate.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,113 @@
    import numpy as np
    from itertools import product
    from functools import reduce

    ACTIONLIST = {
    0: 'skip',
    1: 'draw'
    }

    CARDS = np.array([2,3,4,5,6,7,8,9,10,10,10,10,11])
    BLACKJACK = 21
    DEALER_SKIP = 17
    NR_STARTING_CARDS = 2

    STATELIST = {0: (0,0,0)} # Game start state
    STATELIST = {**STATELIST, **{nr+1:state for nr, state in enumerate(product(range(2), range(CARDS.min()*NR_STARTING_CARDS,BLACKJACK + 2), range(CARDS.min()*NR_STARTING_CARDS, BLACKJACK+2)))}}

    def cartesian(x,y):
    return np.dstack(np.meshgrid(x, y)).reshape(-1, 2).sum(axis=1)


    def deal_card_probability(count_now, count_next, take=1):
    if take > 1:
    cards = reduce(cartesian, [CARDS]*take)
    else:
    cards = CARDS

    return (np.minimum(count_now + cards, BLACKJACK + 1) == count_next).sum() / len(cards)


    def is_gameover(skipped, player, dealer):
    return any([
    dealer >= DEALER_SKIP and skipped == 1,
    dealer > BLACKJACK,
    player > BLACKJACK
    ])

    def blackjack_probability(action, stateid_now, stateid_next):
    # 0,2,2
    skipped_now, player_now, dealer_now = STATELIST[stateid_now]
    skipped_next, player_next, dealer_next = STATELIST[stateid_next]

    if stateid_now == stateid_next:
    # Game cannot stay in current state
    return 0.0

    if stateid_now == 0:
    if skipped_next == 1:
    return 0
    else:
    # State lower or equal than 1 is a start of a new game
    dealer_prob = deal_card_probability(0, dealer_next, take=NR_STARTING_CARDS)
    player_prob = deal_card_probability(0, player_next, take=NR_STARTING_CARDS)

    return dealer_prob * player_prob

    if is_gameover(skipped_now, player_now, dealer_now):
    # We arrived at end state, now reset game
    return 1.0 if stateid_next == 0 else 0.0

    if skipped_now == 1:
    if skipped_next == 0 or player_next != player_now:
    # Once you skip you keep on skipping in blackjack
    # Also player cards cannot increase once in a skipped state
    return 0.0

    if ACTIONLIST[action] == 'skip' or (ACTIONLIST[action] == 'draw' and skipped_now == 1):
    # If willingly skipped or in forced skip (attempted draw in already skipped game):
    if skipped_next != 1 or player_now != player_next:
    # Next state must be a skipped state with same card count for player
    return 0.0

    if ACTIONLIST[action] == 'draw' and skipped_now == 0 and skipped_next != 0:
    # Next state must be a drawable state
    return 0.0

    if dealer_now >= DEALER_SKIP and dealer_now != dealer_next:
    # Dealer always skips once it has a card count higher than set amount
    return 0.0

    dealer_prob = deal_card_probability(dealer_now, dealer_next, take=1) if dealer_now < DEALER_SKIP else 1
    player_prob = deal_card_probability(player_now, player_next, take=1) if not (ACTIONLIST[action] == 'skip' or skipped_now == 1) else 1

    return dealer_prob * player_prob


    def blackjack_rewards(action, stateid):
    skipped, player, dealer = STATELIST[stateid]

    if not is_gameover(skipped, player, dealer):
    return 0
    elif player > BLACKJACK or dealer == BLACKJACK or player <= dealer:
    return -1
    elif player == BLACKJACK:
    return 1.5
    elif player > dealer:
    return 1
    else:
    raise Exception('Undefined reward')


    # Define transition matrix
    T = np.zeros((len(ACTIONLIST), len(STATELIST), len(STATELIST)))
    for a, i, j in product(ACTIONLIST.keys(), STATELIST.keys(), STATELIST.keys()):
    T[a,i,j] = blackjack_probability(0, i, j)

    # Define reward matrix
    R = np.zeros((2, len(STATELIST)))
    for a, s in product(range(2), STATELIST.keys()):
    R[a,s] = blackjack_rewards(a, s)

    # Check that we have a valid transition matrix with transition probabilities summing to 1
    assert (T.sum(axis=2).round(10) == 1).all()