Last active
November 2, 2022 16:17
-
-
Save iiLaurens/ba9c479e71ee4ceef816ad50b87d9ebd to your computer and use it in GitHub Desktop.
Revisions
-
iiLaurens revised this gist
Apr 3, 2020 . 1 changed file with 20 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -18,6 +18,7 @@ STATELIST = {0: (0,0,0)} # Game start state STATELIST = {**STATELIST, **{nr+1:state for nr, state in enumerate(product(range(2), range(CARDS.min()*STARTING_CARDS_PLAYER,BLACKJACK + 2), range(CARDS.min()*STARTING_CARDS_DEALER, BLACKJACK+2)))}} def cartesian(x,y): return np.dstack(np.meshgrid(x, y)).reshape(-1, 2).sum(axis=1) @@ -77,15 +78,27 @@ def blackjack_probability(action, stateid_now, stateid_next): # Next state must be a drawable state return 0.0 if dealer_now != dealer_next and player_now != player_next: # Only the player or the dealer can draw a card. Not both simultaneously! return 0.0 # Now either the dealer or the player draws a card if ACTIONLIST[action] == 'draw' and skipped_now == 0: # Player draws a card prob = deal_card_probability(player_now, player_next, take=1) else: # Dealer draws a card if dealer_now >= DEALER_SKIP: if dealer_now != dealer_next: # Dealer always stands once it has a card count higher than set amount return 0.0 else: # Dealer stands return 1.0 prob = deal_card_probability(dealer_now, dealer_next, take=1) return prob def blackjack_rewards(action, stateid): -
iiLaurens revised this gist
Apr 3, 2020 . 1 changed file with 19 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,6 @@ import numpy as np import pandas as pd from itertools import product from functools import reduce @@ -10,10 +12,11 @@ CARDS = np.array([2,3,4,5,6,7,8,9,10,10,10,10,11]) BLACKJACK = 21 DEALER_SKIP = 17 STARTING_CARDS_PLAYER = 2 STARTING_CARDS_DEALER = 1 STATELIST = {0: (0,0,0)} # Game start state STATELIST = {**STATELIST, **{nr+1:state for nr, state in enumerate(product(range(2), range(CARDS.min()*STARTING_CARDS_PLAYER,BLACKJACK + 2), range(CARDS.min()*STARTING_CARDS_DEALER, BLACKJACK+2)))}} def cartesian(x,y): return np.dstack(np.meshgrid(x, y)).reshape(-1, 2).sum(axis=1) @@ -32,7 +35,7 @@ def is_gameover(skipped, player, dealer): return any([ dealer >= DEALER_SKIP and skipped == 1, dealer > BLACKJACK and skipped == 1, player > BLACKJACK ]) def blackjack_probability(action, stateid_now, stateid_next): @@ -49,8 +52,8 @@ def blackjack_probability(action, stateid_now, stateid_next): return 0 else: # State lower or equal than 1 is a start of a new game dealer_prob = deal_card_probability(0, dealer_next, take=STARTING_CARDS_DEALER) player_prob = deal_card_probability(0, player_next, take=STARTING_CARDS_PLAYER) return dealer_prob * player_prob @@ -78,6 +81,7 @@ def blackjack_probability(action, stateid_now, stateid_next): # Dealer always skips once it has a card count higher than set amount return 0.0 dealer_prob = deal_card_probability(dealer_now, dealer_next, take=1) if dealer_now < DEALER_SKIP else 1 player_prob = deal_card_probability(player_now, player_next, take=1) if not (ACTIONLIST[action] == 'skip' or skipped_now == 1) else 1 @@ -89,7 +93,7 @@ def blackjack_rewards(action, stateid): if not is_gameover(skipped, player, dealer): return 0 elif player > BLACKJACK or (player <= dealer and dealer <= BLACKJACK): return -1 elif player == BLACKJACK and dealer < BLACKJACK: return 1.5 @@ -111,12 +115,20 @@ def blackjack_rewards(action, stateid): def print_blackjack_policy(policy): idx = pd.MultiIndex.from_tuples(list(STATELIST.values()), names=['Skipped', 'Player', 'Dealer']) S = pd.Series(['x' if i == 1 else '.' for i in policy], index=idx) S = S.loc[S.index.get_level_values('Skipped')==0].reset_index('Skipped', drop=True) S = S.loc[S.index.get_level_values('Player')>0] S = S.loc[S.index.get_level_values('Dealer')>0] return S.unstack(-1) def print_blackjack_rewards(): idx = pd.MultiIndex.from_tuples(list(STATELIST.values()), names=['Skipped', 'Player', 'Dealer']) S = pd.Series(R[:,0], index=idx) S = S.loc[S.index.get_level_values('Skipped')==1].reset_index('Skipped', drop=True) S = S.loc[S.index.get_level_values('Player')>0] S = S.loc[S.index.get_level_values('Dealer')>0] return S.unstack(-1) # Check that we have a valid transition matrix with transition probabilities summing to 1 assert (T.sum(axis=2).round(10) == 1).all() -
iiLaurens revised this gist
Apr 3, 2020 . 1 changed file with 7 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -91,7 +91,7 @@ def blackjack_rewards(action, stateid): return 0 elif (player > BLACKJACK or player <= dealer) and dealer <= BLACKJACK: return -1 elif player == BLACKJACK and dealer < BLACKJACK: return 1.5 elif player > dealer or dealer > BLACKJACK: return 1 @@ -111,7 +111,12 @@ def blackjack_rewards(action, stateid): def print_blackjack_policy(policy): idx = pd.MultiIndex.from_tuples(list(blackjack.STATELIST.values()), names=['Skipped', 'Player', 'Dealer']) S = pd.Series(['x' if i == 1 else '.' for i in policy], index=idx) S = S.loc[S.index.get_level_values('Skipped')==0].reset_index('Skipped', drop=True) S = S.loc[S.index.get_level_values('Player')>0] S = S.loc[S.index.get_level_values('Dealer')>0] return S.unstack(-1) # Check that we have a valid transition matrix with transition probabilities summing to 1 assert (T.sum(axis=2).round(10) == 1).all() -
iiLaurens revised this gist
Apr 3, 2020 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -29,9 +29,9 @@ def deal_card_probability(count_now, count_next, take=1): def is_gameover(skipped, player, dealer): return any([ dealer >= DEALER_SKIP and skipped == 1, dealer > BLACKJACK and skipped == 1, player > BLACKJACK and dealer >= DEALER_SKIP ]) -
iiLaurens revised this gist
Apr 3, 2020 . 1 changed file with 14 additions and 10 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -29,10 +29,10 @@ def deal_card_probability(count_now, count_next, take=1): def is_gameover(skipped, player, dealer): return skipped == 1 and any([ dealer >= DEALER_SKIP, dealer > BLACKJACK, player > BLACKJACK and dealer >= DEALER_SKIP ]) def blackjack_probability(action, stateid_now, stateid_next): @@ -64,7 +64,7 @@ def blackjack_probability(action, stateid_now, stateid_next): # Also player cards cannot increase once in a skipped state return 0.0 if ACTIONLIST[action] == 'skip' or skipped_now == 1: # If willingly skipped or in forced skip (attempted draw in already skipped game): if skipped_next != 1 or player_now != player_next: # Next state must be a skipped state with same card count for player @@ -89,25 +89,29 @@ def blackjack_rewards(action, stateid): if not is_gameover(skipped, player, dealer): return 0 elif (player > BLACKJACK or player <= dealer) and dealer <= BLACKJACK: return -1 elif player == BLACKJACK: return 1.5 elif player > dealer or dealer > BLACKJACK: return 1 else: raise Exception(f'Undefined reward: {skipped}, {player}, {dealer}') # Define transition matrix T = np.zeros((len(ACTIONLIST), len(STATELIST), len(STATELIST))) for a, i, j in product(ACTIONLIST.keys(), STATELIST.keys(), STATELIST.keys()): T[a,i,j] = blackjack_probability(a, i, j) # Define reward matrix R = np.zeros((len(STATELIST), len(ACTIONLIST))) for a, s in product(ACTIONLIST.keys(), STATELIST.keys()): R[s, a] = blackjack_rewards(a, s) def print_blackjack_policy(policy): pass # Check that we have a valid transition matrix with transition probabilities summing to 1 assert (T.sum(axis=2).round(10) == 1).all() -
iiLaurens revised this gist
Apr 3, 2020 . No changes.There are no files selected for viewing
-
iiLaurens revised this gist
Apr 3, 2020 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -105,7 +105,7 @@ def blackjack_rewards(action, stateid): T[a,i,j] = blackjack_probability(0, i, j) # Define reward matrix R = np.zeros((len(STATELIST), len(ACTIONLIST)) for a, s in product(ACTIONLIST.keys(), STATELIST.keys()): R[s,a] = blackjack_rewards(a, s) -
iiLaurens revised this gist
Apr 3, 2020 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -107,7 +107,7 @@ def blackjack_rewards(action, stateid): # Define reward matrix R = np.zeros((2, len(STATELIST))) for a, s in product(ACTIONLIST.keys(), STATELIST.keys()): R[s,a] = blackjack_rewards(a, s) # Check that we have a valid transition matrix with transition probabilities summing to 1 assert (T.sum(axis=2).round(10) == 1).all() -
iiLaurens revised this gist
Apr 3, 2020 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -106,8 +106,8 @@ def blackjack_rewards(action, stateid): # Define reward matrix R = np.zeros((2, len(STATELIST))) for a, s in product(ACTIONLIST.keys(), STATELIST.keys()): R[a,s] = blackjack_rewards(a, s) # Check that we have a valid transition matrix with transition probabilities summing to 1 assert (T.sum(axis=2).round(10) == 1).all() -
iiLaurens revised this gist
Apr 3, 2020 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -45,6 +45,7 @@ def blackjack_probability(action, stateid_now, stateid_next): if stateid_now == 0: if skipped_next == 1: # After start of the game the game cannot be in a skipped state return 0 else: # State lower or equal than 1 is a start of a new game -
iiLaurens revised this gist
Apr 3, 2020 . 1 changed file with 0 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -36,7 +36,6 @@ def is_gameover(skipped, player, dealer): ]) def blackjack_probability(action, stateid_now, stateid_next): skipped_now, player_now, dealer_now = STATELIST[stateid_now] skipped_next, player_next, dealer_next = STATELIST[stateid_next] -
iiLaurens created this gist
Apr 3, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,113 @@ import numpy as np from itertools import product from functools import reduce ACTIONLIST = { 0: 'skip', 1: 'draw' } CARDS = np.array([2,3,4,5,6,7,8,9,10,10,10,10,11]) BLACKJACK = 21 DEALER_SKIP = 17 NR_STARTING_CARDS = 2 STATELIST = {0: (0,0,0)} # Game start state STATELIST = {**STATELIST, **{nr+1:state for nr, state in enumerate(product(range(2), range(CARDS.min()*NR_STARTING_CARDS,BLACKJACK + 2), range(CARDS.min()*NR_STARTING_CARDS, BLACKJACK+2)))}} def cartesian(x,y): return np.dstack(np.meshgrid(x, y)).reshape(-1, 2).sum(axis=1) def deal_card_probability(count_now, count_next, take=1): if take > 1: cards = reduce(cartesian, [CARDS]*take) else: cards = CARDS return (np.minimum(count_now + cards, BLACKJACK + 1) == count_next).sum() / len(cards) def is_gameover(skipped, player, dealer): return any([ dealer >= DEALER_SKIP and skipped == 1, dealer > BLACKJACK, player > BLACKJACK ]) def blackjack_probability(action, stateid_now, stateid_next): # 0,2,2 skipped_now, player_now, dealer_now = STATELIST[stateid_now] skipped_next, player_next, dealer_next = STATELIST[stateid_next] if stateid_now == stateid_next: # Game cannot stay in current state return 0.0 if stateid_now == 0: if skipped_next == 1: return 0 else: # State lower or equal than 1 is a start of a new game dealer_prob = deal_card_probability(0, dealer_next, take=NR_STARTING_CARDS) player_prob = deal_card_probability(0, player_next, take=NR_STARTING_CARDS) return dealer_prob * player_prob if is_gameover(skipped_now, player_now, dealer_now): # We arrived at end state, now reset game return 1.0 if stateid_next == 0 else 0.0 if skipped_now == 1: if skipped_next == 0 or player_next != player_now: # Once you skip you keep on skipping in blackjack # Also player cards cannot increase once in a skipped state return 0.0 if ACTIONLIST[action] == 'skip' or (ACTIONLIST[action] == 'draw' and skipped_now == 1): # If willingly skipped or in forced skip (attempted draw in already skipped game): if skipped_next != 1 or player_now != player_next: # Next state must be a skipped state with same card count for player return 0.0 if ACTIONLIST[action] == 'draw' and skipped_now == 0 and skipped_next != 0: # Next state must be a drawable state return 0.0 if dealer_now >= DEALER_SKIP and dealer_now != dealer_next: # Dealer always skips once it has a card count higher than set amount return 0.0 dealer_prob = deal_card_probability(dealer_now, dealer_next, take=1) if dealer_now < DEALER_SKIP else 1 player_prob = deal_card_probability(player_now, player_next, take=1) if not (ACTIONLIST[action] == 'skip' or skipped_now == 1) else 1 return dealer_prob * player_prob def blackjack_rewards(action, stateid): skipped, player, dealer = STATELIST[stateid] if not is_gameover(skipped, player, dealer): return 0 elif player > BLACKJACK or dealer == BLACKJACK or player <= dealer: return -1 elif player == BLACKJACK: return 1.5 elif player > dealer: return 1 else: raise Exception('Undefined reward') # Define transition matrix T = np.zeros((len(ACTIONLIST), len(STATELIST), len(STATELIST))) for a, i, j in product(ACTIONLIST.keys(), STATELIST.keys(), STATELIST.keys()): T[a,i,j] = blackjack_probability(0, i, j) # Define reward matrix R = np.zeros((2, len(STATELIST))) for a, s in product(range(2), STATELIST.keys()): R[a,s] = blackjack_rewards(a, s) # Check that we have a valid transition matrix with transition probabilities summing to 1 assert (T.sum(axis=2).round(10) == 1).all()