#!/usr/bin/env python
# -*- coding: utf-8 -*-

__author__ = 'Zachary Voase <disturbyte@gmail.com>'
__version__ = '0.1'

"""
faceoff.py - A fun (non-serious) module for playing games.

Example:

    >>> player1 = ExponentialBayesianTFTPlayer()
    >>> player2 = RandomPlayer()
    >>> game = PrisonersDilemma(player1, player2)
    >>> decisions = game.playoff(n=1000, generator=False)
    >>> results = (player1.score, player2.score)
    >>> isinstance(results[0], int), isinstance(results[1], int)
    (True, True)
    
    >>> tft_player1 = TitForTatPlayer()
    >>> tft_player2 = TitForTatPlayer()
    >>> tft_game = PrisonersDilemma(tft_player1, tft_player2)
    >>> tft_decisions = tft_game.playoff(n=1000, generator=False)
    >>> print (tft_player1.score == tft_player2.score)
    True
"""

from collections import deque
from decimal import Decimal
import random


class Game(object):
    
    """An abstract base class representing a game-theoretical game."""
    
    PAYOFF = {}
    
    def __init__(self, player1, player2):
        self.player1 = player1
        self.player2 = player2
        self.history = []
    
    def play(self):
        # If there is no history, get the initial decisions.
        if not self.history:
            d1 = self.player1()
            d2 = self.player2()
        # Otherwise, tell each player what the other did in the previous round.
        else:
            d1 = self.player1(self.history[-1][1])
            d2 = self.player2(self.history[-1][0])
        # Add these decisions to the history.
        self.history.append((d1, d2))
        
        payoff = self.PAYOFF[(d1, d2)]
        self.player1.score += payoff[0]
        self.player2.score += payoff[1]
        
        return (d1, d2)
    
    def playoff(self, n=1, generator=True):
        if generator:
            return (self.play() for i in xrange(n))
        return [self.play() for i in xrange(n)]


class PrisonersDilemma(Game):
    
    """
    The prisoner's dilemma game.
    
    See <http://en.wikipedia.org/wiki/Prisoner%27s_dilemma>.
    """
    
    # True means co-operate, False means defect.
    COOP = True
    DEFECT = False
    
    PAYOFF = {
        (COOP, COOP): (3, 3),
        (COOP, DEFECT): (0, 5),
        (DEFECT, COOP): (5, 0),
        (DEFECT, DEFECT): (1, 1)
    }


class Player(object):
    
    """An abstract base class for players of game-theoretical games."""
    
    def __init__(self):
        self.score = 0
    
    def __call__(self, op_last=None):
        raise NotImplementedError


class CoroutinePlayer(Player):
    
    """
    A Player which wraps around a coroutine; when called, an instance passes
    the opponent’s last move into the coroutine via `send()`, and the value
    yielded by the coroutine should be the player's decision. At the beginning
    of the game, `next()` will be called (so the coroutine can make an initial
    decision).
    """
    
    generator = None
    
    @classmethod
    def wrapper(cls, generator):
        # Subclasses `CoroutinePlayer` to get a new class with the specified generator.
        return type(
            generator.__name__, (cls,),
            {
                '__module__': generator.__module__,
                '__doc__': generator.__doc__,
                # `staticmethod` is required to prevent the generator function
                # from being turned into an instance method.
                'generator': staticmethod(generator)
            })
    
    def __init__(self, coroutine=None, *args, **kwargs):
        super(CoroutinePlayer, self).__init__()
        if coroutine is None:
            coroutine = self.generator(*args, **kwargs)
        self.coroutine = coroutine
    
    def __call__(self, op_last=None):
        if op_last is None:
            return self.coroutine.next()
        return self.coroutine.send(op_last)


@CoroutinePlayer.wrapper
def AlwaysDefectPlayer():
    """A player which always defects."""
    while True:
        yield False


@CoroutinePlayer.wrapper
def AlwaysCoopPlayer():
    """A player which always co-operates."""
    while True:
        yield True


@CoroutinePlayer.wrapper
def GrimPlayer(nice=True):
    """
    A Grim player, also known as Friedman.
    
    A player which will co-operate until the opponent defects, after which
    this player will always defect.
    """
    grim = False
    op_last = yield nice
    while True:
        if not (op_last or (op_last is None)):
            grim = True
        op_last = yield (not grim)


@CoroutinePlayer.wrapper
def TitForTatPlayer(nice=True):
    """A player which will always copy its opponent's last move."""
    op_last = yield nice
    while True:
        op_last = yield op_last


@CoroutinePlayer.wrapper
def BayesianTFTPlayer(nice=True):
    """
    A player whose chances of co-operation depend on its opponent's history.
    
    The Bayesian Tit-for-Tat player (BTFT) decides whether or not to
    co-operate based on its opponent's history of co-operation. If the
    opponent has always co-operated, then the BTFT player is certain to
    co-operate. If the opponent has co-operated 5 times and defected 10 times,
    the BTFT player has a 1 in 3 chance of co-operation, and so on and so
    forth.
    """
    opponent_history = []
    op_last = yield nice
    while True:
        opponent_history.append(op_last)
        op_last = yield random.choice(opponent_history)


class ExponentialBayesianTFTPlayer(Player):
    """
    A Bayesian TFT player with an exponentially-weighted forgiveness.
    
    The Exponential Bayesian Tit-for-Tat player (EBTFT) works in a similar way
    to the Bayesian Tit-for-Tat player (BTFT), only recent behaviour is given
    more 'weight' than behaviour long in the past. The opponent's history is
    weighted with an exponential decay function, so that if an opponent more
    recently decided to start co-operating, that recent behaviour could cause
    the EBTFT player to become more co-operative. In this way, the EBTFT is
    more forgiving than the simple BTFT (where all previous decisions have
    equal weight no matter how long ago they were), but still less forgiving
    than the simple TFT.
    
    The ``ExponentialBayesianTFTPlayer`` class takes four optional arguments
    as initial parameters. These affect the behaviour of the player.
        
        ``nice``
            If a player is 'nice', it will co-operate on the first move (i.e.
            when it has no reason to defect).
        ``a`` and ``b``
            These affect the exponential decay function which is used to weigh
            the opponent's history. The function is essentially:
                
                a ** (-b * n)
            
            Where n is the number of rounds which have elapsed between now and
            the opponent's decision in question. For example, the most recent
            decision will have ``n = 0``, the one before that will have
            ``n = 1``, and so on and so forth.
            
            In this case, ``a`` defaults to the mathematical constant *e*,
            also known as the base of natural logarithms, which is around
            2.71828182845904523536, and ``b`` defaults to 1. You can make the
            EBTFT act like the simple TFT by setting ``b`` to infinity (this
            value can be found as ``ExponentialBayesianTFTPlayer.INFINITY``),
            and you can make it act like the BTFT by setting ``b`` to zero.
            In essence, this means that both the BTFT and TFT players are just
            special cases of an EBTFT player.
        ``threshold``
            The EBTFT player will only consider decisions in the past where
            the weight is at least this value. This exists for performance
            reasons (because iterating over the entire history at every step
            might take a while), and it might also be useful to some people.
            
            By default, the EBTFT will iterate over the entire history of the
            opponent's previous decisions, calculating weights, et cetera.
            However, when the weight goes below a certain value (which it may
            do very quickly with certain parameters), a disproportionate
            amount of time is spent iterating over past decisions which have
            a very small effect on the overall result. The threshold tells the
            EBTFT to stop iterating over the history when the weight goes
            below a certain value.
            
            By default the threshold is ``0.0001``; you can also turn this
            behaviour off entirely by setting it to zero (or indeed any
            negative number).
    """
    
    E = Decimal('2.71828182845904523536')
    INFINITY = Decimal('Infinity')
    ONE = Decimal(1)
    ZERO = Decimal(0)
    
    def __init__(self, a=E, b=ONE, threshold=Decimal('0.0001'), nice=True):
        super(ExponentialBayesianTFTPlayer, self).__init__()
        
        # Ensure that `a` and `b` are `Decimal` instances.
        if isinstance(a, float):
            a = Decimal(str(a))
        if isinstance(b, float):
            b = Decimal(str(b))
        if isinstance(threshold, float):
            threshold = Decimal(str(threshold))
        
        self.a = a
        self.b = b
        self.threshold = threshold
        self.nice = nice
        # The opponent's history.
        self.op_history = deque()
    
    def weight(self, n):
        if self.b == self.INFINITY:
            # If this is not caught, it results in an error when `n` is zero.
            # When `b` is infinity, the most recent decision has a weight of
            # one and all others have a weight of zero; thus, it acts like the
            # simple TFT player.
            return int(n == 0)
        elif self.b == self.ZERO:
            # This is just a shortcut. When `b` is zero, each decision in the
            # past has equal weight, so the EBTFT acts like a normal BTFT.
            return 1
        
        weight = self.a ** (-1 * self.b * n)
        
        # Ensure that we return a `Decimal` instance.
        if isinstance(weight, float):
            return Decimal(str(weight))
        return Decimal(weight)
    
    def calculate_coop_probability(self):
        # The sum of weights where the opponent co-operated.
        coop_weight = Decimal('0.0')
        # The sum of all weights.
        total_weight = Decimal('0.0')
        
        for (i, decision) in enumerate(self.op_history):
            weight = self.weight(i)
            # Observe the threshold value.
            if weight < self.threshold:
                break
            
            total_weight += weight
            
            # If, in that round, the opponent decided to co-operate:
            if decision:
                coop_weight += weight
        
        # Because these are `Decimal` instances they should return a precise
        # value.
        return coop_weight / total_weight
    
    def __call__(self, op_last=None):
        # `op_last` will be `None` in the first round.
        if op_last is None:
            return self.nice
        
        self.op_history.appendleft(op_last)
        coop_probability = self.calculate_coop_probability()
        
        # The following is a way of 'throwing a dice', essentially, where
        # there are two outcomes with different probabilities of occurring.
        # Since `coop_probability` will lie in the closed interval [0, 1], as
        # will the result of `random.random()`, the probability that a random
        # number is less than or equal to the probability of co-operation will
        # be equal to that probability of co-operation (it's difficult to
        # explain in words).
        return (Decimal(str(random.random())) <= coop_probability)


@CoroutinePlayer.wrapper
def RandomPlayer():
    """A player which randomly decides what to do each iteration."""
    while True:
        yield random.choice((True, False))


if __name__ == '__main__':
    import doctest
    doctest.testmod()