import os
import sys
import pickle
import json
import numpy as np
import sys
import matplotlib 
matplotlib.use('Agg')
from matplotlib import pyplot as plt
import os.path as osp

DEBUG = True

def bin_scores(scores, score_bins):
    # bins[i-1] <= x < bins[i]
    scores_quantized = np.digitize(scores, score_bins) # bin indices
    return score_bins[scores_quantized] # bin values


def match_histograms(source, template):
    """
    Histogram of source data matches that of template data

    Arguments:
    -----------
        source: np.ndarray (1-D) after bin quantization
        template: np.ndarray (1-D) after bin quantization

    Returns:
    -----------
        matched: np.ndarray
            The transformed source data
    """

    # get unique values, indices and counts
    s_val, bin_idx, s_counts = np.unique(source, return_inverse=True, 
                                         return_counts=True)
    t_val, t_counts = np.unique(template, return_counts=True)

    # calculate empirical CDFs
    s_cdf = np.cumsum(s_counts).astype(np.float64)
    s_cdf /= s_cdf[-1]
    t_cdf = np.cumsum(t_counts).astype(np.float64)
    t_cdf /= t_cdf[-1]

    # mapping: values in template's CDF closest to source's CDF
    interp_t_val = np.interp(s_cdf, t_cdf, t_val)

    # modify source values to match closest template CDF
    source_matched = interp_t_val[bin_idx]

    if DEBUG:
        # plt.ylim([0,0.25])
        sm_val, sm_counts = np.unique(source_matched, return_counts=True)
        sm_cdf = np.cumsum(sm_counts).astype(np.float64)
        sm_cdf /= sm_cdf[-1]

        fig_path =  osp.join('hist_src_target-DEBUG.png')
        plt.plot(s_val, s_cdf, label='CDF source', alpha=0.8)
        plt.plot(t_val, t_cdf, label='CDF target', alpha=0.8)
        plt.plot(sm_val, sm_cdf, label='CDF source-matched', alpha=0.8)
        plt.title('Score histograms')
        plt.grid()
        plt.legend()
        out_dir = os.path.dirname(fig_path)
        if not osp.exists(out_dir):
            os.makedirs(out_dir, exist_ok=True)
        plt.savefig(fig_path, bbox_inches='tight')
        print('Score histogram saved at: %s' % fig_path)
        plt.close()

        fig_path =  osp.join('hist_map-DEBUG.png')
        plt.plot(source, source_matched, 'bo', label='Map source')
        plt.title('Score mapping')
        plt.xlabel('Source scores')
        plt.ylabel('Mapped scores')
        plt.grid()
        plt.legend()
        out_dir = os.path.dirname(fig_path)
        if not osp.exists(out_dir):
            os.makedirs(out_dir, exist_ok=True)
        plt.savefig(fig_path, bbox_inches='tight')
        print('Score mapping saved at: %s' % fig_path)
        plt.close()
    return source_matched
  
  
if __name__ == '__main__': 
    # TESTING
    # create some dummy data
    data_unif = np.random.randint(0, 10, 1000)
    data_normal = np.random.normal(5, 5, 5000)
    data_normal = np.abs(data_normal.astype(np.int64))
    unif_to_normal = match_histograms(data_unif, data_normal)