Skip to content

Instantly share code, notes, and snippets.

@Gabriel-p
Created August 18, 2015 14:04
Show Gist options
  • Save Gabriel-p/17e67b705d12f7c05293 to your computer and use it in GitHub Desktop.
Save Gabriel-p/17e67b705d12f7c05293 to your computer and use it in GitHub Desktop.

Revisions

  1. Gabriel-p created this gist Aug 18, 2015.
    260 changes: 260 additions & 0 deletions KDE_mult_overdens.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,260 @@

    import numpy as np
    from scipy.ndimage.filters import maximum_filter
    from scipy.ndimage.filters import gaussian_filter
    from scipy.ndimage.morphology import generate_binary_structure, binary_erosion
    from scipy import stats
    import matplotlib.pyplot as plt
    import matplotlib.gridspec as gridspec
    import matplotlib.offsetbox as offsetbox
    # from mpl_toolkits.axes_grid1 import make_axes_locatable


    def get_coords():
    '''
    Return coordinates from file.
    '''
    # Each sub-list in file_data is a row of the file.
    # file_data = np.loadtxt("TR24_clean.dat")
    file_data = np.loadtxt("CLUSTER.DAT")

    # Extract coordinates and zip them into two lists.
    ra, dec = zip(*file_data)[1], zip(*file_data)[2]

    # ra_rang, dec_rang = max(ra) - min(ra), max(dec) - min(dec)
    # print 'RA range: ', ra_rang
    # print 'DEC range: ', dec_rang

    return ra, dec


    def get_2d_histo(x_data, y_data, N_bins, std_dev):
    '''
    Return 2D histogram for the positional data.
    '''
    xmin, xmax = min(x_data), max(x_data)
    ymin, ymax = min(y_data), max(y_data)

    # Calculate the number of bins used.
    x_rang, y_rang = (xmax - xmin), (ymax - ymin)

    # Bin width to create the 2D histogram.
    bin_width = min(x_rang, y_rang) / N_bins

    # Number of bins in x,y given the bin width.
    binsxy = [int(x_rang / bin_width), int(y_rang / bin_width)]

    # hist_2d is the 2D histogram, *edges store the edges of the bins.
    hist_2d, xedges, yedges = np.histogram2d(
    x_data, y_data, range=[[xmin, xmax], [ymin, ymax]], bins=binsxy)

    hist = gaussian_filter(hist_2d, std_dev, mode='nearest') # 'constant')

    return hist, xedges, yedges


    def detect_peaks(image):
    """
    Takes an image and detect the peaks using the local maximum filter.
    Returns a boolean mask of the peaks (i.e. 1 when
    the pixel's value is the neighborhood maximum, 0 otherwise)
    """

    # define an 8-connected neighborhood
    neighborhood = generate_binary_structure(2, 2)

    # apply the local maximum filter; all pixel of maximal value
    # in their neighborhood are set to 1
    local_max = maximum_filter(image, footprint=neighborhood) == image
    # local_max is a mask that contains the peaks we are
    # looking for, but also the background.
    # In order to isolate the peaks we must remove the background from the
    # mask.

    # we create the mask of the background
    background = (image == 0)

    # a little technicality: we must erode the background in order to
    # successfully subtract it form local_max, otherwise a line will
    # appear along the background border (artifact of the local maximum filter)
    eroded_background = binary_erosion(
    background, structure=neighborhood, border_value=1)

    # we obtain the final mask, containing only peaks,
    # by removing the background from the local_max mask
    detected_peaks = local_max - eroded_background

    return detected_peaks


    def get_peaks_coords(peaks, ra, dec, histo_edges):
    '''
    Transform peak bin coordinates into actual coordinates.
    '''

    peaks_coords = []
    # for each smoothed chart.
    for i, h_p in enumerate(peaks):
    xedges, yedges = histo_edges[i]
    max_dens_coords = []
    # For each x column in the histogram.
    for x_p_indx, x_col in enumerate(h_p):
    # For each y column in the histogram.
    for y_p_indx, y_p in enumerate(x_col):
    if y_p:
    x_cent_bin, y_cent_bin = x_p_indx, y_p_indx
    # x,y coords of the center of the above bin.
    x_cent_pix = np.average(xedges[x_cent_bin:x_cent_bin + 2])
    y_cent_pix = np.average(yedges[y_cent_bin:y_cent_bin + 2])
    max_dens_coords.append([x_cent_pix, y_cent_pix])

    peaks_coords.append(max_dens_coords)

    return peaks_coords


    def get_kde_dens(ra, dec, peaks_coords):
    '''
    '''

    # Obtain Gaussian KDE in coordinates.
    values = np.vstack([ra, dec])
    kernel = stats.gaussian_kde(values)

    max_dens = []
    for p in peaks_coords:
    # Evaluate kernel in peak coordinates.
    positions = np.vstack([zip(*p)[0], zip(*p)[1]])
    k_pos = kernel(positions)
    # Store density values in those positions.
    max_dens.append(k_pos)

    return max_dens


    def get_filter_dens(peaks_coords, max_dens, thresh=0.75):
    '''
    '''

    max_dens_coords, max_dens_filt = [], []
    for p, md in zip(peaks_coords, max_dens):
    max_d = max(md)
    coords_f, dens_f = [], []
    for i, dens in enumerate(md):

    # Only store those that are larger than a certain threshold.
    if dens > (max_d * thresh):
    dens_f.append(dens)
    coords_f.append(p[i])

    max_dens_coords.append(coords_f)
    max_dens_filt.append(dens_f)

    return max_dens_coords, max_dens_filt


    def make_plot(std_dev_lst, clust_histo, ra, dec, peaks_coords, max_dens,
    max_dens_coords, max_dens_filt, thresh):
    '''
    '''

    fig = plt.figure(figsize=(15, 50))
    gs = gridspec.GridSpec(16, 3)

    i, j = 0, 0
    for h, p, m, mc, mf in zip(clust_histo, peaks_coords, max_dens,
    max_dens_coords, max_dens_filt):
    print 'Plotting: ', i, i + 1, i + 2

    ax1 = plt.subplot(gs[i])
    plt.tick_params(axis='both', which='major', labelsize=8)
    plt.imshow(h.transpose(), origin='lower', interpolation='none')
    # ax.imshow(h.transpose(), origin='lower')
    # Text box.
    ob = offsetbox.AnchoredText(r'$\sigma={}$'.format(std_dev_lst[j]),
    loc=2, prop=dict(size=8))
    ob.patch.set(alpha=0.65)
    ax1.add_artist(ob)
    ax1.invert_xaxis()
    j += 1

    # ax2 = plt.subplot(gs[i + 1])
    # cm = plt.cm.gist_earth_r
    # plt.imshow(p.transpose(), origin='lower', cmap=cm)
    # ax2.invert_xaxis()

    siz = 5 + 95 * (np.array(m) / max(m)) ** 2
    ax2 = plt.subplot(gs[i + 1])
    plt.tick_params(axis='both', which='major', labelsize=8)
    plt.xlabel(r'$RA\,(^{\circ})$', fontsize=10)
    plt.ylabel(r'$DEC\,(^{\circ})$', fontsize=10)
    x_c, y_c = zip(*p)[0], zip(*p)[1]
    plt.xlim(min(ra), max(ra))
    plt.ylim(min(dec), max(dec))
    cm = plt.cm.get_cmap('RdYlBu_r')
    plt.scatter(x_c, y_c, c=m, cmap=cm, s=siz, lw=0.25)
    ax2.invert_xaxis()
    ax2.set_aspect('equal')

    # v_min_mp, v_max_mp = min(m), max(m)
    siz = 5 + 95 * (np.array(mf) / max(mf)) ** 6
    ax3 = plt.subplot(gs[i + 2])
    plt.tick_params(axis='both', which='major', labelsize=8)
    plt.xlabel(r'$RA\,(^{\circ})$', fontsize=10)
    plt.ylabel(r'$DEC\,(^{\circ})$', fontsize=10)
    x_c, y_c = zip(*mc)[0], zip(*mc)[1]
    plt.xlim(min(ra), max(ra))
    plt.ylim(min(dec), max(dec))
    cm = plt.cm.get_cmap('RdYlBu_r')
    plt.scatter(x_c, y_c, c=mf, cmap=cm, s=siz, lw=0.25)
    # vmin=v_min_mp, vmax=v_max_mp)
    # Text box.
    ob = offsetbox.AnchoredText('thresh={}'.format(thresh), loc=2,
    prop=dict(size=8))
    ob.patch.set(alpha=0.5)
    ax3.add_artist(ob)
    # Invert axis and set aspect.
    ax3.invert_xaxis()
    ax3.set_aspect('equal')

    # Increase counter.
    i += 3

    # Output png file.
    fig.tight_layout()
    plt.savefig('/home/gabriel/Descargas/peak_detect.png', dpi=300)


    # Get coordinates.
    ra, dec = get_coords()

    # Generate 2D histogram of the coordinates, using several different numbers
    # of bins and standard deviating values for the Gaussian smoothing.
    clust_histo, histo_edges = [], []
    std_dev_lst = [2, 2.25, 2.5, 2.75, 3., 3.25, 3.5, 3.75, 4., 4.25, 4.5,
    4.75, 5., 5.25, 5.5, 5.75]
    for N_bins in [100]:
    for std_dev in std_dev_lst:
    h, xedges, yedges = get_2d_histo(ra, dec, N_bins, std_dev)
    clust_histo.append(h)
    histo_edges.append([xedges, yedges])

    # Detect peaks.
    peaks = []
    for h in clust_histo:
    peaks.append(detect_peaks(h))

    # Transform peaks bin coordinates into actual coordinates.
    peaks_coords = get_peaks_coords(peaks, ra, dec, histo_edges)

    # Obtain KDE values for each peak in each smoothed cluster.
    max_dens = get_kde_dens(ra, dec, peaks_coords)

    thresh = 0.9
    # Filter found max density coordinates below a certain threshold.
    max_dens_coords, max_dens_filt = get_filter_dens(peaks_coords, max_dens,
    thresh)

    # Plot results.
    make_plot(std_dev_lst, clust_histo, ra, dec, peaks_coords, max_dens,
    max_dens_coords, max_dens_filt, thresh)