Skip to content

Instantly share code, notes, and snippets.

@WebAppEngineer
Forked from skuttruf/frac-diff_sk
Created June 30, 2019 15:45
Show Gist options
  • Save WebAppEngineer/504b2d5fa404bae072fb9fc501b3dbb5 to your computer and use it in GitHub Desktop.
Save WebAppEngineer/504b2d5fa404bae072fb9fc501b3dbb5 to your computer and use it in GitHub Desktop.

Revisions

  1. @skuttruf skuttruf revised this gist Feb 28, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion frac-diff_sk
    Original file line number Diff line number Diff line change
    @@ -69,7 +69,7 @@ def MemoryVsCorr(series, dRange, numberPlots, lag_cutoff, seriesName):
    result.columns = ['order','adf','corr', '5%']
    result['order']=interval
    for counter,order in enumerate(interval):
    seq_traf=seq_transform(series,order,lag_cutoff)
    seq_traf=ts_differencing(series,order,lag_cutoff)
    res=adfuller(seq_traf, maxlag=1, regression='c') #autolag='AIC'
    result.loc[counter,'adf']=res[0]
    result.loc[counter,'5%']=res[4]['5%']
  2. @skuttruf skuttruf created this gist Feb 26, 2019.
    78 changes: 78 additions & 0 deletions frac-diff_sk
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,78 @@
    """
    Python code for fractional differencing of pandas time series
    illustrating the concepts of the article "Preserving Memory in Stationary Time Series"
    by Simon Kuttruf

    While this code is dedicated to the public domain for use without permission, the author disclaims any liability in connection with the use of this code.
    """

    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt

    def getWeights(d,lags):
    # return the weights from the series expansion of the differencing operator
    # for real orders d and up to lags coefficients
    w=[1]
    for k in range(1,lags):
    w.append(-w[-1]*((d-k+1))/k)
    w=np.array(w).reshape(-1,1)
    return w

    def plotWeights(dRange, lags, numberPlots):
    weights=pd.DataFrame(np.zeros((lags, numberPlots)))
    interval=np.linspace(dRange[0],dRange[1],numberPlots)
    for i, diff_order in enumerate(interval):
    weights[i]=getWeights(diff_order,lags)
    weights.columns = [round(x,2) for x in interval]
    fig=weights.plot()
    plt.legend(title='Order of differencing')
    plt.title('Lag coefficients for various orders of differencing')
    plt.xlabel('lag coefficients')
    #plt.grid(False)
    plt.show()

    plotWeights([0,1],7,6)

    def ts_differencing(series, order, lag_cutoff):
    # return the time series resulting from (fractional) differencing
    # for real orders order up to lag_cutoff coefficients

    weights=getWeights(order, lag_cutoff)
    res=0
    for k in range(lag_cutoff):
    res += weights[k]*series.shift(k).fillna(0)
    return res[lag_cutoff:]


    def plotMemoryVsCorr(result, seriesName):
    fig, ax = plt.subplots()
    ax2 = ax.twinx()
    color1='xkcd:deep red'; color2='xkcd:cornflower blue'
    ax.plot(result.order,result['adf'],color=color1)
    ax.plot(result.order, result['5%'], color='xkcd:slate')
    ax2.plot(result.order,result['corr'], color=color2)
    ax.set_xlabel('order of differencing')
    ax.set_ylabel('adf', color=color1);ax.tick_params(axis='y', labelcolor=color1)
    ax2.set_ylabel('corr', color=color2); ax2.tick_params(axis='y', labelcolor=color2)
    plt.title('ADF test statistics and correlation for %s' % (seriesName))
    plt.show()


    from statsmodels.tsa.stattools import adfuller
    def MemoryVsCorr(series, dRange, numberPlots, lag_cutoff, seriesName):
    # return a data frame and plot comparing adf statistics and linear correlation
    # for numberPlots orders of differencing in the interval dRange up to a lag_cutoff coefficients

    interval=np.linspace(dRange[0], dRange[1],numberPlots)
    result=pd.DataFrame(np.zeros((len(interval),4)))
    result.columns = ['order','adf','corr', '5%']
    result['order']=interval
    for counter,order in enumerate(interval):
    seq_traf=seq_transform(series,order,lag_cutoff)
    res=adfuller(seq_traf, maxlag=1, regression='c') #autolag='AIC'
    result.loc[counter,'adf']=res[0]
    result.loc[counter,'5%']=res[4]['5%']
    result.loc[counter,'corr']= np.corrcoef(series[lag_cutoff:].fillna(0),seq_traf)[0,1]
    plotMemoryVsCorr(result, seriesName)
    return result