WebAppEngineer · June 30, 2019 15:45 · Feb 28, 2019 · Feb 26, 2019
diff --git a/frac-diff_sk b/frac-diff_sk
@@ -69,7 +69,7 @@ def MemoryVsCorr(series, dRange, numberPlots, lag_cutoff, seriesName):
     result.columns = ['order','adf','corr', '5%']
     result['order']=interval
     for counter,order in enumerate(interval):
-        seq_traf=seq_transform(series,order,lag_cutoff)
+        seq_traf=ts_differencing(series,order,lag_cutoff)
         res=adfuller(seq_traf, maxlag=1, regression='c') #autolag='AIC'
         result.loc[counter,'adf']=res[0]
         result.loc[counter,'5%']=res[4]['5%']

diff --git a/frac-diff_sk b/frac-diff_sk
@@ -0,0 +1,78 @@
+"""
+Python code for fractional differencing of pandas time series
+illustrating the concepts of the article "Preserving Memory in Stationary Time Series" 
+by Simon Kuttruf
+
+While this code is dedicated to the public domain for use without permission, the author disclaims any liability in connection with the use of this code.
+"""
+
+import numpy as np 
+import pandas as pd
+import matplotlib.pyplot as plt
+
+def getWeights(d,lags):
+    # return the weights from the series expansion of the differencing operator
+    # for real orders d and up to lags coefficients
+    w=[1]
+    for k in range(1,lags):
+        w.append(-w[-1]*((d-k+1))/k)
+    w=np.array(w).reshape(-1,1) 
+    return w
+
+def plotWeights(dRange, lags, numberPlots):
+    weights=pd.DataFrame(np.zeros((lags, numberPlots)))
+    interval=np.linspace(dRange[0],dRange[1],numberPlots)
+    for i, diff_order in enumerate(interval):
+        weights[i]=getWeights(diff_order,lags)
+    weights.columns = [round(x,2) for x in interval]
+    fig=weights.plot()
+    plt.legend(title='Order of differencing')
+    plt.title('Lag coefficients for various orders of differencing')
+    plt.xlabel('lag coefficients')
+    #plt.grid(False)
+    plt.show()
+
+plotWeights([0,1],7,6)
+
+def ts_differencing(series, order, lag_cutoff):
+    # return the time series resulting from (fractional) differencing
+    # for real orders order up to lag_cutoff coefficients
+
+    weights=getWeights(order, lag_cutoff)
+    res=0
+    for k in range(lag_cutoff):
+        res += weights[k]*series.shift(k).fillna(0)
+    return res[lag_cutoff:] 
+
+
+def plotMemoryVsCorr(result, seriesName):
+    fig, ax = plt.subplots()
+    ax2 = ax.twinx()  
+    color1='xkcd:deep red'; color2='xkcd:cornflower blue'
+    ax.plot(result.order,result['adf'],color=color1)
+    ax.plot(result.order, result['5%'], color='xkcd:slate')
+    ax2.plot(result.order,result['corr'], color=color2)
+    ax.set_xlabel('order of differencing')
+    ax.set_ylabel('adf', color=color1);ax.tick_params(axis='y', labelcolor=color1)
+    ax2.set_ylabel('corr', color=color2); ax2.tick_params(axis='y', labelcolor=color2)
+    plt.title('ADF test statistics and correlation for %s' % (seriesName))
+    plt.show()
+
+
+from statsmodels.tsa.stattools import adfuller 
+def MemoryVsCorr(series, dRange, numberPlots, lag_cutoff, seriesName):
+    # return a data frame and plot comparing adf statistics and linear correlation
+    # for numberPlots orders of differencing in the interval dRange up to a lag_cutoff coefficients
+
+    interval=np.linspace(dRange[0], dRange[1],numberPlots)
+    result=pd.DataFrame(np.zeros((len(interval),4)))
+    result.columns = ['order','adf','corr', '5%']
+    result['order']=interval
+    for counter,order in enumerate(interval):
+        seq_traf=seq_transform(series,order,lag_cutoff)
+        res=adfuller(seq_traf, maxlag=1, regression='c') #autolag='AIC'
+        result.loc[counter,'adf']=res[0]
+        result.loc[counter,'5%']=res[4]['5%']
+        result.loc[counter,'corr']= np.corrcoef(series[lag_cutoff:].fillna(0),seq_traf)[0,1]
+    plotMemoryVsCorr(result, seriesName)    
+    return result