Skip to content

Instantly share code, notes, and snippets.

@domisoxz
Last active July 6, 2023 01:37
Show Gist options
  • Save domisoxz/212239b21120bdb93e2b81380bc6d2a5 to your computer and use it in GitHub Desktop.
Save domisoxz/212239b21120bdb93e2b81380bc6d2a5 to your computer and use it in GitHub Desktop.

Revisions

  1. domisoxz revised this gist Jul 13, 2021. 1 changed file with 6 additions and 0 deletions.
    6 changes: 6 additions & 0 deletions FHLMC_Credit_Analysis.py
    Original file line number Diff line number Diff line change
    @@ -26,6 +26,12 @@
    Outputs = "enter your output_path here"
    Download = "https://freddiemac.embs.com/FLoan/Data/"

    #Assign column names and extract shorter list of columns from original files
    orig_columns = ["Credit_Score","First_Payment_Date","First_Time_Home_Buyer","Maturity_Date","MSA","Insurance_pct","Units","Occupancy","Combined_LTV","DTI","UPB","LTV","Interest","Channel","PPM","Product_Type","State","Property_Type","ZIP","ID","Loan_Purpose","Term","Borrowers","Seller","Servicer","Super_Confirming","Program"]
    orig_columns_extract = ["ID","Credit_Score","MSA","Combined_LTV","LTV","DTI","State","ZIP"]
    perf_columns = ["ID","Date","C_UPB","Del_Status",'Loan_Age',"Remain_Age","Repurchase","Modification","Zero_UPB","Zero_UPB_Date","C_IR","C_deferred_UPB","DDLPI","MI_Rec","Net_Sales","Non_MI_Rec","Expenses","Legal_Costs","Maintain_Cost","Taxes","Other_Expenses","Actual_Loss","Modification_Cost","Step_Modification","Deferred_PP","ELTV","Zero_UPB_Removal","Del_Accrued_Interest","Del_Disaster","Borrower_Assistance"]
    perf_columns_extract = ["ID","Date","C_UPB","Del_Status",'Loan_Age',"Remain_Age","Zero_UPB","Zero_UPB_Date","Net_Sales","Actual_Loss","ELTV","Del_Disaster"]

    #Specify login credentials
    login = {
    'username': 'your login email',
  2. domisoxz renamed this gist Feb 6, 2021. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  3. domisoxz renamed this gist Feb 6, 2021. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  4. domisoxz created this gist Feb 6, 2021.
    268 changes: 268 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,268 @@
    import pandas as pd
    import requests
    import io
    import os
    import zipfile
    import matplotlib.pyplot as plt
    import matplotlib.dates as mdates
    import seaborn as sns
    import numpy as np

    import matplotlib as mpl

    pd.options.display.max_rows = 40

    %matplotlib inline
    %config InlineBackend.figure_format = 'retina'
    plt.style.use('seaborn-colorblind')
    plt.rcParams["patch.force_edgecolor"] = True

    # You may want to adjust the figure size if the figures appear too big or too small on your screen.
    mpl.rcParams['figure.figsize'] = (10.0, 6.0)

    #Download
    #Path
    Inputs = "enter your input_path here"
    Outputs = "enter your output_path here"
    Download = "https://freddiemac.embs.com/FLoan/Data/"

    #Specify login credentials
    login = {
    'username': 'your login email',
    'password': 'your password'
    }
    #Define the vintage data to download (for this article, we will look into vintage from Q1 2014 to Q2 2019.
    Period = Period = ["Q12014","Q22014","Q32014","Q42014","Q12015","Q22015","Q32015","Q42015","Q12016","Q22016","Q32016","Q42016","Q12017","Q22017","Q32017","Q42017","Q12018","Q22018","Q32018","Q42018","Q12019","Q22019"]
    with requests.Session() as s:
    p =s.post('https://freddiemac.embs.com/FLoan/Data/download2.php')
    for Qtr in Period:
    #File Names
    zip_file = "historical_data1_" + Qtr + ".zip"
    perf_file = "historical_data1_time_" + Qtr + ".txt"
    orig_file = "historical_data1_" + Qtr + ".txt"
    export_file = Qtr + ".csv"
    r = s.get(os.path.join(Download, zip_file))
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall(Input_Data_Path)
    Vintage_data(perf_file, orig_file, export_file)

    def Vintage_data(perf_file, orig_file, export_file):
    Perf= pd.read_csv(os.path.join(Inputs, perf_file), sep=',', delimiter="|", header=None)
    Orig = pd.read_csv(os.path.join(Inputs, orig_file), sep=',', delimiter="|", header=None)
    Orig.columns = orig_columns
    Perf.columns = perf_columns
    Orig = Orig[orig_columns_extract]
    Perf = Perf[perf_columns_extract]
    print("Origination File Unique ID #:%s" %(Orig['ID'].nunique()))
    print("Performance File Unique ID #:%s" %(Perf['ID'].nunique()))
    Vintage = pd.merge(Perf, Orig, how='left', on=['ID'])
    Vintage.to_csv(os.path.join(Outputs, export_file))

    data_all = pd.DataFrame()
    for cohort in Period:
    cohort_data = cohort + ".csv"
    cohort = pd.read_csv(os.path.join(Outputs, cohort_data),index_col=0, dtype={'Del_Status': object}, low_memory=False)
    cohort['Date'] = pd.to_datetime(cohort['Date'],format='%Y%m')
    cohort['Orig_Date'] = cohort['Date'] - cohort['Loan_Age'].values.astype("timedelta64[M]") - pd.DateOffset(months=1)
    cohort['Orig_Date'] = pd.to_datetime(cohort.Orig_Date) + pd.offsets.MonthBegin(0)
    cohort = cohort.sort_values(by=['ID', 'Date'])
    orig_qtr = cohort.groupby("ID").first().reset_index()
    orig_qtr['Orig_Date'] = orig_qtr['Orig_Date'].dt.date
    data_all = data_all.append(orig_qtr)

    #Originations count over time
    Orig_count = data_all.groupby("Orig_Date")['ID'].count().reset_index()
    Orig_count['ID'] = Orig_count['ID']/1000
    Orig_count = Orig_count[:-1]

    #Originations UPB over time
    Orig_UPB = data_all.groupby("Orig_Date")['C_UPB'].sum().reset_index()
    Orig_UPB['C_UPB'] = Orig_UPB['C_UPB']/1000000000
    Orig_UPB = Orig_UPB[:-1]

    Orig_count['Orig_Date'] = pd.to_datetime(Orig_count['Orig_Date'],format='%Y-%m-%d')

    x = Orig_count['Orig_Date'].tolist()
    y1 = Orig_count['ID']
    y2 = Orig_UPB['C_UPB']

    fig = plt.figure()
    fig.suptitle("Freddie Mac Mortgage Origination: Q1 2014 to Q2 2019 \n left - by count; right - by volume", fontsize=15)
    ax1 = fig.add_subplot(111)
    ax1.plot(x, y1)
    ax1.set_ylabel('Count in Thousands')

    ax2 = ax1.twinx()
    ax2.plot(x, y2, 'r-')
    ax2.set_ylabel('Volumn in Billion', color='r')
    for tl in ax2.get_yticklabels():
    tl.set_color('r')

    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    ax1.xaxis.set_major_locator(mdates.MonthLocator((1,4,7,10)))

    plt.setp(ax1.get_xticklabels(), rotation=70, horizontalalignment='right')
    plt.show()

    data_all['Orig_Year'] = pd.DatetimeIndex(data_all["Orig_Date"]).year

    # Distribution of Original Loan to Value: value = 999 are used for missing data - 155 accounts
    data_all.LTV.plot(kind='hist',bins=np.linspace(data_all.LTV.min(), 110, 100))

    # Distribution of current credit score: value = 9999 are used for missing data - 650 accounts
    data_all.Credit_Score.plot(kind='hist',bins=np.linspace(data_all.Credit_Score.min(), 850,100))

    # Distribution of DTI: value = 999 are used for missing data - 328 accounts)
    data_all.DTI.plot(kind='hist',bins=np.linspace(data_all.DTI.min(),60,100))

    #Default Flag
    def del_analysis(vintage):

    #Create Default flag due to 180day delinquency
    vintage.loc[(~vintage["Del_Status"].isin(('0','1','2','3','4','5'))),"default_flag1"] = 1

    #Create Default flag due to zero_balances. Not using actual loss here because in some cases there will be "loss" due to reperforming sale
    #for now including zero balance code of 02, 03, and 09 as default - TBD
    vintage.loc[(vintage["Zero_UPB"].isin(('2.','9.','3.'))),"default_flag2"] = 1
    vintage.loc[(vintage["default_flag1"]==1) | (vintage["default_flag2"]==1),"default_flag"] = 1

    #To create first default date, First sort by ID and Date, then group by ID take first date
    default = vintage[vintage["default_flag"]==1][["ID","Date","default_flag"]]
    default = default.sort_values(by=['ID', 'Date'])
    default = default.rename(columns={'Date':'Default_Date'})
    default = default.groupby("ID").first().reset_index()

    #Merge first default date and finalize default flag
    vintage_tag = pd.merge(vintage, default[["ID","Default_Date"]], how='left', on=['ID'])
    conditions = [
    (vintage_tag['Default_Date'].isnull()),
    (vintage_tag['Date']<vintage_tag['Default_Date']),
    (vintage_tag['Date']==vintage_tag['Default_Date']),
    (vintage_tag['Date']>vintage_tag['Default_Date'])
    ]
    values = [0, 0, 1, 2]
    vintage_tag['default_flag'] = np.select(conditions, values)

    #Create PD dataset (all default_flag = 0 and first time default_flag = 1)
    PD_non_default = vintage_tag[vintage_tag['default_flag']==0]
    PD_default = vintage_tag[vintage_tag['default_flag']==1]
    df_PD = PD_non_default.append(PD_default)

    conditions_1 = [
    (df_PD['default_flag']==1),
    (df_PD['Del_Status']=='1'),
    (df_PD['Del_Status']=='2'),
    (df_PD['Del_Status']=='3'),
    (df_PD['Del_Status']=='4'),
    (df_PD['Del_Status']=='5'),
    (df_PD['Del_Status']=='0')
    ]
    values_1 = ["Default","DPD_30", "DPD_60", "DPD_90", "DPD_120", "DPD_150","DPD_0"]
    df_PD['DPD_Bucket'] = np.select(conditions_1, values_1)

    TM_PD = df_PD[["Date","ID","DPD_Bucket"]]
    TM_count = Transition_Analysis(TM_PD)

    #Delinquency / Default status count over time
    #Del_count = df_PD.groupby(("Date","DPD_Bucket"))["ID"].count().reset_index(name ='Count')
    #Del_count = Del_count.pivot(index='Date', columns='DPD_Bucket', values='Count').reset_index()

    #return Del_count
    return TM_count

    for cohort in Period:
    print(cohort)
    cohort_data = cohort + ".csv"
    cohort = pd.read_csv(os.path.join(Outputs, cohort_data),index_col=0, dtype={'Del_Status': object}, low_memory=False)
    cohort['Date'] = pd.to_datetime(cohort['Date'],format='%Y%m')
    #Del_count = del_analysis(cohort)
    TM_count = del_analysis(cohort)
    #df_PD_all = df_PD_all.append(df_PD)
    #Del_count_all = Del_count_all.append(Del_count)
    TM_count_all = TM_count_all.append(TM_count)

    #Plot Delinquencies and Defaults over time
    Del_count_all = Del_count_all.groupby(("Date"))[["Default","DPD_30", "DPD_60", "DPD_90", "DPD_120", "DPD_150","DPD_0"]].sum().reset_index()
    Del_count_all['total'] = y1 = Del_count_all["DPD_0"] + Del_count_all["DPD_30"] + Del_count_all["DPD_60"] + Del_count_all["DPD_90"] + Del_count_all["DPD_120"] + Del_count_all["DPD_150"] + Del_count_all["Default"]

    fig, axs = plt.subplots(2, 4)

    fig.set_size_inches(20,8)
    x = Del_count_all['Date']
    y1 = Del_count_all["DPD_0"] / Del_count_all['total']
    y2 = Del_count_all["DPD_30"] / Del_count_all['total']
    y3 = Del_count_all["DPD_60"] / Del_count_all['total']
    y4 = Del_count_all["DPD_90"] / Del_count_all['total']
    y5 = Del_count_all["DPD_120"] / Del_count_all['total']
    y6 = Del_count_all["DPD_150"] / Del_count_all['total']
    y7 = Del_count_all["Default"] / Del_count_all['total']

    axs[0, 0].plot(x, y1)
    axs[0, 0].set_title('Current')
    axs[0, 1].plot(x, y2, 'tab:orange')
    axs[0, 1].set_title('30 Day Delinquent')
    axs[0, 2].plot(x, y3, 'tab:green')
    axs[0, 2].set_title('60 Day Delinquent')
    axs[0, 3].plot(x, y4, 'tab:red')
    axs[0, 3].set_title('90 Day Delinquent')
    axs[1, 0].plot(x, y5)
    axs[1, 0].set_title('120 Day Delinquent')
    axs[1, 1].plot(x, y6, 'tab:orange')
    axs[1, 1].set_title('150 Day Delinquent')
    axs[1, 2].plot(x, y7, 'tab:green')
    axs[1, 2].set_title('Default')

    fig.suptitle("Freddie Mac Delinquency Status: \n Q1 2014 onwards", fontsize=15)

    fig.delaxes(axs[1][3])

    #Transition Analysis
    #from df_PD <- PD datasets by vintage creating a file with Date, account, previous status, current status by merging
    def Transition_Analysis(TM_PD):
    TM_Current = TM_PD.rename(columns={"DPD_Bucket": "Current"})

    #create date forward column used for merge
    TM_Previous = TM_PD.rename(columns={"DPD_Bucket": "Previous"})
    TM_Previous["Date_forward"] = TM_Previous["Date"] + pd.DateOffset(months=1)

    #Merge - left join previous states
    TM_merged = pd.merge(TM_Previous[["Date_forward","ID","Previous"]], TM_Current, left_on=["Date_forward","ID"], right_on=["Date","ID"], how='left')
    TM_merged = TM_merged.dropna()
    TM_merged["from_to_freq"] = TM_merged["Previous"] + "_to_" + TM_merged["Current"]

    #create from-to-freq count
    TM_count = TM_merged.groupby(("Date","from_to_freq"))["ID"].count().reset_index(name ='Count')
    TM_count = TM_count.pivot(index='Date', columns='from_to_freq', values='Count').reset_index()
    TM_count = TM_count.fillna(0)

    return TM_count

    TM_count_all = TM_count_all.groupby("Date").sum().reset_index()
    Del_count_all["Date_forward"] = Del_count_all["Date"] + pd.DateOffset(months=1)
    Del_count_all = Del_count_all.drop(columns=['Date'])

    TM_all = pd.merge(TM_count_all, Del_count_all, left_on=["Date"], right_on=["Date_forward"], how='left')

    #Current-to-30DPD vs PD
    x = TM_all['Date'].tolist()
    y1 = TM_all["DPD_0_to_DPD_30"]/TM_all["DPD_0"]
    y2 = Del_count_all["Default"][1:] / Del_count_all['total'][1:]

    fig = plt.figure()
    fig.suptitle("From Current to 30day Delinquency vs. Default", fontsize=15)
    ax1 = fig.add_subplot(111)
    ax1.plot(x, y1)
    ax1.set_ylabel('Transition Probability')

    ax2 = ax1.twinx()
    ax2.plot(x, y2, 'r-')
    ax2.set_ylabel('Default Probability', color='r')
    for tl in ax2.get_yticklabels():
    tl.set_color('r')

    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    ax1.xaxis.set_major_locator(mdates.MonthLocator((1,4,7,10)))

    plt.setp(ax1.get_xticklabels(), rotation=70, horizontalalignment='right')
    plt.show()