Created
April 19, 2021 12:46
-
-
Save baarkerlounger/7c8b380fbf52288dcfc068ad1ed996c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy | |
| from fuzzywuzzy import process, fuzz | |
| mhclg_column_list = ["Tenancy start date", "Type of letting", "Who is the landlord", "Registration no", "LA CORE code", "Management group", "Scheme code", "Tenant code", "Starter/Introductory tenancy", "Type of tenancy", "Tenancy Duration", "Age of Person 1", "Age of Person 2", "Age of Person 3", "Age of Person 4", "Age of Person 5", "Age of Person 6", "Age of Person 7", "Age of Person 8", "Gender of Person 1", "Gender of Person 2", "Gender of Person 3", "Gender of Person 4", "Gender of Person 5", "Gender of Person 6", "Gender of Person 7", "Gender of Person 8", "Person 2 relationship to Person 1", "Person 3 relationship to Person 1", "Person 4 relationship to Person 1", "Person 5 relationship to Person 1", "Person 6 relationship to Person 1", "Person 7 relationship to Person 1", "Person 8 relationship to Person 1", "Economic Status of Person 1", "Economic Status of Person 2", "Economic Status of Person 3", "Economic Status of Person 4", "Economic Status of Person 5", "Economic Status of Person 6", "Economic Status of Person 7", "Economic Status of Person 8", "Ethnic group of person 1 as defined by applicant", "Nationality of person 1", "Household member has ever served in the UK Armed Forces", "Household member has been seriously injured or ill in the UK Armed Forces", "Does the household contain a pregnant person", "Which benefits does the tenant receive", "How much income comes from these benefits", "Tenant's net income", "Income refused", "Main reason the household left their last settled home", "Accessibility requirements", "Housing situation", "LA in which household lived prior to this letting", "Postcode of previous accommodation", "How long has the household lived in the LA", "How long has the household been on the waiting list", "Homeless status prior to this letting", "Reason for Housing Priority", "Was the letting made under CBL", "Was the letting made under CHR", "Was the letting made under CAP", "Source of referral for this letting", "Rent and other charges period", "Basic rent", "Service charge", "Personal Service Charge", "Support charge", "Care home charge", "Exempt from accommodation charges", "After benefits, what is the outstanding rent", "Void or newbuild/renewal date", "Major repairs completion date", "Supported scheme", "Number of offers since last tenancy", "Property Reference", "UPRN", "Number of bedrooms", "Type of unit", "Type of building", "Wheelchair accessible", "For relets, previous basis for rent", "Reason for vacancy", "ONS LA code", "Postcode of property"] | |
| input_columns_list = ['Start date', ' ', "Landlord Name", "Reg no", "Local Authority Code", "Mgmnt group", "Our Scheme code column", "Tenant Cde", "Is this a starter or introductory tenancy?", "Tenancy type", "Duration of tenancy", "Person 1 age", "Person 2 age", "Person 3 age", "Person 4 age", "Person 5 age", "Person 6 age", "Person 7 age", "Person 8 age", "Person 1 gender", "Person 2 gender", "Person 3 gender", "Person 4 gender", "Person 5 gender", "Person 6 gender", "Person 7 gender", "Person 8 gender", "Person 2 relationship to Person 1", "Person 3 relationship to Person 1", "Person 4 relationship to Person 1", "Person 5 relationship to Person 1", "Person 6 relationship to Person 1", "Person 7 relationship to Person 1", "Person 8 relationship to Person 1", "Person 1 Economic status", "Person 2 Economic status", "Person 3 Economic status", "Person 4 Economic status", "Person 5 Economic status", "Person 6 Economic status", "Person 7 Economic status", "Person 8 Economic status", "Person 1 ethnic group", "Person 1 nationality", "Armed forces status", "Armed forces injury status", "Pregnancy status", "Tenant benefits", "Benefit income", "Net income", "Income refused", "Reason for move", "Accessibility", "Prior Housing status"] | |
| df = pd.DataFrame.from_dict({'mhclg_columns': mhclg_column_list, 'input_columns': input_columns_list}, orient='index').transpose().fillna(' ') | |
| match = [] | |
| similarity = [] | |
| df = tm.matcher(mhclg_column_list, input_columns_list, 1, 2) | |
| df['correct_match'] = pd.Series(input_columns_list) | |
| df['result'] = numpy.where(df['Lookup 1'] == df.correct_match, True, False) | |
| result_count = df.groupby('result')['result'].count() | |
| accuracy = result_count[True] / result_count.sum() | |
| print('Matched Data: ') | |
| print(df) | |
| print('\n') | |
| print('\n') | |
| print(f'TFIDF Accuracy: {accuracy}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment