import pandas as pd df=pd.read_csv('pennebakerctq01_definitions.csv') dfgiven=pd.read_excel('cnb_ndar_codebook_v5.xlsx') _aliases=df['Aliases'].unique() __aliases=[] for e in _aliases: if pd.isna(e): continue if ',' in e: for sub_e in e.split(','): __aliases.append(sub_e) __aliases.append(e) df1=df.copy() df2=df.copy() df1.set_index('ElementName',inplace=True) df2.set_index('Aliases',inplace=True) for i,row in dfgiven.iterrows(): # .strip() is necessary because some of # Kosha's ElementName cells appear to have trailing w/ space var=row['ElementName'].strip() try: # ideal case: exists in ElementName df1.loc[var] except: # print('does not exist in ElementName:', var) # check if exists in single element Aliases cell try: df2.loc[var] except: # print('does not exist in single-element Aliases cell:', var) pass # final check: does it exist in multiple-element Aliases cell? # this step will take O(n) time for each var if var not in __aliases: print('does not exist anywhere:', var)