import sys, csv, os ### ### Author Jason Theobald, MD/MBA Student 2014 ### This script reads all SPARCS files with the .DAT extension in the same folder as the script ### Use this to retrieve patients by ICD-9 Code and generate a .csv file ### To run: python extractor.py on Mac OSX or Windows PC (with Python 2.7 installed) ### ### Shows what files are being read. Will open all .dat files in current directory. files = [i for i in os.listdir(os.curdir) if i[-3:].lower() == "dat"] print "" print "Files to extract from..." for f in files: print f ### Wait for keyboard prompt to show user the files that are being read. raw_input("Press enter to continue") ### Opens each .dat file for i in files: f = open(i) o = open('JOINTS' + str(i[-6:-4]) + '.csv', 'wb') output = csv.writer(o, delimiter=',') ### ### Header of .csv output file ### Here you enter the field names that will be the first row of your CSV file. ### Type them in order, as shown below. ### output.writerow(["Type","Age","Zip","County","FacilityID","FacilityName","Principal ICD", "ICD2", "ICD3", "ICD4", "ICD5", "ICD6", "ICD7", "ICD8", "ICD9", "ICD10", "ICD11", "ICD12", "ICD13", "ICD14", "ICD15"]) ### Refer to the SPARCS code list to tell the script where to look for each item of interest. Remember that python uses 0-based indexing, so you ### need to subtract 1 from the start number of all of the SPARCS codes. End number is the same. ### For each line in the file, grab the data of interest and load into a variable. for line in f: age = line[101:104].strip() zip = line[164:169].strip() county = line[173:175].strip() facilityID = line[199:205].strip() facilityName = line[206:276].strip() PICD = line[1642:1649].strip() # Primary ICD code ICD2 = line[1666:1673].strip() # All the other ICD codes ICD3 = line[1690:1697].strip() ICD4 = line[1714:1721].strip() ICD5 = line[1738:1745].strip() ICD6 = line[1762:1769].strip() ICD7 = line[1786:1793].strip() ICD8 = line[1810:1817].strip() ICD9 = line[1834:1841].strip() ICD10 = line[1858:1865].strip() ICD11 = line[1882:1889].strip() ICD12 = line[1906:1913].strip() ICD13 = line[1930:1937].strip() ICD14 = line[1954:1961].strip() ICD15 = line[1978:1961].strip() ### Enter your collection of ICD-9 codes of interest. In this case, we are looking for hips and knees. codelist = [PICD, ICD2, ICD3, ICD4, ICD5, ICD6, ICD7, ICD8, ICD9, ICD10, ICD11, ICD12, ICD13, ICD14, ICD15] hiplist = ["8151", "8152", "0070", "0071", "0072", "0073", "8153", "0074", "0075", "0076", "0077"] kneelist = ["8154", "8155", "0080", "0081", "0082", "0083", "0084"] ### Start by assuming this is not a hip patient or a knee patient. hip = False knee = False ### Go through each code in the code list. If any of them are found in the patient line, hip or knee = True for code in codelist: if code in hiplist: hip = True if code in kneelist: knee = True ### After going through the codes, now you can say whether this is a hip patient, a knee patient, or both. ### If it's a hip, it will print the row. Same for knee. if hip == True: print "Hip" output.writerow(["Hip"] + [age,zip,county,facilityID,facilityName]+ codelist) if knee == True: print "Knee" output.writerow(["Knee"] + [age,zip,county,facilityID,facilityName]+ codelist)