Created
March 31, 2020 04:35
-
-
Save NWPlayer123/b066e3e5b9bcc399c7c42d051a4a1506 to your computer and use it in GitHub Desktop.
Revisions
-
NWPlayer123 created this gist
Mar 31, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,122 @@ from bcsv_reader import BCSV from msbt_reader import MSBT from binascii import hexlify from os import listdir import sys, string, codecs '''reload(sys) sys.setdefaultencoding('utf8')''' msg_path = "../message1.1" bcsv_path = "." output_name = "scrape_data_test.txt" def getindices(s): return [i for i, c in enumerate(s) if c.isupper()] #grab item ids and names item_strings = [] item_id2str = {} item_str2id = {} for filename in listdir(msg_path + "/String_USen/Item/"): if filename.endswith(".msbt"): labels, text = MSBT().read_msbt("%s/String_USen/Item/%s" % (msg_path, filename)) labels2 = [] for entry in labels: #take from 3D groups to 2D array for entry2 in entry: labels2.append(entry2) labels2.sort(key=lambda x: x[1]) #sort by text index for entry in labels2: if not entry[0].endswith(b"_pl"): #filter out plural itemid = int(entry[0].split(b"_")[1]) #Rug_07322 -> 7322 itemname = text[entry[1]][0] item_strings.append([itemid, itemname]) item_id2str[itemid] = itemname item_str2id[itemname] = itemid #grab clothing ids and names cloth_strings = [] cloth_id2str = {} cloth_str2id = {} for filename in listdir(msg_path + "/String_USen/Outfit/GroupName/"): if filename.endswith(".msbt"): labels, text = MSBT().read_msbt("%s/String_USen/Outfit/GroupName/%s" % (msg_path, filename)) labels2 = [] for entry in labels: #take from 3D groups to 2D array for entry2 in entry: labels2.append(entry2) labels2.sort(key=lambda x: x[1]) #sort by text index for entry in labels2: itemid = int(entry[0]) itemname = text[entry[1]][0] cloth_strings.append([itemid, itemname]) cloth_id2str[itemid] = itemname cloth_str2id[itemname] = itemid #convert cloth_itemid -> item_itemid cloth_data = BCSV().read_bcsv(bcsv_path + "/ItemClothGroup.bcsv") cloth_lookup = {} cloth_lookup2 = {} cloth_data2 = [] for entry in cloth_data: clothid = entry[0x54706054] #string id itemid = entry[0x65503F9F] #item id itemname = entry[0x13AB5198].decode("UTF-8").rstrip(u"\0").encode("UTF-8") #TODO: assert at end? itemdesc = entry[0x036E8EBE].decode("UTF-8").rstrip(u"\0").encode("UTF-8") cloth_lookup[clothid] = itemid cloth_lookup2[itemname] = itemid cloth_data2.append([clothid, itemid, itemname, itemdesc]) #add clothes to item_strings to add in next step, skip if for entry in cloth_data2: try: #store the string names that match, we'll extrapolate later item_strings.append([entry[1], cloth_id2str[entry[0]]]) except: pass #create lookup table for everything item_lookup = {} for entry in item_strings: item_lookup[entry[0]] = entry[1] #parse table for IDs and filenames item_data = BCSV().read_bcsv(bcsv_path + "/ItemParam.bcsv") item_table = [] do_later = [] dupe_lookup = {} skipme = False with open(output_name, "wb") as o: o.write(b"Item ID (hex), Buy Price, Sell Price, Item Name, item_type, file_name, file_desc\r\n") for entry in item_data: item_id = entry[0x54706054] buy_price = entry[0x718B024D] sell_price = int(buy_price / 4.0) file_name = entry[0x3FEBC642].decode("UTF-8").rstrip(u"\0").encode("UTF-8") item_type = entry[0xFC275E86].decode("UTF-8").rstrip(u"\0").encode("UTF-8") try: file_desc = entry[0xB8CC232C].decode("UTF-8").rstrip(u"\0").encode("UTF-8") except: #bcsv stupid limit, 0x40 truncated string file_desc = b"FILE_DESC_TRUNCATED " + entry[0xB8CC232C].decode("UTF-8", "ignore").encode("UTF-8") try: item_name = item_lookup[item_id] #check if we have the name for that id, else do_later item_table.append([item_id, buy_price, sell_price, item_name, item_type, file_name, file_desc]) dupe_lookup[file_name.rstrip(string.digits)] = item_name #store base for most stuff, e.g. 0, 1, 2 of thing needs to be looked up indices = getindices(file_name) #strip end of string so e.g. TwotoneMonotone + variants #o.write("%04X, %d, %d, %s, %s, %s, %s\r\n" % (item_id, buy_price, sell_price, item_name.encode("UTF-8"), item_type, file_name, file_desc)) if len(indices) > 1 and len(file_name[:indices[-1]]) > 5: dupe_lookup[file_name[:indices[-1]]] = item_name #store item name for color variants except: do_later.append([item_id, buy_price, sell_price, item_type, file_name, file_desc]) for entry in do_later: file_name = entry[4] try: item_name = dupe_lookup[file_name.rstrip(string.digits)] except: try: indices = getindices(file_name) item_name = dupe_lookup[file_name[:indices[-1]]] except: item_name = b"UNUSED" print("%s %s" % (entry[4], item_name.encode("UTF-8"))) item_table.append([entry[0], entry[1], entry[2], item_name, entry[3], entry[4], entry[5]]) item_table.sort(key=lambda x: x[0]) #sort by item_id for entry in item_table: o.write("%04X, %d, %d, %s, %s, %s, %s\r\n" % (entry[0], entry[1], entry[2], entry[3].encode("UTF-8"), entry[4], entry[5], entry[6]))