-
-
Save rebeccabilbro/2c7bb4d1acfbcdcf9156e7b9b7577cba to your computer and use it in GitHub Desktop.
| # kimchi.py | |
| # For converting Python 2 pickles to Python 3 | |
| import os | |
| import dill | |
| import pickle | |
| import argparse | |
| def convert(old_pkl): | |
| """ | |
| Convert a Python 2 pickle to Python 3 | |
| """ | |
| # Make a name for the new pickle | |
| new_pkl = os.path.splitext(os.path.basename(old_pkl))[0]+"_p3.pkl" | |
| # Convert Python 2 "ObjectType" to Python 3 object | |
| dill._dill._reverse_typemap["ObjectType"] = object | |
| # Open the pickle using latin1 encoding | |
| with open(old_pkl, "rb") as f: | |
| loaded = pickle.load(f, encoding="latin1") | |
| # Re-save as Python 3 pickle | |
| with open(new_pkl, "wb") as outfile: | |
| pickle.dump(loaded, outfile) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser( | |
| description="Convert a Python 2 pickle to Python 3" | |
| ) | |
| parser.add_argument("infile", help="Python 2 pickle filename") | |
| args = parser.parse_args() | |
| convert(args.infile) |
Hiya - this is meant for unpickling scikit-learn Estimators. For pandas you could try out this suggestion
Thanks! This is great, worked for me. But there's one thing I don't understand. The script sets
dill._dill._reverse_typemap["ObjectType"] = object
But it doesn't use dill, it uses pickle.load and pickle.dump. So how does dill do anything here?
Hi @nbecker; this gist is associated with a longer blog post, which may answer your question!
In my case I have to convert a joblib jl saved file, how to adapt the script for joblib?
I have tried to map b'ObjectType' but it seems it is not enough:
import os
import joblib
import dill
dill._dill._reverse_typemap["ObjectType"] = object
DATA_PATH = '/root'
tfidf_vectorizer, _ = joblib.load(os.path.join(DATA_PATH,'nmf_topic_model/tfidf_mpd.jl'))
nmf_model, _ = joblib.load(os.path.join(DATA_PATH,'nmf_topic_model/nmf_mpd.jl'))I have tried to tap the joblib file .../joblib/numpy_pickle.py so that the class NumpyUnpickler has a ovverride load method like
def load(self):
eggs = pickle.load(self.file_handle, encoding='latin1')
return eggsand I have put
import dill
dill._dill._reverse_typemap["ObjectType"] = objectAnother attempt was to add the encoding directly in the NumpyUnpickler init:
def __init__(self, filename, file_handle, mmap_mode=None):
Unpickler.__init__(self, self.file_handle, encoding="latin1")
loaded = pickle.load(f, encoding="latin1")
ModuleNotFoundError: No module named 'pandas.indexes'