Created
March 12, 2019 00:48
-
-
Save nathangeology/032dbefe557aad911d2dd4e4eb65626a to your computer and use it in GitHub Desktop.
Code to create a lookup dictionary for fast loading of las well log files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class LasDocumentStore(InputSource): | |
| las_dict = defaultdict(list) | |
| __directory_path = '' | |
| column_cleaner = Column_Cleaner( ) | |
| def __init__(self, *, directory_path): | |
| self.__directory_path = directory_path | |
| self.path = self.__directory_path + '/las_document_store.pkl' | |
| self.name = 'WellLogs' | |
| self.las_dict = defaultdict(list) | |
| self.__get_file_paths( ) | |
| self.__save( ) | |
| @classmethod | |
| def get_document_store(cls, directory_path): | |
| path = directory_path + '/las_document_store.pkl' | |
| if os.path.isfile(path): | |
| objects = [] | |
| with (open(path, "rb")) as openfile: | |
| while True: | |
| try: | |
| objects.append(pickle.load(openfile)) | |
| except EOFError: | |
| break | |
| obj = objects[0] | |
| else: | |
| obj = cls(directory_path=directory_path) | |
| return obj | |
| def return_data_dictionary(self): | |
| output = defaultdict(list) | |
| if isinstance(self.filter, list): | |
| for aKey in self.filter: | |
| output[self.name].append(self._load_log(self.las_dict[aKey])) | |
| else: | |
| output[self.name].append(self._load_log(self.las_dict[self.filter])) | |
| return output | |
| def _load_log(self, filepath): | |
| # data = [] | |
| output = None | |
| if len(filepath) > 0: | |
| for a_file in filepath: | |
| data_temp = LasLoader.get_dataframe_for_filepath(a_file) | |
| data_temp['DEPTH'] = data_temp.index | |
| try: | |
| if not isinstance(data_temp.index, pd.Float64Index): | |
| data_temp.set_index(keys=['DEPTH'], | |
| drop=False, | |
| inplace=True) | |
| except KeyError: | |
| continue | |
| data_temp.columns = self.column_cleaner.get_clean_columns( | |
| source_name=self.name, | |
| column_names=data_temp.columns) | |
| if output is None: | |
| output = data_temp.copy( ) | |
| else: | |
| output = self._merge(output, data_temp) | |
| return output | |
| @staticmethod | |
| def _merge(df1, df2): | |
| new_cols = df2.columns.difference(df1.columns) | |
| shared_cols = df2.columns.intersection(df1.columns) | |
| output = pd.merge(df1, df2[new_cols], | |
| left_index=True, | |
| right_index=True, | |
| how='outer') | |
| for col in shared_cols: | |
| col1 = df1[col] | |
| col2 = df2[col] | |
| if isinstance(col1, pd.DataFrame): | |
| print('stop') | |
| if isinstance(col2, pd.DataFrame): | |
| print('stop') | |
| col1 = col1.dropna( ) | |
| col2 = col2.dropna( ) | |
| new_vals = col2.index.difference(col1.index) | |
| col1 = pd.concat([col1, col2.loc[new_vals]]) | |
| try: | |
| output[col] = col1 | |
| return output | |
| except Exception: | |
| return df1 | |
| def __save(self): | |
| with open(self.path, "wb") as f: | |
| pickle.dump(self, f) | |
| def __get_file_paths(self): | |
| for dir_path, _, files in os.walk(self.__directory_path): | |
| self.__add_las_to_dictionary_in_directory(dir_path, files) | |
| def __add_las_to_dictionary_in_directory(self, dir_path, files): | |
| for name in files: | |
| if name.lower( ).endswith('.las'): | |
| file = os.path.join(dir_path, name) | |
| api = self.__get_api_for_file(file) | |
| self.las_dict[api].append(file) | |
| @staticmethod | |
| def __get_api_for_file(file): | |
| return LasLoader.get_api_for_filepath(file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment