Skip to content

Instantly share code, notes, and snippets.

@nathangeology
Created March 12, 2019 00:48
Show Gist options
  • Save nathangeology/032dbefe557aad911d2dd4e4eb65626a to your computer and use it in GitHub Desktop.
Save nathangeology/032dbefe557aad911d2dd4e4eb65626a to your computer and use it in GitHub Desktop.
Code to create a lookup dictionary for fast loading of las well log files
class LasDocumentStore(InputSource):
las_dict = defaultdict(list)
__directory_path = ''
column_cleaner = Column_Cleaner( )
def __init__(self, *, directory_path):
self.__directory_path = directory_path
self.path = self.__directory_path + '/las_document_store.pkl'
self.name = 'WellLogs'
self.las_dict = defaultdict(list)
self.__get_file_paths( )
self.__save( )
@classmethod
def get_document_store(cls, directory_path):
path = directory_path + '/las_document_store.pkl'
if os.path.isfile(path):
objects = []
with (open(path, "rb")) as openfile:
while True:
try:
objects.append(pickle.load(openfile))
except EOFError:
break
obj = objects[0]
else:
obj = cls(directory_path=directory_path)
return obj
def return_data_dictionary(self):
output = defaultdict(list)
if isinstance(self.filter, list):
for aKey in self.filter:
output[self.name].append(self._load_log(self.las_dict[aKey]))
else:
output[self.name].append(self._load_log(self.las_dict[self.filter]))
return output
def _load_log(self, filepath):
# data = []
output = None
if len(filepath) > 0:
for a_file in filepath:
data_temp = LasLoader.get_dataframe_for_filepath(a_file)
data_temp['DEPTH'] = data_temp.index
try:
if not isinstance(data_temp.index, pd.Float64Index):
data_temp.set_index(keys=['DEPTH'],
drop=False,
inplace=True)
except KeyError:
continue
data_temp.columns = self.column_cleaner.get_clean_columns(
source_name=self.name,
column_names=data_temp.columns)
if output is None:
output = data_temp.copy( )
else:
output = self._merge(output, data_temp)
return output
@staticmethod
def _merge(df1, df2):
new_cols = df2.columns.difference(df1.columns)
shared_cols = df2.columns.intersection(df1.columns)
output = pd.merge(df1, df2[new_cols],
left_index=True,
right_index=True,
how='outer')
for col in shared_cols:
col1 = df1[col]
col2 = df2[col]
if isinstance(col1, pd.DataFrame):
print('stop')
if isinstance(col2, pd.DataFrame):
print('stop')
col1 = col1.dropna( )
col2 = col2.dropna( )
new_vals = col2.index.difference(col1.index)
col1 = pd.concat([col1, col2.loc[new_vals]])
try:
output[col] = col1
return output
except Exception:
return df1
def __save(self):
with open(self.path, "wb") as f:
pickle.dump(self, f)
def __get_file_paths(self):
for dir_path, _, files in os.walk(self.__directory_path):
self.__add_las_to_dictionary_in_directory(dir_path, files)
def __add_las_to_dictionary_in_directory(self, dir_path, files):
for name in files:
if name.lower( ).endswith('.las'):
file = os.path.join(dir_path, name)
api = self.__get_api_for_file(file)
self.las_dict[api].append(file)
@staticmethod
def __get_api_for_file(file):
return LasLoader.get_api_for_filepath(file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment