nathangeology · March 12, 2019 00:48
diff --git a/LasDocumentStore b/LasDocumentStore
 class LasDocumentStore(InputSource):
    las_dict = defaultdict(list)
    __directory_path = ''
    column_cleaner = Column_Cleaner( )

    def __init__(self, *, directory_path):
        self.__directory_path = directory_path
        self.path = self.__directory_path + '/las_document_store.pkl'
        self.name = 'WellLogs'
        self.las_dict = defaultdict(list)
        self.__get_file_paths( )
        self.__save( )

    @classmethod
    def get_document_store(cls, directory_path):
        path = directory_path + '/las_document_store.pkl'
        if os.path.isfile(path):
            objects = []
            with (open(path, "rb")) as openfile:
                while True:
                    try:
                        objects.append(pickle.load(openfile))
                    except EOFError:
                        break
            obj = objects[0]
        else:
            obj = cls(directory_path=directory_path)

        return obj

    def return_data_dictionary(self):
        output = defaultdict(list)
        if isinstance(self.filter, list):
            for aKey in self.filter:
                output[self.name].append(self._load_log(self.las_dict[aKey]))
        else:
            output[self.name].append(self._load_log(self.las_dict[self.filter]))

        return output

    def _load_log(self, filepath):
        # data = []
        output = None
        if len(filepath) > 0:
            for a_file in filepath:
                data_temp = LasLoader.get_dataframe_for_filepath(a_file)
                data_temp['DEPTH'] = data_temp.index
                try:
                    if not isinstance(data_temp.index, pd.Float64Index):
                        data_temp.set_index(keys=['DEPTH'],
                                            drop=False,
                                            inplace=True)
                except KeyError:
                    continue
                data_temp.columns = self.column_cleaner.get_clean_columns(
                    source_name=self.name,
                    column_names=data_temp.columns)

                if output is None:
                    output = data_temp.copy( )
                else:
                    output = self._merge(output, data_temp)
        return output

    @staticmethod
    def _merge(df1, df2):
        new_cols = df2.columns.difference(df1.columns)
        shared_cols = df2.columns.intersection(df1.columns)
        output = pd.merge(df1, df2[new_cols],
                          left_index=True,
                          right_index=True,
                          how='outer')
        for col in shared_cols:
            col1 = df1[col]
            col2 = df2[col]
            if isinstance(col1, pd.DataFrame):
                print('stop')
            if isinstance(col2, pd.DataFrame):
                print('stop')
            col1 = col1.dropna( )
            col2 = col2.dropna( )
            new_vals = col2.index.difference(col1.index)
            col1 = pd.concat([col1, col2.loc[new_vals]])
            try:
                output[col] = col1
                return output
            except Exception:
                return df1

    def __save(self):

        with open(self.path, "wb") as f:
            pickle.dump(self, f)

    def __get_file_paths(self):
        for dir_path, _, files in os.walk(self.__directory_path):
            self.__add_las_to_dictionary_in_directory(dir_path, files)

    def __add_las_to_dictionary_in_directory(self, dir_path, files):
        for name in files:
            if name.lower( ).endswith('.las'):
                file = os.path.join(dir_path, name)
                api = self.__get_api_for_file(file)
                self.las_dict[api].append(file)

    @staticmethod
    def __get_api_for_file(file):
        return LasLoader.get_api_for_filepath(file)
	class LasDocumentStore(InputSource):
	las_dict = defaultdict(list)
	__directory_path = ''
	column_cleaner = Column_Cleaner( )

	def __init__(self, *, directory_path):
	self.__directory_path = directory_path
	self.path = self.__directory_path + '/las_document_store.pkl'
	self.name = 'WellLogs'
	self.las_dict = defaultdict(list)
	self.__get_file_paths( )
	self.__save( )

	@classmethod
	def get_document_store(cls, directory_path):
	path = directory_path + '/las_document_store.pkl'
	if os.path.isfile(path):
	objects = []
	with (open(path, "rb")) as openfile:
	while True:
	try:
	objects.append(pickle.load(openfile))
	except EOFError:
	break
	obj = objects[0]
	else:
	obj = cls(directory_path=directory_path)

	return obj

	def return_data_dictionary(self):
	output = defaultdict(list)
	if isinstance(self.filter, list):
	for aKey in self.filter:
	output[self.name].append(self._load_log(self.las_dict[aKey]))
	else:
	output[self.name].append(self._load_log(self.las_dict[self.filter]))

	return output

	def _load_log(self, filepath):
	# data = []
	output = None
	if len(filepath) > 0:
	for a_file in filepath:
	data_temp = LasLoader.get_dataframe_for_filepath(a_file)
	data_temp['DEPTH'] = data_temp.index
	try:
	if not isinstance(data_temp.index, pd.Float64Index):
	data_temp.set_index(keys=['DEPTH'],
	drop=False,
	inplace=True)
	except KeyError:
	continue
	data_temp.columns = self.column_cleaner.get_clean_columns(
	source_name=self.name,
	column_names=data_temp.columns)

	if output is None:
	output = data_temp.copy( )
	else:
	output = self._merge(output, data_temp)
	return output

	@staticmethod
	def _merge(df1, df2):
	new_cols = df2.columns.difference(df1.columns)
	shared_cols = df2.columns.intersection(df1.columns)
	output = pd.merge(df1, df2[new_cols],
	left_index=True,
	right_index=True,
	how='outer')
	for col in shared_cols:
	col1 = df1[col]
	col2 = df2[col]
	if isinstance(col1, pd.DataFrame):
	print('stop')
	if isinstance(col2, pd.DataFrame):
	print('stop')
	col1 = col1.dropna( )
	col2 = col2.dropna( )
	new_vals = col2.index.difference(col1.index)
	col1 = pd.concat([col1, col2.loc[new_vals]])
	try:
	output[col] = col1
	return output
	except Exception:
	return df1

	def __save(self):

	with open(self.path, "wb") as f:
	pickle.dump(self, f)

	def __get_file_paths(self):
	for dir_path, _, files in os.walk(self.__directory_path):
	self.__add_las_to_dictionary_in_directory(dir_path, files)

	def __add_las_to_dictionary_in_directory(self, dir_path, files):
	for name in files:
	if name.lower( ).endswith('.las'):
	file = os.path.join(dir_path, name)
	api = self.__get_api_for_file(file)
	self.las_dict[api].append(file)

	@staticmethod
	def __get_api_for_file(file):
	return LasLoader.get_api_for_filepath(file)