def info(self, show = True): ''' Print concise summary of a pyspark.sql.DataFrame This method prints information about a DataFrame including the index dtype and columns, non-null values Args: show(bool): default True. show result Returns: pyspark.sql.DataFrame ''' subset = self.schema.names total_rows = self.count() _non_null = \ self.select([(total_rows - f.sum(f.when(f.col(col).isNull(),1).otherwise(0))).alias(col) for col in subset])\ .toPandas()\ .transpose()\ .reset_index()\ .rename(columns={'index':'Column', 0:'Non-Null Count'}) _non_null = spark.createDataFrame(_non_null) _dtype = spark.createDataFrame(self.dtypes).withColumnRenamed('_1','Column').withColumnRenamed('_2','Dtype') result = _dtype.join(_non_null, on = 'Column').select('Column', 'Non-Null Count', 'Dtype') if show: return result.show() else: return result pyspark.sql.DataFrame.info = info