Skip to content

Instantly share code, notes, and snippets.

# before
sales_over_time = df.groupby('date', as_index = False).agg({'resale_price':'count'}).rename(columns = {'resale_price':'resale_count'})
fig, ax = plt.subplots(figsize = (10, 5))
ax = sns.lineplot(data = sales_over_time, x = 'date', y = 'resale_count')
ax.set_xlabel('Month')
ax.set_ylabel('Sales Transaction')
ax.set_title('Number of Sales Over Time - Central Area') #hard-coded value
plt.show()
# after
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
def valueCounts(self, subset, normalize = False, sort = True, ascending = False, show = True):
'''
Count of unique rows in a DataFrame
Args:
subset(list): column to be used when counting.
normalize(bool): default False. Return proportion instead of frequencies.
def rename(self, columns):
'''
Rename colum headers
Args:
columns(dict):
A dictionary where key are current column names and values are new column names
{'old_column_name1':'new_column_name1', 'old_column_name2':'new_column_name2'}
def info(self, show = True):
'''
Print concise summary of a pyspark.sql.DataFrame
This method prints information about a DataFrame
including the index dtype and columns, non-null values
Args:
show(bool): default True. show result
def shape(self):
'''
Find the number of rows and columns in a DataFrame
'''
return self.count(), len(self.schema.names)
pyspark.sql.DataFrame.shape = shape
def duplicated(self, subset = None, orderby = None, ascending = False, keep = 'first'):
'''
Returns pyspark.sql.DataFrame with duplicate indicator column. True = duplicate(s)
Args:
subset(list):
default None. list of column for identifying duplicates.
Default uses all the columns.
@edwintyh
edwintyh / GoogleMapDownloader.py
Created May 2, 2019 12:59 — forked from eskriett/GoogleMapDownloader.py
A python script to download high resolution Google map images given a longitude, latitude and zoom level.
#!/usr/bin/python
# GoogleMapDownloader.py
# Created by Hayden Eskriett [http://eskriett.com]
#
# A script which when given a longitude, latitude and zoom level downloads a
# high resolution google map
# Find the associated blog post at: http://blog.eskriett.com/2013/07/19/downloading-google-maps/
import urllib
import Image