Skip to content

Instantly share code, notes, and snippets.

@rgindallas
Forked from akamor/attempt2.py
Created March 13, 2021 07:03
Show Gist options
  • Save rgindallas/ee8aa94eff6ae4161e1ffdee0368a1f5 to your computer and use it in GitHub Desktop.
Save rgindallas/ee8aa94eff6ae4161e1ffdee0368a1f5 to your computer and use it in GitHub Desktop.
import random, csv
from datetime import timedelta, datetime
from faker import Faker
from faker.providers import person
from faker.providers import internet
from faker.providers import ssn
from faker.providers import address
from faker.providers import job
from faker.providers import date_time
fake = Faker()
fake.add_provider(person)
fake.add_provider(internet)
fake.add_provider(ssn)
fake.add_provider(address)
fake.add_provider(job)
fake.add_provider(date_time)
def first_name_and_gender():
g = 'M' if random.randint(0,1) == 0 else 'F'
n = fake.first_name_male() if g=='M' else fake.first_name_female()
return {'gender':g,'first_name':n}
def birth_and_start_date():
sd = fake.date_between(start_date="-20y", end_date="now")
delta = timedelta(days=365*random.randint(18,40))
bd = sd-delta
return {'birth_date':bd.strftime('%m/%d/%Y'), 'start_date': sd.strftime('%m/%d/%Y')}
def birth_and_start_date_on_windows():
bd = datetime(1960, 1, 1) + timedelta(seconds=random.randint(0,1261600000)) #40 year time delta
earliest_start_date = bd + timedelta(seconds=random.randint(0,567720000)) #earliest start date is 18 years after birth
latest_start_date = datetime.now()
delta = latest_start_date-earliest_start_date
delta_in_seconds = delta.days*24*60*60+delta.seconds
random_second = random.randint(0,delta_in_seconds)
return {'birth_date':bd.strftime('%m/%d/%Y'), 'start_date': (bd+timedelta(seconds=random_second)).strftime('%m/%d/%Y')}
def title_office_org():
#generate a map of real office to fake office
offices = ['New York','Austin','Seattle','Chicago']
#codify the hierarchical structure
allowed_orgs_per_office = {'New York':['Sales'],'Austin':['Devops','Platform','Product','Internal Tools'],'Chicago':['Devops'], 'Seattle':['Internal Tools','Product']}
allowed_titles_per_org = {
'Devops':['Engineer','Senior Engineer','Manager'],
'Sales':['Associate'],
'Platform':['Engineer'],
'Product':['Manager','VP'],
'Internal Tools':['Engineer','Senior Engineer','VP','Manager']
}
office = random.choice(offices)
org = random.choice(allowed_orgs_per_office[office])
title = random.choice(allowed_titles_per_org[org])
return {'office':office, 'title':title,'org': org}
def salary_and_bonus():
salary = round(random.randint(90000,120000)/1000)*1000
bonus_ratio = random.uniform(0.15,0.2)
bonus = round(salary*bonus_ratio/500)*500
return {'salary':salary,'bonus':bonus}
def title_office_org_salary_bonus():
position = title_office_org()
title_and_salary_range = {'Engineer':[90,120],'Senior Engineer':[110,140],'Manager':[130,150],'Associate':[60,80],'VP':[150,250]}
salary_range = title_and_salary_range[position['title']]
salary = round(random.randint(1000*salary_range[0],1000*salary_range[1])/1000)*1000
bonus_ratio = random.uniform(0.15,0.2)
bonus = round(salary*bonus_ratio/500)*500
position.update({'salary':salary,'bonus':bonus})
return position
d = dict()
d['first_name_and_gender'] = first_name_and_gender
d['last_name'] = lambda: {'last_name':fake.last_name()}
d['personal_email'] = lambda: {'email':fake.email()}
d['ssn'] = lambda: {'ssn':fake.ssn()}
d['birth_and_start_date'] = birth_and_start_date
d['title_office_org_salary_bonus'] = title_office_org_salary_bonus
d['accrued_holidays'] = lambda: {'accrued_holiday':random.randint(0,20)}
numRows = 100000
for _ in range(numRows):
deep_list = [list(d[k]().values()) for k in d.keys()]
row = [item for sublist in deep_list for item in sublist]
print(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment