Created
April 29, 2022 14:54
-
-
Save thoroc/19a8401f3d4724a0255764d6e5317025 to your computer and use it in GitHub Desktop.
Revisions
-
thoroc created this gist
Apr 29, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,71 @@ # Based on the following: # https://www.datainsightonline.com/post/how-to-generate-fake-dataset-with-python-faker-library # https://deparkes.co.uk/2020/12/28/python-fake-data-with-faker/ from faker.providers import BaseProvider from faker import Faker import pandas as pd from loguru import logger fake = Faker("en_GB") for i in range(5): logger.info("order: %s" % fake.bothify(text="ord-###")) for i in range(5): logger.info("time: %s" % fake.date_between(start_date="-2y", end_date="today")) for i in range(5): logger.info("name: %s" % fake.name()) class MyProvider(BaseProvider): __provider__ = "item_category" __provider__ = "food" __provider__ = "fruit" item_categories = ["food", "fruit"] foods = ["rice", "yam", "beans", "spaghetti"] fruits = ["orange", "mango", "banana", "apple"] def item_category(self): return self.random_element(self.item_categories) def food(self): return self.random_element(self.foods) def fruit(self): return self.random_element(self.fruits) fake.add_provider(MyProvider) for i in range(5): logger.info("category: %s" % fake.item_category()) def link_variables(): item_cat = fake.item_category() item = fake.fruit() if item_cat == "fruit" else fake.food() return {"Item_Category": item_cat, "Item_Name": item} for i in range(5): logger.info("variables: %s" % link_variables()) thelist = [] for x in range(100): dataset = { "Order_ID": fake.bothify(text="ord-###"), "Order_Date": fake.date_between(start_date="-2y", end_date="today"), "Customer_Name": fake.name() } dataset_copy = dataset.copy() for key, value in link_variables().items(): dataset_copy[key] = value thelist.append(dataset_copy) dataset_frame = pd.DataFrame(thelist) logger.info("\n%s" % dataset_frame.head(10))