# Based on the following: # https://www.datainsightonline.com/post/how-to-generate-fake-dataset-with-python-faker-library # https://deparkes.co.uk/2020/12/28/python-fake-data-with-faker/ from faker.providers import BaseProvider from faker import Faker import pandas as pd from loguru import logger fake = Faker("en_GB") for i in range(5): logger.info("order: %s" % fake.bothify(text="ord-###")) for i in range(5): logger.info("time: %s" % fake.date_between(start_date="-2y", end_date="today")) for i in range(5): logger.info("name: %s" % fake.name()) class MyProvider(BaseProvider): __provider__ = "item_category" __provider__ = "food" __provider__ = "fruit" item_categories = ["food", "fruit"] foods = ["rice", "yam", "beans", "spaghetti"] fruits = ["orange", "mango", "banana", "apple"] def item_category(self): return self.random_element(self.item_categories) def food(self): return self.random_element(self.foods) def fruit(self): return self.random_element(self.fruits) fake.add_provider(MyProvider) for i in range(5): logger.info("category: %s" % fake.item_category()) def link_variables(): item_cat = fake.item_category() item = fake.fruit() if item_cat == "fruit" else fake.food() return {"Item_Category": item_cat, "Item_Name": item} for i in range(5): logger.info("variables: %s" % link_variables()) thelist = [] for x in range(100): dataset = { "Order_ID": fake.bothify(text="ord-###"), "Order_Date": fake.date_between(start_date="-2y", end_date="today"), "Customer_Name": fake.name() } dataset_copy = dataset.copy() for key, value in link_variables().items(): dataset_copy[key] = value thelist.append(dataset_copy) dataset_frame = pd.DataFrame(thelist) logger.info("\n%s" % dataset_frame.head(10))