Skip to content

Instantly share code, notes, and snippets.

@thoroc
Created April 29, 2022 14:54
Show Gist options
  • Save thoroc/19a8401f3d4724a0255764d6e5317025 to your computer and use it in GitHub Desktop.
Save thoroc/19a8401f3d4724a0255764d6e5317025 to your computer and use it in GitHub Desktop.

Revisions

  1. thoroc created this gist Apr 29, 2022.
    71 changes: 71 additions & 0 deletions provider.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,71 @@
    # Based on the following:
    # https://www.datainsightonline.com/post/how-to-generate-fake-dataset-with-python-faker-library
    # https://deparkes.co.uk/2020/12/28/python-fake-data-with-faker/

    from faker.providers import BaseProvider
    from faker import Faker
    import pandas as pd
    from loguru import logger

    fake = Faker("en_GB")

    for i in range(5):
    logger.info("order: %s" % fake.bothify(text="ord-###"))

    for i in range(5):
    logger.info("time: %s" % fake.date_between(start_date="-2y", end_date="today"))

    for i in range(5):
    logger.info("name: %s" % fake.name())


    class MyProvider(BaseProvider):
    __provider__ = "item_category"
    __provider__ = "food"
    __provider__ = "fruit"
    item_categories = ["food", "fruit"]
    foods = ["rice", "yam", "beans", "spaghetti"]
    fruits = ["orange", "mango", "banana", "apple"]

    def item_category(self):
    return self.random_element(self.item_categories)

    def food(self):
    return self.random_element(self.foods)

    def fruit(self):
    return self.random_element(self.fruits)


    fake.add_provider(MyProvider)


    for i in range(5):
    logger.info("category: %s" % fake.item_category())


    def link_variables():
    item_cat = fake.item_category()
    item = fake.fruit() if item_cat == "fruit" else fake.food()
    return {"Item_Category": item_cat, "Item_Name": item}


    for i in range(5):
    logger.info("variables: %s" % link_variables())

    thelist = []
    for x in range(100):
    dataset = {
    "Order_ID": fake.bothify(text="ord-###"),
    "Order_Date": fake.date_between(start_date="-2y", end_date="today"),
    "Customer_Name": fake.name()
    }

    dataset_copy = dataset.copy()
    for key, value in link_variables().items():
    dataset_copy[key] = value

    thelist.append(dataset_copy)

    dataset_frame = pd.DataFrame(thelist)
    logger.info("\n%s" % dataset_frame.head(10))