Skip to content

Instantly share code, notes, and snippets.

@PeterAttardo
Last active October 22, 2024 14:38
Show Gist options
  • Select an option

  • Save PeterAttardo/bc9cee647e1a8c9f5874b80a0cfd0231 to your computer and use it in GitHub Desktop.

Select an option

Save PeterAttardo/bc9cee647e1a8c9f5874b80a0cfd0231 to your computer and use it in GitHub Desktop.

Revisions

  1. PeterAttardo revised this gist Apr 11, 2017. 1 changed file with 42 additions and 3 deletions.
    45 changes: 42 additions & 3 deletions dim_calendar.hql
    Original file line number Diff line number Diff line change
    @@ -1,14 +1,53 @@

    set hivevar:start_date=0000-01-01;
    set hivevar:days=1000000;
    set hivevar:table_name=[INSERT YOUR TABLE NAME HERE];

    -- If you are running a version of HIVE prior to 1.2, comment out all uses of date_format() and uncomment the lines below for equivalent functionality

    CREATE TABLE IF NOT EXISTS ${table_name} AS
    WITH dates AS (
    SELECT date_add("${start_date}", a.pos) as date
    FROM (SELECT posexplode(split(repeat(",", ${days}), ","))) a
    )
    ),
    dates_expanded AS (
    SELECT
    date,
    year(date) as year,
    month(date) as month,
    day(date) as day,
    date_format(date, 'u') as day_of_week
    -- from_unixtime(unix_timestamp(date, "yyyy-MM-dd"), "u") as day_of_week
    FROM dates
    )
    SELECT
    date
    FROM dates
    date,
    year,
    cast(month(date)/4 + 1 AS BIGINT) as quarter,
    month,
    date_format(date, 'W') as week_of_month,
    -- from_unixtime(unix_timestamp(date, "yyyy-MM-dd"), "W") as week_of_month,
    date_format(date, 'w') as week_of_year,
    -- from_unixtime(unix_timestamp(date, "yyyy-MM-dd"), "w") as week_of_year,
    day,
    day_of_week,
    date_format(date, 'EEE') as day_of_week_s,
    -- from_unixtime(unix_timestamp(date, "yyyy-MM-dd"), "EEE") as day_of_week_s,
    date_format(date, 'D') as day_of_year,
    -- from_unixtime(unix_timestamp(date, "yyyy-MM-dd"), "D") as day_of_year,
    datediff(date, "1970-01-01") as day_of_epoch,
    if(day_of_week BETWEEN 6 AND 7, true, false) as weekend,
    if(
    ((month = 1 AND day = 1 AND day_of_week between 1 AND 5) OR (day_of_week = 1 AND month = 1 AND day BETWEEN 1 AND 3)) -- New Year's Day
    OR (month = 1 AND day_of_week = 1 AND day BETWEEN 15 AND 21) -- MLK Jr
    OR (month = 2 AND day_of_week = 1 AND day BETWEEN 15 AND 21) -- President's Day
    OR (month = 5 AND day_of_week = 1 AND day BETWEEN 25 AND 31) -- Memorial Day
    OR ((month = 7 AND day = 4 AND day_of_week between 1 AND 5) OR (day_of_week = 1 AND month = 7 AND day BETWEEN 4 AND 6)) -- Independence Day
    OR (month = 9 AND day_of_week = 1 AND day BETWEEN 1 AND 7) -- Labor Day
    OR ((month = 11 AND day = 11 AND day_of_week between 1 AND 5) OR (day_of_week = 1 AND month = 11 AND day BETWEEN 11 AND 13)) -- Veteran's Day
    OR (month = 11 AND day_of_week = 4 AND day BETWEEN 22 AND 28) -- Thanksgiving
    OR ((month = 12 AND day = 25 AND day_of_week between 1 AND 5) OR (day_of_week = 1 AND month = 12 AND day BETWEEN 25 AND 27)) -- Christmas
    ,true, false) as us_holiday
    FROM dates_expanded
    SORT BY date
    ;
  2. PeterAttardo created this gist Dec 20, 2016.
    14 changes: 14 additions & 0 deletions dim_calendar.hql
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,14 @@
    set hivevar:start_date=0000-01-01;
    set hivevar:days=1000000;
    set hivevar:table_name=[INSERT YOUR TABLE NAME HERE];

    CREATE TABLE IF NOT EXISTS ${table_name} AS
    WITH dates AS (
    SELECT date_add("${start_date}", a.pos) as date
    FROM (SELECT posexplode(split(repeat(",", ${days}), ","))) a
    )
    SELECT
    date
    FROM dates
    SORT BY date
    ;