Created
September 26, 2024 21:11
-
-
Save TheRockStarDBA/49b8829cafbdcb79ea2cb587aba4f579 to your computer and use it in GitHub Desktop.
Revisions
-
TheRockStarDBA created this gist
Sep 26, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,64 @@ -- ref: https://github.com/jimtodd92/airflow_metadata_mining/blob/main/airflow_metatdata_mining.sql -- To get Critical user activities like paused and dagrun_clear in past 24 hours. SELECT event ,DATE (dttm) ,count(*) FROM PUBLIC.log WHERE DATE (dttm) BETWEEN ( SELECT max(DATE (dttm)) - 1 FROM PUBLIC.log ) AND ( SELECT max(DATE (dttm)) FROM PUBLIC.log ) AND event IN ( 'paused' ,'dagrun_clear' ) GROUP BY event ,DATE (dttm) ORDER BY DATE (dttm) DESC; -- To get 10 long ran tasks in a day. SELECT dag_id ,task_id ,execution_date ,round(cast(duration / (60 * 60) AS NUMERIC), 2) AS duration_in_hour FROM PUBLIC.task_instance WHERE DATE (execution_date) = '2022-01-03' AND STATE IN ('success') AND duration IS NOT NULL GROUP BY task_id ,dag_id ,execution_date ORDER BY duration DESC limit 10; -- To understand how occupied the airflow scheduler is, and decide a maintenance window. SELECT x.start_time_window ,count(*) FROM ( SELECT start_date ,CASE WHEN extract(hour FROM start_date) BETWEEN 0 AND 5 THEN '0-5' WHEN extract(hour FROM start_date) BETWEEN 6 AND 11 THEN '6-11' WHEN extract(hour FROM start_date) BETWEEN 12 AND 17 THEN '12-17' WHEN extract(hour FROM start_date) BETWEEN 18 AND 23 THEN '18-23' END AS start_time_window FROM PUBLIC.task_instance WHERE execution_date BETWEEN '2022-01-01' AND '2022-01-02' AND start_date IS NOT NULL ) AS x GROUP BY x.start_time_window ORDER BY cast(split_part(x.start_time_window, '-', 1) AS INT)