diff --git a/README.md b/README.md index b109120..0485261 100644 --- a/README.md +++ b/README.md @@ -26,3 +26,53 @@ DROP TABLE raw.performance_review_sub_scoring; DROP TABLE raw.performance_review_total_scoring; DROP TABLE raw.positions; DROP TABLE raw.workers; + + + +Data quality checks: + +-- Total worker count +SELECT count(*) +from clean.worker; + +-- Active workers +SELECT count(*) +from clean.worker +where employment_exit_date is null; + +-- Number of job change / position change records +WITH history_counts AS ( + SELECT w.id, count(*) as history_count + from clean.worker w + left join clean.job_change jc on w.id = jc.worker_id + group by w.id +) +SELECT history_count, count(*) +from history_counts +group by history_count +order by history_count; + +-- Years at the company +WITH yac AS ( + SELECT + w.id, + EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company + FROM clean.worker w +) +SELECT + yac.years_at_company, + COUNT(*) +FROM yac +GROUP BY yac.years_at_company +ORDER BY yac.years_at_company + +-- Worker id's with < 0 years at company or > 60 years +WITH yac AS ( + SELECT + w.id, w.worker_hris_id, w.employment_start, w.employment_exit_date, + EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company + FROM clean.worker w +) +SELECT * +from yac +where years_at_company < 0 or years_at_company > 60; \ No newline at end of file diff --git a/transform/models/tap_spreadsheets_anywhere/department.sql b/transform/models/tap_spreadsheets_anywhere/department.sql index 366bd2c..82888f8 100644 --- a/transform/models/tap_spreadsheets_anywhere/department.sql +++ b/transform/models/tap_spreadsheets_anywhere/department.sql @@ -13,6 +13,7 @@ latest_departments as ( from {{ source('tap_spreadsheets_anywhere', 'departments') }} ) t where rn = 1 + and id not in ('CAD', 'CSAD') ), department_tree as ( -- Anchor: top-level department (parent_id is set to Sarens Group in the Excel)