meltano run tap-spreadsheets-anywhere target-postgres or meltano run tap-spreadsheets-anywhere target-postgres --full-refresh
meltano invoke dbt-postgres:run
meltano --environment=sarens-demo run tap-spreadsheets-anywhere target-postgres dbt-postgres:run
Refresh / Overwrite doesn't seem to work, so manual fix here: TRUNCATE TABLE raw.absenteisme; TRUNCATE TABLE raw.departments; TRUNCATE TABLE raw.performance_review_steps; TRUNCATE TABLE raw.performance_review_sub_scoring; TRUNCATE TABLE raw.performance_review_total_scoring; TRUNCATE TABLE raw.positions; TRUNCATE TABLE raw.workers;
DROP TABLE raw.absenteisme; DROP TABLE raw.departments; DROP TABLE raw.performance_review_steps; DROP TABLE raw.performance_review_sub_scoring; DROP TABLE raw.performance_review_total_scoring; DROP TABLE raw.positions; DROP TABLE raw.workers;
Data quality checks:
-- Total worker count SELECT count(*) from clean.worker;
-- Active workers SELECT count(*) from clean.worker where employment_exit_date is null;
-- Number of job change / position change records WITH history_counts AS ( SELECT w.id, count() as history_count from clean.worker w left join clean.job_change jc on w.id = jc.worker_id group by w.id ) SELECT history_count, count() from history_counts group by history_count order by history_count;
-- Years at the company WITH yac AS ( SELECT w.id, EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company FROM clean.worker w ) SELECT yac.years_at_company, COUNT(*) FROM yac GROUP BY yac.years_at_company ORDER BY yac.years_at_company
-- Worker id's with < 0 years at company or > 60 years WITH yac AS ( SELECT w.id, w.worker_hris_id, w.employment_start, w.employment_exit_date, EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company FROM clean.worker w ) SELECT * from yac where years_at_company < 0 or years_at_company > 60;
-- Performance review: number of steps loaded select c.name, s.name, count(*) from performance_review_step s inner join performance_review r on r.id = s.review_id inner join performance_cycle c on c.id = r.cycle_id group by c.name, s.name, s.sequence_number order by c.name, s.sequence_number;
-- Steps not linked to a review select count(*) from performance_review_step where review_id is null;