From e72bb37106d0d18c6c36f972d6277b1009a58002 Mon Sep 17 00:00:00 2001 From: Jeroen Date: Thu, 19 Jun 2025 11:25:00 +0200 Subject: [PATCH] added absenteism loading --- meltano.yml | 9 ++ .../tap_spreadsheets_anywhere/source.yml | 3 +- .../time_registration.sql | 117 ++++++++++++++++++ 3 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 transform/models/tap_spreadsheets_anywhere/time_registration.sql diff --git a/meltano.yml b/meltano.yml index 185629e..182a75d 100644 --- a/meltano.yml +++ b/meltano.yml @@ -66,6 +66,15 @@ environments: skip_initial: 8 sample_rate: 1 max_sampling_read: 25000 + - path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/ + name: absenteisme + pattern: "Absenteisme.*" + start_date: '2000-01-01T00:00:00Z' + key_properties: [] + format: excel + worksheet_name: Tabel + sample_rate: 1 + max_sampling_read: 25000 select: - departments.* - '!departments._*' diff --git a/transform/models/tap_spreadsheets_anywhere/source.yml b/transform/models/tap_spreadsheets_anywhere/source.yml index be89f7c..9c7e6d9 100644 --- a/transform/models/tap_spreadsheets_anywhere/source.yml +++ b/transform/models/tap_spreadsheets_anywhere/source.yml @@ -9,4 +9,5 @@ sources: - name: workers - name: performance_review_steps - name: performance_review_total_scoring - - name: performance_review_sub_scoring \ No newline at end of file + - name: performance_review_sub_scoring + - name: absenteisme \ No newline at end of file diff --git a/transform/models/tap_spreadsheets_anywhere/time_registration.sql b/transform/models/tap_spreadsheets_anywhere/time_registration.sql new file mode 100644 index 0000000..afe5ecf --- /dev/null +++ b/transform/models/tap_spreadsheets_anywhere/time_registration.sql @@ -0,0 +1,117 @@ +{{ + config( + materialized='table' + ) +}} + + +-- This model explodes the absenteism overview into a daily time registration per Belgian workday +-- covering all records in `raw.absenteisme` (starting June 2024) and maps them to workers. + +with abs as ( + select + ax_nummer::text as ax_nummer, + to_date(begindatum_historische_rubrieken, 'YYYY-MM-DD')::date as period_start, + to_date(einddatum_historische_rubrieken, 'YYYY-MM-DD')::date as period_end, + coalesce("0120_gewaarborgd_maandloon_ziekte", 0)::int as paid_sick_days, + coalesce("0500_gelijkgestelde_dag_ziekte", 0)::int as unpaid_sick_days + from {{ source('tap_spreadsheets_anywhere', 'absenteisme') }} + where begindatum_historische_rubrieken is not null +), + +calendar as ( + -- generate every calendar day of each record's date range + select + a.ax_nummer, + a.period_start, + a.period_end, + a.paid_sick_days, + a.unpaid_sick_days, + generate_series( + a.period_start, + a.period_end, + interval '1 day' + )::date as date + from abs a +), + +holidays as ( + -- Hard‑coded Belgian public holidays for 2024‑2025 + select date '2024-01-01' as holiday_date union all + select date '2024-04-01' union all + select date '2024-05-01' union all + select date '2024-05-09' union all + select date '2024-05-20' union all + select date '2024-07-21' union all + select date '2024-08-15' union all + select date '2024-11-01' union all + select date '2024-11-11' union all + select date '2024-12-25' union all + select date '2025-01-01' union all + select date '2025-04-21' union all + select date '2025-05-01' union all + select date '2025-05-29' union all + select date '2025-06-09' union all + select date '2025-07-21' union all + select date '2025-08-15' union all + select date '2025-11-01' union all + select date '2025-11-11' union all + select date '2025-12-25' +), + +workdays as ( + -- Filter to Belgian working days (Mon‑Fri excluding public holidays) + select + c.ax_nummer, + c.period_start, + c.date, + c.paid_sick_days, + c.unpaid_sick_days + from calendar c + left join holidays h on c.date = h.holiday_date + where extract(dow from c.date) not in (0, 6) -- 0 = Sunday, 6 = Saturday + and h.holiday_date is null +), + +indexed as ( + -- Give each workday an index within its record for sick‑day allocation + select + w.*, + row_number() over (partition by ax_nummer, period_start order by date) as rn + from workdays w +), + +typed as ( + -- Translate the indices into the requested day type + select + w.date, + 8 as hours, + 100 as percentage_workday, + case + when rn <= paid_sick_days then 'Paid sick day' + when rn <= paid_sick_days + unpaid_sick_days then 'Unpaid sick day' + else 'Work' + end as type, + w.ax_nummer + from indexed w +), + +with_workers as ( + -- Map to the worker table, stripping leading 0s from the HRIS id + select + t.*, + wk.id as worker_id + from typed t + join {{ ref('worker') }} wk + on regexp_replace(wk.worker_hris_id, '^0+', '') = t.ax_nummer +) + +select + row_number() over (order by worker_id, date) as id, + date, + hours, + percentage_workday, + type, + worker_id +from with_workers +order by worker_id, date