From 3b3a9dd7c1c088f3bcc434c94e1c56a736bab6d9 Mon Sep 17 00:00:00 2001 From: Jeroen Date: Thu, 12 Jun 2025 16:32:45 +0200 Subject: [PATCH] Adding loading of positions and workers, and updated transform for departments to only consider last loaded departments --- meltano.yml | 25 ++++++++++++++++--- .../manageviews_departments.sql | 18 ++++++++++--- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/meltano.yml b/meltano.yml index 194134c..2ccdc9d 100644 --- a/meltano.yml +++ b/meltano.yml @@ -13,10 +13,29 @@ environments: name: departments pattern: Applicable Organizations.xlsx start_date: '2000-01-01T00:00:00Z' - key_properties: - - id + key_properties: [] format: excel worksheet_name: AO + - path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/ + name: positions + pattern: "HR002.*" + start_date: '2000-01-01T00:00:00Z' + key_properties: [] + format: excel + worksheet_name: HR002 - Positions Report (INTER + skip_initial: 8 + sample_rate: 1 + max_sampling_read: 1000 + - path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/ + name: workers + pattern: "HR006.*" + start_date: '2000-01-01T00:00:00Z' + key_properties: [] + format: excel + worksheet_name: HR006 - All Users Report (Activ + skip_initial: 8 + sample_rate: 1 + max_sampling_read: 25000 select: - departments.* - '!departments._*' @@ -25,7 +44,7 @@ environments: config: database: lakehouse_sarens host: localhost - load_method: upsert + load_method: append-only user: lakehouse_sarens default_target_schema: public utilities: diff --git a/transform/models/tap_spreadsheets_everywhere/manageviews_departments.sql b/transform/models/tap_spreadsheets_everywhere/manageviews_departments.sql index 49bc4a2..ed81b28 100644 --- a/transform/models/tap_spreadsheets_everywhere/manageviews_departments.sql +++ b/transform/models/tap_spreadsheets_everywhere/manageviews_departments.sql @@ -4,15 +4,25 @@ ) }} -with recursive department_tree as ( - -- Anchor: top-level departments (no parent_id) +with recursive +latest_departments as ( + select * + from ( + select *, + row_number() over (partition by id order by _sdc_received_at desc) as rn + from {{ source('tap_spreadsheets_everywhere', 'departments') }} + ) t + where rn = 1 +), +department_tree as ( + -- Anchor: top-level department (parent_id is set to Sarens Group in the Excel) select id as department_hris_id, applicable_organization as name, null::text as cost_center, null::text as manager_id, id::text as path -- start path with own ID - from {{ source('tap_spreadsheets_everywhere', 'departments') }} + from latest_departments where parent_id = 'Sarens Group' union all @@ -24,7 +34,7 @@ with recursive department_tree as ( null::text as cost_center, null::text as manager_id, dt.path || '.' || d.id::text as path - from {{ source('tap_spreadsheets_everywhere', 'departments') }} d + from latest_departments d join department_tree dt on d.parent_id = dt.department_hris_id )