Adding loading of positions and workers, and updated transform for

departments to only consider last loaded departments
This commit is contained in:
2025-06-12 16:32:45 +02:00
parent 33b22855a6
commit 3b3a9dd7c1
2 changed files with 36 additions and 7 deletions

View File

@@ -13,10 +13,29 @@ environments:
name: departments
pattern: Applicable Organizations.xlsx
start_date: '2000-01-01T00:00:00Z'
key_properties:
- id
key_properties: []
format: excel
worksheet_name: AO
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
name: positions
pattern: "HR002.*"
start_date: '2000-01-01T00:00:00Z'
key_properties: []
format: excel
worksheet_name: HR002 - Positions Report (INTER
skip_initial: 8
sample_rate: 1
max_sampling_read: 1000
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
name: workers
pattern: "HR006.*"
start_date: '2000-01-01T00:00:00Z'
key_properties: []
format: excel
worksheet_name: HR006 - All Users Report (Activ
skip_initial: 8
sample_rate: 1
max_sampling_read: 25000
select:
- departments.*
- '!departments._*'
@@ -25,7 +44,7 @@ environments:
config:
database: lakehouse_sarens
host: localhost
load_method: upsert
load_method: append-only
user: lakehouse_sarens
default_target_schema: public
utilities:

View File

@@ -4,15 +4,25 @@
)
}}
with recursive department_tree as (
-- Anchor: top-level departments (no parent_id)
with recursive
latest_departments as (
select *
from (
select *,
row_number() over (partition by id order by _sdc_received_at desc) as rn
from {{ source('tap_spreadsheets_everywhere', 'departments') }}
) t
where rn = 1
),
department_tree as (
-- Anchor: top-level department (parent_id is set to Sarens Group in the Excel)
select
id as department_hris_id,
applicable_organization as name,
null::text as cost_center,
null::text as manager_id,
id::text as path -- start path with own ID
from {{ source('tap_spreadsheets_everywhere', 'departments') }}
from latest_departments
where parent_id = 'Sarens Group'
union all
@@ -24,7 +34,7 @@ with recursive department_tree as (
null::text as cost_center,
null::text as manager_id,
dt.path || '.' || d.id::text as path
from {{ source('tap_spreadsheets_everywhere', 'departments') }} d
from latest_departments d
join department_tree dt
on d.parent_id = dt.department_hris_id
)