Compare commits
24 Commits
2f2716a62b
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| b3ad962b41 | |||
| eaaf51f699 | |||
| 461a7eb9aa | |||
| 9a7059dcdf | |||
| 735250c4c7 | |||
| aa19a53d51 | |||
| 44d9f2c21a | |||
| f9834cac10 | |||
| a222a43727 | |||
| 8df02c97e8 | |||
| bc228215ec | |||
| d7e2b69dd0 | |||
| 2b8f26e9fe | |||
| 3486b4f509 | |||
| 2da0aff227 | |||
| 14fb081542 | |||
| aef1d13279 | |||
| 506dda094d | |||
| b256e70e46 | |||
| 65262b2c99 | |||
| 37c0c3826f | |||
| 23e707b7fe | |||
| cc4aadc708 | |||
| 04c0499cce |
85
README.md
85
README.md
@@ -1,6 +1,91 @@
|
||||
meltano run tap-spreadsheets-anywhere target-postgres
|
||||
or
|
||||
meltano run tap-spreadsheets-anywhere target-postgres --full-refresh
|
||||
|
||||
meltano invoke dbt-postgres:run
|
||||
|
||||
|
||||
|
||||
meltano --environment=sarens-demo run tap-spreadsheets-anywhere target-postgres dbt-postgres:run
|
||||
|
||||
|
||||
Refresh / Overwrite doesn't seem to work, so manual fix here:
|
||||
TRUNCATE TABLE raw.absenteisme;
|
||||
TRUNCATE TABLE raw.departments;
|
||||
TRUNCATE TABLE raw.performance_review_steps;
|
||||
TRUNCATE TABLE raw.performance_review_sub_scoring;
|
||||
TRUNCATE TABLE raw.performance_review_total_scoring;
|
||||
TRUNCATE TABLE raw.positions;
|
||||
TRUNCATE TABLE raw.workers;
|
||||
|
||||
|
||||
DROP TABLE raw.absenteisme;
|
||||
DROP TABLE raw.departments;
|
||||
DROP TABLE raw.performance_review_steps;
|
||||
DROP TABLE raw.performance_review_sub_scoring;
|
||||
DROP TABLE raw.performance_review_total_scoring;
|
||||
DROP TABLE raw.positions;
|
||||
DROP TABLE raw.workers;
|
||||
|
||||
|
||||
|
||||
Data quality checks:
|
||||
|
||||
-- Total worker count
|
||||
SELECT count(*)
|
||||
from clean.worker;
|
||||
|
||||
-- Active workers
|
||||
SELECT count(*)
|
||||
from clean.worker
|
||||
where employment_exit_date is null;
|
||||
|
||||
-- Number of job change / position change records
|
||||
WITH history_counts AS (
|
||||
SELECT w.id, count(*) as history_count
|
||||
from clean.worker w
|
||||
left join clean.job_change jc on w.id = jc.worker_id
|
||||
group by w.id
|
||||
)
|
||||
SELECT history_count, count(*)
|
||||
from history_counts
|
||||
group by history_count
|
||||
order by history_count;
|
||||
|
||||
-- Years at the company
|
||||
WITH yac AS (
|
||||
SELECT
|
||||
w.id,
|
||||
EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company
|
||||
FROM clean.worker w
|
||||
)
|
||||
SELECT
|
||||
yac.years_at_company,
|
||||
COUNT(*)
|
||||
FROM yac
|
||||
GROUP BY yac.years_at_company
|
||||
ORDER BY yac.years_at_company
|
||||
|
||||
-- Worker id's with < 0 years at company or > 60 years
|
||||
WITH yac AS (
|
||||
SELECT
|
||||
w.id, w.worker_hris_id, w.employment_start, w.employment_exit_date,
|
||||
EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company
|
||||
FROM clean.worker w
|
||||
)
|
||||
SELECT *
|
||||
from yac
|
||||
where years_at_company < 0 or years_at_company > 60;
|
||||
|
||||
|
||||
-- Performance review: number of steps loaded
|
||||
select c.name, s.name, count(*)
|
||||
from performance_review_step s
|
||||
inner join performance_review r on r.id = s.review_id
|
||||
inner join performance_cycle c on c.id = r.cycle_id
|
||||
group by c.name, s.name, s.sequence_number
|
||||
order by c.name, s.sequence_number;
|
||||
|
||||
-- Steps not linked to a review
|
||||
select count(*) from performance_review_step
|
||||
where review_id is null;
|
||||
@@ -1,5 +1,5 @@
|
||||
x-meltano-image: &meltano-image
|
||||
image: docker.hrlakehouse.com/lakehouse/sarens-integration:latest
|
||||
image: gitea.jvtech.be/lakehouse/sarens-integration:latest
|
||||
|
||||
services:
|
||||
meltano:
|
||||
@@ -10,6 +10,9 @@ services:
|
||||
- db_network
|
||||
volumes:
|
||||
- /home/sarens-data:/sarens-data
|
||||
environment:
|
||||
- TARGET_POSTGRES_PASSWORD=${TARGET_POSTGRES_PASSWORD}
|
||||
- DBT_POSTGRES_PASSWORD=${DBT_POSTGRES_PASSWORD}
|
||||
|
||||
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||
# airflow-scheduler:
|
||||
|
||||
20
meltano.yml
20
meltano.yml
@@ -9,14 +9,14 @@ environments:
|
||||
- name: tap-spreadsheets-anywhere
|
||||
config:
|
||||
tables:
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: departments
|
||||
pattern: Applicable Organizations.xlsx
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
key_properties: []
|
||||
format: excel
|
||||
worksheet_name: AO
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: positions
|
||||
pattern: "HR002.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -26,7 +26,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 1000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: workers
|
||||
pattern: "HR006.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -36,7 +36,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: performance_review_steps
|
||||
pattern: "PER001.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -46,7 +46,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: performance_review_total_scoring
|
||||
pattern: "PER002.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -56,7 +56,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: performance_review_sub_scoring
|
||||
pattern: "PER003.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -66,7 +66,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: absenteisme
|
||||
pattern: "Absenteisme.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -80,7 +80,7 @@ environments:
|
||||
config:
|
||||
database: lakehouse_sarens
|
||||
host: localhost
|
||||
load_method: append-only
|
||||
load_method: overwrite
|
||||
user: postgres
|
||||
default_target_schema: raw
|
||||
utilities:
|
||||
@@ -167,9 +167,9 @@ environments:
|
||||
loaders:
|
||||
- name: target-postgres
|
||||
config:
|
||||
database: lakehouse_sarens
|
||||
database: lakehouse-sarens
|
||||
host: lakehouse-sarens-db-1
|
||||
load_method: append-only
|
||||
load_method: overwrite
|
||||
user: lakehouse-sarens
|
||||
default_target_schema: raw
|
||||
utilities:
|
||||
|
||||
@@ -13,6 +13,7 @@ latest_departments as (
|
||||
from {{ source('tap_spreadsheets_anywhere', 'departments') }}
|
||||
) t
|
||||
where rn = 1
|
||||
and id not in ('CAD', 'CSAD')
|
||||
),
|
||||
department_tree as (
|
||||
-- Anchor: top-level department (parent_id is set to Sarens Group in the Excel)
|
||||
|
||||
@@ -17,11 +17,18 @@ latest_departments AS (
|
||||
FROM {{ ref('department') }}
|
||||
),
|
||||
|
||||
positions_deduped AS (
|
||||
SELECT DISTINCT ON (position_id) *
|
||||
-- note: Positions ID is not unique, hence removed the full deduplication logic
|
||||
-- however, we had positions with same start date while both having primary position set to true, hence only selecting a random one for now (temp workaround)
|
||||
deduplicated_positions AS (
|
||||
SELECT DISTINCT ON (assigned_employee_id, assigned_employee_effective_date, assigned_unit_effective_date)
|
||||
*
|
||||
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||
WHERE _sdc_deleted_at IS NULL
|
||||
ORDER BY position_id, _sdc_received_at DESC
|
||||
ORDER BY assigned_employee_id,
|
||||
assigned_employee_effective_date DESC,
|
||||
assigned_unit_effective_date DESC,
|
||||
CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC,
|
||||
_sdc_received_at DESC
|
||||
),
|
||||
|
||||
transformed_worker AS (
|
||||
@@ -38,41 +45,45 @@ position_details AS (
|
||||
w.contracting_company AS new_contracting_company,
|
||||
d.path::ltree AS new_department_path,
|
||||
d.manager_id::BIGINT AS new_manager_id,
|
||||
p.assigned_employee_effective_date::DATE AS new_job_effective_date,
|
||||
p.fte_utilized_by_employee_in_this_position * 100 AS new_fte_percentage,
|
||||
-- new job effective date:
|
||||
-- When both assigned_employee_effective_date and assigned_unit_effective_date are available, use the latest one
|
||||
-- When only one of those is available, we pick that one (greatest ignores null values)
|
||||
-- If none are available, we pick original_hire_date (this is the case if there is no position record)
|
||||
COALESCE(GREATEST(p.assigned_employee_effective_date, p.assigned_unit_effective_date), w.original_hire_date)::DATE AS new_job_effective_date,
|
||||
COALESCE(p.fte_utilized_by_employee_in_this_position * 100, 100) AS new_fte_percentage, -- Default to 100% if not specified
|
||||
tw.id as worker_id
|
||||
FROM positions_deduped p
|
||||
LEFT JOIN latest_workers w
|
||||
ON p.assigned_employee_id = w.user_id
|
||||
FROM transformed_worker tw
|
||||
LEFT JOIN deduplicated_positions p
|
||||
ON p.assigned_employee_id = tw.worker_hris_id
|
||||
LEFT JOIN latest_departments d
|
||||
ON p.assigned_unit_id = d.department_hris_id
|
||||
LEFT JOIN transformed_worker tw ON tw.worker_hris_id = w.user_id
|
||||
LEFT JOIN latest_workers w ON tw.worker_hris_id = w.user_id
|
||||
),
|
||||
|
||||
job_changes_with_history AS (
|
||||
SELECT
|
||||
*,
|
||||
LAG(position_title) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_function_title,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_function_level, -- Not available
|
||||
LAG(w.user_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_type,
|
||||
LAG(new_fte_percentage) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_fte_percentage,
|
||||
LAG(w.location) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_work_location,
|
||||
LAG(NULL::VARCHAR) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_work_location_type,
|
||||
LAG(p.assigned_unit) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_team,
|
||||
LAG(w.depot_cost_center) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_cost_center,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_monthly_gross_fixed,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_fixed,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_variable,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_fully_loaded_cost,
|
||||
LAG(NULL::VARCHAR(3)) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_currency,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_hay_grade,
|
||||
LAG(d.path::ltree) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_department_path,
|
||||
LAG(d.manager_id::BIGINT) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_manager_id,
|
||||
LAG(w.collar_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_collar_type,
|
||||
LAG(w.user_contract_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_contract_type,
|
||||
LAG(w.contracting_company) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_contracting_company,
|
||||
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY new_job_effective_date DESC) = 1 AS latest,
|
||||
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY new_job_effective_date) = 1 AS is_first
|
||||
LAG(position_title) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_function_title,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_function_level, -- Not available
|
||||
LAG(w.user_type) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_type,
|
||||
LAG(new_fte_percentage) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_fte_percentage,
|
||||
LAG(w.location) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_work_location,
|
||||
LAG(NULL::VARCHAR) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_work_location_type,
|
||||
LAG(p.assigned_unit) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_team,
|
||||
LAG(w.depot_cost_center) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_cost_center,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_monthly_gross_fixed,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_fixed,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_variable,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_yearly_fully_loaded_cost,
|
||||
LAG(NULL::VARCHAR(3)) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_currency,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_hay_grade,
|
||||
LAG(d.path::ltree) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_department_path,
|
||||
LAG(d.manager_id::BIGINT) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_manager_id,
|
||||
LAG(w.collar_type) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_collar_type,
|
||||
LAG(w.user_contract_type) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_contract_type,
|
||||
LAG(w.contracting_company) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_contracting_company,
|
||||
ROW_NUMBER() OVER (PARTITION BY worker_id ORDER BY new_job_effective_date DESC) = 1 AS latest,
|
||||
ROW_NUMBER() OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) = 1 AS is_first
|
||||
FROM position_details p
|
||||
LEFT JOIN latest_workers w ON p.assigned_employee_id = w.user_id
|
||||
LEFT JOIN latest_departments d ON p.assigned_unit_id = d.department_hris_id
|
||||
|
||||
@@ -5,16 +5,58 @@
|
||||
}}
|
||||
|
||||
with distinct_cycles as (
|
||||
select distinct task_name, task_status
|
||||
select
|
||||
min(step_submission_date) as cycle_start_date,
|
||||
max(step_submission_date) as cycle_end_date,
|
||||
task_name,
|
||||
case
|
||||
when count(case when task_status != 'Completed' then 1 end) = 0
|
||||
then 'Closed'
|
||||
else 'Open'
|
||||
end as task_status
|
||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }}
|
||||
--where is_not_removed_from_task = 1
|
||||
group by task_name
|
||||
),
|
||||
|
||||
base_records as (
|
||||
select
|
||||
dense_rank() over (order by task_name)::bigint as id,
|
||||
cycle_start_date::date as start_date,
|
||||
cycle_end_date::date as end_date,
|
||||
task_name as name,
|
||||
'Closed' as status, -- overwritten logic for Sarens
|
||||
'annual' as type
|
||||
from distinct_cycles
|
||||
),
|
||||
|
||||
-- Generate additional records for Performance Review 2024
|
||||
additional_records as (
|
||||
select
|
||||
(select max(id) from base_records) + 1 as id,
|
||||
start_date,
|
||||
end_date,
|
||||
'Performance Review 2024 - Generic' as name,
|
||||
status,
|
||||
type
|
||||
from base_records
|
||||
where name = 'Performance Review 2024'
|
||||
|
||||
union all
|
||||
|
||||
select
|
||||
(select max(id) from base_records) + 2 as id,
|
||||
start_date,
|
||||
end_date,
|
||||
'Performance Review 2024 - n-1 managers' as name,
|
||||
status,
|
||||
type
|
||||
from base_records
|
||||
where name = 'Performance Review 2024'
|
||||
)
|
||||
|
||||
select
|
||||
dense_rank() over (order by task_name)::bigint as id,
|
||||
null::date as start,
|
||||
null::date as "end",
|
||||
task_name as name,
|
||||
task_status as status,
|
||||
task_name as type
|
||||
from distinct_cycles
|
||||
-- Combine original records with additional records
|
||||
select * from base_records
|
||||
where name != 'Performance Review 2024'
|
||||
union all
|
||||
select * from additional_records
|
||||
@@ -11,6 +11,28 @@ with step_agg as (
|
||||
group by 1,2
|
||||
),
|
||||
|
||||
-- When task_name = "Performance Review 2024", we need to update it to either "Performance Review 2024 - Generic" or "Performance Review 2024 - n-1 managers"
|
||||
-- This is determined based on whether for the user_id, there exists a step which is not in ('Employee Self Review', 'Manager Review', 'Performance Conversation & Manager Sign-off', or 'Employee Sign-off')
|
||||
remapped_steps as (
|
||||
select
|
||||
user_id,
|
||||
case
|
||||
when task_name != 'Performance Review 2024'
|
||||
then task_name
|
||||
when exists (
|
||||
select 1
|
||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }} s
|
||||
where s.user_id = step_agg.user_id
|
||||
and s.task_name = 'Performance Review 2024'
|
||||
and s.step_title not in ('Employee Self Review', 'Manager Review', 'Performance Conversation & Manager Sign-off', 'Employee Sign-Off')
|
||||
) then 'Performance Review 2024 - n-1 managers'
|
||||
else 'Performance Review 2024 - Generic'
|
||||
end as task_name,
|
||||
completed_at,
|
||||
status
|
||||
from step_agg
|
||||
),
|
||||
|
||||
total_scoring as (
|
||||
select
|
||||
user_id::text as user_id,
|
||||
@@ -39,7 +61,7 @@ combined as (
|
||||
ts.overall_rating,
|
||||
w.worker_id,
|
||||
c.cycle_id
|
||||
from step_agg s
|
||||
from remapped_steps s
|
||||
left join total_scoring ts using (user_id, task_name)
|
||||
left join worker_map w on w.worker_hris_id = s.user_id
|
||||
left join cycle_map c on c.name = s.task_name
|
||||
@@ -49,7 +71,7 @@ select
|
||||
row_number() over (order by user_id, task_name)::bigint as id,
|
||||
null::date as start,
|
||||
status,
|
||||
nullif(regexp_replace(overall_rating, '\D','','g'), '')::numeric as overall_rating_value,
|
||||
nullif(regexp_replace(overall_rating, '^(\d+).*', '\1'), '')::numeric as overall_rating_value,
|
||||
overall_rating as overall_rating_text,
|
||||
cycle_id,
|
||||
worker_id,
|
||||
|
||||
@@ -17,20 +17,82 @@ steps as (
|
||||
user_id::text as user_id,
|
||||
task_name,
|
||||
step_title as name,
|
||||
case
|
||||
when step_title in ('Auto-évaluation des employés', 'Employee Self Review', 'Sarens Projects Review',
|
||||
'Approbation des employés',
|
||||
'Self Review') then 1
|
||||
when step_title in ('Évaluation du manager', 'Functional Manager Review', 'Discussion et approbation du manager',
|
||||
'Manager Review', 'Strategy Review', 'Finance Review', 'Sales Review', 'Fleet Review', 'LCM Review', 'Operations Review') then 2
|
||||
when step_title in ('HR Review', 'SHEQ Review', 'Performance Conversation & Manager Sign-off') then 3
|
||||
when step_title = 'Employee Sign-Off' then 4
|
||||
else null -- fallback for any unexpected values
|
||||
end as sequence_number,
|
||||
step_status as status,
|
||||
step_submission_date::date as completed_at
|
||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }}
|
||||
where is_not_removed_from_task = 1
|
||||
),
|
||||
|
||||
mandatory_steps as (
|
||||
select 'Employee Self Review' as name, 1 as sequence_number
|
||||
union all select 'Manager Review', 2
|
||||
union all select 'Performance Conversation & Manager Sign-off', 3
|
||||
union all select 'Employee Sign-Off', 4
|
||||
),
|
||||
|
||||
-- Get all reviews that match our criteria
|
||||
filtered_reviews as (
|
||||
select distinct
|
||||
r.review_id,
|
||||
r.user_id,
|
||||
r.task_name
|
||||
from review_base r
|
||||
where r.task_name like 'Performance Review 2024%'
|
||||
),
|
||||
|
||||
-- Generate all expected steps for these reviews
|
||||
expected_steps as (
|
||||
select
|
||||
r.review_id,
|
||||
r.user_id,
|
||||
r.task_name,
|
||||
m.name,
|
||||
m.sequence_number
|
||||
from filtered_reviews r
|
||||
cross join mandatory_steps m
|
||||
),
|
||||
|
||||
-- Find which expected steps are missing from the source data
|
||||
missing_steps as (
|
||||
select
|
||||
e.user_id,
|
||||
e.task_name,
|
||||
e.name,
|
||||
e.sequence_number,
|
||||
'Not started' as status,
|
||||
null::date as completed_at
|
||||
from expected_steps e
|
||||
left join steps s
|
||||
on e.user_id = s.user_id
|
||||
and e.task_name like s.task_name || '%' -- Also map for remapped cycle generic/n-1
|
||||
and e.name = s.name
|
||||
where s.user_id is null
|
||||
)
|
||||
|
||||
-- Combine existing steps with missing steps
|
||||
select
|
||||
row_number() over (order by s.user_id, s.task_name, s.name)::bigint as id,
|
||||
r.review_id,
|
||||
s.name,
|
||||
s.sequence_number,
|
||||
s.status,
|
||||
s.completed_at,
|
||||
null::date as due
|
||||
from steps s
|
||||
from (
|
||||
select * from steps
|
||||
union all
|
||||
select * from missing_steps
|
||||
) s
|
||||
left join review_base r
|
||||
on r.user_id = s.user_id
|
||||
and r.task_name = s.task_name
|
||||
and r.task_name like s.task_name || '%' -- Also map for remapped cycle generic/n-1
|
||||
@@ -16,7 +16,7 @@ total_scores as (
|
||||
select
|
||||
r.review_id,
|
||||
t.section_name as dimension,
|
||||
nullif(regexp_replace(t.final_rating, '\D','','g'), '')::numeric as score_value,
|
||||
nullif(regexp_replace(t.final_rating, '^(\d+).*|.*', '\1'), '')::numeric as score_value,
|
||||
nullif(trim(t.final_rating), '') as score_text,
|
||||
null::text as comment
|
||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_total_scoring') }} t
|
||||
@@ -29,7 +29,7 @@ sub_scores as (
|
||||
select
|
||||
r.review_id,
|
||||
s.competency_name as dimension,
|
||||
nullif(regexp_replace(s.competency_rating_score, '\D','','g'), '')::numeric as score_value,
|
||||
nullif(regexp_replace(s.competency_rating_score, '^(\d+).*|.*', '\1'), '')::numeric as score_value,
|
||||
nullif(trim(s.competency_rating_score), '') as score_text,
|
||||
null::text as comment
|
||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_sub_scoring') }} s
|
||||
|
||||
@@ -9,6 +9,9 @@ WITH latest_workers AS (
|
||||
SELECT DISTINCT ON (user_id) *
|
||||
FROM {{ source('tap_spreadsheets_anywhere', 'workers') }}
|
||||
WHERE _sdc_deleted_at IS NULL
|
||||
AND user_id IS NOT NULL and user_id != '' -- Skipping empty user_ids
|
||||
-- Skipping users with non-numeric user_id, as they are not valid
|
||||
AND user_id ~ '^[0-9]+$'
|
||||
ORDER BY user_id, _sdc_received_at DESC
|
||||
),
|
||||
|
||||
@@ -19,14 +22,24 @@ latest_positions AS (
|
||||
SELECT DISTINCT ON (assigned_employee_id) *
|
||||
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||
WHERE _sdc_deleted_at IS NULL
|
||||
AND primary_position = 'Yes'
|
||||
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date
|
||||
-- AND primary_position = 'Yes' -- Removed this filter, as there are employees with only non-primary positions
|
||||
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date DESC, assigned_unit_effective_date DESC, CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC
|
||||
),
|
||||
|
||||
-- Get the first position for each worker, to set the hire date
|
||||
-- This is to fix the "original hire date" column of the All users report containing dates before the first position
|
||||
first_positions AS (
|
||||
SELECT DISTINCT ON (assigned_employee_id) *
|
||||
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||
WHERE _sdc_deleted_at IS NULL
|
||||
ORDER BY assigned_employee_id, _sdc_received_at DESC, GREATEST(assigned_employee_effective_date, assigned_unit_effective_date) ASC, CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC
|
||||
),
|
||||
|
||||
joined_data AS (
|
||||
SELECT
|
||||
w.user_id,
|
||||
w.birth_date::DATE AS date_of_birth,
|
||||
-- if birth_date is 01/01/1901, we consider it NULL
|
||||
NULLIF(w.birth_date::DATE, '1901-01-01') AS date_of_birth,
|
||||
w.gender,
|
||||
w.nationality,
|
||||
NULL::VARCHAR AS first_name, -- Not available
|
||||
@@ -41,7 +54,7 @@ joined_data AS (
|
||||
NULL::VARCHAR AS address_country,
|
||||
NULL::VARCHAR AS phone_number,
|
||||
NULL::VARCHAR AS driver_license,
|
||||
w.original_hire_date::DATE AS employment_start,
|
||||
COALESCE(GREATEST(fp.assigned_employee_effective_date, fp.assigned_unit_effective_date), GREATEST(w.original_hire_date, w.last_hire_date))::DATE AS employment_start,
|
||||
w.user_type AS employment_type,
|
||||
w.user_contract_type AS employment_contract_type,
|
||||
w.contracting_company AS employment_contracting_company,
|
||||
@@ -60,10 +73,10 @@ joined_data AS (
|
||||
NULL::INTEGER AS employment_criticality,
|
||||
NULL::VARCHAR AS employment_probation_status,
|
||||
NULL::DATE AS employment_probation_end_date,
|
||||
d.path::ltree AS employment_department_path,
|
||||
COALESCE(d.path::ltree, d2.path::ltree) AS employment_department_path,
|
||||
NULL::VARCHAR(254) AS email,
|
||||
NULL::DATE AS employment_earliest_retirement_date,
|
||||
ROUND(p.fte_utilized_by_employee_in_this_position * 100)::INTEGER AS employment_fte_percentage,
|
||||
COALESCE(ROUND(p.fte_utilized_by_employee_in_this_position * 100)::INTEGER, 100) AS employment_fte_percentage,
|
||||
NULL::INTEGER AS salary_hay_grade,
|
||||
NULL::VARCHAR(3) AS salary_currency,
|
||||
NULL::INTEGER AS salary_yearly_gross_fixed,
|
||||
@@ -76,8 +89,12 @@ joined_data AS (
|
||||
FROM latest_workers w
|
||||
LEFT JOIN latest_positions p
|
||||
ON w.user_id = p.assigned_employee_id
|
||||
LEFT JOIN {{ ref('department') }} d
|
||||
LEFT JOIN first_positions fp
|
||||
ON w.user_id = fp.assigned_employee_id
|
||||
LEFT JOIN {{ ref('department') }} d -- Source = Department from Positions report, only relevant for active workers
|
||||
ON p.assigned_unit_id = d.department_hris_id
|
||||
LEFT JOIN {{ ref('department') }} d2 -- Source = Business Unit from All Users report, only relevant for inactive workers
|
||||
ON w.business_unit = d2.name AND nlevel(d2.path) = 3
|
||||
)
|
||||
|
||||
SELECT
|
||||
@@ -100,6 +117,9 @@ SELECT
|
||||
driver_license,
|
||||
employment_start,
|
||||
employment_type,
|
||||
employment_contract_type,
|
||||
employment_contracting_company,
|
||||
employment_collar_type,
|
||||
employment_function_level,
|
||||
employment_function_title,
|
||||
employment_team,
|
||||
@@ -107,6 +127,7 @@ SELECT
|
||||
employment_cost_center,
|
||||
employment_exit_type,
|
||||
employment_exit_date,
|
||||
employment_exit_reason,
|
||||
performance_rating,
|
||||
performance_date,
|
||||
employment_manager_id,
|
||||
|
||||
@@ -16,7 +16,7 @@ meltano:
|
||||
search_path: "{{ env_var('DBT_POSTGRES_SEARCH_PATH', '') }}" # optional, override the default postgres search_path
|
||||
role: "{{ env_var('DBT_POSTGRES_ROLE', '') }}" # optional, set the role dbt assumes when executing queries
|
||||
# sslmode: "{{ env_var('DBT_POSTGRES_SSLMODE', '').split() }}" # optional, set the sslmode used to connect to the database
|
||||
staging:
|
||||
sarens-demo:
|
||||
type: postgres
|
||||
host: "{{ env_var('DBT_POSTGRES_HOST') }}"
|
||||
user: "{{ env_var('DBT_POSTGRES_USER') }}"
|
||||
|
||||
Reference in New Issue
Block a user