diff --git a/README.md b/README.md index e69de29..7251b6a 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,3 @@ +meltano run tap-spreadsheets-anywhere target-postgres + +meltano invoke dbt-postgres:run \ No newline at end of file diff --git a/meltano.yml b/meltano.yml index 2ccdc9d..185629e 100644 --- a/meltano.yml +++ b/meltano.yml @@ -36,6 +36,36 @@ environments: skip_initial: 8 sample_rate: 1 max_sampling_read: 25000 + - path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/ + name: performance_review_steps + pattern: "PER001.*" + start_date: '2000-01-01T00:00:00Z' + key_properties: [] + format: excel + worksheet_name: PER001 - Performance Review (2) + skip_initial: 8 + sample_rate: 1 + max_sampling_read: 25000 + - path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/ + name: performance_review_total_scoring + pattern: "PER002.*" + start_date: '2000-01-01T00:00:00Z' + key_properties: [] + format: excel + worksheet_name: PER002 - Performance Review (2) + skip_initial: 8 + sample_rate: 1 + max_sampling_read: 25000 + - path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/ + name: performance_review_sub_scoring + pattern: "PER003.*" + start_date: '2000-01-01T00:00:00Z' + key_properties: [] + format: excel + worksheet_name: PER003 - Performance Review (2) + skip_initial: 8 + sample_rate: 1 + max_sampling_read: 25000 select: - departments.* - '!departments._*' @@ -45,16 +75,16 @@ environments: database: lakehouse_sarens host: localhost load_method: append-only - user: lakehouse_sarens - default_target_schema: public + user: postgres + default_target_schema: raw utilities: - name: dbt-postgres config: host: localhost dbname: lakehouse_sarens - user: lakehouse_sarens + user: postgres port: 5432 - schema: public + schema: clean - name: staging - name: prod plugins: diff --git a/transform/models/tap_spreadsheets_everywhere/manageviews_departments.sql b/transform/models/tap_spreadsheets_anywhere/department.sql similarity index 78% rename from transform/models/tap_spreadsheets_everywhere/manageviews_departments.sql rename to transform/models/tap_spreadsheets_anywhere/department.sql index ed81b28..366bd2c 100644 --- a/transform/models/tap_spreadsheets_everywhere/manageviews_departments.sql +++ b/transform/models/tap_spreadsheets_anywhere/department.sql @@ -10,7 +10,7 @@ latest_departments as ( from ( select *, row_number() over (partition by id order by _sdc_received_at desc) as rn - from {{ source('tap_spreadsheets_everywhere', 'departments') }} + from {{ source('tap_spreadsheets_anywhere', 'departments') }} ) t where rn = 1 ), @@ -19,9 +19,9 @@ department_tree as ( select id as department_hris_id, applicable_organization as name, - null::text as cost_center, null::text as manager_id, - id::text as path -- start path with own ID + id::ltree as path, -- start path with own ID + NULL::VARCHAR AS org_view_model from latest_departments where parent_id = 'Sarens Group' @@ -31,9 +31,9 @@ department_tree as ( select d.id as department_hris_id, d.applicable_organization as name, - null::text as cost_center, null::text as manager_id, - dt.path || '.' || d.id::text as path + dt.path || d.id::text::ltree as path, + NULL::VARCHAR AS org_view_model from latest_departments d join department_tree dt on d.parent_id = dt.department_hris_id diff --git a/transform/models/tap_spreadsheets_anywhere/job_change.sql b/transform/models/tap_spreadsheets_anywhere/job_change.sql new file mode 100644 index 0000000..14dcc04 --- /dev/null +++ b/transform/models/tap_spreadsheets_anywhere/job_change.sql @@ -0,0 +1,128 @@ +{{ + config( + materialized='table' + ) +}} + + +WITH latest_workers AS ( + SELECT DISTINCT ON (user_id) * + FROM {{ source('tap_spreadsheets_anywhere', 'workers') }} + WHERE _sdc_deleted_at IS NULL + ORDER BY user_id, _sdc_received_at DESC +), + +latest_departments AS ( + SELECT * + FROM {{ ref('department') }} +), + +positions_deduped AS ( + SELECT DISTINCT ON (position_id) * + FROM {{ source('tap_spreadsheets_anywhere', 'positions') }} + WHERE _sdc_deleted_at IS NULL + ORDER BY position_id, _sdc_received_at DESC +), + +transformed_worker AS ( + SELECT worker_hris_id, id + FROM {{ ref('worker') }} +), + +position_details AS ( + SELECT + p.*, + w.user_id as worker_hris_id, + w.collar_type AS new_collar_type, + w.user_contract_type AS new_contract_type, + w.contracting_company AS new_contracting_company, + d.path::ltree AS new_department_path, + d.manager_id::BIGINT AS new_manager_id, + p.assigned_employee_effective_date::DATE AS new_job_effective_date, + p.fte_utilized_by_employee_in_this_position * 100 AS new_fte_percentage, + tw.id as worker_id + FROM positions_deduped p + LEFT JOIN latest_workers w + ON p.assigned_employee_id = w.user_id + LEFT JOIN latest_departments d + ON p.assigned_unit_id = d.department_hris_id + LEFT JOIN transformed_worker tw ON tw.worker_hris_id = w.user_id +), + +job_changes_with_history AS ( + SELECT + *, + LAG(position_title) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_function_title, + LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_function_level, -- Not available + LAG(w.user_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_type, + LAG(new_fte_percentage) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_fte_percentage, + LAG(w.location) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_work_location, + LAG(NULL::VARCHAR) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_work_location_type, + LAG(p.assigned_unit) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_team, + LAG(w.depot_cost_center) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_cost_center, + LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_monthly_gross_fixed, + LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_fixed, + LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_variable, + LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_fully_loaded_cost, + LAG(NULL::VARCHAR(3)) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_currency, + LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_hay_grade, + LAG(d.path::ltree) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_department_path, + LAG(d.manager_id::BIGINT) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_manager_id, + LAG(w.collar_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_collar_type, + LAG(w.user_contract_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_contract_type, + LAG(w.contracting_company) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_contracting_company, + ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY new_job_effective_date DESC) = 1 AS latest, + ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY new_job_effective_date) = 1 AS is_first + FROM position_details p + LEFT JOIN latest_workers w ON p.assigned_employee_id = w.user_id + LEFT JOIN latest_departments d ON p.assigned_unit_id = d.department_hris_id +) + +SELECT + ROW_NUMBER() OVER () AS id, + NULL::VARCHAR AS first_name, + NULL::VARCHAR AS last_name, + position_title AS new_function_title, + NULL::INTEGER AS new_function_level, -- Not available + user_type AS new_type, + ROUND(new_fte_percentage)::INTEGER AS new_fte_percentage, + location AS new_work_location, + NULL::VARCHAR AS new_work_location_type, + assigned_unit AS new_team, + depot_cost_center AS new_cost_center, + NULL::INTEGER AS new_salary_monthly_gross_fixed, + NULL::INTEGER AS new_salary_yearly_gross_fixed, + NULL::INTEGER AS new_salary_yearly_gross_variable, + NULL::INTEGER AS new_salary_yearly_fully_loaded_cost, + NULL::VARCHAR(3) AS new_salary_currency, + NULL::INTEGER AS new_salary_hay_grade, + previous_function_title, + previous_function_level, + previous_type, + previous_fte_percentage, + previous_work_location, + previous_work_location_type, + previous_team, + previous_cost_center, + previous_salary_monthly_gross_fixed, + previous_salary_yearly_gross_fixed, + previous_salary_yearly_gross_variable, + previous_salary_yearly_fully_loaded_cost, + previous_salary_currency, + previous_salary_hay_grade, + COALESCE(NULL, false) AS promotion, -- Optionally update later + latest, + new_job_effective_date, + new_department_path, + new_manager_id, + previous_department_path, + previous_manager_id, + worker_id, + is_first AS new_hire, + new_collar_type, + new_contract_type, + new_contracting_company, + previous_collar_type, + previous_contract_type, + previous_contracting_company +FROM job_changes_with_history \ No newline at end of file diff --git a/transform/models/tap_spreadsheets_anywhere/performance_cycle.sql b/transform/models/tap_spreadsheets_anywhere/performance_cycle.sql new file mode 100644 index 0000000..0ff85e6 --- /dev/null +++ b/transform/models/tap_spreadsheets_anywhere/performance_cycle.sql @@ -0,0 +1,20 @@ +{{ + config( + materialized='table' + ) +}} + +with distinct_cycles as ( + select distinct task_name, task_status + from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }} + --where is_not_removed_from_task = 1 +) + +select + dense_rank() over (order by task_name)::bigint as id, + null::date as start, + null::date as "end", + task_name as name, + task_status as status, + task_name as type +from distinct_cycles diff --git a/transform/models/tap_spreadsheets_anywhere/performance_review.sql b/transform/models/tap_spreadsheets_anywhere/performance_review.sql new file mode 100644 index 0000000..7d15946 --- /dev/null +++ b/transform/models/tap_spreadsheets_anywhere/performance_review.sql @@ -0,0 +1,58 @@ +{{ config(materialized='table') }} + +with step_agg as ( + select + user_id::text as user_id, + task_name, + max(step_submission_date::date) filter (where task_status = 'Completed') as completed_at, + max(task_status) as status + from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }} + where is_not_removed_from_task = 1 + group by 1,2 +), + +total_scoring as ( + select + user_id::text as user_id, + task_name, + max(final_rating) as overall_rating + from {{ source('tap_spreadsheets_anywhere', 'performance_review_total_scoring') }} + group by 1,2 +), + +worker_map as ( + select id as worker_id, worker_hris_id::text + from {{ ref('worker') }} +), + +cycle_map as ( + select id as cycle_id, name + from {{ ref('performance_cycle') }} +), + +combined as ( + select + s.user_id, + s.task_name, + s.status, + s.completed_at, + ts.overall_rating, + w.worker_id, + c.cycle_id + from step_agg s + left join total_scoring ts using (user_id, task_name) + left join worker_map w on w.worker_hris_id = s.user_id + left join cycle_map c on c.name = s.task_name +) + +select + row_number() over (order by user_id, task_name)::bigint as id, + null::date as start, + status, + overall_rating, + cycle_id, + worker_id, + null::bigint as reviewer_id, + completed_at as completed, + null::date as due +from combined \ No newline at end of file diff --git a/transform/models/tap_spreadsheets_anywhere/performance_review_step.sql b/transform/models/tap_spreadsheets_anywhere/performance_review_step.sql new file mode 100644 index 0000000..bc41d63 --- /dev/null +++ b/transform/models/tap_spreadsheets_anywhere/performance_review_step.sql @@ -0,0 +1,36 @@ +{{ config(materialized='table') }} + +with review_base as ( + select + r.id as review_id, + w.worker_hris_id::text as user_id, + c.name as task_name + from {{ ref('performance_review') }} r + join {{ ref('worker') }} w + on w.id = r.worker_id + join {{ ref('performance_cycle') }} c + on c.id = r.cycle_id +), + +steps as ( + select + user_id::text as user_id, + task_name, + step_title as name, + step_status as status, + step_submission_date::date as completed_at + from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }} + where is_not_removed_from_task = 1 +) + +select + row_number() over (order by s.user_id, s.task_name, s.name)::bigint as id, + r.review_id, + s.name, + s.status, + s.completed_at, + null::date as due +from steps s +left join review_base r + on r.user_id = s.user_id + and r.task_name = s.task_name \ No newline at end of file diff --git a/transform/models/tap_spreadsheets_anywhere/scoring.sql b/transform/models/tap_spreadsheets_anywhere/scoring.sql new file mode 100644 index 0000000..346112b --- /dev/null +++ b/transform/models/tap_spreadsheets_anywhere/scoring.sql @@ -0,0 +1,49 @@ +{{ config(materialized='table') }} + +with review_base as ( + select + r.id as review_id, + w.worker_hris_id::text as user_id, + c.name as task_name + from {{ ref('performance_review') }} r + join {{ ref('worker') }} w + on w.id = r.worker_id + join {{ ref('performance_cycle') }} c + on c.id = r.cycle_id +), + +total_scores as ( + select + r.review_id, + t.section_name as dimension, + nullif(trim(t.final_rating), '') as score, + null::text as comment + from {{ source('tap_spreadsheets_anywhere', 'performance_review_total_scoring') }} t + join review_base r + on r.user_id = t.user_id::text + and r.task_name = t.task_name +), + +sub_scores as ( + select + r.review_id, + s.competency_name as dimension, + nullif(trim(s.competency_rating_score), '') as score, + null::text as comment + from {{ source('tap_spreadsheets_anywhere', 'performance_review_sub_scoring') }} s + join review_base r + on r.user_id = s.user_id::text + and r.task_name = s.task_name +) + +select + row_number() over (order by review_id, dimension)::bigint as id, + dimension, + score, -- TODO: change to integer (score_value) + text (score_text) + comment, + review_id +from ( + select * from total_scores + union all + select * from sub_scores +) \ No newline at end of file diff --git a/transform/models/tap_spreadsheets_anywhere/source.yml b/transform/models/tap_spreadsheets_anywhere/source.yml new file mode 100644 index 0000000..be89f7c --- /dev/null +++ b/transform/models/tap_spreadsheets_anywhere/source.yml @@ -0,0 +1,12 @@ +config-version: 2 +version: 2 +sources: + - name: tap_spreadsheets_anywhere + schema: raw + tables: + - name: departments + - name: positions + - name: workers + - name: performance_review_steps + - name: performance_review_total_scoring + - name: performance_review_sub_scoring \ No newline at end of file diff --git a/transform/models/tap_spreadsheets_anywhere/worker.sql b/transform/models/tap_spreadsheets_anywhere/worker.sql new file mode 100644 index 0000000..648ba48 --- /dev/null +++ b/transform/models/tap_spreadsheets_anywhere/worker.sql @@ -0,0 +1,129 @@ +{{ + config( + materialized='table' + ) +}} + +-- Filter "users" based on latest ELT load +WITH latest_workers AS ( + SELECT DISTINCT ON (user_id) * + FROM {{ source('tap_spreadsheets_anywhere', 'workers') }} + WHERE _sdc_deleted_at IS NULL + ORDER BY user_id, _sdc_received_at DESC +), + +-- Filter "positions" based on latest ELT load +-- + only primary positions (need additional view for multi-positions) +-- + only for latest position of a user +latest_positions AS ( + SELECT DISTINCT ON (assigned_employee_id) * + FROM {{ source('tap_spreadsheets_anywhere', 'positions') }} + WHERE _sdc_deleted_at IS NULL + AND primary_position = 'Yes' + ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date +), + +joined_data AS ( + SELECT + w.user_id, + w.birth_date::DATE AS date_of_birth, + w.gender, + w.nationality, + NULL::VARCHAR AS first_name, -- Not available + NULL::VARCHAR AS last_name, -- Not available + NULL::VARCHAR AS ethnicity, + NULL::VARCHAR AS degree_level, + NULL::VARCHAR AS degree_name, + NULL::VARCHAR AS address_line_1, + NULL::VARCHAR AS address_line_2, + NULL::VARCHAR AS address_zip, + NULL::VARCHAR AS address_city, + NULL::VARCHAR AS address_country, + NULL::VARCHAR AS phone_number, + NULL::VARCHAR AS driver_license, + w.original_hire_date::DATE AS employment_start, + w.user_type AS employment_type, + w.user_contract_type AS employment_contract_type, + w.contracting_company AS employment_contracting_company, + w.collar_type AS employment_collar_type, + NULL::INTEGER AS employment_function_level, + p.position_title AS employment_function_title, + p.assigned_unit AS employment_team, + NULL::INTEGER AS employment_notice_period, + depot_cost_center AS employment_cost_center, + w.termination_type AS employment_exit_type, + w.termination_date::DATE AS employment_exit_date, + w.termination_reason AS employment_exit_reason, + NULL::INTEGER AS performance_rating, + NULL::DATE AS performance_date, + d.manager_id::BIGINT AS employment_manager_id, + NULL::INTEGER AS employment_criticality, + NULL::VARCHAR AS employment_probation_status, + NULL::DATE AS employment_probation_end_date, + d.path::ltree AS employment_department_path, + NULL::VARCHAR(254) AS email, + NULL::DATE AS employment_earliest_retirement_date, + ROUND(p.fte_utilized_by_employee_in_this_position * 100)::INTEGER AS employment_fte_percentage, + NULL::INTEGER AS salary_hay_grade, + NULL::VARCHAR(3) AS salary_currency, + NULL::INTEGER AS salary_yearly_gross_fixed, + NULL::INTEGER AS salary_yearly_gross_variable, + w.location AS employment_work_location, + NULL::VARCHAR AS employment_work_location_type, + NULL::INTEGER AS salary_monthly_gross_fixed, + NULL::INTEGER AS salary_yearly_fully_loaded_cost, + 'hris' AS source + FROM latest_workers w + LEFT JOIN latest_positions p + ON w.user_id = p.assigned_employee_id + LEFT JOIN {{ ref('department') }} d + ON p.assigned_unit_id = d.department_hris_id +) + +SELECT + row_number() OVER () AS id, + user_id::VARCHAR AS worker_hris_id, + first_name, + last_name, + date_of_birth, + gender, + nationality, + ethnicity, + degree_level, + degree_name, + address_line_1, + address_line_2, + address_zip, + address_city, + address_country, + phone_number, + driver_license, + employment_start, + employment_type, + employment_function_level, + employment_function_title, + employment_team, + employment_notice_period, + employment_cost_center, + employment_exit_type, + employment_exit_date, + performance_rating, + performance_date, + employment_manager_id, + employment_criticality, + employment_probation_status, + source, + employment_probation_end_date, + employment_department_path, + email, + employment_earliest_retirement_date, + employment_fte_percentage, + salary_hay_grade, + salary_currency, + salary_yearly_gross_fixed, + salary_yearly_gross_variable, + employment_work_location, + employment_work_location_type, + salary_monthly_gross_fixed, + salary_yearly_fully_loaded_cost +FROM joined_data \ No newline at end of file diff --git a/transform/models/tap_spreadsheets_everywhere/source.yml b/transform/models/tap_spreadsheets_everywhere/source.yml deleted file mode 100644 index 0992c2b..0000000 --- a/transform/models/tap_spreadsheets_everywhere/source.yml +++ /dev/null @@ -1,7 +0,0 @@ -config-version: 2 -version: 2 -sources: - - name: tap_spreadsheets_everywhere - schema: public - tables: - - name: departments \ No newline at end of file