Compare commits
30 Commits
48d0762c15
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| b3ad962b41 | |||
| eaaf51f699 | |||
| 461a7eb9aa | |||
| 9a7059dcdf | |||
| 735250c4c7 | |||
| aa19a53d51 | |||
| 44d9f2c21a | |||
| f9834cac10 | |||
| a222a43727 | |||
| 8df02c97e8 | |||
| bc228215ec | |||
| d7e2b69dd0 | |||
| 2b8f26e9fe | |||
| 3486b4f509 | |||
| 2da0aff227 | |||
| 14fb081542 | |||
| aef1d13279 | |||
| 506dda094d | |||
| b256e70e46 | |||
| 65262b2c99 | |||
| 37c0c3826f | |||
| 23e707b7fe | |||
| cc4aadc708 | |||
| 04c0499cce | |||
| 2f2716a62b | |||
| a64382ae5a | |||
| 6542fb07ec | |||
| 5a8b2f2a69 | |||
| e72bb37106 | |||
| 88b52d85e7 |
15
.dockerignore
Normal file
15
.dockerignore
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
/.git
|
||||||
|
|
||||||
|
# .gitignore
|
||||||
|
/venv
|
||||||
|
/.meltano
|
||||||
|
/.env
|
||||||
|
/ui.cfg
|
||||||
|
/output
|
||||||
|
|
||||||
|
# transform/.gitignore
|
||||||
|
/transform/target/
|
||||||
|
/transform/dbt_modules/
|
||||||
|
/transform/logs/
|
||||||
|
|
||||||
|
# custom
|
||||||
18
Dockerfile
Normal file
18
Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# registry.gitlab.com/meltano/meltano:latest is also available in GitLab Registry
|
||||||
|
ARG MELTANO_IMAGE=meltano/meltano:latest
|
||||||
|
FROM $MELTANO_IMAGE
|
||||||
|
|
||||||
|
WORKDIR /project
|
||||||
|
|
||||||
|
# Install any additional requirements
|
||||||
|
COPY ./requirements.txt .
|
||||||
|
RUN pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Copy over Meltano project directory
|
||||||
|
COPY . .
|
||||||
|
RUN meltano install
|
||||||
|
|
||||||
|
# Don't allow changes to containerized project files
|
||||||
|
ENV MELTANO_PROJECT_READONLY=1
|
||||||
|
|
||||||
|
ENTRYPOINT ["meltano"]
|
||||||
67
README (files-docker-compose).md
Normal file
67
README (files-docker-compose).md
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
# Meltano & Docker Compose
|
||||||
|
|
||||||
|
*[This file](https://gitlab.com/meltano/files-docker-compose/-/blob/master/bundle/README.md) has been added to your project for convenience and reference only. Feel free to delete it.*
|
||||||
|
|
||||||
|
## Getting started
|
||||||
|
|
||||||
|
1. Start the services in the background:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
### Helpful commands
|
||||||
|
|
||||||
|
- `docker compose run meltano {subcommand}`: Run a [`meltano` CLI command](https://meltano.com/docs/command-line-interface.html) inside your container.
|
||||||
|
- `docker compose logs`: See all logs.
|
||||||
|
- `docker compose logs {service}`: See logs for a particular service, e.g. `meltano`.
|
||||||
|
|
||||||
|
## Optional services
|
||||||
|
|
||||||
|
If these services are not relevant to you, feel free to delete their commented sections.
|
||||||
|
|
||||||
|
### Airflow
|
||||||
|
|
||||||
|
If you are using the [Airflow orchestrator](https://meltano.com/docs/orchestration.html) and would like to run it using Docker Compose, follow these steps:
|
||||||
|
|
||||||
|
1. Uncomment the `airflow-webserver` and `airflow-scheduler` services.
|
||||||
|
1. Start the new services:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Open the Airflow web interface at <http://localhost:8080>.
|
||||||
|
|
||||||
|
## Production usage
|
||||||
|
|
||||||
|
A `docker-compose.prod.yml` file is included that represents a [production-grade](https://meltano.com/docs/production.html) setup of a [containerized Meltano project](https://meltano.com/docs/containerization.html).
|
||||||
|
|
||||||
|
If this is not relevant to you, feel free to delete it.
|
||||||
|
|
||||||
|
### Dependencies
|
||||||
|
|
||||||
|
The production configuration depends on a `Dockerfile` being present in your project.
|
||||||
|
|
||||||
|
If you haven't already, add the appropriate `Dockerfile` and `.dockerignore` files to your project by adding the [`docker` file bundle](https://gitlab.com/meltano/files-docker):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
meltano add files docker
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
Please ensure you do the following before deploying to production:
|
||||||
|
|
||||||
|
1. If you are using the [Airflow orchestrator](#airflow) and would like to run it using Docker Compose, uncomment the Airflow services, network, and volume, and add `psycopg2` to `airflow`'s `pip_url` in `meltano.yml` as described in the ["Deployment in Production" guide](https://meltano.com/docs/production.html#airflow-orchestrator). If not, feel free to delete the commented sections.
|
||||||
|
1. Change the database password for `meltano-system-db` (and `airflow-metadata-db`): look for `# CHANGE ME`.
|
||||||
|
1. Update the database connection URIs under `x-meltano-env` (and `x-airflow-env`) to reflect the changed passwords.
|
||||||
|
1. Add any environment variables from `.env` and your local environment that are needed for production under `x-meltano-env`.
|
||||||
|
1. Change the image name and tag under `x-meltano-image` to something that makes sense for your project.
|
||||||
|
1. Start the services in the background:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose -f docker-compose.prod.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
If you've made changes to your project and need to rebuild your project-specific image, run `docker compose -f docker-compose.prod.yml up -d --build`.
|
||||||
90
README.md
90
README.md
@@ -1,3 +1,91 @@
|
|||||||
meltano run tap-spreadsheets-anywhere target-postgres
|
meltano run tap-spreadsheets-anywhere target-postgres
|
||||||
|
or
|
||||||
|
meltano run tap-spreadsheets-anywhere target-postgres --full-refresh
|
||||||
|
|
||||||
meltano invoke dbt-postgres:run
|
meltano invoke dbt-postgres:run
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
meltano --environment=sarens-demo run tap-spreadsheets-anywhere target-postgres dbt-postgres:run
|
||||||
|
|
||||||
|
|
||||||
|
Refresh / Overwrite doesn't seem to work, so manual fix here:
|
||||||
|
TRUNCATE TABLE raw.absenteisme;
|
||||||
|
TRUNCATE TABLE raw.departments;
|
||||||
|
TRUNCATE TABLE raw.performance_review_steps;
|
||||||
|
TRUNCATE TABLE raw.performance_review_sub_scoring;
|
||||||
|
TRUNCATE TABLE raw.performance_review_total_scoring;
|
||||||
|
TRUNCATE TABLE raw.positions;
|
||||||
|
TRUNCATE TABLE raw.workers;
|
||||||
|
|
||||||
|
|
||||||
|
DROP TABLE raw.absenteisme;
|
||||||
|
DROP TABLE raw.departments;
|
||||||
|
DROP TABLE raw.performance_review_steps;
|
||||||
|
DROP TABLE raw.performance_review_sub_scoring;
|
||||||
|
DROP TABLE raw.performance_review_total_scoring;
|
||||||
|
DROP TABLE raw.positions;
|
||||||
|
DROP TABLE raw.workers;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Data quality checks:
|
||||||
|
|
||||||
|
-- Total worker count
|
||||||
|
SELECT count(*)
|
||||||
|
from clean.worker;
|
||||||
|
|
||||||
|
-- Active workers
|
||||||
|
SELECT count(*)
|
||||||
|
from clean.worker
|
||||||
|
where employment_exit_date is null;
|
||||||
|
|
||||||
|
-- Number of job change / position change records
|
||||||
|
WITH history_counts AS (
|
||||||
|
SELECT w.id, count(*) as history_count
|
||||||
|
from clean.worker w
|
||||||
|
left join clean.job_change jc on w.id = jc.worker_id
|
||||||
|
group by w.id
|
||||||
|
)
|
||||||
|
SELECT history_count, count(*)
|
||||||
|
from history_counts
|
||||||
|
group by history_count
|
||||||
|
order by history_count;
|
||||||
|
|
||||||
|
-- Years at the company
|
||||||
|
WITH yac AS (
|
||||||
|
SELECT
|
||||||
|
w.id,
|
||||||
|
EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company
|
||||||
|
FROM clean.worker w
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
yac.years_at_company,
|
||||||
|
COUNT(*)
|
||||||
|
FROM yac
|
||||||
|
GROUP BY yac.years_at_company
|
||||||
|
ORDER BY yac.years_at_company
|
||||||
|
|
||||||
|
-- Worker id's with < 0 years at company or > 60 years
|
||||||
|
WITH yac AS (
|
||||||
|
SELECT
|
||||||
|
w.id, w.worker_hris_id, w.employment_start, w.employment_exit_date,
|
||||||
|
EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company
|
||||||
|
FROM clean.worker w
|
||||||
|
)
|
||||||
|
SELECT *
|
||||||
|
from yac
|
||||||
|
where years_at_company < 0 or years_at_company > 60;
|
||||||
|
|
||||||
|
|
||||||
|
-- Performance review: number of steps loaded
|
||||||
|
select c.name, s.name, count(*)
|
||||||
|
from performance_review_step s
|
||||||
|
inner join performance_review r on r.id = s.review_id
|
||||||
|
inner join performance_cycle c on c.id = r.cycle_id
|
||||||
|
group by c.name, s.name, s.sequence_number
|
||||||
|
order by c.name, s.sequence_number;
|
||||||
|
|
||||||
|
-- Steps not linked to a review
|
||||||
|
select count(*) from performance_review_step
|
||||||
|
where review_id is null;
|
||||||
111
docker-compose.prod.yml
Normal file
111
docker-compose.prod.yml
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
x-meltano-image: &meltano-image
|
||||||
|
image: meltano-demo-project:dev # Change me to a name and tag that makes sense for your project
|
||||||
|
build: .
|
||||||
|
|
||||||
|
x-meltano-env: &meltano-env
|
||||||
|
MELTANO_DATABASE_URI: postgresql://postgres:postgres@meltano-system-db/meltano
|
||||||
|
# Add any additional Meltano configuration environment variables here
|
||||||
|
|
||||||
|
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||||
|
# x-airflow-env: &airflow-env
|
||||||
|
# AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgres://postgres:postgres@airflow-metadata-db/airflow
|
||||||
|
# AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
||||||
|
|
||||||
|
services:
|
||||||
|
meltano:
|
||||||
|
<<: *meltano-image
|
||||||
|
command: dragon
|
||||||
|
environment:
|
||||||
|
<<: *meltano-env
|
||||||
|
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||||
|
# <<: *airflow-env
|
||||||
|
volumes:
|
||||||
|
- meltano_elt_logs_data:/project/.meltano/logs/elt
|
||||||
|
expose:
|
||||||
|
- 5000
|
||||||
|
ports:
|
||||||
|
- 5000:5000
|
||||||
|
depends_on:
|
||||||
|
- meltano-system-db
|
||||||
|
networks:
|
||||||
|
- meltano
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
meltano-system-db:
|
||||||
|
image: postgres
|
||||||
|
environment:
|
||||||
|
POSTGRES_PASSWORD: postgres # CHANGE ME
|
||||||
|
POSTGRES_DB: meltano
|
||||||
|
PGDATA: /var/lib/postgresql/data/pgdata
|
||||||
|
volumes:
|
||||||
|
- meltano_postgresql_data:/var/lib/postgresql/data
|
||||||
|
expose:
|
||||||
|
- 5432
|
||||||
|
networks:
|
||||||
|
- meltano
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||||
|
# airflow-scheduler:
|
||||||
|
# <<: *meltano-image
|
||||||
|
# command: invoke airflow scheduler
|
||||||
|
# environment:
|
||||||
|
# <<: *meltano-env
|
||||||
|
# <<: *airflow-env
|
||||||
|
# volumes:
|
||||||
|
# - meltano_elt_logs_data:/project/.meltano/logs/elt
|
||||||
|
# expose:
|
||||||
|
# - 8793
|
||||||
|
# depends_on:
|
||||||
|
# - meltano-system-db
|
||||||
|
# - airflow-metadata-db
|
||||||
|
# networks:
|
||||||
|
# - meltano
|
||||||
|
# - airflow
|
||||||
|
# restart: unless-stopped
|
||||||
|
#
|
||||||
|
# airflow-webserver:
|
||||||
|
# <<: *meltano-image
|
||||||
|
# command: invoke airflow webserver
|
||||||
|
# environment:
|
||||||
|
# <<: *meltano-env
|
||||||
|
# <<: *airflow-env
|
||||||
|
# expose:
|
||||||
|
# - 8080
|
||||||
|
# ports:
|
||||||
|
# - 8080:8080
|
||||||
|
# depends_on:
|
||||||
|
# - meltano-system-db
|
||||||
|
# - airflow-metadata-db
|
||||||
|
# networks:
|
||||||
|
# - meltano
|
||||||
|
# - airflow
|
||||||
|
# restart: unless-stopped
|
||||||
|
#
|
||||||
|
# airflow-metadata-db:
|
||||||
|
# image: postgres
|
||||||
|
# environment:
|
||||||
|
# POSTGRES_PASSWORD: postgres # CHANGE ME
|
||||||
|
# POSTGRES_DB: airflow
|
||||||
|
# PGDATA: /var/lib/postgresql/data/pgdata
|
||||||
|
# volumes:
|
||||||
|
# - airflow_postgresql_data:/var/lib/postgresql/data
|
||||||
|
# expose:
|
||||||
|
# - 5432
|
||||||
|
# networks:
|
||||||
|
# - airflow
|
||||||
|
# restart: unless-stopped
|
||||||
|
|
||||||
|
networks:
|
||||||
|
meltano:
|
||||||
|
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||||
|
# airflow:
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
meltano_postgresql_data:
|
||||||
|
driver: local
|
||||||
|
meltano_elt_logs_data:
|
||||||
|
driver: local
|
||||||
|
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||||
|
# airflow_postgresql_data:
|
||||||
|
# driver: local
|
||||||
38
docker-compose.yml
Normal file
38
docker-compose.yml
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
x-meltano-image: &meltano-image
|
||||||
|
image: gitea.jvtech.be/lakehouse/sarens-integration:latest
|
||||||
|
|
||||||
|
services:
|
||||||
|
meltano:
|
||||||
|
<<: *meltano-image
|
||||||
|
command: "--environment=sarens-demo run tap-spreadsheets-anywhere target-postgres dbt-postgres:run"
|
||||||
|
restart: no # unless-stopped
|
||||||
|
networks:
|
||||||
|
- db_network
|
||||||
|
volumes:
|
||||||
|
- /home/sarens-data:/sarens-data
|
||||||
|
environment:
|
||||||
|
- TARGET_POSTGRES_PASSWORD=${TARGET_POSTGRES_PASSWORD}
|
||||||
|
- DBT_POSTGRES_PASSWORD=${DBT_POSTGRES_PASSWORD}
|
||||||
|
|
||||||
|
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||||
|
# airflow-scheduler:
|
||||||
|
# <<: *meltano-image
|
||||||
|
# command: invoke airflow scheduler
|
||||||
|
# expose:
|
||||||
|
# - 8793
|
||||||
|
# restart: unless-stopped
|
||||||
|
#
|
||||||
|
# airflow-webserver:
|
||||||
|
# <<: *meltano-image
|
||||||
|
# command: invoke airflow webserver
|
||||||
|
# expose:
|
||||||
|
# - 8080
|
||||||
|
# ports:
|
||||||
|
# - 8080:8080
|
||||||
|
# restart: unless-stopped
|
||||||
|
|
||||||
|
|
||||||
|
networks:
|
||||||
|
db_network:
|
||||||
|
external: true
|
||||||
|
name: db_network
|
||||||
116
meltano.yml
116
meltano.yml
@@ -9,14 +9,14 @@ environments:
|
|||||||
- name: tap-spreadsheets-anywhere
|
- name: tap-spreadsheets-anywhere
|
||||||
config:
|
config:
|
||||||
tables:
|
tables:
|
||||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||||
name: departments
|
name: departments
|
||||||
pattern: Applicable Organizations.xlsx
|
pattern: Applicable Organizations.xlsx
|
||||||
start_date: '2000-01-01T00:00:00Z'
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
key_properties: []
|
key_properties: []
|
||||||
format: excel
|
format: excel
|
||||||
worksheet_name: AO
|
worksheet_name: AO
|
||||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||||
name: positions
|
name: positions
|
||||||
pattern: "HR002.*"
|
pattern: "HR002.*"
|
||||||
start_date: '2000-01-01T00:00:00Z'
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
@@ -26,7 +26,7 @@ environments:
|
|||||||
skip_initial: 8
|
skip_initial: 8
|
||||||
sample_rate: 1
|
sample_rate: 1
|
||||||
max_sampling_read: 1000
|
max_sampling_read: 1000
|
||||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||||
name: workers
|
name: workers
|
||||||
pattern: "HR006.*"
|
pattern: "HR006.*"
|
||||||
start_date: '2000-01-01T00:00:00Z'
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
@@ -36,7 +36,7 @@ environments:
|
|||||||
skip_initial: 8
|
skip_initial: 8
|
||||||
sample_rate: 1
|
sample_rate: 1
|
||||||
max_sampling_read: 25000
|
max_sampling_read: 25000
|
||||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||||
name: performance_review_steps
|
name: performance_review_steps
|
||||||
pattern: "PER001.*"
|
pattern: "PER001.*"
|
||||||
start_date: '2000-01-01T00:00:00Z'
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
@@ -46,7 +46,7 @@ environments:
|
|||||||
skip_initial: 8
|
skip_initial: 8
|
||||||
sample_rate: 1
|
sample_rate: 1
|
||||||
max_sampling_read: 25000
|
max_sampling_read: 25000
|
||||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||||
name: performance_review_total_scoring
|
name: performance_review_total_scoring
|
||||||
pattern: "PER002.*"
|
pattern: "PER002.*"
|
||||||
start_date: '2000-01-01T00:00:00Z'
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
@@ -56,7 +56,7 @@ environments:
|
|||||||
skip_initial: 8
|
skip_initial: 8
|
||||||
sample_rate: 1
|
sample_rate: 1
|
||||||
max_sampling_read: 25000
|
max_sampling_read: 25000
|
||||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||||
name: performance_review_sub_scoring
|
name: performance_review_sub_scoring
|
||||||
pattern: "PER003.*"
|
pattern: "PER003.*"
|
||||||
start_date: '2000-01-01T00:00:00Z'
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
@@ -66,15 +66,21 @@ environments:
|
|||||||
skip_initial: 8
|
skip_initial: 8
|
||||||
sample_rate: 1
|
sample_rate: 1
|
||||||
max_sampling_read: 25000
|
max_sampling_read: 25000
|
||||||
select:
|
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||||
- departments.*
|
name: absenteisme
|
||||||
- '!departments._*'
|
pattern: "Absenteisme.*"
|
||||||
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
|
key_properties: []
|
||||||
|
format: excel
|
||||||
|
worksheet_name: Tabel
|
||||||
|
sample_rate: 1
|
||||||
|
max_sampling_read: 25000
|
||||||
loaders:
|
loaders:
|
||||||
- name: target-postgres
|
- name: target-postgres
|
||||||
config:
|
config:
|
||||||
database: lakehouse_sarens
|
database: lakehouse_sarens
|
||||||
host: localhost
|
host: localhost
|
||||||
load_method: append-only
|
load_method: overwrite
|
||||||
user: postgres
|
user: postgres
|
||||||
default_target_schema: raw
|
default_target_schema: raw
|
||||||
utilities:
|
utilities:
|
||||||
@@ -85,7 +91,95 @@ environments:
|
|||||||
user: postgres
|
user: postgres
|
||||||
port: 5432
|
port: 5432
|
||||||
schema: clean
|
schema: clean
|
||||||
- name: staging
|
- name: sarens-demo
|
||||||
|
config:
|
||||||
|
plugins:
|
||||||
|
extractors:
|
||||||
|
- name: tap-spreadsheets-anywhere
|
||||||
|
config:
|
||||||
|
tables:
|
||||||
|
- path: file:///sarens-data/
|
||||||
|
name: departments
|
||||||
|
pattern: Applicable Organizations.xlsx
|
||||||
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
|
key_properties: []
|
||||||
|
format: excel
|
||||||
|
worksheet_name: AO
|
||||||
|
- path: file:///sarens-data/
|
||||||
|
name: positions
|
||||||
|
pattern: "HR002.*"
|
||||||
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
|
key_properties: []
|
||||||
|
format: excel
|
||||||
|
worksheet_name: HR002 - Positions Report (INTER
|
||||||
|
skip_initial: 8
|
||||||
|
sample_rate: 1
|
||||||
|
max_sampling_read: 1000
|
||||||
|
- path: file:///sarens-data/
|
||||||
|
name: workers
|
||||||
|
pattern: "HR006.*"
|
||||||
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
|
key_properties: []
|
||||||
|
format: excel
|
||||||
|
worksheet_name: HR006 - All Users Report (Activ
|
||||||
|
skip_initial: 8
|
||||||
|
sample_rate: 1
|
||||||
|
max_sampling_read: 25000
|
||||||
|
- path: file:///sarens-data/
|
||||||
|
name: performance_review_steps
|
||||||
|
pattern: "PER001.*"
|
||||||
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
|
key_properties: []
|
||||||
|
format: excel
|
||||||
|
worksheet_name: PER001 - Performance Review (2)
|
||||||
|
skip_initial: 8
|
||||||
|
sample_rate: 1
|
||||||
|
max_sampling_read: 25000
|
||||||
|
- path: file:///sarens-data/
|
||||||
|
name: performance_review_total_scoring
|
||||||
|
pattern: "PER002.*"
|
||||||
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
|
key_properties: []
|
||||||
|
format: excel
|
||||||
|
worksheet_name: PER002 - Performance Review (2)
|
||||||
|
skip_initial: 8
|
||||||
|
sample_rate: 1
|
||||||
|
max_sampling_read: 25000
|
||||||
|
- path: file:///sarens-data/
|
||||||
|
name: performance_review_sub_scoring
|
||||||
|
pattern: "PER003.*"
|
||||||
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
|
key_properties: []
|
||||||
|
format: excel
|
||||||
|
worksheet_name: PER003 - Performance Review (2)
|
||||||
|
skip_initial: 8
|
||||||
|
sample_rate: 1
|
||||||
|
max_sampling_read: 25000
|
||||||
|
- path: file:///sarens-data/
|
||||||
|
name: absenteisme
|
||||||
|
pattern: "Absenteisme.*"
|
||||||
|
start_date: '2000-01-01T00:00:00Z'
|
||||||
|
key_properties: []
|
||||||
|
format: excel
|
||||||
|
worksheet_name: Tabel
|
||||||
|
sample_rate: 1
|
||||||
|
max_sampling_read: 25000
|
||||||
|
loaders:
|
||||||
|
- name: target-postgres
|
||||||
|
config:
|
||||||
|
database: lakehouse-sarens
|
||||||
|
host: lakehouse-sarens-db-1
|
||||||
|
load_method: overwrite
|
||||||
|
user: lakehouse-sarens
|
||||||
|
default_target_schema: raw
|
||||||
|
utilities:
|
||||||
|
- name: dbt-postgres
|
||||||
|
config:
|
||||||
|
host: lakehouse-sarens-db-1
|
||||||
|
dbname: lakehouse-sarens
|
||||||
|
user: lakehouse-sarens
|
||||||
|
port: 5432
|
||||||
|
schema: clean
|
||||||
- name: prod
|
- name: prod
|
||||||
plugins:
|
plugins:
|
||||||
extractors:
|
extractors:
|
||||||
|
|||||||
11
plugins/files/files-docker--meltano.lock
Normal file
11
plugins/files/files-docker--meltano.lock
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"plugin_type": "files",
|
||||||
|
"name": "files-docker",
|
||||||
|
"namespace": "files_docker",
|
||||||
|
"variant": "meltano",
|
||||||
|
"label": "Docker",
|
||||||
|
"docs": "https://hub.meltano.com/files/files-docker--meltano",
|
||||||
|
"repo": "https://github.com/meltano/files-docker",
|
||||||
|
"pip_url": "git+https://github.com/meltano/files-docker.git",
|
||||||
|
"logo_url": "https://hub.meltano.com/assets/logos/files/docker.png"
|
||||||
|
}
|
||||||
11
plugins/files/files-docker-compose--meltano.lock
Normal file
11
plugins/files/files-docker-compose--meltano.lock
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"plugin_type": "files",
|
||||||
|
"name": "files-docker-compose",
|
||||||
|
"namespace": "files_docker_compose",
|
||||||
|
"variant": "meltano",
|
||||||
|
"label": "Docker Compose",
|
||||||
|
"docs": "https://hub.meltano.com/files/files-docker-compose--meltano",
|
||||||
|
"repo": "https://github.com/meltano/files-docker-compose",
|
||||||
|
"pip_url": "git+https://github.com/meltano/files-docker-compose.git",
|
||||||
|
"logo_url": "https://hub.meltano.com/assets/logos/files/docker-compose.png"
|
||||||
|
}
|
||||||
@@ -13,6 +13,7 @@ latest_departments as (
|
|||||||
from {{ source('tap_spreadsheets_anywhere', 'departments') }}
|
from {{ source('tap_spreadsheets_anywhere', 'departments') }}
|
||||||
) t
|
) t
|
||||||
where rn = 1
|
where rn = 1
|
||||||
|
and id not in ('CAD', 'CSAD')
|
||||||
),
|
),
|
||||||
department_tree as (
|
department_tree as (
|
||||||
-- Anchor: top-level department (parent_id is set to Sarens Group in the Excel)
|
-- Anchor: top-level department (parent_id is set to Sarens Group in the Excel)
|
||||||
|
|||||||
@@ -17,11 +17,18 @@ latest_departments AS (
|
|||||||
FROM {{ ref('department') }}
|
FROM {{ ref('department') }}
|
||||||
),
|
),
|
||||||
|
|
||||||
positions_deduped AS (
|
-- note: Positions ID is not unique, hence removed the full deduplication logic
|
||||||
SELECT DISTINCT ON (position_id) *
|
-- however, we had positions with same start date while both having primary position set to true, hence only selecting a random one for now (temp workaround)
|
||||||
|
deduplicated_positions AS (
|
||||||
|
SELECT DISTINCT ON (assigned_employee_id, assigned_employee_effective_date, assigned_unit_effective_date)
|
||||||
|
*
|
||||||
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||||
WHERE _sdc_deleted_at IS NULL
|
WHERE _sdc_deleted_at IS NULL
|
||||||
ORDER BY position_id, _sdc_received_at DESC
|
ORDER BY assigned_employee_id,
|
||||||
|
assigned_employee_effective_date DESC,
|
||||||
|
assigned_unit_effective_date DESC,
|
||||||
|
CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC,
|
||||||
|
_sdc_received_at DESC
|
||||||
),
|
),
|
||||||
|
|
||||||
transformed_worker AS (
|
transformed_worker AS (
|
||||||
@@ -38,41 +45,45 @@ position_details AS (
|
|||||||
w.contracting_company AS new_contracting_company,
|
w.contracting_company AS new_contracting_company,
|
||||||
d.path::ltree AS new_department_path,
|
d.path::ltree AS new_department_path,
|
||||||
d.manager_id::BIGINT AS new_manager_id,
|
d.manager_id::BIGINT AS new_manager_id,
|
||||||
p.assigned_employee_effective_date::DATE AS new_job_effective_date,
|
-- new job effective date:
|
||||||
p.fte_utilized_by_employee_in_this_position * 100 AS new_fte_percentage,
|
-- When both assigned_employee_effective_date and assigned_unit_effective_date are available, use the latest one
|
||||||
|
-- When only one of those is available, we pick that one (greatest ignores null values)
|
||||||
|
-- If none are available, we pick original_hire_date (this is the case if there is no position record)
|
||||||
|
COALESCE(GREATEST(p.assigned_employee_effective_date, p.assigned_unit_effective_date), w.original_hire_date)::DATE AS new_job_effective_date,
|
||||||
|
COALESCE(p.fte_utilized_by_employee_in_this_position * 100, 100) AS new_fte_percentage, -- Default to 100% if not specified
|
||||||
tw.id as worker_id
|
tw.id as worker_id
|
||||||
FROM positions_deduped p
|
FROM transformed_worker tw
|
||||||
LEFT JOIN latest_workers w
|
LEFT JOIN deduplicated_positions p
|
||||||
ON p.assigned_employee_id = w.user_id
|
ON p.assigned_employee_id = tw.worker_hris_id
|
||||||
LEFT JOIN latest_departments d
|
LEFT JOIN latest_departments d
|
||||||
ON p.assigned_unit_id = d.department_hris_id
|
ON p.assigned_unit_id = d.department_hris_id
|
||||||
LEFT JOIN transformed_worker tw ON tw.worker_hris_id = w.user_id
|
LEFT JOIN latest_workers w ON tw.worker_hris_id = w.user_id
|
||||||
),
|
),
|
||||||
|
|
||||||
job_changes_with_history AS (
|
job_changes_with_history AS (
|
||||||
SELECT
|
SELECT
|
||||||
*,
|
*,
|
||||||
LAG(position_title) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_function_title,
|
LAG(position_title) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_function_title,
|
||||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_function_level, -- Not available
|
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_function_level, -- Not available
|
||||||
LAG(w.user_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_type,
|
LAG(w.user_type) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_type,
|
||||||
LAG(new_fte_percentage) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_fte_percentage,
|
LAG(new_fte_percentage) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_fte_percentage,
|
||||||
LAG(w.location) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_work_location,
|
LAG(w.location) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_work_location,
|
||||||
LAG(NULL::VARCHAR) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_work_location_type,
|
LAG(NULL::VARCHAR) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_work_location_type,
|
||||||
LAG(p.assigned_unit) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_team,
|
LAG(p.assigned_unit) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_team,
|
||||||
LAG(w.depot_cost_center) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_cost_center,
|
LAG(w.depot_cost_center) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_cost_center,
|
||||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_monthly_gross_fixed,
|
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_monthly_gross_fixed,
|
||||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_fixed,
|
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_fixed,
|
||||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_variable,
|
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_variable,
|
||||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_fully_loaded_cost,
|
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_yearly_fully_loaded_cost,
|
||||||
LAG(NULL::VARCHAR(3)) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_currency,
|
LAG(NULL::VARCHAR(3)) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_currency,
|
||||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_hay_grade,
|
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_hay_grade,
|
||||||
LAG(d.path::ltree) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_department_path,
|
LAG(d.path::ltree) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_department_path,
|
||||||
LAG(d.manager_id::BIGINT) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_manager_id,
|
LAG(d.manager_id::BIGINT) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_manager_id,
|
||||||
LAG(w.collar_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_collar_type,
|
LAG(w.collar_type) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_collar_type,
|
||||||
LAG(w.user_contract_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_contract_type,
|
LAG(w.user_contract_type) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_contract_type,
|
||||||
LAG(w.contracting_company) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_contracting_company,
|
LAG(w.contracting_company) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_contracting_company,
|
||||||
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY new_job_effective_date DESC) = 1 AS latest,
|
ROW_NUMBER() OVER (PARTITION BY worker_id ORDER BY new_job_effective_date DESC) = 1 AS latest,
|
||||||
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY new_job_effective_date) = 1 AS is_first
|
ROW_NUMBER() OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) = 1 AS is_first
|
||||||
FROM position_details p
|
FROM position_details p
|
||||||
LEFT JOIN latest_workers w ON p.assigned_employee_id = w.user_id
|
LEFT JOIN latest_workers w ON p.assigned_employee_id = w.user_id
|
||||||
LEFT JOIN latest_departments d ON p.assigned_unit_id = d.department_hris_id
|
LEFT JOIN latest_departments d ON p.assigned_unit_id = d.department_hris_id
|
||||||
|
|||||||
@@ -5,16 +5,58 @@
|
|||||||
}}
|
}}
|
||||||
|
|
||||||
with distinct_cycles as (
|
with distinct_cycles as (
|
||||||
select distinct task_name, task_status
|
select
|
||||||
|
min(step_submission_date) as cycle_start_date,
|
||||||
|
max(step_submission_date) as cycle_end_date,
|
||||||
|
task_name,
|
||||||
|
case
|
||||||
|
when count(case when task_status != 'Completed' then 1 end) = 0
|
||||||
|
then 'Closed'
|
||||||
|
else 'Open'
|
||||||
|
end as task_status
|
||||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }}
|
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }}
|
||||||
--where is_not_removed_from_task = 1
|
--where is_not_removed_from_task = 1
|
||||||
|
group by task_name
|
||||||
|
),
|
||||||
|
|
||||||
|
base_records as (
|
||||||
|
select
|
||||||
|
dense_rank() over (order by task_name)::bigint as id,
|
||||||
|
cycle_start_date::date as start_date,
|
||||||
|
cycle_end_date::date as end_date,
|
||||||
|
task_name as name,
|
||||||
|
'Closed' as status, -- overwritten logic for Sarens
|
||||||
|
'annual' as type
|
||||||
|
from distinct_cycles
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Generate additional records for Performance Review 2024
|
||||||
|
additional_records as (
|
||||||
|
select
|
||||||
|
(select max(id) from base_records) + 1 as id,
|
||||||
|
start_date,
|
||||||
|
end_date,
|
||||||
|
'Performance Review 2024 - Generic' as name,
|
||||||
|
status,
|
||||||
|
type
|
||||||
|
from base_records
|
||||||
|
where name = 'Performance Review 2024'
|
||||||
|
|
||||||
|
union all
|
||||||
|
|
||||||
|
select
|
||||||
|
(select max(id) from base_records) + 2 as id,
|
||||||
|
start_date,
|
||||||
|
end_date,
|
||||||
|
'Performance Review 2024 - n-1 managers' as name,
|
||||||
|
status,
|
||||||
|
type
|
||||||
|
from base_records
|
||||||
|
where name = 'Performance Review 2024'
|
||||||
)
|
)
|
||||||
|
|
||||||
select
|
-- Combine original records with additional records
|
||||||
dense_rank() over (order by task_name)::bigint as id,
|
select * from base_records
|
||||||
null::date as start,
|
where name != 'Performance Review 2024'
|
||||||
null::date as "end",
|
union all
|
||||||
task_name as name,
|
select * from additional_records
|
||||||
task_status as status,
|
|
||||||
task_name as type
|
|
||||||
from distinct_cycles
|
|
||||||
@@ -11,6 +11,28 @@ with step_agg as (
|
|||||||
group by 1,2
|
group by 1,2
|
||||||
),
|
),
|
||||||
|
|
||||||
|
-- When task_name = "Performance Review 2024", we need to update it to either "Performance Review 2024 - Generic" or "Performance Review 2024 - n-1 managers"
|
||||||
|
-- This is determined based on whether for the user_id, there exists a step which is not in ('Employee Self Review', 'Manager Review', 'Performance Conversation & Manager Sign-off', or 'Employee Sign-off')
|
||||||
|
remapped_steps as (
|
||||||
|
select
|
||||||
|
user_id,
|
||||||
|
case
|
||||||
|
when task_name != 'Performance Review 2024'
|
||||||
|
then task_name
|
||||||
|
when exists (
|
||||||
|
select 1
|
||||||
|
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }} s
|
||||||
|
where s.user_id = step_agg.user_id
|
||||||
|
and s.task_name = 'Performance Review 2024'
|
||||||
|
and s.step_title not in ('Employee Self Review', 'Manager Review', 'Performance Conversation & Manager Sign-off', 'Employee Sign-Off')
|
||||||
|
) then 'Performance Review 2024 - n-1 managers'
|
||||||
|
else 'Performance Review 2024 - Generic'
|
||||||
|
end as task_name,
|
||||||
|
completed_at,
|
||||||
|
status
|
||||||
|
from step_agg
|
||||||
|
),
|
||||||
|
|
||||||
total_scoring as (
|
total_scoring as (
|
||||||
select
|
select
|
||||||
user_id::text as user_id,
|
user_id::text as user_id,
|
||||||
@@ -39,7 +61,7 @@ combined as (
|
|||||||
ts.overall_rating,
|
ts.overall_rating,
|
||||||
w.worker_id,
|
w.worker_id,
|
||||||
c.cycle_id
|
c.cycle_id
|
||||||
from step_agg s
|
from remapped_steps s
|
||||||
left join total_scoring ts using (user_id, task_name)
|
left join total_scoring ts using (user_id, task_name)
|
||||||
left join worker_map w on w.worker_hris_id = s.user_id
|
left join worker_map w on w.worker_hris_id = s.user_id
|
||||||
left join cycle_map c on c.name = s.task_name
|
left join cycle_map c on c.name = s.task_name
|
||||||
@@ -49,7 +71,8 @@ select
|
|||||||
row_number() over (order by user_id, task_name)::bigint as id,
|
row_number() over (order by user_id, task_name)::bigint as id,
|
||||||
null::date as start,
|
null::date as start,
|
||||||
status,
|
status,
|
||||||
overall_rating,
|
nullif(regexp_replace(overall_rating, '^(\d+).*', '\1'), '')::numeric as overall_rating_value,
|
||||||
|
overall_rating as overall_rating_text,
|
||||||
cycle_id,
|
cycle_id,
|
||||||
worker_id,
|
worker_id,
|
||||||
null::bigint as reviewer_id,
|
null::bigint as reviewer_id,
|
||||||
|
|||||||
@@ -17,20 +17,82 @@ steps as (
|
|||||||
user_id::text as user_id,
|
user_id::text as user_id,
|
||||||
task_name,
|
task_name,
|
||||||
step_title as name,
|
step_title as name,
|
||||||
|
case
|
||||||
|
when step_title in ('Auto-évaluation des employés', 'Employee Self Review', 'Sarens Projects Review',
|
||||||
|
'Approbation des employés',
|
||||||
|
'Self Review') then 1
|
||||||
|
when step_title in ('Évaluation du manager', 'Functional Manager Review', 'Discussion et approbation du manager',
|
||||||
|
'Manager Review', 'Strategy Review', 'Finance Review', 'Sales Review', 'Fleet Review', 'LCM Review', 'Operations Review') then 2
|
||||||
|
when step_title in ('HR Review', 'SHEQ Review', 'Performance Conversation & Manager Sign-off') then 3
|
||||||
|
when step_title = 'Employee Sign-Off' then 4
|
||||||
|
else null -- fallback for any unexpected values
|
||||||
|
end as sequence_number,
|
||||||
step_status as status,
|
step_status as status,
|
||||||
step_submission_date::date as completed_at
|
step_submission_date::date as completed_at
|
||||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }}
|
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }}
|
||||||
where is_not_removed_from_task = 1
|
where is_not_removed_from_task = 1
|
||||||
|
),
|
||||||
|
|
||||||
|
mandatory_steps as (
|
||||||
|
select 'Employee Self Review' as name, 1 as sequence_number
|
||||||
|
union all select 'Manager Review', 2
|
||||||
|
union all select 'Performance Conversation & Manager Sign-off', 3
|
||||||
|
union all select 'Employee Sign-Off', 4
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Get all reviews that match our criteria
|
||||||
|
filtered_reviews as (
|
||||||
|
select distinct
|
||||||
|
r.review_id,
|
||||||
|
r.user_id,
|
||||||
|
r.task_name
|
||||||
|
from review_base r
|
||||||
|
where r.task_name like 'Performance Review 2024%'
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Generate all expected steps for these reviews
|
||||||
|
expected_steps as (
|
||||||
|
select
|
||||||
|
r.review_id,
|
||||||
|
r.user_id,
|
||||||
|
r.task_name,
|
||||||
|
m.name,
|
||||||
|
m.sequence_number
|
||||||
|
from filtered_reviews r
|
||||||
|
cross join mandatory_steps m
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Find which expected steps are missing from the source data
|
||||||
|
missing_steps as (
|
||||||
|
select
|
||||||
|
e.user_id,
|
||||||
|
e.task_name,
|
||||||
|
e.name,
|
||||||
|
e.sequence_number,
|
||||||
|
'Not started' as status,
|
||||||
|
null::date as completed_at
|
||||||
|
from expected_steps e
|
||||||
|
left join steps s
|
||||||
|
on e.user_id = s.user_id
|
||||||
|
and e.task_name like s.task_name || '%' -- Also map for remapped cycle generic/n-1
|
||||||
|
and e.name = s.name
|
||||||
|
where s.user_id is null
|
||||||
)
|
)
|
||||||
|
|
||||||
|
-- Combine existing steps with missing steps
|
||||||
select
|
select
|
||||||
row_number() over (order by s.user_id, s.task_name, s.name)::bigint as id,
|
row_number() over (order by s.user_id, s.task_name, s.name)::bigint as id,
|
||||||
r.review_id,
|
r.review_id,
|
||||||
s.name,
|
s.name,
|
||||||
|
s.sequence_number,
|
||||||
s.status,
|
s.status,
|
||||||
s.completed_at,
|
s.completed_at,
|
||||||
null::date as due
|
null::date as due
|
||||||
from steps s
|
from (
|
||||||
|
select * from steps
|
||||||
|
union all
|
||||||
|
select * from missing_steps
|
||||||
|
) s
|
||||||
left join review_base r
|
left join review_base r
|
||||||
on r.user_id = s.user_id
|
on r.user_id = s.user_id
|
||||||
and r.task_name = s.task_name
|
and r.task_name like s.task_name || '%' -- Also map for remapped cycle generic/n-1
|
||||||
@@ -16,7 +16,8 @@ total_scores as (
|
|||||||
select
|
select
|
||||||
r.review_id,
|
r.review_id,
|
||||||
t.section_name as dimension,
|
t.section_name as dimension,
|
||||||
nullif(trim(t.final_rating), '') as score,
|
nullif(regexp_replace(t.final_rating, '^(\d+).*|.*', '\1'), '')::numeric as score_value,
|
||||||
|
nullif(trim(t.final_rating), '') as score_text,
|
||||||
null::text as comment
|
null::text as comment
|
||||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_total_scoring') }} t
|
from {{ source('tap_spreadsheets_anywhere', 'performance_review_total_scoring') }} t
|
||||||
join review_base r
|
join review_base r
|
||||||
@@ -28,7 +29,8 @@ sub_scores as (
|
|||||||
select
|
select
|
||||||
r.review_id,
|
r.review_id,
|
||||||
s.competency_name as dimension,
|
s.competency_name as dimension,
|
||||||
nullif(trim(s.competency_rating_score), '') as score,
|
nullif(regexp_replace(s.competency_rating_score, '^(\d+).*|.*', '\1'), '')::numeric as score_value,
|
||||||
|
nullif(trim(s.competency_rating_score), '') as score_text,
|
||||||
null::text as comment
|
null::text as comment
|
||||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_sub_scoring') }} s
|
from {{ source('tap_spreadsheets_anywhere', 'performance_review_sub_scoring') }} s
|
||||||
join review_base r
|
join review_base r
|
||||||
@@ -39,7 +41,8 @@ sub_scores as (
|
|||||||
select
|
select
|
||||||
row_number() over (order by review_id, dimension)::bigint as id,
|
row_number() over (order by review_id, dimension)::bigint as id,
|
||||||
dimension,
|
dimension,
|
||||||
score, -- TODO: change to integer (score_value) + text (score_text)
|
score_value,
|
||||||
|
score_text,
|
||||||
comment,
|
comment,
|
||||||
review_id
|
review_id
|
||||||
from (
|
from (
|
||||||
|
|||||||
@@ -9,4 +9,5 @@ sources:
|
|||||||
- name: workers
|
- name: workers
|
||||||
- name: performance_review_steps
|
- name: performance_review_steps
|
||||||
- name: performance_review_total_scoring
|
- name: performance_review_total_scoring
|
||||||
- name: performance_review_sub_scoring
|
- name: performance_review_sub_scoring
|
||||||
|
- name: absenteisme
|
||||||
117
transform/models/tap_spreadsheets_anywhere/time_registration.sql
Normal file
117
transform/models/tap_spreadsheets_anywhere/time_registration.sql
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
{{
|
||||||
|
config(
|
||||||
|
materialized='table'
|
||||||
|
)
|
||||||
|
}}
|
||||||
|
|
||||||
|
|
||||||
|
-- This model explodes the absenteism overview into a daily time registration per Belgian workday
|
||||||
|
-- covering all records in `raw.absenteisme` (starting June 2024) and maps them to workers.
|
||||||
|
|
||||||
|
with abs as (
|
||||||
|
select
|
||||||
|
ax_nummer::text as ax_nummer,
|
||||||
|
to_date(begindatum_historische_rubrieken, 'YYYY-MM-DD')::date as period_start,
|
||||||
|
to_date(einddatum_historische_rubrieken, 'YYYY-MM-DD')::date as period_end,
|
||||||
|
coalesce("0120_gewaarborgd_maandloon_ziekte", 0)::int as paid_sick_days,
|
||||||
|
coalesce("0500_gelijkgestelde_dag_ziekte", 0)::int as unpaid_sick_days
|
||||||
|
from {{ source('tap_spreadsheets_anywhere', 'absenteisme') }}
|
||||||
|
where begindatum_historische_rubrieken is not null
|
||||||
|
),
|
||||||
|
|
||||||
|
calendar as (
|
||||||
|
-- generate every calendar day of each record's date range
|
||||||
|
select
|
||||||
|
a.ax_nummer,
|
||||||
|
a.period_start,
|
||||||
|
a.period_end,
|
||||||
|
a.paid_sick_days,
|
||||||
|
a.unpaid_sick_days,
|
||||||
|
generate_series(
|
||||||
|
a.period_start,
|
||||||
|
a.period_end,
|
||||||
|
interval '1 day'
|
||||||
|
)::date as date
|
||||||
|
from abs a
|
||||||
|
),
|
||||||
|
|
||||||
|
holidays as (
|
||||||
|
-- Hard‑coded Belgian public holidays for 2024‑2025
|
||||||
|
select date '2024-01-01' as holiday_date union all
|
||||||
|
select date '2024-04-01' union all
|
||||||
|
select date '2024-05-01' union all
|
||||||
|
select date '2024-05-09' union all
|
||||||
|
select date '2024-05-20' union all
|
||||||
|
select date '2024-07-21' union all
|
||||||
|
select date '2024-08-15' union all
|
||||||
|
select date '2024-11-01' union all
|
||||||
|
select date '2024-11-11' union all
|
||||||
|
select date '2024-12-25' union all
|
||||||
|
select date '2025-01-01' union all
|
||||||
|
select date '2025-04-21' union all
|
||||||
|
select date '2025-05-01' union all
|
||||||
|
select date '2025-05-29' union all
|
||||||
|
select date '2025-06-09' union all
|
||||||
|
select date '2025-07-21' union all
|
||||||
|
select date '2025-08-15' union all
|
||||||
|
select date '2025-11-01' union all
|
||||||
|
select date '2025-11-11' union all
|
||||||
|
select date '2025-12-25'
|
||||||
|
),
|
||||||
|
|
||||||
|
workdays as (
|
||||||
|
-- Filter to Belgian working days (Mon‑Fri excluding public holidays)
|
||||||
|
select
|
||||||
|
c.ax_nummer,
|
||||||
|
c.period_start,
|
||||||
|
c.date,
|
||||||
|
c.paid_sick_days,
|
||||||
|
c.unpaid_sick_days
|
||||||
|
from calendar c
|
||||||
|
left join holidays h on c.date = h.holiday_date
|
||||||
|
where extract(dow from c.date) not in (0, 6) -- 0 = Sunday, 6 = Saturday
|
||||||
|
and h.holiday_date is null
|
||||||
|
),
|
||||||
|
|
||||||
|
indexed as (
|
||||||
|
-- Give each workday an index within its record for sick‑day allocation
|
||||||
|
select
|
||||||
|
w.*,
|
||||||
|
row_number() over (partition by ax_nummer, period_start order by date) as rn
|
||||||
|
from workdays w
|
||||||
|
),
|
||||||
|
|
||||||
|
typed as (
|
||||||
|
-- Translate the indices into the requested day type
|
||||||
|
select
|
||||||
|
w.date,
|
||||||
|
8 as hours,
|
||||||
|
100 as percentage_workday,
|
||||||
|
case
|
||||||
|
when rn <= paid_sick_days then 'Paid sick day'
|
||||||
|
when rn <= paid_sick_days + unpaid_sick_days then 'Unpaid sick day'
|
||||||
|
else 'Work'
|
||||||
|
end as type,
|
||||||
|
w.ax_nummer
|
||||||
|
from indexed w
|
||||||
|
),
|
||||||
|
|
||||||
|
with_workers as (
|
||||||
|
-- Map to the worker table, stripping leading 0s from the HRIS id
|
||||||
|
select
|
||||||
|
t.*,
|
||||||
|
wk.id as worker_id
|
||||||
|
from typed t
|
||||||
|
join {{ ref('worker') }} wk
|
||||||
|
on regexp_replace(wk.worker_hris_id, '^0+', '') = t.ax_nummer
|
||||||
|
)
|
||||||
|
|
||||||
|
select
|
||||||
|
row_number() over (order by worker_id, date) as id,
|
||||||
|
date,
|
||||||
|
hours,
|
||||||
|
percentage_workday,
|
||||||
|
type,
|
||||||
|
worker_id
|
||||||
|
from with_workers
|
||||||
|
order by worker_id, date
|
||||||
@@ -9,6 +9,9 @@ WITH latest_workers AS (
|
|||||||
SELECT DISTINCT ON (user_id) *
|
SELECT DISTINCT ON (user_id) *
|
||||||
FROM {{ source('tap_spreadsheets_anywhere', 'workers') }}
|
FROM {{ source('tap_spreadsheets_anywhere', 'workers') }}
|
||||||
WHERE _sdc_deleted_at IS NULL
|
WHERE _sdc_deleted_at IS NULL
|
||||||
|
AND user_id IS NOT NULL and user_id != '' -- Skipping empty user_ids
|
||||||
|
-- Skipping users with non-numeric user_id, as they are not valid
|
||||||
|
AND user_id ~ '^[0-9]+$'
|
||||||
ORDER BY user_id, _sdc_received_at DESC
|
ORDER BY user_id, _sdc_received_at DESC
|
||||||
),
|
),
|
||||||
|
|
||||||
@@ -19,14 +22,24 @@ latest_positions AS (
|
|||||||
SELECT DISTINCT ON (assigned_employee_id) *
|
SELECT DISTINCT ON (assigned_employee_id) *
|
||||||
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||||
WHERE _sdc_deleted_at IS NULL
|
WHERE _sdc_deleted_at IS NULL
|
||||||
AND primary_position = 'Yes'
|
-- AND primary_position = 'Yes' -- Removed this filter, as there are employees with only non-primary positions
|
||||||
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date
|
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date DESC, assigned_unit_effective_date DESC, CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Get the first position for each worker, to set the hire date
|
||||||
|
-- This is to fix the "original hire date" column of the All users report containing dates before the first position
|
||||||
|
first_positions AS (
|
||||||
|
SELECT DISTINCT ON (assigned_employee_id) *
|
||||||
|
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||||
|
WHERE _sdc_deleted_at IS NULL
|
||||||
|
ORDER BY assigned_employee_id, _sdc_received_at DESC, GREATEST(assigned_employee_effective_date, assigned_unit_effective_date) ASC, CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC
|
||||||
),
|
),
|
||||||
|
|
||||||
joined_data AS (
|
joined_data AS (
|
||||||
SELECT
|
SELECT
|
||||||
w.user_id,
|
w.user_id,
|
||||||
w.birth_date::DATE AS date_of_birth,
|
-- if birth_date is 01/01/1901, we consider it NULL
|
||||||
|
NULLIF(w.birth_date::DATE, '1901-01-01') AS date_of_birth,
|
||||||
w.gender,
|
w.gender,
|
||||||
w.nationality,
|
w.nationality,
|
||||||
NULL::VARCHAR AS first_name, -- Not available
|
NULL::VARCHAR AS first_name, -- Not available
|
||||||
@@ -41,7 +54,7 @@ joined_data AS (
|
|||||||
NULL::VARCHAR AS address_country,
|
NULL::VARCHAR AS address_country,
|
||||||
NULL::VARCHAR AS phone_number,
|
NULL::VARCHAR AS phone_number,
|
||||||
NULL::VARCHAR AS driver_license,
|
NULL::VARCHAR AS driver_license,
|
||||||
w.original_hire_date::DATE AS employment_start,
|
COALESCE(GREATEST(fp.assigned_employee_effective_date, fp.assigned_unit_effective_date), GREATEST(w.original_hire_date, w.last_hire_date))::DATE AS employment_start,
|
||||||
w.user_type AS employment_type,
|
w.user_type AS employment_type,
|
||||||
w.user_contract_type AS employment_contract_type,
|
w.user_contract_type AS employment_contract_type,
|
||||||
w.contracting_company AS employment_contracting_company,
|
w.contracting_company AS employment_contracting_company,
|
||||||
@@ -60,10 +73,10 @@ joined_data AS (
|
|||||||
NULL::INTEGER AS employment_criticality,
|
NULL::INTEGER AS employment_criticality,
|
||||||
NULL::VARCHAR AS employment_probation_status,
|
NULL::VARCHAR AS employment_probation_status,
|
||||||
NULL::DATE AS employment_probation_end_date,
|
NULL::DATE AS employment_probation_end_date,
|
||||||
d.path::ltree AS employment_department_path,
|
COALESCE(d.path::ltree, d2.path::ltree) AS employment_department_path,
|
||||||
NULL::VARCHAR(254) AS email,
|
NULL::VARCHAR(254) AS email,
|
||||||
NULL::DATE AS employment_earliest_retirement_date,
|
NULL::DATE AS employment_earliest_retirement_date,
|
||||||
ROUND(p.fte_utilized_by_employee_in_this_position * 100)::INTEGER AS employment_fte_percentage,
|
COALESCE(ROUND(p.fte_utilized_by_employee_in_this_position * 100)::INTEGER, 100) AS employment_fte_percentage,
|
||||||
NULL::INTEGER AS salary_hay_grade,
|
NULL::INTEGER AS salary_hay_grade,
|
||||||
NULL::VARCHAR(3) AS salary_currency,
|
NULL::VARCHAR(3) AS salary_currency,
|
||||||
NULL::INTEGER AS salary_yearly_gross_fixed,
|
NULL::INTEGER AS salary_yearly_gross_fixed,
|
||||||
@@ -76,8 +89,12 @@ joined_data AS (
|
|||||||
FROM latest_workers w
|
FROM latest_workers w
|
||||||
LEFT JOIN latest_positions p
|
LEFT JOIN latest_positions p
|
||||||
ON w.user_id = p.assigned_employee_id
|
ON w.user_id = p.assigned_employee_id
|
||||||
LEFT JOIN {{ ref('department') }} d
|
LEFT JOIN first_positions fp
|
||||||
|
ON w.user_id = fp.assigned_employee_id
|
||||||
|
LEFT JOIN {{ ref('department') }} d -- Source = Department from Positions report, only relevant for active workers
|
||||||
ON p.assigned_unit_id = d.department_hris_id
|
ON p.assigned_unit_id = d.department_hris_id
|
||||||
|
LEFT JOIN {{ ref('department') }} d2 -- Source = Business Unit from All Users report, only relevant for inactive workers
|
||||||
|
ON w.business_unit = d2.name AND nlevel(d2.path) = 3
|
||||||
)
|
)
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
@@ -100,6 +117,9 @@ SELECT
|
|||||||
driver_license,
|
driver_license,
|
||||||
employment_start,
|
employment_start,
|
||||||
employment_type,
|
employment_type,
|
||||||
|
employment_contract_type,
|
||||||
|
employment_contracting_company,
|
||||||
|
employment_collar_type,
|
||||||
employment_function_level,
|
employment_function_level,
|
||||||
employment_function_title,
|
employment_function_title,
|
||||||
employment_team,
|
employment_team,
|
||||||
@@ -107,6 +127,7 @@ SELECT
|
|||||||
employment_cost_center,
|
employment_cost_center,
|
||||||
employment_exit_type,
|
employment_exit_type,
|
||||||
employment_exit_date,
|
employment_exit_date,
|
||||||
|
employment_exit_reason,
|
||||||
performance_rating,
|
performance_rating,
|
||||||
performance_date,
|
performance_date,
|
||||||
employment_manager_id,
|
employment_manager_id,
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ meltano:
|
|||||||
search_path: "{{ env_var('DBT_POSTGRES_SEARCH_PATH', '') }}" # optional, override the default postgres search_path
|
search_path: "{{ env_var('DBT_POSTGRES_SEARCH_PATH', '') }}" # optional, override the default postgres search_path
|
||||||
role: "{{ env_var('DBT_POSTGRES_ROLE', '') }}" # optional, set the role dbt assumes when executing queries
|
role: "{{ env_var('DBT_POSTGRES_ROLE', '') }}" # optional, set the role dbt assumes when executing queries
|
||||||
# sslmode: "{{ env_var('DBT_POSTGRES_SSLMODE', '').split() }}" # optional, set the sslmode used to connect to the database
|
# sslmode: "{{ env_var('DBT_POSTGRES_SSLMODE', '').split() }}" # optional, set the sslmode used to connect to the database
|
||||||
staging:
|
sarens-demo:
|
||||||
type: postgres
|
type: postgres
|
||||||
host: "{{ env_var('DBT_POSTGRES_HOST') }}"
|
host: "{{ env_var('DBT_POSTGRES_HOST') }}"
|
||||||
user: "{{ env_var('DBT_POSTGRES_USER') }}"
|
user: "{{ env_var('DBT_POSTGRES_USER') }}"
|
||||||
|
|||||||
Reference in New Issue
Block a user