Compare commits
30 Commits
48d0762c15
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| b3ad962b41 | |||
| eaaf51f699 | |||
| 461a7eb9aa | |||
| 9a7059dcdf | |||
| 735250c4c7 | |||
| aa19a53d51 | |||
| 44d9f2c21a | |||
| f9834cac10 | |||
| a222a43727 | |||
| 8df02c97e8 | |||
| bc228215ec | |||
| d7e2b69dd0 | |||
| 2b8f26e9fe | |||
| 3486b4f509 | |||
| 2da0aff227 | |||
| 14fb081542 | |||
| aef1d13279 | |||
| 506dda094d | |||
| b256e70e46 | |||
| 65262b2c99 | |||
| 37c0c3826f | |||
| 23e707b7fe | |||
| cc4aadc708 | |||
| 04c0499cce | |||
| 2f2716a62b | |||
| a64382ae5a | |||
| 6542fb07ec | |||
| 5a8b2f2a69 | |||
| e72bb37106 | |||
| 88b52d85e7 |
15
.dockerignore
Normal file
15
.dockerignore
Normal file
@@ -0,0 +1,15 @@
|
||||
/.git
|
||||
|
||||
# .gitignore
|
||||
/venv
|
||||
/.meltano
|
||||
/.env
|
||||
/ui.cfg
|
||||
/output
|
||||
|
||||
# transform/.gitignore
|
||||
/transform/target/
|
||||
/transform/dbt_modules/
|
||||
/transform/logs/
|
||||
|
||||
# custom
|
||||
18
Dockerfile
Normal file
18
Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
# registry.gitlab.com/meltano/meltano:latest is also available in GitLab Registry
|
||||
ARG MELTANO_IMAGE=meltano/meltano:latest
|
||||
FROM $MELTANO_IMAGE
|
||||
|
||||
WORKDIR /project
|
||||
|
||||
# Install any additional requirements
|
||||
COPY ./requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
# Copy over Meltano project directory
|
||||
COPY . .
|
||||
RUN meltano install
|
||||
|
||||
# Don't allow changes to containerized project files
|
||||
ENV MELTANO_PROJECT_READONLY=1
|
||||
|
||||
ENTRYPOINT ["meltano"]
|
||||
67
README (files-docker-compose).md
Normal file
67
README (files-docker-compose).md
Normal file
@@ -0,0 +1,67 @@
|
||||
# Meltano & Docker Compose
|
||||
|
||||
*[This file](https://gitlab.com/meltano/files-docker-compose/-/blob/master/bundle/README.md) has been added to your project for convenience and reference only. Feel free to delete it.*
|
||||
|
||||
## Getting started
|
||||
|
||||
1. Start the services in the background:
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Helpful commands
|
||||
|
||||
- `docker compose run meltano {subcommand}`: Run a [`meltano` CLI command](https://meltano.com/docs/command-line-interface.html) inside your container.
|
||||
- `docker compose logs`: See all logs.
|
||||
- `docker compose logs {service}`: See logs for a particular service, e.g. `meltano`.
|
||||
|
||||
## Optional services
|
||||
|
||||
If these services are not relevant to you, feel free to delete their commented sections.
|
||||
|
||||
### Airflow
|
||||
|
||||
If you are using the [Airflow orchestrator](https://meltano.com/docs/orchestration.html) and would like to run it using Docker Compose, follow these steps:
|
||||
|
||||
1. Uncomment the `airflow-webserver` and `airflow-scheduler` services.
|
||||
1. Start the new services:
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
1. Open the Airflow web interface at <http://localhost:8080>.
|
||||
|
||||
## Production usage
|
||||
|
||||
A `docker-compose.prod.yml` file is included that represents a [production-grade](https://meltano.com/docs/production.html) setup of a [containerized Meltano project](https://meltano.com/docs/containerization.html).
|
||||
|
||||
If this is not relevant to you, feel free to delete it.
|
||||
|
||||
### Dependencies
|
||||
|
||||
The production configuration depends on a `Dockerfile` being present in your project.
|
||||
|
||||
If you haven't already, add the appropriate `Dockerfile` and `.dockerignore` files to your project by adding the [`docker` file bundle](https://gitlab.com/meltano/files-docker):
|
||||
|
||||
```bash
|
||||
meltano add files docker
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
Please ensure you do the following before deploying to production:
|
||||
|
||||
1. If you are using the [Airflow orchestrator](#airflow) and would like to run it using Docker Compose, uncomment the Airflow services, network, and volume, and add `psycopg2` to `airflow`'s `pip_url` in `meltano.yml` as described in the ["Deployment in Production" guide](https://meltano.com/docs/production.html#airflow-orchestrator). If not, feel free to delete the commented sections.
|
||||
1. Change the database password for `meltano-system-db` (and `airflow-metadata-db`): look for `# CHANGE ME`.
|
||||
1. Update the database connection URIs under `x-meltano-env` (and `x-airflow-env`) to reflect the changed passwords.
|
||||
1. Add any environment variables from `.env` and your local environment that are needed for production under `x-meltano-env`.
|
||||
1. Change the image name and tag under `x-meltano-image` to something that makes sense for your project.
|
||||
1. Start the services in the background:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml up -d
|
||||
```
|
||||
|
||||
If you've made changes to your project and need to rebuild your project-specific image, run `docker compose -f docker-compose.prod.yml up -d --build`.
|
||||
88
README.md
88
README.md
@@ -1,3 +1,91 @@
|
||||
meltano run tap-spreadsheets-anywhere target-postgres
|
||||
or
|
||||
meltano run tap-spreadsheets-anywhere target-postgres --full-refresh
|
||||
|
||||
meltano invoke dbt-postgres:run
|
||||
|
||||
|
||||
|
||||
meltano --environment=sarens-demo run tap-spreadsheets-anywhere target-postgres dbt-postgres:run
|
||||
|
||||
|
||||
Refresh / Overwrite doesn't seem to work, so manual fix here:
|
||||
TRUNCATE TABLE raw.absenteisme;
|
||||
TRUNCATE TABLE raw.departments;
|
||||
TRUNCATE TABLE raw.performance_review_steps;
|
||||
TRUNCATE TABLE raw.performance_review_sub_scoring;
|
||||
TRUNCATE TABLE raw.performance_review_total_scoring;
|
||||
TRUNCATE TABLE raw.positions;
|
||||
TRUNCATE TABLE raw.workers;
|
||||
|
||||
|
||||
DROP TABLE raw.absenteisme;
|
||||
DROP TABLE raw.departments;
|
||||
DROP TABLE raw.performance_review_steps;
|
||||
DROP TABLE raw.performance_review_sub_scoring;
|
||||
DROP TABLE raw.performance_review_total_scoring;
|
||||
DROP TABLE raw.positions;
|
||||
DROP TABLE raw.workers;
|
||||
|
||||
|
||||
|
||||
Data quality checks:
|
||||
|
||||
-- Total worker count
|
||||
SELECT count(*)
|
||||
from clean.worker;
|
||||
|
||||
-- Active workers
|
||||
SELECT count(*)
|
||||
from clean.worker
|
||||
where employment_exit_date is null;
|
||||
|
||||
-- Number of job change / position change records
|
||||
WITH history_counts AS (
|
||||
SELECT w.id, count(*) as history_count
|
||||
from clean.worker w
|
||||
left join clean.job_change jc on w.id = jc.worker_id
|
||||
group by w.id
|
||||
)
|
||||
SELECT history_count, count(*)
|
||||
from history_counts
|
||||
group by history_count
|
||||
order by history_count;
|
||||
|
||||
-- Years at the company
|
||||
WITH yac AS (
|
||||
SELECT
|
||||
w.id,
|
||||
EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company
|
||||
FROM clean.worker w
|
||||
)
|
||||
SELECT
|
||||
yac.years_at_company,
|
||||
COUNT(*)
|
||||
FROM yac
|
||||
GROUP BY yac.years_at_company
|
||||
ORDER BY yac.years_at_company
|
||||
|
||||
-- Worker id's with < 0 years at company or > 60 years
|
||||
WITH yac AS (
|
||||
SELECT
|
||||
w.id, w.worker_hris_id, w.employment_start, w.employment_exit_date,
|
||||
EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company
|
||||
FROM clean.worker w
|
||||
)
|
||||
SELECT *
|
||||
from yac
|
||||
where years_at_company < 0 or years_at_company > 60;
|
||||
|
||||
|
||||
-- Performance review: number of steps loaded
|
||||
select c.name, s.name, count(*)
|
||||
from performance_review_step s
|
||||
inner join performance_review r on r.id = s.review_id
|
||||
inner join performance_cycle c on c.id = r.cycle_id
|
||||
group by c.name, s.name, s.sequence_number
|
||||
order by c.name, s.sequence_number;
|
||||
|
||||
-- Steps not linked to a review
|
||||
select count(*) from performance_review_step
|
||||
where review_id is null;
|
||||
111
docker-compose.prod.yml
Normal file
111
docker-compose.prod.yml
Normal file
@@ -0,0 +1,111 @@
|
||||
x-meltano-image: &meltano-image
|
||||
image: meltano-demo-project:dev # Change me to a name and tag that makes sense for your project
|
||||
build: .
|
||||
|
||||
x-meltano-env: &meltano-env
|
||||
MELTANO_DATABASE_URI: postgresql://postgres:postgres@meltano-system-db/meltano
|
||||
# Add any additional Meltano configuration environment variables here
|
||||
|
||||
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||
# x-airflow-env: &airflow-env
|
||||
# AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgres://postgres:postgres@airflow-metadata-db/airflow
|
||||
# AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
||||
|
||||
services:
|
||||
meltano:
|
||||
<<: *meltano-image
|
||||
command: dragon
|
||||
environment:
|
||||
<<: *meltano-env
|
||||
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||
# <<: *airflow-env
|
||||
volumes:
|
||||
- meltano_elt_logs_data:/project/.meltano/logs/elt
|
||||
expose:
|
||||
- 5000
|
||||
ports:
|
||||
- 5000:5000
|
||||
depends_on:
|
||||
- meltano-system-db
|
||||
networks:
|
||||
- meltano
|
||||
restart: unless-stopped
|
||||
|
||||
meltano-system-db:
|
||||
image: postgres
|
||||
environment:
|
||||
POSTGRES_PASSWORD: postgres # CHANGE ME
|
||||
POSTGRES_DB: meltano
|
||||
PGDATA: /var/lib/postgresql/data/pgdata
|
||||
volumes:
|
||||
- meltano_postgresql_data:/var/lib/postgresql/data
|
||||
expose:
|
||||
- 5432
|
||||
networks:
|
||||
- meltano
|
||||
restart: unless-stopped
|
||||
|
||||
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||
# airflow-scheduler:
|
||||
# <<: *meltano-image
|
||||
# command: invoke airflow scheduler
|
||||
# environment:
|
||||
# <<: *meltano-env
|
||||
# <<: *airflow-env
|
||||
# volumes:
|
||||
# - meltano_elt_logs_data:/project/.meltano/logs/elt
|
||||
# expose:
|
||||
# - 8793
|
||||
# depends_on:
|
||||
# - meltano-system-db
|
||||
# - airflow-metadata-db
|
||||
# networks:
|
||||
# - meltano
|
||||
# - airflow
|
||||
# restart: unless-stopped
|
||||
#
|
||||
# airflow-webserver:
|
||||
# <<: *meltano-image
|
||||
# command: invoke airflow webserver
|
||||
# environment:
|
||||
# <<: *meltano-env
|
||||
# <<: *airflow-env
|
||||
# expose:
|
||||
# - 8080
|
||||
# ports:
|
||||
# - 8080:8080
|
||||
# depends_on:
|
||||
# - meltano-system-db
|
||||
# - airflow-metadata-db
|
||||
# networks:
|
||||
# - meltano
|
||||
# - airflow
|
||||
# restart: unless-stopped
|
||||
#
|
||||
# airflow-metadata-db:
|
||||
# image: postgres
|
||||
# environment:
|
||||
# POSTGRES_PASSWORD: postgres # CHANGE ME
|
||||
# POSTGRES_DB: airflow
|
||||
# PGDATA: /var/lib/postgresql/data/pgdata
|
||||
# volumes:
|
||||
# - airflow_postgresql_data:/var/lib/postgresql/data
|
||||
# expose:
|
||||
# - 5432
|
||||
# networks:
|
||||
# - airflow
|
||||
# restart: unless-stopped
|
||||
|
||||
networks:
|
||||
meltano:
|
||||
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||
# airflow:
|
||||
|
||||
volumes:
|
||||
meltano_postgresql_data:
|
||||
driver: local
|
||||
meltano_elt_logs_data:
|
||||
driver: local
|
||||
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||
# airflow_postgresql_data:
|
||||
# driver: local
|
||||
38
docker-compose.yml
Normal file
38
docker-compose.yml
Normal file
@@ -0,0 +1,38 @@
|
||||
x-meltano-image: &meltano-image
|
||||
image: gitea.jvtech.be/lakehouse/sarens-integration:latest
|
||||
|
||||
services:
|
||||
meltano:
|
||||
<<: *meltano-image
|
||||
command: "--environment=sarens-demo run tap-spreadsheets-anywhere target-postgres dbt-postgres:run"
|
||||
restart: no # unless-stopped
|
||||
networks:
|
||||
- db_network
|
||||
volumes:
|
||||
- /home/sarens-data:/sarens-data
|
||||
environment:
|
||||
- TARGET_POSTGRES_PASSWORD=${TARGET_POSTGRES_PASSWORD}
|
||||
- DBT_POSTGRES_PASSWORD=${DBT_POSTGRES_PASSWORD}
|
||||
|
||||
# # Uncomment if you are using the Airflow orchestrator, delete otherwise
|
||||
# airflow-scheduler:
|
||||
# <<: *meltano-image
|
||||
# command: invoke airflow scheduler
|
||||
# expose:
|
||||
# - 8793
|
||||
# restart: unless-stopped
|
||||
#
|
||||
# airflow-webserver:
|
||||
# <<: *meltano-image
|
||||
# command: invoke airflow webserver
|
||||
# expose:
|
||||
# - 8080
|
||||
# ports:
|
||||
# - 8080:8080
|
||||
# restart: unless-stopped
|
||||
|
||||
|
||||
networks:
|
||||
db_network:
|
||||
external: true
|
||||
name: db_network
|
||||
116
meltano.yml
116
meltano.yml
@@ -9,14 +9,14 @@ environments:
|
||||
- name: tap-spreadsheets-anywhere
|
||||
config:
|
||||
tables:
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: departments
|
||||
pattern: Applicable Organizations.xlsx
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
key_properties: []
|
||||
format: excel
|
||||
worksheet_name: AO
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: positions
|
||||
pattern: "HR002.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -26,7 +26,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 1000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: workers
|
||||
pattern: "HR006.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -36,7 +36,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: performance_review_steps
|
||||
pattern: "PER001.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -46,7 +46,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: performance_review_total_scoring
|
||||
pattern: "PER002.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -56,7 +56,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: performance_review_sub_scoring
|
||||
pattern: "PER003.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -66,15 +66,21 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
select:
|
||||
- departments.*
|
||||
- '!departments._*'
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: absenteisme
|
||||
pattern: "Absenteisme.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
key_properties: []
|
||||
format: excel
|
||||
worksheet_name: Tabel
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
loaders:
|
||||
- name: target-postgres
|
||||
config:
|
||||
database: lakehouse_sarens
|
||||
host: localhost
|
||||
load_method: append-only
|
||||
load_method: overwrite
|
||||
user: postgres
|
||||
default_target_schema: raw
|
||||
utilities:
|
||||
@@ -85,7 +91,95 @@ environments:
|
||||
user: postgres
|
||||
port: 5432
|
||||
schema: clean
|
||||
- name: staging
|
||||
- name: sarens-demo
|
||||
config:
|
||||
plugins:
|
||||
extractors:
|
||||
- name: tap-spreadsheets-anywhere
|
||||
config:
|
||||
tables:
|
||||
- path: file:///sarens-data/
|
||||
name: departments
|
||||
pattern: Applicable Organizations.xlsx
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
key_properties: []
|
||||
format: excel
|
||||
worksheet_name: AO
|
||||
- path: file:///sarens-data/
|
||||
name: positions
|
||||
pattern: "HR002.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
key_properties: []
|
||||
format: excel
|
||||
worksheet_name: HR002 - Positions Report (INTER
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 1000
|
||||
- path: file:///sarens-data/
|
||||
name: workers
|
||||
pattern: "HR006.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
key_properties: []
|
||||
format: excel
|
||||
worksheet_name: HR006 - All Users Report (Activ
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file:///sarens-data/
|
||||
name: performance_review_steps
|
||||
pattern: "PER001.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
key_properties: []
|
||||
format: excel
|
||||
worksheet_name: PER001 - Performance Review (2)
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file:///sarens-data/
|
||||
name: performance_review_total_scoring
|
||||
pattern: "PER002.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
key_properties: []
|
||||
format: excel
|
||||
worksheet_name: PER002 - Performance Review (2)
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file:///sarens-data/
|
||||
name: performance_review_sub_scoring
|
||||
pattern: "PER003.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
key_properties: []
|
||||
format: excel
|
||||
worksheet_name: PER003 - Performance Review (2)
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file:///sarens-data/
|
||||
name: absenteisme
|
||||
pattern: "Absenteisme.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
key_properties: []
|
||||
format: excel
|
||||
worksheet_name: Tabel
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
loaders:
|
||||
- name: target-postgres
|
||||
config:
|
||||
database: lakehouse-sarens
|
||||
host: lakehouse-sarens-db-1
|
||||
load_method: overwrite
|
||||
user: lakehouse-sarens
|
||||
default_target_schema: raw
|
||||
utilities:
|
||||
- name: dbt-postgres
|
||||
config:
|
||||
host: lakehouse-sarens-db-1
|
||||
dbname: lakehouse-sarens
|
||||
user: lakehouse-sarens
|
||||
port: 5432
|
||||
schema: clean
|
||||
- name: prod
|
||||
plugins:
|
||||
extractors:
|
||||
|
||||
11
plugins/files/files-docker--meltano.lock
Normal file
11
plugins/files/files-docker--meltano.lock
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"plugin_type": "files",
|
||||
"name": "files-docker",
|
||||
"namespace": "files_docker",
|
||||
"variant": "meltano",
|
||||
"label": "Docker",
|
||||
"docs": "https://hub.meltano.com/files/files-docker--meltano",
|
||||
"repo": "https://github.com/meltano/files-docker",
|
||||
"pip_url": "git+https://github.com/meltano/files-docker.git",
|
||||
"logo_url": "https://hub.meltano.com/assets/logos/files/docker.png"
|
||||
}
|
||||
11
plugins/files/files-docker-compose--meltano.lock
Normal file
11
plugins/files/files-docker-compose--meltano.lock
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"plugin_type": "files",
|
||||
"name": "files-docker-compose",
|
||||
"namespace": "files_docker_compose",
|
||||
"variant": "meltano",
|
||||
"label": "Docker Compose",
|
||||
"docs": "https://hub.meltano.com/files/files-docker-compose--meltano",
|
||||
"repo": "https://github.com/meltano/files-docker-compose",
|
||||
"pip_url": "git+https://github.com/meltano/files-docker-compose.git",
|
||||
"logo_url": "https://hub.meltano.com/assets/logos/files/docker-compose.png"
|
||||
}
|
||||
@@ -13,6 +13,7 @@ latest_departments as (
|
||||
from {{ source('tap_spreadsheets_anywhere', 'departments') }}
|
||||
) t
|
||||
where rn = 1
|
||||
and id not in ('CAD', 'CSAD')
|
||||
),
|
||||
department_tree as (
|
||||
-- Anchor: top-level department (parent_id is set to Sarens Group in the Excel)
|
||||
|
||||
@@ -17,11 +17,18 @@ latest_departments AS (
|
||||
FROM {{ ref('department') }}
|
||||
),
|
||||
|
||||
positions_deduped AS (
|
||||
SELECT DISTINCT ON (position_id) *
|
||||
-- note: Positions ID is not unique, hence removed the full deduplication logic
|
||||
-- however, we had positions with same start date while both having primary position set to true, hence only selecting a random one for now (temp workaround)
|
||||
deduplicated_positions AS (
|
||||
SELECT DISTINCT ON (assigned_employee_id, assigned_employee_effective_date, assigned_unit_effective_date)
|
||||
*
|
||||
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||
WHERE _sdc_deleted_at IS NULL
|
||||
ORDER BY position_id, _sdc_received_at DESC
|
||||
ORDER BY assigned_employee_id,
|
||||
assigned_employee_effective_date DESC,
|
||||
assigned_unit_effective_date DESC,
|
||||
CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC,
|
||||
_sdc_received_at DESC
|
||||
),
|
||||
|
||||
transformed_worker AS (
|
||||
@@ -38,41 +45,45 @@ position_details AS (
|
||||
w.contracting_company AS new_contracting_company,
|
||||
d.path::ltree AS new_department_path,
|
||||
d.manager_id::BIGINT AS new_manager_id,
|
||||
p.assigned_employee_effective_date::DATE AS new_job_effective_date,
|
||||
p.fte_utilized_by_employee_in_this_position * 100 AS new_fte_percentage,
|
||||
-- new job effective date:
|
||||
-- When both assigned_employee_effective_date and assigned_unit_effective_date are available, use the latest one
|
||||
-- When only one of those is available, we pick that one (greatest ignores null values)
|
||||
-- If none are available, we pick original_hire_date (this is the case if there is no position record)
|
||||
COALESCE(GREATEST(p.assigned_employee_effective_date, p.assigned_unit_effective_date), w.original_hire_date)::DATE AS new_job_effective_date,
|
||||
COALESCE(p.fte_utilized_by_employee_in_this_position * 100, 100) AS new_fte_percentage, -- Default to 100% if not specified
|
||||
tw.id as worker_id
|
||||
FROM positions_deduped p
|
||||
LEFT JOIN latest_workers w
|
||||
ON p.assigned_employee_id = w.user_id
|
||||
FROM transformed_worker tw
|
||||
LEFT JOIN deduplicated_positions p
|
||||
ON p.assigned_employee_id = tw.worker_hris_id
|
||||
LEFT JOIN latest_departments d
|
||||
ON p.assigned_unit_id = d.department_hris_id
|
||||
LEFT JOIN transformed_worker tw ON tw.worker_hris_id = w.user_id
|
||||
LEFT JOIN latest_workers w ON tw.worker_hris_id = w.user_id
|
||||
),
|
||||
|
||||
job_changes_with_history AS (
|
||||
SELECT
|
||||
*,
|
||||
LAG(position_title) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_function_title,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_function_level, -- Not available
|
||||
LAG(w.user_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_type,
|
||||
LAG(new_fte_percentage) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_fte_percentage,
|
||||
LAG(w.location) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_work_location,
|
||||
LAG(NULL::VARCHAR) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_work_location_type,
|
||||
LAG(p.assigned_unit) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_team,
|
||||
LAG(w.depot_cost_center) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_cost_center,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_monthly_gross_fixed,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_fixed,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_variable,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_yearly_fully_loaded_cost,
|
||||
LAG(NULL::VARCHAR(3)) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_currency,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_salary_hay_grade,
|
||||
LAG(d.path::ltree) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_department_path,
|
||||
LAG(d.manager_id::BIGINT) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_manager_id,
|
||||
LAG(w.collar_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_collar_type,
|
||||
LAG(w.user_contract_type) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_contract_type,
|
||||
LAG(w.contracting_company) OVER (PARTITION BY user_id ORDER BY new_job_effective_date) AS previous_contracting_company,
|
||||
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY new_job_effective_date DESC) = 1 AS latest,
|
||||
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY new_job_effective_date) = 1 AS is_first
|
||||
LAG(position_title) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_function_title,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_function_level, -- Not available
|
||||
LAG(w.user_type) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_type,
|
||||
LAG(new_fte_percentage) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_fte_percentage,
|
||||
LAG(w.location) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_work_location,
|
||||
LAG(NULL::VARCHAR) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_work_location_type,
|
||||
LAG(p.assigned_unit) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_team,
|
||||
LAG(w.depot_cost_center) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_cost_center,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_monthly_gross_fixed,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_fixed,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_yearly_gross_variable,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_yearly_fully_loaded_cost,
|
||||
LAG(NULL::VARCHAR(3)) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_currency,
|
||||
LAG(NULL::INTEGER) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_salary_hay_grade,
|
||||
LAG(d.path::ltree) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_department_path,
|
||||
LAG(d.manager_id::BIGINT) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_manager_id,
|
||||
LAG(w.collar_type) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_collar_type,
|
||||
LAG(w.user_contract_type) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_contract_type,
|
||||
LAG(w.contracting_company) OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) AS previous_contracting_company,
|
||||
ROW_NUMBER() OVER (PARTITION BY worker_id ORDER BY new_job_effective_date DESC) = 1 AS latest,
|
||||
ROW_NUMBER() OVER (PARTITION BY worker_id ORDER BY new_job_effective_date) = 1 AS is_first
|
||||
FROM position_details p
|
||||
LEFT JOIN latest_workers w ON p.assigned_employee_id = w.user_id
|
||||
LEFT JOIN latest_departments d ON p.assigned_unit_id = d.department_hris_id
|
||||
|
||||
@@ -5,16 +5,58 @@
|
||||
}}
|
||||
|
||||
with distinct_cycles as (
|
||||
select distinct task_name, task_status
|
||||
select
|
||||
min(step_submission_date) as cycle_start_date,
|
||||
max(step_submission_date) as cycle_end_date,
|
||||
task_name,
|
||||
case
|
||||
when count(case when task_status != 'Completed' then 1 end) = 0
|
||||
then 'Closed'
|
||||
else 'Open'
|
||||
end as task_status
|
||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }}
|
||||
--where is_not_removed_from_task = 1
|
||||
)
|
||||
group by task_name
|
||||
),
|
||||
|
||||
base_records as (
|
||||
select
|
||||
dense_rank() over (order by task_name)::bigint as id,
|
||||
null::date as start,
|
||||
null::date as "end",
|
||||
cycle_start_date::date as start_date,
|
||||
cycle_end_date::date as end_date,
|
||||
task_name as name,
|
||||
task_status as status,
|
||||
task_name as type
|
||||
'Closed' as status, -- overwritten logic for Sarens
|
||||
'annual' as type
|
||||
from distinct_cycles
|
||||
),
|
||||
|
||||
-- Generate additional records for Performance Review 2024
|
||||
additional_records as (
|
||||
select
|
||||
(select max(id) from base_records) + 1 as id,
|
||||
start_date,
|
||||
end_date,
|
||||
'Performance Review 2024 - Generic' as name,
|
||||
status,
|
||||
type
|
||||
from base_records
|
||||
where name = 'Performance Review 2024'
|
||||
|
||||
union all
|
||||
|
||||
select
|
||||
(select max(id) from base_records) + 2 as id,
|
||||
start_date,
|
||||
end_date,
|
||||
'Performance Review 2024 - n-1 managers' as name,
|
||||
status,
|
||||
type
|
||||
from base_records
|
||||
where name = 'Performance Review 2024'
|
||||
)
|
||||
|
||||
-- Combine original records with additional records
|
||||
select * from base_records
|
||||
where name != 'Performance Review 2024'
|
||||
union all
|
||||
select * from additional_records
|
||||
@@ -11,6 +11,28 @@ with step_agg as (
|
||||
group by 1,2
|
||||
),
|
||||
|
||||
-- When task_name = "Performance Review 2024", we need to update it to either "Performance Review 2024 - Generic" or "Performance Review 2024 - n-1 managers"
|
||||
-- This is determined based on whether for the user_id, there exists a step which is not in ('Employee Self Review', 'Manager Review', 'Performance Conversation & Manager Sign-off', or 'Employee Sign-off')
|
||||
remapped_steps as (
|
||||
select
|
||||
user_id,
|
||||
case
|
||||
when task_name != 'Performance Review 2024'
|
||||
then task_name
|
||||
when exists (
|
||||
select 1
|
||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }} s
|
||||
where s.user_id = step_agg.user_id
|
||||
and s.task_name = 'Performance Review 2024'
|
||||
and s.step_title not in ('Employee Self Review', 'Manager Review', 'Performance Conversation & Manager Sign-off', 'Employee Sign-Off')
|
||||
) then 'Performance Review 2024 - n-1 managers'
|
||||
else 'Performance Review 2024 - Generic'
|
||||
end as task_name,
|
||||
completed_at,
|
||||
status
|
||||
from step_agg
|
||||
),
|
||||
|
||||
total_scoring as (
|
||||
select
|
||||
user_id::text as user_id,
|
||||
@@ -39,7 +61,7 @@ combined as (
|
||||
ts.overall_rating,
|
||||
w.worker_id,
|
||||
c.cycle_id
|
||||
from step_agg s
|
||||
from remapped_steps s
|
||||
left join total_scoring ts using (user_id, task_name)
|
||||
left join worker_map w on w.worker_hris_id = s.user_id
|
||||
left join cycle_map c on c.name = s.task_name
|
||||
@@ -49,7 +71,8 @@ select
|
||||
row_number() over (order by user_id, task_name)::bigint as id,
|
||||
null::date as start,
|
||||
status,
|
||||
overall_rating,
|
||||
nullif(regexp_replace(overall_rating, '^(\d+).*', '\1'), '')::numeric as overall_rating_value,
|
||||
overall_rating as overall_rating_text,
|
||||
cycle_id,
|
||||
worker_id,
|
||||
null::bigint as reviewer_id,
|
||||
|
||||
@@ -17,20 +17,82 @@ steps as (
|
||||
user_id::text as user_id,
|
||||
task_name,
|
||||
step_title as name,
|
||||
case
|
||||
when step_title in ('Auto-évaluation des employés', 'Employee Self Review', 'Sarens Projects Review',
|
||||
'Approbation des employés',
|
||||
'Self Review') then 1
|
||||
when step_title in ('Évaluation du manager', 'Functional Manager Review', 'Discussion et approbation du manager',
|
||||
'Manager Review', 'Strategy Review', 'Finance Review', 'Sales Review', 'Fleet Review', 'LCM Review', 'Operations Review') then 2
|
||||
when step_title in ('HR Review', 'SHEQ Review', 'Performance Conversation & Manager Sign-off') then 3
|
||||
when step_title = 'Employee Sign-Off' then 4
|
||||
else null -- fallback for any unexpected values
|
||||
end as sequence_number,
|
||||
step_status as status,
|
||||
step_submission_date::date as completed_at
|
||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }}
|
||||
where is_not_removed_from_task = 1
|
||||
),
|
||||
|
||||
mandatory_steps as (
|
||||
select 'Employee Self Review' as name, 1 as sequence_number
|
||||
union all select 'Manager Review', 2
|
||||
union all select 'Performance Conversation & Manager Sign-off', 3
|
||||
union all select 'Employee Sign-Off', 4
|
||||
),
|
||||
|
||||
-- Get all reviews that match our criteria
|
||||
filtered_reviews as (
|
||||
select distinct
|
||||
r.review_id,
|
||||
r.user_id,
|
||||
r.task_name
|
||||
from review_base r
|
||||
where r.task_name like 'Performance Review 2024%'
|
||||
),
|
||||
|
||||
-- Generate all expected steps for these reviews
|
||||
expected_steps as (
|
||||
select
|
||||
r.review_id,
|
||||
r.user_id,
|
||||
r.task_name,
|
||||
m.name,
|
||||
m.sequence_number
|
||||
from filtered_reviews r
|
||||
cross join mandatory_steps m
|
||||
),
|
||||
|
||||
-- Find which expected steps are missing from the source data
|
||||
missing_steps as (
|
||||
select
|
||||
e.user_id,
|
||||
e.task_name,
|
||||
e.name,
|
||||
e.sequence_number,
|
||||
'Not started' as status,
|
||||
null::date as completed_at
|
||||
from expected_steps e
|
||||
left join steps s
|
||||
on e.user_id = s.user_id
|
||||
and e.task_name like s.task_name || '%' -- Also map for remapped cycle generic/n-1
|
||||
and e.name = s.name
|
||||
where s.user_id is null
|
||||
)
|
||||
|
||||
-- Combine existing steps with missing steps
|
||||
select
|
||||
row_number() over (order by s.user_id, s.task_name, s.name)::bigint as id,
|
||||
r.review_id,
|
||||
s.name,
|
||||
s.sequence_number,
|
||||
s.status,
|
||||
s.completed_at,
|
||||
null::date as due
|
||||
from steps s
|
||||
from (
|
||||
select * from steps
|
||||
union all
|
||||
select * from missing_steps
|
||||
) s
|
||||
left join review_base r
|
||||
on r.user_id = s.user_id
|
||||
and r.task_name = s.task_name
|
||||
and r.task_name like s.task_name || '%' -- Also map for remapped cycle generic/n-1
|
||||
@@ -16,7 +16,8 @@ total_scores as (
|
||||
select
|
||||
r.review_id,
|
||||
t.section_name as dimension,
|
||||
nullif(trim(t.final_rating), '') as score,
|
||||
nullif(regexp_replace(t.final_rating, '^(\d+).*|.*', '\1'), '')::numeric as score_value,
|
||||
nullif(trim(t.final_rating), '') as score_text,
|
||||
null::text as comment
|
||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_total_scoring') }} t
|
||||
join review_base r
|
||||
@@ -28,7 +29,8 @@ sub_scores as (
|
||||
select
|
||||
r.review_id,
|
||||
s.competency_name as dimension,
|
||||
nullif(trim(s.competency_rating_score), '') as score,
|
||||
nullif(regexp_replace(s.competency_rating_score, '^(\d+).*|.*', '\1'), '')::numeric as score_value,
|
||||
nullif(trim(s.competency_rating_score), '') as score_text,
|
||||
null::text as comment
|
||||
from {{ source('tap_spreadsheets_anywhere', 'performance_review_sub_scoring') }} s
|
||||
join review_base r
|
||||
@@ -39,7 +41,8 @@ sub_scores as (
|
||||
select
|
||||
row_number() over (order by review_id, dimension)::bigint as id,
|
||||
dimension,
|
||||
score, -- TODO: change to integer (score_value) + text (score_text)
|
||||
score_value,
|
||||
score_text,
|
||||
comment,
|
||||
review_id
|
||||
from (
|
||||
|
||||
@@ -10,3 +10,4 @@ sources:
|
||||
- name: performance_review_steps
|
||||
- name: performance_review_total_scoring
|
||||
- name: performance_review_sub_scoring
|
||||
- name: absenteisme
|
||||
117
transform/models/tap_spreadsheets_anywhere/time_registration.sql
Normal file
117
transform/models/tap_spreadsheets_anywhere/time_registration.sql
Normal file
@@ -0,0 +1,117 @@
|
||||
{{
|
||||
config(
|
||||
materialized='table'
|
||||
)
|
||||
}}
|
||||
|
||||
|
||||
-- This model explodes the absenteism overview into a daily time registration per Belgian workday
|
||||
-- covering all records in `raw.absenteisme` (starting June 2024) and maps them to workers.
|
||||
|
||||
with abs as (
|
||||
select
|
||||
ax_nummer::text as ax_nummer,
|
||||
to_date(begindatum_historische_rubrieken, 'YYYY-MM-DD')::date as period_start,
|
||||
to_date(einddatum_historische_rubrieken, 'YYYY-MM-DD')::date as period_end,
|
||||
coalesce("0120_gewaarborgd_maandloon_ziekte", 0)::int as paid_sick_days,
|
||||
coalesce("0500_gelijkgestelde_dag_ziekte", 0)::int as unpaid_sick_days
|
||||
from {{ source('tap_spreadsheets_anywhere', 'absenteisme') }}
|
||||
where begindatum_historische_rubrieken is not null
|
||||
),
|
||||
|
||||
calendar as (
|
||||
-- generate every calendar day of each record's date range
|
||||
select
|
||||
a.ax_nummer,
|
||||
a.period_start,
|
||||
a.period_end,
|
||||
a.paid_sick_days,
|
||||
a.unpaid_sick_days,
|
||||
generate_series(
|
||||
a.period_start,
|
||||
a.period_end,
|
||||
interval '1 day'
|
||||
)::date as date
|
||||
from abs a
|
||||
),
|
||||
|
||||
holidays as (
|
||||
-- Hard‑coded Belgian public holidays for 2024‑2025
|
||||
select date '2024-01-01' as holiday_date union all
|
||||
select date '2024-04-01' union all
|
||||
select date '2024-05-01' union all
|
||||
select date '2024-05-09' union all
|
||||
select date '2024-05-20' union all
|
||||
select date '2024-07-21' union all
|
||||
select date '2024-08-15' union all
|
||||
select date '2024-11-01' union all
|
||||
select date '2024-11-11' union all
|
||||
select date '2024-12-25' union all
|
||||
select date '2025-01-01' union all
|
||||
select date '2025-04-21' union all
|
||||
select date '2025-05-01' union all
|
||||
select date '2025-05-29' union all
|
||||
select date '2025-06-09' union all
|
||||
select date '2025-07-21' union all
|
||||
select date '2025-08-15' union all
|
||||
select date '2025-11-01' union all
|
||||
select date '2025-11-11' union all
|
||||
select date '2025-12-25'
|
||||
),
|
||||
|
||||
workdays as (
|
||||
-- Filter to Belgian working days (Mon‑Fri excluding public holidays)
|
||||
select
|
||||
c.ax_nummer,
|
||||
c.period_start,
|
||||
c.date,
|
||||
c.paid_sick_days,
|
||||
c.unpaid_sick_days
|
||||
from calendar c
|
||||
left join holidays h on c.date = h.holiday_date
|
||||
where extract(dow from c.date) not in (0, 6) -- 0 = Sunday, 6 = Saturday
|
||||
and h.holiday_date is null
|
||||
),
|
||||
|
||||
indexed as (
|
||||
-- Give each workday an index within its record for sick‑day allocation
|
||||
select
|
||||
w.*,
|
||||
row_number() over (partition by ax_nummer, period_start order by date) as rn
|
||||
from workdays w
|
||||
),
|
||||
|
||||
typed as (
|
||||
-- Translate the indices into the requested day type
|
||||
select
|
||||
w.date,
|
||||
8 as hours,
|
||||
100 as percentage_workday,
|
||||
case
|
||||
when rn <= paid_sick_days then 'Paid sick day'
|
||||
when rn <= paid_sick_days + unpaid_sick_days then 'Unpaid sick day'
|
||||
else 'Work'
|
||||
end as type,
|
||||
w.ax_nummer
|
||||
from indexed w
|
||||
),
|
||||
|
||||
with_workers as (
|
||||
-- Map to the worker table, stripping leading 0s from the HRIS id
|
||||
select
|
||||
t.*,
|
||||
wk.id as worker_id
|
||||
from typed t
|
||||
join {{ ref('worker') }} wk
|
||||
on regexp_replace(wk.worker_hris_id, '^0+', '') = t.ax_nummer
|
||||
)
|
||||
|
||||
select
|
||||
row_number() over (order by worker_id, date) as id,
|
||||
date,
|
||||
hours,
|
||||
percentage_workday,
|
||||
type,
|
||||
worker_id
|
||||
from with_workers
|
||||
order by worker_id, date
|
||||
@@ -9,6 +9,9 @@ WITH latest_workers AS (
|
||||
SELECT DISTINCT ON (user_id) *
|
||||
FROM {{ source('tap_spreadsheets_anywhere', 'workers') }}
|
||||
WHERE _sdc_deleted_at IS NULL
|
||||
AND user_id IS NOT NULL and user_id != '' -- Skipping empty user_ids
|
||||
-- Skipping users with non-numeric user_id, as they are not valid
|
||||
AND user_id ~ '^[0-9]+$'
|
||||
ORDER BY user_id, _sdc_received_at DESC
|
||||
),
|
||||
|
||||
@@ -19,14 +22,24 @@ latest_positions AS (
|
||||
SELECT DISTINCT ON (assigned_employee_id) *
|
||||
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||
WHERE _sdc_deleted_at IS NULL
|
||||
AND primary_position = 'Yes'
|
||||
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date
|
||||
-- AND primary_position = 'Yes' -- Removed this filter, as there are employees with only non-primary positions
|
||||
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date DESC, assigned_unit_effective_date DESC, CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC
|
||||
),
|
||||
|
||||
-- Get the first position for each worker, to set the hire date
|
||||
-- This is to fix the "original hire date" column of the All users report containing dates before the first position
|
||||
first_positions AS (
|
||||
SELECT DISTINCT ON (assigned_employee_id) *
|
||||
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||
WHERE _sdc_deleted_at IS NULL
|
||||
ORDER BY assigned_employee_id, _sdc_received_at DESC, GREATEST(assigned_employee_effective_date, assigned_unit_effective_date) ASC, CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC
|
||||
),
|
||||
|
||||
joined_data AS (
|
||||
SELECT
|
||||
w.user_id,
|
||||
w.birth_date::DATE AS date_of_birth,
|
||||
-- if birth_date is 01/01/1901, we consider it NULL
|
||||
NULLIF(w.birth_date::DATE, '1901-01-01') AS date_of_birth,
|
||||
w.gender,
|
||||
w.nationality,
|
||||
NULL::VARCHAR AS first_name, -- Not available
|
||||
@@ -41,7 +54,7 @@ joined_data AS (
|
||||
NULL::VARCHAR AS address_country,
|
||||
NULL::VARCHAR AS phone_number,
|
||||
NULL::VARCHAR AS driver_license,
|
||||
w.original_hire_date::DATE AS employment_start,
|
||||
COALESCE(GREATEST(fp.assigned_employee_effective_date, fp.assigned_unit_effective_date), GREATEST(w.original_hire_date, w.last_hire_date))::DATE AS employment_start,
|
||||
w.user_type AS employment_type,
|
||||
w.user_contract_type AS employment_contract_type,
|
||||
w.contracting_company AS employment_contracting_company,
|
||||
@@ -60,10 +73,10 @@ joined_data AS (
|
||||
NULL::INTEGER AS employment_criticality,
|
||||
NULL::VARCHAR AS employment_probation_status,
|
||||
NULL::DATE AS employment_probation_end_date,
|
||||
d.path::ltree AS employment_department_path,
|
||||
COALESCE(d.path::ltree, d2.path::ltree) AS employment_department_path,
|
||||
NULL::VARCHAR(254) AS email,
|
||||
NULL::DATE AS employment_earliest_retirement_date,
|
||||
ROUND(p.fte_utilized_by_employee_in_this_position * 100)::INTEGER AS employment_fte_percentage,
|
||||
COALESCE(ROUND(p.fte_utilized_by_employee_in_this_position * 100)::INTEGER, 100) AS employment_fte_percentage,
|
||||
NULL::INTEGER AS salary_hay_grade,
|
||||
NULL::VARCHAR(3) AS salary_currency,
|
||||
NULL::INTEGER AS salary_yearly_gross_fixed,
|
||||
@@ -76,8 +89,12 @@ joined_data AS (
|
||||
FROM latest_workers w
|
||||
LEFT JOIN latest_positions p
|
||||
ON w.user_id = p.assigned_employee_id
|
||||
LEFT JOIN {{ ref('department') }} d
|
||||
LEFT JOIN first_positions fp
|
||||
ON w.user_id = fp.assigned_employee_id
|
||||
LEFT JOIN {{ ref('department') }} d -- Source = Department from Positions report, only relevant for active workers
|
||||
ON p.assigned_unit_id = d.department_hris_id
|
||||
LEFT JOIN {{ ref('department') }} d2 -- Source = Business Unit from All Users report, only relevant for inactive workers
|
||||
ON w.business_unit = d2.name AND nlevel(d2.path) = 3
|
||||
)
|
||||
|
||||
SELECT
|
||||
@@ -100,6 +117,9 @@ SELECT
|
||||
driver_license,
|
||||
employment_start,
|
||||
employment_type,
|
||||
employment_contract_type,
|
||||
employment_contracting_company,
|
||||
employment_collar_type,
|
||||
employment_function_level,
|
||||
employment_function_title,
|
||||
employment_team,
|
||||
@@ -107,6 +127,7 @@ SELECT
|
||||
employment_cost_center,
|
||||
employment_exit_type,
|
||||
employment_exit_date,
|
||||
employment_exit_reason,
|
||||
performance_rating,
|
||||
performance_date,
|
||||
employment_manager_id,
|
||||
|
||||
@@ -16,7 +16,7 @@ meltano:
|
||||
search_path: "{{ env_var('DBT_POSTGRES_SEARCH_PATH', '') }}" # optional, override the default postgres search_path
|
||||
role: "{{ env_var('DBT_POSTGRES_ROLE', '') }}" # optional, set the role dbt assumes when executing queries
|
||||
# sslmode: "{{ env_var('DBT_POSTGRES_SSLMODE', '').split() }}" # optional, set the sslmode used to connect to the database
|
||||
staging:
|
||||
sarens-demo:
|
||||
type: postgres
|
||||
host: "{{ env_var('DBT_POSTGRES_HOST') }}"
|
||||
user: "{{ env_var('DBT_POSTGRES_USER') }}"
|
||||
|
||||
Reference in New Issue
Block a user