# HG changeset patch # User Thomas Junk # Date 1625466706 -7200 # Node ID dcc692a333c087be84d7a3b65f934d4a792e1116 # Parent bda102c56cfcbf30051aa1de075fa26c40b9eeca# Parent 23a30ba08243df1667fbc6a7358ee977e03b97ec merge diff -r bda102c56cfc -r dcc692a333c0 report-templates/data-quality-report.xlsx Binary file report-templates/data-quality-report.xlsx has changed diff -r bda102c56cfc -r dcc692a333c0 report-templates/data-quality-report.yaml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/report-templates/data-quality-report.yaml Mon Jul 05 08:31:46 2021 +0200 @@ -0,0 +1,119 @@ +actions: +# One sheet per CC +- type: select + statement: > + SELECT cc FROM + (VALUES ('AT'), ('SK'), ('HU'), ('HR'), ('RS'), ('BG'), ('RO') ) + AS t (cc); + actions: + - type: sheet + source: CCTmpl + destination: "{{ cc }}" + vars: [last_row, column_number, offset] + actions: + # Header: + - type: copy + location: [A1,A5] + # BN names + - type: select + statement: > + SELECT DISTINCT objnam AS bnnam + FROM waterway.dqr_bottleneck_stats + WHERE cc = {{ cc }} ORDER BY objnam; + actions: + - type: assign + name: last_row + expr: (row_number ?? 0) + 6 + - type: copy + location: [A6] + destination: A{{ last_row }} + - type: copy + location: [A7] + destination: A{{ (last_row ?? 6) + 1 }} + # Gen Months + - type: select + statement: > + SELECT to_char(d, 'Month YYYY') AS month, d::date + FROM generate_series( '2019-10-01'::date, + now() - interval '1 day', + '1 month'::interval ) d; + actions: + - type: assign + name: column_number + expr: column2num("B") + (row_number ?? 0) * 2 + - type: copy + location: [B4,C5] + destination: '{{ coord2cell(column_number, 4) }}' + # BN SR-Count + - type: select + statement: > + SELECT objnam, srcnt, fwacnt + FROM waterway.dqr_bottleneck_stats + WHERE cc = {{ cc }} AND month = {{ d }} + ORDER BY objnam; + actions: + - type: assign + name: last_row + expr: (row_number ?? 0) + 6 + - type: copy + location: [B6,C6] + destination: '{{ coord2cell(column_number, last_row) }}' + - type: copy + location: [B7,C7] + destination: '{{ coord2cell(column_number, (last_row ?? 0) + 1) }}' + - type: assign + name: offset + expr: (last_row ?? 0) + 3 + #-------------------------------------------------------------------------- + # GAUGES + # Header: + - type: copy + location: [A9] + destination: A{{ offset }} + # Gauges names + - type: select + statement: > + SELECT DISTINCT objname AS gnam FROM waterway.dqr_gauge_stats + WHERE cc = {{ cc }} ORDER BY objname; + actions: + - type: assign + name: last_row + expr: (row_number ?? 0) + offset + 1 + - type: copy + location: [A10] + destination: A{{ last_row }} + - type: copy + location: [A11] + destination: A{{ (last_row ?? 10) + 1 }} + # Gen Months + - type: select + statement: > + SELECT to_char(d, 'Month YYYY') AS month, d::date + FROM generate_series( '2019-10-01'::date, + now() - interval '1 day', + '1 month'::interval ) d; + actions: + - type: assign + name: column_number + expr: column2num("B") + (row_number ?? 0) + - type: copy + location: [B9] + destination: '{{ coord2cell(column_number, offset) }}' + # BN SR-Count + - type: select + statement: > + SELECT objname, daynodata + FROM waterway.dqr_gauge_stats + WHERE cc = {{ cc }} AND month = {{ d }} + ORDER BY objname; + actions: + - type: assign + name: last_row + expr: (row_number ?? 0) + offset + 1 + - type: copy + location: [B10] + destination: '{{ coord2cell(column_number, last_row) }}' + - type: copy + location: [B11] + destination: '{{ coord2cell(column_number, (last_row ?? 10) + 1) }}' + diff -r bda102c56cfc -r dcc692a333c0 schema/gemma.sql --- a/schema/gemma.sql Mon Jul 05 08:31:22 2021 +0200 +++ b/schema/gemma.sql Mon Jul 05 08:31:46 2021 +0200 @@ -807,8 +807,6 @@ coverage varchar REFERENCES coverage_types, depth_reference varchar NOT NULL, -- REFERENCES depth_references, zpg_exception bool NOT NULL DEFAULT FALSE, -- Found no LDC in ZPG case. - octree_checksum varchar, - octree_index bytea, mesh_checksum varchar, mesh_index bytea, staging_done boolean NOT NULL DEFAULT false diff -r bda102c56cfc -r dcc692a333c0 schema/install-db.sh --- a/schema/install-db.sh Mon Jul 05 08:31:22 2021 +0200 +++ b/schema/install-db.sh Mon Jul 05 08:31:46 2021 +0200 @@ -126,6 +126,7 @@ -f "$BASEDIR/roles.sql" \ -f "$BASEDIR/isrs.sql" \ -f "$BASEDIR/gemma.sql" \ + -f "$BASEDIR/reports.sql" \ -f "$BASEDIR/geo_functions.sql" \ -f "$BASEDIR/search_functions.sql" \ -f "$BASEDIR/geonames.sql" \ diff -r bda102c56cfc -r dcc692a333c0 schema/reports.sql --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/schema/reports.sql Mon Jul 05 08:31:46 2021 +0200 @@ -0,0 +1,89 @@ +-- This is Free Software under GNU Affero General Public License v >= 3.0 +-- without warranty, see README.md and license for details. + +-- SPDX-License-Identifier: AGPL-3.0-or-later +-- License-Filename: LICENSES/AGPL-3.0.txt + +-- Copyright (C) 2021 by via donau +-- – Österreichische Wasserstraßen-Gesellschaft mbH +-- Software engineering by Intevation GmbH + +-- Author(s): +-- * Sascha Wilde + + +-- Materialized Views with statistical data for data quality reports +CREATE MATERIALIZED VIEW waterway.dqr_gauge_stats AS +WITH d AS ( SELECT ym::date, d::date + FROM generate_series( '2019-10-01'::date, + now() - interval '1 day', + '1 month'::interval ) ym, + generate_series( ym, + ( ym + interval '1 month' + - interval '1 day'), + '1 day'::interval ) d ), + g AS ( SELECT d.ym, d.d AS day, (location).country_code AS cc, + objname, array_agg(distinct(location)) AS locations + FROM waterway.gauges,d + GROUP BY objname,d.d,(location).country_code,d.ym ), + measure AS ( + SELECT g.ym, g.cc, g.objname, g.day, + CASE WHEN count(measure_date) = 0 + THEN 1 ELSE 0 END AS missing + FROM g + LEFT OUTER JOIN waterway.gauge_measurements gm + ON ARRAY[location] <@ g.locations + AND g.day <= measure_date + AND measure_date < (g.day + interval '1 day') + GROUP BY g.objname,g.day,g.ym,g.cc ) + SELECT cc, ym AS month, objname, sum(missing) AS daynodata + FROM measure + GROUP BY cc, ym, objname; + +CREATE MATERIALIZED VIEW waterway.dqr_bottleneck_stats AS +WITH d AS ( SELECT ym::date + FROM generate_series( '2019-10-01'::date, + now() - interval '1 day', + '1 month'::interval ) ym), + bn AS ( SELECT DISTINCT objnam, responsible_country AS cc + FROM waterway.bottlenecks ), + bid AS (SELECT objnam, array_agg(distinct(bottleneck_id)) AS ids + FROM waterway.bottlenecks GROUP BY objnam) + SELECT bn.cc, d.ym AS month, bid.objnam, + COALESCE(count(distinct(sr.date_info)),0) AS srcnt, + COALESCE(count(distinct(efa.measure_date)),0) AS fwacnt + FROM bn, bid + CROSS JOIN d + LEFT OUTER JOIN waterway.sounding_results sr + ON ARRAY[sr.bottleneck_id] <@ bid.ids + AND d.ym <= sr.date_info + AND sr.date_info < (d.ym + interval '1 month') + LEFT OUTER JOIN waterway.fairway_availability fa + ON ARRAY[fa.bottleneck_id] <@ bid.ids + LEFT OUTER JOIN waterway.effective_fairway_availability efa + ON fairway_availability_id = fa.id + AND d.ym <= efa.measure_date + AND efa.measure_date < (d.ym + interval '1 month') + AND efa.measure_type = 'Measured' + WHERE bid.objnam = bn.objnam + GROUP BY bn.cc, d.ym, bid.objnam; + +-- We need a wrapper procedure with owner rights for +-- the refresh, as (from the PGSQL manual): "REFRESH MATERIALIZED VIEW +-- completely replaces the contents of a materialized view. To execute +-- this command you must be the owner of the materialized view."" + +CREATE OR REPLACE PROCEDURE sys_admin.update_dqr_stats() +LANGUAGE plpgsql AS $$ +BEGIN + EXECUTE 'REFRESH MATERIALIZED VIEW waterway.dqr_bottleneck_stats'; + EXECUTE 'REFRESH MATERIALIZED VIEW waterway.dqr_gauge_stats'; +END; +$$ SECURITY DEFINER; + +GRANT EXECUTE ON PROCEDURE sys_admin.update_dqr_stats() TO sys_admin; + +-- Config update statement +INSERT INTO sys_admin.stats_updates + VALUES ('Data quality report', + 'CALL sys_admin.update_dqr_stats();'); diff -r bda102c56cfc -r dcc692a333c0 schema/updates/1452/01.report_views.sql --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/schema/updates/1452/01.report_views.sql Mon Jul 05 08:31:46 2021 +0200 @@ -0,0 +1,58 @@ +-- Materialized Views with statistical data for data quality reports +CREATE MATERIALIZED VIEW waterway.dqr_gauge_stats AS +WITH d AS ( SELECT ym::date, d::date + FROM generate_series( '2019-10-01'::date, + now() - interval '1 day', + '1 month'::interval ) ym, + generate_series( ym, + ( ym + interval '1 month' + - interval '1 day'), + '1 day'::interval ) d ), + g AS ( SELECT d.ym, d.d AS day, (location).country_code AS cc, + objname, array_agg(distinct(location)) AS locations + FROM waterway.gauges,d + GROUP BY objname,d.d,(location).country_code,d.ym ), + measure AS ( + SELECT g.ym, g.cc, g.objname, g.day, + CASE WHEN count(measure_date) = 0 + THEN 1 ELSE 0 END AS missing + FROM g + LEFT OUTER JOIN waterway.gauge_measurements gm + ON ARRAY[location] <@ g.locations + AND g.day <= measure_date + AND measure_date < (g.day + interval '1 day') + GROUP BY g.objname,g.day,g.ym,g.cc ) + SELECT cc, ym AS month, objname, sum(missing) AS daynodata + FROM measure + GROUP BY cc, ym, objname; + +CREATE MATERIALIZED VIEW waterway.dqr_bottleneck_stats AS +WITH d AS ( SELECT ym::date + FROM generate_series( '2019-10-01'::date, + now() - interval '1 day', + '1 month'::interval ) ym), + bn AS ( SELECT DISTINCT objnam, responsible_country AS cc + FROM waterway.bottlenecks ), + bid AS (SELECT objnam, array_agg(distinct(bottleneck_id)) AS ids + FROM waterway.bottlenecks GROUP BY objnam) + SELECT bn.cc, d.ym AS month, bid.objnam, + COALESCE(count(distinct(sr.date_info)),0) AS srcnt, + COALESCE(count(distinct(efa.measure_date)),0) AS fwacnt + FROM bn, bid + CROSS JOIN d + LEFT OUTER JOIN waterway.sounding_results sr + ON ARRAY[sr.bottleneck_id] <@ bid.ids + AND d.ym <= sr.date_info + AND sr.date_info < (d.ym + interval '1 month') + LEFT OUTER JOIN waterway.fairway_availability fa + ON ARRAY[fa.bottleneck_id] <@ bid.ids + LEFT OUTER JOIN waterway.effective_fairway_availability efa + ON fairway_availability_id = fa.id + AND d.ym <= efa.measure_date + AND efa.measure_date < (d.ym + interval '1 month') + AND efa.measure_type = 'Measured' + WHERE bid.objnam = bn.objnam + GROUP BY bn.cc, d.ym, bid.objnam; + +-- Refresh access rights! +GRANT SELECT on ALL tables in schema waterway TO waterway_user; diff -r bda102c56cfc -r dcc692a333c0 schema/updates/1453/01.update_dqr_stats.sql --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/schema/updates/1453/01.update_dqr_stats.sql Mon Jul 05 08:31:46 2021 +0200 @@ -0,0 +1,19 @@ +-- We need a wrapper procedure with owner rights for +-- the refresh, as (from the PGSQL manual): "REFRESH MATERIALIZED VIEW +-- completely replaces the contents of a materialized view. To execute +-- this command you must be the owner of the materialized view."" + +CREATE OR REPLACE PROCEDURE sys_admin.update_dqr_stats() +LANGUAGE plpgsql AS $$ +BEGIN + EXECUTE 'REFRESH MATERIALIZED VIEW waterway.dqr_bottleneck_stats'; + EXECUTE 'REFRESH MATERIALIZED VIEW waterway.dqr_gauge_stats'; +END; +$$ SECURITY DEFINER; + +GRANT EXECUTE ON PROCEDURE sys_admin.update_dqr_stats() TO sys_admin; + +-- Config update statement +INSERT INTO sys_admin.stats_updates + VALUES ('Data quality report', + 'CALL sys_admin.update_dqr_stats();'); diff -r bda102c56cfc -r dcc692a333c0 schema/version.sql --- a/schema/version.sql Mon Jul 05 08:31:22 2021 +0200 +++ b/schema/version.sql Mon Jul 05 08:31:46 2021 +0200 @@ -1,1 +1,1 @@ -INSERT INTO gemma_schema_version(version) VALUES (1451); +INSERT INTO gemma_schema_version(version) VALUES (1453);