changeset 5398:dcc692a333c0 extented-report

merge
author Thomas Junk <thomas.junk@intevation.de>
date Mon, 05 Jul 2021 08:31:46 +0200
parents bda102c56cfc (current diff) 23a30ba08243 (diff)
children 47c2ca05e8ec f5063fa7f666
files
diffstat 8 files changed, 287 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
Binary file report-templates/data-quality-report.xlsx has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/report-templates/data-quality-report.yaml	Mon Jul 05 08:31:46 2021 +0200
@@ -0,0 +1,119 @@
+actions:
+# One sheet per CC
+- type: select
+  statement: >
+    SELECT cc FROM
+      (VALUES ('AT'), ('SK'), ('HU'), ('HR'), ('RS'), ('BG'), ('RO') )
+      AS t (cc);
+  actions:
+  - type: sheet
+    source: CCTmpl
+    destination: "{{ cc }}"
+    vars: [last_row, column_number, offset]
+    actions:
+    # Header:
+    - type: copy
+      location: [A1,A5]
+    # BN names
+    - type: select
+      statement: >
+        SELECT DISTINCT objnam AS bnnam
+          FROM waterway.dqr_bottleneck_stats
+          WHERE cc = {{ cc }} ORDER BY objnam;
+      actions:
+      - type: assign
+        name: last_row
+        expr: (row_number ?? 0) + 6
+      - type: copy
+        location: [A6]
+        destination: A{{ last_row }}
+    - type: copy
+      location: [A7]
+      destination: A{{ (last_row ?? 6) + 1 }}
+    # Gen Months
+    - type: select
+      statement: >
+        SELECT to_char(d, 'Month YYYY') AS month, d::date
+          FROM generate_series( '2019-10-01'::date,
+                                now() - interval '1 day',
+                                '1 month'::interval ) d;
+      actions:
+      - type: assign
+        name: column_number
+        expr: column2num("B") + (row_number ?? 0) * 2
+      - type: copy
+        location: [B4,C5]
+        destination: '{{ coord2cell(column_number, 4) }}'
+      # BN SR-Count
+      - type: select
+        statement: >
+          SELECT objnam, srcnt, fwacnt
+            FROM waterway.dqr_bottleneck_stats
+            WHERE cc = {{ cc }} AND month = {{ d }}
+            ORDER BY objnam;
+        actions:
+        - type: assign
+          name: last_row
+          expr: (row_number ?? 0) + 6
+        - type: copy
+          location: [B6,C6]
+          destination: '{{ coord2cell(column_number, last_row) }}'
+      - type: copy
+        location: [B7,C7]
+        destination: '{{ coord2cell(column_number, (last_row ?? 0) + 1) }}'
+    - type: assign
+      name: offset
+      expr: (last_row ?? 0) + 3
+    #--------------------------------------------------------------------------
+    # GAUGES
+    # Header:
+    - type: copy
+      location: [A9]
+      destination: A{{ offset }}
+    # Gauges names
+    - type: select
+      statement: >
+        SELECT DISTINCT objname AS gnam FROM waterway.dqr_gauge_stats
+          WHERE cc = {{ cc }} ORDER BY objname;
+      actions:
+      - type: assign
+        name: last_row
+        expr: (row_number ?? 0) + offset + 1
+      - type: copy
+        location: [A10]
+        destination: A{{ last_row }}
+    - type: copy
+      location: [A11]
+      destination: A{{ (last_row ?? 10) + 1 }}
+    # Gen Months
+    - type: select
+      statement: >
+        SELECT to_char(d, 'Month YYYY') AS month, d::date
+          FROM generate_series( '2019-10-01'::date,
+                                now() - interval '1 day',
+                                '1 month'::interval ) d;
+      actions:
+      - type: assign
+        name: column_number
+        expr: column2num("B") + (row_number ?? 0)
+      - type: copy
+        location: [B9]
+        destination: '{{ coord2cell(column_number, offset) }}'
+      # BN SR-Count
+      - type: select
+        statement: >
+          SELECT objname, daynodata
+            FROM waterway.dqr_gauge_stats
+            WHERE cc = {{ cc }} AND month = {{ d }}
+            ORDER BY objname;
+        actions:
+        - type: assign
+          name: last_row
+          expr: (row_number ?? 0) + offset + 1
+        - type: copy
+          location: [B10]
+          destination: '{{ coord2cell(column_number, last_row) }}'
+      - type: copy
+        location: [B11]
+        destination: '{{ coord2cell(column_number, (last_row ?? 10) + 1) }}'
+
--- a/schema/gemma.sql	Mon Jul 05 08:31:22 2021 +0200
+++ b/schema/gemma.sql	Mon Jul 05 08:31:46 2021 +0200
@@ -807,8 +807,6 @@
         coverage varchar REFERENCES coverage_types,
         depth_reference varchar NOT NULL, -- REFERENCES depth_references,
         zpg_exception bool NOT NULL DEFAULT FALSE, -- Found no LDC in ZPG case.
-        octree_checksum varchar,
-        octree_index bytea,
         mesh_checksum varchar,
         mesh_index bytea,
         staging_done boolean NOT NULL DEFAULT false
--- a/schema/install-db.sh	Mon Jul 05 08:31:22 2021 +0200
+++ b/schema/install-db.sh	Mon Jul 05 08:31:46 2021 +0200
@@ -126,6 +126,7 @@
        -f "$BASEDIR/roles.sql" \
        -f "$BASEDIR/isrs.sql" \
        -f "$BASEDIR/gemma.sql" \
+       -f "$BASEDIR/reports.sql" \
        -f "$BASEDIR/geo_functions.sql" \
        -f "$BASEDIR/search_functions.sql" \
        -f "$BASEDIR/geonames.sql" \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/reports.sql	Mon Jul 05 08:31:46 2021 +0200
@@ -0,0 +1,89 @@
+-- This is Free Software under GNU Affero General Public License v >= 3.0
+-- without warranty, see README.md and license for details.
+
+-- SPDX-License-Identifier: AGPL-3.0-or-later
+-- License-Filename: LICENSES/AGPL-3.0.txt
+
+-- Copyright (C) 2021 by via donau
+--   – Österreichische Wasserstraßen-Gesellschaft mbH
+-- Software engineering by Intevation GmbH
+
+-- Author(s):
+--  * Sascha Wilde <sascha.wilde@intevation.de>
+
+
+-- Materialized Views with statistical data for data quality reports
+CREATE MATERIALIZED VIEW waterway.dqr_gauge_stats AS
+WITH d AS ( SELECT ym::date, d::date
+              FROM generate_series( '2019-10-01'::date,
+                                    now() - interval '1 day',
+                                    '1 month'::interval ) ym,
+                   generate_series( ym,
+                                    ( ym + interval '1 month'
+                                         - interval '1 day'),
+                                     '1 day'::interval ) d ),
+     g AS ( SELECT d.ym, d.d AS day, (location).country_code AS cc,
+                   objname, array_agg(distinct(location)) AS locations
+            FROM waterway.gauges,d
+            GROUP BY objname,d.d,(location).country_code,d.ym ),
+     measure AS (
+       SELECT g.ym, g.cc, g.objname, g.day,
+              CASE WHEN count(measure_date) = 0
+              THEN 1 ELSE 0 END AS missing
+         FROM g
+         LEFT OUTER JOIN waterway.gauge_measurements gm
+           ON ARRAY[location] <@ g.locations
+             AND g.day <= measure_date
+             AND measure_date < (g.day + interval '1 day')
+         GROUP BY g.objname,g.day,g.ym,g.cc )
+  SELECT cc, ym AS month, objname, sum(missing) AS daynodata
+    FROM measure
+    GROUP BY cc, ym, objname;
+
+CREATE MATERIALIZED VIEW waterway.dqr_bottleneck_stats AS
+WITH d AS ( SELECT ym::date
+              FROM generate_series( '2019-10-01'::date,
+                                    now() - interval '1 day',
+                                    '1 month'::interval ) ym),
+     bn AS ( SELECT DISTINCT objnam, responsible_country AS cc
+               FROM waterway.bottlenecks ),
+     bid AS (SELECT objnam, array_agg(distinct(bottleneck_id)) AS ids
+               FROM waterway.bottlenecks GROUP BY objnam)
+  SELECT bn.cc, d.ym AS month, bid.objnam,
+         COALESCE(count(distinct(sr.date_info)),0) AS srcnt,
+         COALESCE(count(distinct(efa.measure_date)),0) AS fwacnt
+    FROM bn, bid
+    CROSS JOIN d
+    LEFT OUTER JOIN waterway.sounding_results sr
+      ON ARRAY[sr.bottleneck_id] <@ bid.ids
+         AND d.ym <= sr.date_info
+         AND sr.date_info < (d.ym + interval '1 month')
+    LEFT OUTER JOIN waterway.fairway_availability fa
+      ON ARRAY[fa.bottleneck_id] <@ bid.ids
+    LEFT OUTER JOIN waterway.effective_fairway_availability efa
+      ON fairway_availability_id = fa.id
+         AND d.ym <= efa.measure_date
+         AND efa.measure_date < (d.ym + interval '1 month')
+         AND efa.measure_type = 'Measured'
+    WHERE bid.objnam = bn.objnam
+    GROUP BY bn.cc, d.ym, bid.objnam;
+
+-- We need a wrapper procedure with owner rights for
+-- the refresh, as (from the PGSQL manual): "REFRESH MATERIALIZED VIEW
+-- completely replaces the contents of a materialized view. To execute
+-- this command you must be the owner of the materialized view.""
+
+CREATE OR REPLACE PROCEDURE sys_admin.update_dqr_stats()
+LANGUAGE plpgsql AS $$
+BEGIN
+    EXECUTE 'REFRESH MATERIALIZED VIEW waterway.dqr_bottleneck_stats';
+    EXECUTE 'REFRESH MATERIALIZED VIEW waterway.dqr_gauge_stats';
+END;
+$$ SECURITY DEFINER;
+
+GRANT EXECUTE ON PROCEDURE sys_admin.update_dqr_stats() TO sys_admin;
+
+-- Config update statement
+INSERT INTO sys_admin.stats_updates
+    VALUES ('Data quality report',
+            'CALL sys_admin.update_dqr_stats();');
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/updates/1452/01.report_views.sql	Mon Jul 05 08:31:46 2021 +0200
@@ -0,0 +1,58 @@
+-- Materialized Views with statistical data for data quality reports
+CREATE MATERIALIZED VIEW waterway.dqr_gauge_stats AS
+WITH d AS ( SELECT ym::date, d::date
+              FROM generate_series( '2019-10-01'::date,
+                                    now() - interval '1 day',
+                                    '1 month'::interval ) ym,
+                   generate_series( ym,
+                                    ( ym + interval '1 month'
+                                         - interval '1 day'),
+                                     '1 day'::interval ) d ),
+     g AS ( SELECT d.ym, d.d AS day, (location).country_code AS cc,
+                   objname, array_agg(distinct(location)) AS locations
+            FROM waterway.gauges,d
+            GROUP BY objname,d.d,(location).country_code,d.ym ),
+     measure AS (
+       SELECT g.ym, g.cc, g.objname, g.day,
+              CASE WHEN count(measure_date) = 0
+              THEN 1 ELSE 0 END AS missing
+         FROM g
+         LEFT OUTER JOIN waterway.gauge_measurements gm
+           ON ARRAY[location] <@ g.locations
+             AND g.day <= measure_date
+             AND measure_date < (g.day + interval '1 day')
+         GROUP BY g.objname,g.day,g.ym,g.cc )
+  SELECT cc, ym AS month, objname, sum(missing) AS daynodata
+    FROM measure
+    GROUP BY cc, ym, objname;
+
+CREATE MATERIALIZED VIEW waterway.dqr_bottleneck_stats AS
+WITH d AS ( SELECT ym::date
+              FROM generate_series( '2019-10-01'::date,
+                                    now() - interval '1 day',
+                                    '1 month'::interval ) ym),
+     bn AS ( SELECT DISTINCT objnam, responsible_country AS cc
+               FROM waterway.bottlenecks ),
+     bid AS (SELECT objnam, array_agg(distinct(bottleneck_id)) AS ids
+               FROM waterway.bottlenecks GROUP BY objnam)
+  SELECT bn.cc, d.ym AS month, bid.objnam,
+         COALESCE(count(distinct(sr.date_info)),0) AS srcnt,
+         COALESCE(count(distinct(efa.measure_date)),0) AS fwacnt
+    FROM bn, bid
+    CROSS JOIN d
+    LEFT OUTER JOIN waterway.sounding_results sr
+      ON ARRAY[sr.bottleneck_id] <@ bid.ids
+         AND d.ym <= sr.date_info
+         AND sr.date_info < (d.ym + interval '1 month')
+    LEFT OUTER JOIN waterway.fairway_availability fa
+      ON ARRAY[fa.bottleneck_id] <@ bid.ids
+    LEFT OUTER JOIN waterway.effective_fairway_availability efa
+      ON fairway_availability_id = fa.id
+         AND d.ym <= efa.measure_date
+         AND efa.measure_date < (d.ym + interval '1 month')
+         AND efa.measure_type = 'Measured'
+    WHERE bid.objnam = bn.objnam
+    GROUP BY bn.cc, d.ym, bid.objnam;
+
+-- Refresh access rights!
+GRANT SELECT on ALL tables in schema waterway TO waterway_user;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/updates/1453/01.update_dqr_stats.sql	Mon Jul 05 08:31:46 2021 +0200
@@ -0,0 +1,19 @@
+-- We need a wrapper procedure with owner rights for
+-- the refresh, as (from the PGSQL manual): "REFRESH MATERIALIZED VIEW
+-- completely replaces the contents of a materialized view. To execute
+-- this command you must be the owner of the materialized view.""
+
+CREATE OR REPLACE PROCEDURE sys_admin.update_dqr_stats()
+LANGUAGE plpgsql AS $$
+BEGIN
+    EXECUTE 'REFRESH MATERIALIZED VIEW waterway.dqr_bottleneck_stats';
+    EXECUTE 'REFRESH MATERIALIZED VIEW waterway.dqr_gauge_stats';
+END;
+$$ SECURITY DEFINER;
+
+GRANT EXECUTE ON PROCEDURE sys_admin.update_dqr_stats() TO sys_admin;
+
+-- Config update statement
+INSERT INTO sys_admin.stats_updates
+    VALUES ('Data quality report',
+            'CALL sys_admin.update_dqr_stats();');
--- a/schema/version.sql	Mon Jul 05 08:31:22 2021 +0200
+++ b/schema/version.sql	Mon Jul 05 08:31:46 2021 +0200
@@ -1,1 +1,1 @@
-INSERT INTO gemma_schema_version(version) VALUES (1451);
+INSERT INTO gemma_schema_version(version) VALUES (1453);