changeset 5390:90fc90148ef8 extented-report

Added materialized views based data quality report
author Sascha Wilde <wilde@intevation.de>
date Fri, 02 Jul 2021 18:56:59 +0200
parents 661e8a2deed9
children 1dd63a2405bc
files report-templates/data-quality-report.xlsx report-templates/data-quality-report.yaml schema/install-db.sh schema/reports.sql schema/updates/1452/01.report_views.sql schema/version.sql
diffstat 6 files changed, 264 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
Binary file report-templates/data-quality-report.xlsx has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/report-templates/data-quality-report.yaml	Fri Jul 02 18:56:59 2021 +0200
@@ -0,0 +1,119 @@
+actions:
+# One sheet per CC
+- type: select
+  statement: >
+    SELECT cc FROM
+      (VALUES ('AT'), ('SK'), ('HU'), ('HR'), ('RS'), ('BG'), ('RO') )
+      AS t (cc);
+  actions:
+  - type: sheet
+    source: CCTmpl
+    destination: "{{ cc }}"
+    vars: [last_row, column_number, offset]
+    actions:
+    # Header:
+    - type: copy
+      location: [A1,A5]
+    # BN names
+    - type: select
+      statement: >
+        SELECT DISTINCT objnam AS bnnam
+          FROM waterway.dqr_bottleneck_stats
+          WHERE cc = {{ cc }} ORDER BY objnam;
+      actions:
+      - type: assign
+        name: last_row
+        expr: (row_number ?? 0) + 6
+      - type: copy
+        location: [A6]
+        destination: A{{ last_row }}
+    - type: copy
+      location: [A7]
+      destination: A{{ last_row + 1 }}
+    # Gen Months
+    - type: select
+      statement: >
+        SELECT to_char(d, 'Month YYYY') AS month, d::date
+          FROM generate_series( '2019-10-01'::date,
+                                now() - interval '1 day',
+                                '1 month'::interval ) d;
+      actions:
+      - type: assign
+        name: column_number
+        expr: column2num("B") + (row_number ?? 0) * 2
+      - type: copy
+        location: [B4,C5]
+        destination: '{{ coord2cell(column_number, 4) }}'
+      # BN SR-Count
+      - type: select
+        statement: >
+          SELECT objnam, srcnt, fwacnt
+            FROM waterway.dqr_bottleneck_stats
+            WHERE cc = {{ cc }} AND month = {{ d }}
+            ORDER BY objnam;
+        actions:
+        - type: assign
+          name: last_row 
+          expr: (row_number ?? 0) + 6
+        - type: copy
+          location: [B6,C6]
+          destination: '{{ coord2cell(column_number, last_row) }}'
+      - type: copy
+        location: [B7,C7]
+        destination: '{{ coord2cell(column_number, last_row + 1) }}'
+    - type: assign
+      name: offset
+      expr: last_row + 3
+    #--------------------------------------------------------------------------
+    # GAUGES
+    # Header:
+    - type: copy
+      location: [A9]
+      destination: A{{ offset }}
+    # Gauges names
+    - type: select
+      statement: >
+        SELECT DISTINCT objname AS gnam FROM waterway.dqr_gauge_stats
+          WHERE cc = {{ cc }} ORDER BY objname;
+      actions:
+      - type: assign
+        name: last_row
+        expr: (row_number ?? 0) + offset + 1
+      - type: copy
+        location: [A10]
+        destination: A{{ last_row }}
+    - type: copy
+      location: [A11]
+      destination: A{{ last_row + 1 }}
+    # Gen Months
+    - type: select
+      statement: >
+        SELECT to_char(d, 'Month YYYY') AS month, d::date
+          FROM generate_series( '2019-10-01'::date,
+                                now() - interval '1 day',
+                                '1 month'::interval ) d;
+      actions:
+      - type: assign
+        name: column_number
+        expr: column2num("B") + (row_number ?? 0)
+      - type: copy
+        location: [B9]
+        destination: '{{ coord2cell(column_number, offset) }}'
+      # BN SR-Count
+      - type: select
+        statement: >
+          SELECT objname, daynodata
+            FROM waterway.dqr_gauge_stats
+            WHERE cc = {{ cc }} AND month = {{ d }}
+            ORDER BY objname;
+        actions:
+        - type: assign
+          name: last_row 
+          expr: (row_number ?? 0) + offset + 1
+        - type: copy
+          location: [B10]
+          destination: '{{ coord2cell(column_number, last_row) }}'
+      - type: copy
+        location: [B11]
+        destination: '{{ coord2cell(column_number, last_row + 1) }}'
+
--- a/schema/install-db.sh	Fri Jul 02 14:16:02 2021 +0200
+++ b/schema/install-db.sh	Fri Jul 02 18:56:59 2021 +0200
@@ -126,6 +126,7 @@
        -f "$BASEDIR/roles.sql" \
        -f "$BASEDIR/isrs.sql" \
        -f "$BASEDIR/gemma.sql" \
+       -f "$BASEDIR/reports.sql" \
        -f "$BASEDIR/geo_functions.sql" \
        -f "$BASEDIR/search_functions.sql" \
        -f "$BASEDIR/geonames.sql" \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/reports.sql	Fri Jul 02 18:56:59 2021 +0200
@@ -0,0 +1,85 @@
+-- This is Free Software under GNU Affero General Public License v >= 3.0
+-- without warranty, see README.md and license for details.
+
+-- SPDX-License-Identifier: AGPL-3.0-or-later
+-- License-Filename: LICENSES/AGPL-3.0.txt
+
+-- Copyright (C) 2021 by via donau
+--   – Österreichische Wasserstraßen-Gesellschaft mbH
+-- Software engineering by Intevation GmbH
+
+-- Author(s):
+--  * Sascha Wilde <sascha.wilde@intevation.de>
+
+
+-- Materialized Views with statistical data for data quality reports
+CREATE MATERIALIZED VIEW waterway.dqr_gauge_stats AS
+WITH d AS ( SELECT ym::date, d::date
+              FROM generate_series( '2019-10-01'::date,
+                                    now() - interval '1 day',
+                                    '1 month'::interval ) ym,
+                   generate_series( ym,
+                                    ( ym + interval '1 month'
+                                         - interval '1 day'),
+                                     '1 day'::interval ) d ),
+     g AS ( SELECT d.ym, d.d AS day, (location).country_code AS cc,
+                   objname, array_agg(distinct(location)) AS locations
+            FROM waterway.gauges,d
+            GROUP BY objname,d.d,(location).country_code,d.ym ),
+     measure AS (
+       SELECT g.ym, g.cc, g.objname, g.day,
+              CASE WHEN count(measure_date) = 0
+              THEN 1 ELSE 0 END AS missing
+         FROM g
+         LEFT OUTER JOIN waterway.gauge_measurements gm
+           ON ARRAY[location] <@ g.locations
+             AND g.day <= measure_date
+             AND measure_date < (g.day + interval '1 day')
+         GROUP BY g.objname,g.day,g.ym,g.cc )
+  SELECT cc, ym AS month, objname, sum(missing) AS daynodata
+    FROM measure
+    GROUP BY cc, ym, objname;
+
+CREATE MATERIALIZED VIEW waterway.dqr_bottleneck_stats AS
+WITH d AS ( SELECT ym::date
+              FROM generate_series( '2019-10-01'::date,
+                                    now() - interval '1 day',
+                                    '1 month'::interval ) ym),
+     bn AS ( SELECT DISTINCT objnam, responsible_country AS cc
+               FROM waterway.bottlenecks ),
+     bid AS (SELECT objnam, array_agg(distinct(bottleneck_id)) AS ids
+               FROM waterway.bottlenecks GROUP BY objnam)
+  SELECT bn.cc, d.ym AS month, bid.objnam,
+         COALESCE(count(distinct(sr.date_info)),0) AS srcnt,
+         COALESCE(count(distinct(efa.measure_date)),0) AS fwacnt
+    FROM bn, bid
+    CROSS JOIN d
+    LEFT OUTER JOIN waterway.sounding_results sr
+      ON ARRAY[sr.bottleneck_id] <@ bid.ids
+         AND d.ym <= sr.date_info
+         AND sr.date_info < (d.ym + interval '1 month')
+    LEFT OUTER JOIN waterway.fairway_availability fa
+      ON ARRAY[fa.bottleneck_id] <@ bid.ids
+    LEFT OUTER JOIN waterway.effective_fairway_availability efa
+      ON fairway_availability_id = fa.id
+         AND d.ym <= efa.measure_date
+         AND efa.measure_date < (d.ym + interval '1 month')
+         AND efa.measure_type = 'Measured'
+    WHERE bid.objnam = bn.objnam
+    GROUP BY bn.cc, d.ym, bid.objnam;
+
+
+-- Config update statement
+
+-- TODO
+--
+--  INSERT INTO sys_admin.stats_updates
+--    VALUES ('Data quality report',
+--            'REFRESH MATERIALIZED VIEW waterway.dqr_bottleneck_stats;
+--             REFRESH MATERIALIZED VIEW waterway.dqr_gauge_stats;');
+--
+-- THIS DOES _NOT_ WORK!
+-- To make this work we need a wrapper function with owner rights for
+-- the refresh, as (from the PGSQL manual): "REFRESH MATERIALIZED VIEW
+-- completely replaces the contents of a materialized view. To execute
+-- this command you must be the owner of the materialized view.""
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/updates/1452/01.report_views.sql	Fri Jul 02 18:56:59 2021 +0200
@@ -0,0 +1,58 @@
+-- Materialized Views with statistical data for data quality reports
+CREATE MATERIALIZED VIEW waterway.dqr_gauge_stats AS
+WITH d AS ( SELECT ym::date, d::date
+              FROM generate_series( '2019-10-01'::date,
+                                    now() - interval '1 day',
+                                    '1 month'::interval ) ym,
+                   generate_series( ym,
+                                    ( ym + interval '1 month'
+                                         - interval '1 day'),
+                                     '1 day'::interval ) d ),
+     g AS ( SELECT d.ym, d.d AS day, (location).country_code AS cc,
+                   objname, array_agg(distinct(location)) AS locations
+            FROM waterway.gauges,d
+            GROUP BY objname,d.d,(location).country_code,d.ym ),
+     measure AS (
+       SELECT g.ym, g.cc, g.objname, g.day,
+              CASE WHEN count(measure_date) = 0
+              THEN 1 ELSE 0 END AS missing
+         FROM g
+         LEFT OUTER JOIN waterway.gauge_measurements gm
+           ON ARRAY[location] <@ g.locations
+             AND g.day <= measure_date
+             AND measure_date < (g.day + interval '1 day')
+         GROUP BY g.objname,g.day,g.ym,g.cc )
+  SELECT cc, ym AS month, objname, sum(missing) AS daynodata
+    FROM measure
+    GROUP BY cc, ym, objname;
+
+CREATE MATERIALIZED VIEW waterway.dqr_bottleneck_stats AS
+WITH d AS ( SELECT ym::date
+              FROM generate_series( '2019-10-01'::date,
+                                    now() - interval '1 day',
+                                    '1 month'::interval ) ym),
+     bn AS ( SELECT DISTINCT objnam, responsible_country AS cc
+               FROM waterway.bottlenecks ),
+     bid AS (SELECT objnam, array_agg(distinct(bottleneck_id)) AS ids
+               FROM waterway.bottlenecks GROUP BY objnam)
+  SELECT bn.cc, d.ym AS month, bid.objnam,
+         COALESCE(count(distinct(sr.date_info)),0) AS srcnt,
+         COALESCE(count(distinct(efa.measure_date)),0) AS fwacnt
+    FROM bn, bid
+    CROSS JOIN d
+    LEFT OUTER JOIN waterway.sounding_results sr
+      ON ARRAY[sr.bottleneck_id] <@ bid.ids
+         AND d.ym <= sr.date_info
+         AND sr.date_info < (d.ym + interval '1 month')
+    LEFT OUTER JOIN waterway.fairway_availability fa
+      ON ARRAY[fa.bottleneck_id] <@ bid.ids
+    LEFT OUTER JOIN waterway.effective_fairway_availability efa
+      ON fairway_availability_id = fa.id
+         AND d.ym <= efa.measure_date
+         AND efa.measure_date < (d.ym + interval '1 month')
+         AND efa.measure_type = 'Measured'
+    WHERE bid.objnam = bn.objnam
+    GROUP BY bn.cc, d.ym, bid.objnam;
+
+-- Refresh access rights!
+GRANT SELECT on ALL tables in schema waterway TO waterway_user;
--- a/schema/version.sql	Fri Jul 02 14:16:02 2021 +0200
+++ b/schema/version.sql	Fri Jul 02 18:56:59 2021 +0200
@@ -1,1 +1,1 @@
-INSERT INTO gemma_schema_version(version) VALUES (1451);
+INSERT INTO gemma_schema_version(version) VALUES (1452);