diff schema/reports.sql @ 5390:90fc90148ef8 extented-report

Added materialized views based data quality report
author Sascha Wilde <wilde@intevation.de>
date Fri, 02 Jul 2021 18:56:59 +0200
parents
children 1dd63a2405bc
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/reports.sql	Fri Jul 02 18:56:59 2021 +0200
@@ -0,0 +1,85 @@
+-- This is Free Software under GNU Affero General Public License v >= 3.0
+-- without warranty, see README.md and license for details.
+
+-- SPDX-License-Identifier: AGPL-3.0-or-later
+-- License-Filename: LICENSES/AGPL-3.0.txt
+
+-- Copyright (C) 2021 by via donau
+--   – Österreichische Wasserstraßen-Gesellschaft mbH
+-- Software engineering by Intevation GmbH
+
+-- Author(s):
+--  * Sascha Wilde <sascha.wilde@intevation.de>
+
+
+-- Materialized Views with statistical data for data quality reports
+CREATE MATERIALIZED VIEW waterway.dqr_gauge_stats AS
+WITH d AS ( SELECT ym::date, d::date
+              FROM generate_series( '2019-10-01'::date,
+                                    now() - interval '1 day',
+                                    '1 month'::interval ) ym,
+                   generate_series( ym,
+                                    ( ym + interval '1 month'
+                                         - interval '1 day'),
+                                     '1 day'::interval ) d ),
+     g AS ( SELECT d.ym, d.d AS day, (location).country_code AS cc,
+                   objname, array_agg(distinct(location)) AS locations
+            FROM waterway.gauges,d
+            GROUP BY objname,d.d,(location).country_code,d.ym ),
+     measure AS (
+       SELECT g.ym, g.cc, g.objname, g.day,
+              CASE WHEN count(measure_date) = 0
+              THEN 1 ELSE 0 END AS missing
+         FROM g
+         LEFT OUTER JOIN waterway.gauge_measurements gm
+           ON ARRAY[location] <@ g.locations
+             AND g.day <= measure_date
+             AND measure_date < (g.day + interval '1 day')
+         GROUP BY g.objname,g.day,g.ym,g.cc )
+  SELECT cc, ym AS month, objname, sum(missing) AS daynodata
+    FROM measure
+    GROUP BY cc, ym, objname;
+
+CREATE MATERIALIZED VIEW waterway.dqr_bottleneck_stats AS
+WITH d AS ( SELECT ym::date
+              FROM generate_series( '2019-10-01'::date,
+                                    now() - interval '1 day',
+                                    '1 month'::interval ) ym),
+     bn AS ( SELECT DISTINCT objnam, responsible_country AS cc
+               FROM waterway.bottlenecks ),
+     bid AS (SELECT objnam, array_agg(distinct(bottleneck_id)) AS ids
+               FROM waterway.bottlenecks GROUP BY objnam)
+  SELECT bn.cc, d.ym AS month, bid.objnam,
+         COALESCE(count(distinct(sr.date_info)),0) AS srcnt,
+         COALESCE(count(distinct(efa.measure_date)),0) AS fwacnt
+    FROM bn, bid
+    CROSS JOIN d
+    LEFT OUTER JOIN waterway.sounding_results sr
+      ON ARRAY[sr.bottleneck_id] <@ bid.ids
+         AND d.ym <= sr.date_info
+         AND sr.date_info < (d.ym + interval '1 month')
+    LEFT OUTER JOIN waterway.fairway_availability fa
+      ON ARRAY[fa.bottleneck_id] <@ bid.ids
+    LEFT OUTER JOIN waterway.effective_fairway_availability efa
+      ON fairway_availability_id = fa.id
+         AND d.ym <= efa.measure_date
+         AND efa.measure_date < (d.ym + interval '1 month')
+         AND efa.measure_type = 'Measured'
+    WHERE bid.objnam = bn.objnam
+    GROUP BY bn.cc, d.ym, bid.objnam;
+
+
+-- Config update statement
+
+-- TODO
+--
+--  INSERT INTO sys_admin.stats_updates
+--    VALUES ('Data quality report',
+--            'REFRESH MATERIALIZED VIEW waterway.dqr_bottleneck_stats;
+--             REFRESH MATERIALIZED VIEW waterway.dqr_gauge_stats;');
+--
+-- THIS DOES _NOT_ WORK!
+-- To make this work we need a wrapper function with owner rights for
+-- the refresh, as (from the PGSQL manual): "REFRESH MATERIALIZED VIEW
+-- completely replaces the contents of a materialized view. To execute
+-- this command you must be the owner of the materialized view.""