annotate schema/reports.sql @ 5390:90fc90148ef8 extented-report

Added materialized views based data quality report
author Sascha Wilde <wilde@intevation.de>
date Fri, 02 Jul 2021 18:56:59 +0200
parents
children 1dd63a2405bc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5390
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
1 -- This is Free Software under GNU Affero General Public License v >= 3.0
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
2 -- without warranty, see README.md and license for details.
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
3
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
4 -- SPDX-License-Identifier: AGPL-3.0-or-later
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
5 -- License-Filename: LICENSES/AGPL-3.0.txt
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
6
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
7 -- Copyright (C) 2021 by via donau
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
8 -- – Österreichische Wasserstraßen-Gesellschaft mbH
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
9 -- Software engineering by Intevation GmbH
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
10
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
11 -- Author(s):
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
12 -- * Sascha Wilde <sascha.wilde@intevation.de>
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
13
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
14
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
15 -- Materialized Views with statistical data for data quality reports
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
16 CREATE MATERIALIZED VIEW waterway.dqr_gauge_stats AS
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
17 WITH d AS ( SELECT ym::date, d::date
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
18 FROM generate_series( '2019-10-01'::date,
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
19 now() - interval '1 day',
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
20 '1 month'::interval ) ym,
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
21 generate_series( ym,
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
22 ( ym + interval '1 month'
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
23 - interval '1 day'),
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
24 '1 day'::interval ) d ),
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
25 g AS ( SELECT d.ym, d.d AS day, (location).country_code AS cc,
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
26 objname, array_agg(distinct(location)) AS locations
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
27 FROM waterway.gauges,d
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
28 GROUP BY objname,d.d,(location).country_code,d.ym ),
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
29 measure AS (
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
30 SELECT g.ym, g.cc, g.objname, g.day,
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
31 CASE WHEN count(measure_date) = 0
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
32 THEN 1 ELSE 0 END AS missing
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
33 FROM g
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
34 LEFT OUTER JOIN waterway.gauge_measurements gm
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
35 ON ARRAY[location] <@ g.locations
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
36 AND g.day <= measure_date
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
37 AND measure_date < (g.day + interval '1 day')
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
38 GROUP BY g.objname,g.day,g.ym,g.cc )
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
39 SELECT cc, ym AS month, objname, sum(missing) AS daynodata
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
40 FROM measure
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
41 GROUP BY cc, ym, objname;
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
42
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
43 CREATE MATERIALIZED VIEW waterway.dqr_bottleneck_stats AS
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
44 WITH d AS ( SELECT ym::date
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
45 FROM generate_series( '2019-10-01'::date,
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
46 now() - interval '1 day',
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
47 '1 month'::interval ) ym),
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
48 bn AS ( SELECT DISTINCT objnam, responsible_country AS cc
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
49 FROM waterway.bottlenecks ),
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
50 bid AS (SELECT objnam, array_agg(distinct(bottleneck_id)) AS ids
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
51 FROM waterway.bottlenecks GROUP BY objnam)
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
52 SELECT bn.cc, d.ym AS month, bid.objnam,
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
53 COALESCE(count(distinct(sr.date_info)),0) AS srcnt,
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
54 COALESCE(count(distinct(efa.measure_date)),0) AS fwacnt
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
55 FROM bn, bid
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
56 CROSS JOIN d
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
57 LEFT OUTER JOIN waterway.sounding_results sr
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
58 ON ARRAY[sr.bottleneck_id] <@ bid.ids
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
59 AND d.ym <= sr.date_info
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
60 AND sr.date_info < (d.ym + interval '1 month')
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
61 LEFT OUTER JOIN waterway.fairway_availability fa
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
62 ON ARRAY[fa.bottleneck_id] <@ bid.ids
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
63 LEFT OUTER JOIN waterway.effective_fairway_availability efa
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
64 ON fairway_availability_id = fa.id
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
65 AND d.ym <= efa.measure_date
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
66 AND efa.measure_date < (d.ym + interval '1 month')
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
67 AND efa.measure_type = 'Measured'
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
68 WHERE bid.objnam = bn.objnam
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
69 GROUP BY bn.cc, d.ym, bid.objnam;
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
70
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
71
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
72 -- Config update statement
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
73
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
74 -- TODO
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
75 --
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
76 -- INSERT INTO sys_admin.stats_updates
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
77 -- VALUES ('Data quality report',
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
78 -- 'REFRESH MATERIALIZED VIEW waterway.dqr_bottleneck_stats;
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
79 -- REFRESH MATERIALIZED VIEW waterway.dqr_gauge_stats;');
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
80 --
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
81 -- THIS DOES _NOT_ WORK!
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
82 -- To make this work we need a wrapper function with owner rights for
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
83 -- the refresh, as (from the PGSQL manual): "REFRESH MATERIALIZED VIEW
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
84 -- completely replaces the contents of a materialized view. To execute
90fc90148ef8 Added materialized views based data quality report
Sascha Wilde <wilde@intevation.de>
parents:
diff changeset
85 -- this command you must be the owner of the materialized view.""