changeset 5651:cb789e814ecb

Merged "DQR adjustments"
author Sascha Wilde <wilde@sha-bang.de>
date Fri, 28 Jul 2023 13:25:12 +0200
parents 77b6d1002e73 (current diff) cb3c4ba0f052 (diff)
children 9b3779f649c2
files
diffstat 5 files changed, 173 insertions(+), 131 deletions(-) [+]
line wrap: on
line diff
Binary file report-templates/data-quality-report.xlsx has changed
--- a/report-templates/data-quality-report.yaml	Mon Jul 17 12:12:41 2023 +0200
+++ b/report-templates/data-quality-report.yaml	Fri Jul 28 13:25:12 2023 +0200
@@ -1,123 +1,122 @@
 actions:
-# One sheet per CC
-- type: select
-  statement: >
-    SELECT cc FROM
-      (VALUES ('AT'), ('SK'), ('HU'), ('HR'), ('RS'), ('BG'), ('RO') )
-      AS t (cc);
-  actions:
-  - type: sheet
-    source: CCTmpl
-    destination: "{{ cc }}"
-    vars: [last_row, column_number, offset]
+  # One sheet per CC
+  - type: select
+    statement: >
+      SELECT cc FROM
+        (VALUES ('AT'), ('SK'), ('HU'), ('HR'), ('RS'), ('BG'), ('RO') )
+        AS t (cc);
     actions:
-    # Header:
-    - type: copy
-      location: [A1,A5]
-    # BN names
-    - type: select
-      statement: >
-        SELECT DISTINCT objnam AS bnnam, hm
-          FROM waterway.dqr_bottleneck_stats
-          WHERE cc = {{ cc }} ORDER BY hm, objnam;
-      actions:
-      - type: assign
-        name: last_row
-        expr: (row_number ?? 0) + 6
-      - type: copy
-        location: [A6]
-        destination: A{{ last_row }}
-    - type: copy
-      location: [A7]
-      destination: A{{ (last_row ?? 6) + 1 }}
-    # Gen Months
-    - type: select
-      statement: >
-        SELECT to_char(d, 'Month YYYY') AS month, d::date
-          FROM generate_series( date_trunc('month',
-                                           now() - interval '1 day'
-                                          )::date,
-                                '2019-10-01'::date,
-                                - '1 month'::interval ) d;
-      actions:
-      - type: assign
-        name: column_number
-        expr: column2num("B") + (row_number ?? 0) * 2
-      - type: copy
-        location: [B4,C5]
-        destination: '{{ coord2cell(column_number, 4) }}'
-      # BN SR-Count
-      - type: select
-        statement: >
-          SELECT objnam, srcnt, fwacnt
-            FROM waterway.dqr_bottleneck_stats
-            WHERE cc = {{ cc }} AND month = {{ d }}
-            ORDER BY hm, objnam;
+      - type: sheet
+        source: CCTmpl
+        destination: "{{ cc }}"
+        vars: [last_row, column_number, offset]
         actions:
-        - type: assign
-          name: last_row
-          expr: (row_number ?? 0) + 6
-        - type: copy
-          location: [B6,C6]
-          destination: '{{ coord2cell(column_number, last_row) }}'
-      - type: copy
-        location: [B7,C7]
-        destination: '{{ coord2cell(column_number, (last_row ?? 0) + 1) }}'
-    - type: assign
-      name: offset
-      expr: (last_row ?? 0) + 3
-    #--------------------------------------------------------------------------
-    # GAUGES
-    # Header:
-    - type: copy
-      location: [A9]
-      destination: A{{ offset }}
-    # Gauges names
-    - type: select
-      statement: >
-        SELECT DISTINCT objname AS gnam, hm FROM waterway.dqr_gauge_stats
-          WHERE cc = {{ cc }} ORDER BY hm, objname;
-      actions:
-      - type: assign
-        name: last_row
-        expr: (row_number ?? 0) + offset + 1
-      - type: copy
-        location: [A10]
-        destination: A{{ last_row }}
-    - type: copy
-      location: [A11]
-      destination: A{{ (last_row ?? 10) + 1 }}
-    # Gen Months
-    - type: select
-      statement: >
-        SELECT to_char(d, 'Month YYYY') AS month, d::date
-          FROM generate_series( date_trunc('month',
-                                           now() - interval '1 day'
-                                          )::date,
-                                '2019-10-01'::date,
-                                - '1 month'::interval ) d;
-      actions:
-      - type: assign
-        name: column_number
-        expr: column2num("B") + (row_number ?? 0)
-      - type: copy
-        location: [B9]
-        destination: '{{ coord2cell(column_number, offset) }}'
-      # BN SR-Count
-      - type: select
-        statement: >
-          SELECT objname, daynodata
-            FROM waterway.dqr_gauge_stats
-            WHERE cc = {{ cc }} AND month = {{ d }}
-            ORDER BY hm, objname;
-        actions:
-        - type: assign
-          name: last_row
-          expr: (row_number ?? 0) + offset + 1
-        - type: copy
-          location: [B10]
-          destination: '{{ coord2cell(column_number, last_row) }}'
-      - type: copy
-        location: [B11]
-        destination: '{{ coord2cell(column_number, (last_row ?? 10) + 1) }}'
-
+          # Header:
+          - type: copy
+            location: [A1, A5]
+          # BN names
+          - type: select
+            statement: >
+              SELECT DISTINCT bottleneck_id, concat(objnam, ' (', bottleneck_id, ')') AS bnnam, hm
+                FROM waterway.dqr_bottleneck_stats
+                WHERE cc = {{ cc }} ORDER BY hm, bottleneck_id;
+            actions:
+              - type: assign
+                name: last_row
+                expr: (row_number ?? 0) + 6
+              - type: copy
+                location: [A6]
+                destination: A{{ last_row }}
+          - type: copy
+            location: [A7]
+            destination: A{{ (last_row ?? 6) + 1 }}
+          # Gen Months
+          - type: select
+            statement: >
+              SELECT to_char(d, 'Month YYYY') AS month, d::date
+                FROM generate_series( date_trunc('month',
+                                                 now() - interval '1 day'
+                                                )::date,
+                                      '2019-10-01'::date,
+                                      - '1 month'::interval ) d;
+            actions:
+              - type: assign
+                name: column_number
+                expr: column2num("B") + (row_number ?? 0) * 2
+              - type: copy
+                location: [B4, C5]
+                destination: "{{ coord2cell(column_number, 4) }}"
+              # BN SR-Count
+              - type: select
+                statement: >
+                  SELECT concat(bottleneck_id, ' (', objnam, ')') AS objnam, srcnt, fwacnt
+                    FROM waterway.dqr_bottleneck_stats
+                    WHERE cc = {{ cc }} AND month = {{ d }}
+                    ORDER BY hm, bottleneck_id;
+                actions:
+                  - type: assign
+                    name: last_row
+                    expr: (row_number ?? 0) + 6
+                  - type: copy
+                    location: [B6, C6]
+                    destination: "{{ coord2cell(column_number, last_row) }}"
+              - type: copy
+                location: [B7, C7]
+                destination: "{{ coord2cell(column_number, (last_row ?? 0) + 1) }}"
+          - type: assign
+            name: offset
+            expr: (last_row ?? 0) + 3
+          #--------------------------------------------------------------------------
+          # GAUGES
+          # Header:
+          - type: copy
+            location: [A9]
+            destination: A{{ offset }}
+          # Gauges names
+          - type: select
+            statement: >
+              SELECT DISTINCT objname AS gnam, hm FROM waterway.dqr_gauge_stats
+                WHERE cc = {{ cc }} ORDER BY hm, objname;
+            actions:
+              - type: assign
+                name: last_row
+                expr: (row_number ?? 0) + offset + 1
+              - type: copy
+                location: [A10]
+                destination: A{{ last_row }}
+          - type: copy
+            location: [A11]
+            destination: A{{ (last_row ?? 10) + 1 }}
+          # Gen Months
+          - type: select
+            statement: >
+              SELECT to_char(d, 'Month YYYY') AS month, d::date
+                FROM generate_series( date_trunc('month',
+                                                 now() - interval '1 day'
+                                                )::date,
+                                      '2019-10-01'::date,
+                                      - '1 month'::interval ) d;
+            actions:
+              - type: assign
+                name: column_number
+                expr: column2num("B") + (row_number ?? 0)
+              - type: copy
+                location: [B9]
+                destination: "{{ coord2cell(column_number, offset) }}"
+              # BN SR-Count
+              - type: select
+                statement: >
+                  SELECT objname, daynodata
+                    FROM waterway.dqr_gauge_stats
+                    WHERE cc = {{ cc }} AND month = {{ d }}
+                    ORDER BY hm, objname;
+                actions:
+                  - type: assign
+                    name: last_row
+                    expr: (row_number ?? 0) + offset + 1
+                  - type: copy
+                    location: [B10]
+                    destination: "{{ coord2cell(column_number, last_row) }}"
+              - type: copy
+                location: [B11]
+                destination: "{{ coord2cell(column_number, (last_row ?? 10) + 1) }}"
--- a/schema/reports.sql	Mon Jul 17 12:12:41 2023 +0200
+++ b/schema/reports.sql	Fri Jul 28 13:25:12 2023 +0200
@@ -46,29 +46,34 @@
               FROM generate_series( '2019-10-01'::date,
                                     now() - interval '1 day',
                                     '1 month'::interval ) ym),
-     bn AS ( SELECT DISTINCT objnam, responsible_country AS cc,
+     bn AS ( SELECT DISTINCT bottleneck_id ,max(objnam) as objnam, responsible_country AS cc,
                     min ((lower(stretch)).hectometre) AS hm
-               FROM waterway.bottlenecks GROUP BY objnam,cc),
-     bid AS (SELECT objnam, array_agg(distinct(bottleneck_id)) AS ids
-               FROM waterway.bottlenecks GROUP BY objnam)
-  SELECT bn.cc, bn.hm, d.ym AS month, bid.objnam,
+               FROM waterway.bottlenecks
+               GROUP BY bottleneck_id,cc
+               ),
+      bnn AS ( SELECT DISTINCT ON (bottleneck_id) bottleneck_id, objnam, validity
+               FROM waterway.bottlenecks b
+               ORDER BY bottleneck_id,
+               validity DESC )
+  SELECT bn.cc, bn.hm, d.ym AS month, bnn.objnam, bn.bottleneck_id,
          COALESCE(count(distinct(sr.date_info)),0) AS srcnt,
          COALESCE(count(distinct(efa.measure_date)),0) AS fwacnt
-    FROM bn, bid
+    FROM bn
     CROSS JOIN d
+    JOIN bnn
+      ON bnn.bottleneck_id = bn.bottleneck_id
     LEFT OUTER JOIN waterway.sounding_results sr
-      ON ARRAY[sr.bottleneck_id] <@ bid.ids
+      ON sr.bottleneck_id = bn.bottleneck_id
          AND d.ym <= sr.date_info
          AND sr.date_info < (d.ym + interval '1 month')
     LEFT OUTER JOIN waterway.fairway_availability fa
-      ON ARRAY[fa.bottleneck_id] <@ bid.ids
+      ON fa.bottleneck_id = bn.bottleneck_id
     LEFT OUTER JOIN waterway.effective_fairway_availability efa
       ON fairway_availability_id = fa.id
          AND d.ym <= efa.measure_date
          AND efa.measure_date < (d.ym + interval '1 month')
          AND efa.measure_type = 'Measured'
-    WHERE bid.objnam = bn.objnam
-    GROUP BY bn.cc, bn.hm, d.ym, bid.objnam;
+    GROUP BY bn.cc, bn.hm, d.ym, bn.bottleneck_id, bnn.objnam;
 
 -- We need a wrapper procedure with owner rights for
 -- the refresh, as (from the PGSQL manual): "REFRESH MATERIALIZED VIEW
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/updates/1470/01.update_report.sql	Fri Jul 28 13:25:12 2023 +0200
@@ -0,0 +1,38 @@
+DROP MATERIALIZED VIEW waterway.dqr_bottleneck_stats;
+
+CREATE MATERIALIZED VIEW waterway.dqr_bottleneck_stats AS
+WITH d AS ( SELECT ym::date
+              FROM generate_series( '2019-10-01'::date,
+                                    now() - interval '1 day',
+                                    '1 month'::interval ) ym),
+     bn AS ( SELECT DISTINCT bottleneck_id ,max(objnam) as objnam, responsible_country AS cc,
+                    min ((lower(stretch)).hectometre) AS hm
+               FROM waterway.bottlenecks
+               GROUP BY bottleneck_id,cc
+               ),
+      bnn AS ( SELECT DISTINCT ON (bottleneck_id) bottleneck_id, objnam, validity
+               FROM waterway.bottlenecks b
+               ORDER BY bottleneck_id,
+               validity DESC )
+  SELECT bn.cc, bn.hm, d.ym AS month, bnn.objnam, bn.bottleneck_id,
+         COALESCE(count(distinct(sr.date_info)),0) AS srcnt,
+         COALESCE(count(distinct(efa.measure_date)),0) AS fwacnt
+    FROM bn
+    CROSS JOIN d
+    JOIN bnn
+      ON bnn.bottleneck_id = bn.bottleneck_id
+    LEFT OUTER JOIN waterway.sounding_results sr
+      ON sr.bottleneck_id = bn.bottleneck_id
+         AND d.ym <= sr.date_info
+         AND sr.date_info < (d.ym + interval '1 month')
+    LEFT OUTER JOIN waterway.fairway_availability fa
+      ON fa.bottleneck_id = bn.bottleneck_id
+    LEFT OUTER JOIN waterway.effective_fairway_availability efa
+      ON fairway_availability_id = fa.id
+         AND d.ym <= efa.measure_date
+         AND efa.measure_date < (d.ym + interval '1 month')
+         AND efa.measure_type = 'Measured'
+    GROUP BY bn.cc, bn.hm, d.ym, bn.bottleneck_id, bnn.objnam;
+
+-- Refresh access rights!
+GRANT SELECT on ALL tables in schema waterway TO waterway_user;
\ No newline at end of file
--- a/schema/version.sql	Mon Jul 17 12:12:41 2023 +0200
+++ b/schema/version.sql	Fri Jul 28 13:25:12 2023 +0200
@@ -1,1 +1,1 @@
-INSERT INTO gemma_schema_version(version) VALUES (1469);
+INSERT INTO gemma_schema_version(version) VALUES (1470);