changeset 4449:ce884af9f42f

Fix forecast accuracy values and improve performance Forecast accuracy should be based only on the most recent predictions. Generally consider measurements and predictions older than 14 days as outdated and irrelevant. This also implies a performance gain, since it saves a full scan of gauge_measurements and the extra index for this purpose. Adding an extra subquery level for calculation of the forecast accuracy per row instead of the function call as a FROM item saves a (nested loop) join, which is another performance gain.
author Tom Gottfried <tom@intevation.de>
date Fri, 20 Sep 2019 13:22:45 +0200
parents ec207cee13bd
children 41dd97f8c905
files schema/gemma.sql schema/geoserver_views.sql schema/updates/1202/01.fix_data_availability.sql schema/version.sql
diffstat 4 files changed, 26 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/schema/gemma.sql	Fri Sep 20 12:49:21 2019 +0200
+++ b/schema/gemma.sql	Fri Sep 20 13:22:45 2019 +0200
@@ -492,9 +492,6 @@
     CREATE CONSTRAINT TRIGGER waterway_gauge_measurements_reference_gauge
         AFTER INSERT OR UPDATE OF location ON gauge_measurements
         FOR EACH ROW EXECUTE FUNCTION check_valid_gauge_ts('location','measure_date')
-    -- For fast retrieval of newest measurement per location:
-    CREATE INDEX gauge_measurements_location_measure_date_desc
-        ON waterway.gauge_measurements (location, measure_date DESC)
 
     CREATE TABLE gauge_predictions (
         location isrs NOT NULL,
--- a/schema/geoserver_views.sql	Fri Sep 20 12:49:21 2019 +0200
+++ b/schema/geoserver_views.sql	Fri Sep 20 13:22:45 2019 +0200
@@ -15,7 +15,7 @@
         r.rwls AS reference_water_levels,
         wl.measure_date AS gm_measuredate,
         wl.water_level AS gm_waterlevel,
-        wl_14d.n AS gm_n_14d,
+        wl.n AS gm_n_14d,
         fca.forecast_accuracy_3d,
         fca.forecast_accuracy_1d
     FROM waterway.gauges g
@@ -28,29 +28,32 @@
         LEFT JOIN (SELECT DISTINCT ON (location)
                     location,
                     measure_date,
-                    water_level
-                FROM waterway.gauge_measurements
-                ORDER BY location, measure_date DESC) AS wl
-            USING (location)
-        LEFT JOIN (SELECT location, count(water_level) AS n
+                    water_level,
+                    count(*) OVER (PARTITION BY location) AS n
                 FROM waterway.gauge_measurements
                 -- consider all measurements within 14 days plus a tolerance
                 WHERE measure_date
                     >= current_timestamp - '14 days 00:15'::interval
-                GROUP BY location) AS wl_14d
+                ORDER BY location, measure_date DESC) AS wl
             USING (location)
-        LEFT JOIN (SELECT location,
-                    max(acc) FILTER (WHERE
-                        measure_date <= current_timestamp + '1 day'::interval)
-                        AS forecast_accuracy_1d,
-                    max(acc) AS forecast_accuracy_3d
-                FROM waterway.gauge_predictions,
-                    GREATEST(water_level - lower(conf_interval),
-                        upper(conf_interval) - water_level) AS acc (acc)
-                WHERE measure_date
-                    BETWEEN current_timestamp
-                        AND current_timestamp + '3 days'::interval
-                GROUP BY location) AS fca
+        LEFT JOIN (SELECT DISTINCT ON (location)
+                    location,
+                    max(acc) FILTER (WHERE measure_date
+                            <= current_timestamp + '1 day'::interval)
+                        OVER loc_date_issue AS forecast_accuracy_1d,
+                    max(acc) OVER loc_date_issue AS forecast_accuracy_3d
+                FROM (SELECT location, date_issue, measure_date,
+                        GREATEST(water_level - lower(conf_interval),
+                            upper(conf_interval) - water_level) AS acc
+                    FROM waterway.gauge_predictions
+                    -- consider predictions made within last 14 days ...
+                    WHERE date_issue
+                        >= current_timestamp - '14 days 00:15'::interval
+                        -- ... for the next three days from now
+                        AND measure_date BETWEEN current_timestamp
+                            AND current_timestamp + '3 days'::interval) AS acc
+                WINDOW loc_date_issue AS (PARTITION BY location, date_issue)
+                ORDER BY location, date_issue DESC) AS fca
             USING (location);
 
 CREATE OR REPLACE VIEW waterway.gauges_geoserver AS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/updates/1202/01.fix_data_availability.sql	Fri Sep 20 13:22:45 2019 +0200
@@ -0,0 +1,3 @@
+\ir ../../geoserver_views.sql;
+
+DROP INDEX waterway.gauge_measurements_location_measure_date_desc
--- a/schema/version.sql	Fri Sep 20 12:49:21 2019 +0200
+++ b/schema/version.sql	Fri Sep 20 13:22:45 2019 +0200
@@ -1,1 +1,1 @@
-INSERT INTO gemma_schema_version(version) VALUES (1201);
+INSERT INTO gemma_schema_version(version) VALUES (1202);