Mercurial > gemma
changeset 4449:ce884af9f42f
Fix forecast accuracy values and improve performance
Forecast accuracy should be based only on the most recent predictions.
Generally consider measurements and predictions older than 14 days as
outdated and irrelevant. This also implies a performance gain, since it
saves a full scan of gauge_measurements and the extra index for this
purpose.
Adding an extra subquery level for calculation of the forecast accuracy per
row instead of the function call as a FROM item saves a (nested loop)
join, which is another performance gain.
author | Tom Gottfried <tom@intevation.de> |
---|---|
date | Fri, 20 Sep 2019 13:22:45 +0200 |
parents | ec207cee13bd |
children | 41dd97f8c905 |
files | schema/gemma.sql schema/geoserver_views.sql schema/updates/1202/01.fix_data_availability.sql schema/version.sql |
diffstat | 4 files changed, 26 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/schema/gemma.sql Fri Sep 20 12:49:21 2019 +0200 +++ b/schema/gemma.sql Fri Sep 20 13:22:45 2019 +0200 @@ -492,9 +492,6 @@ CREATE CONSTRAINT TRIGGER waterway_gauge_measurements_reference_gauge AFTER INSERT OR UPDATE OF location ON gauge_measurements FOR EACH ROW EXECUTE FUNCTION check_valid_gauge_ts('location','measure_date') - -- For fast retrieval of newest measurement per location: - CREATE INDEX gauge_measurements_location_measure_date_desc - ON waterway.gauge_measurements (location, measure_date DESC) CREATE TABLE gauge_predictions ( location isrs NOT NULL,
--- a/schema/geoserver_views.sql Fri Sep 20 12:49:21 2019 +0200 +++ b/schema/geoserver_views.sql Fri Sep 20 13:22:45 2019 +0200 @@ -15,7 +15,7 @@ r.rwls AS reference_water_levels, wl.measure_date AS gm_measuredate, wl.water_level AS gm_waterlevel, - wl_14d.n AS gm_n_14d, + wl.n AS gm_n_14d, fca.forecast_accuracy_3d, fca.forecast_accuracy_1d FROM waterway.gauges g @@ -28,29 +28,32 @@ LEFT JOIN (SELECT DISTINCT ON (location) location, measure_date, - water_level - FROM waterway.gauge_measurements - ORDER BY location, measure_date DESC) AS wl - USING (location) - LEFT JOIN (SELECT location, count(water_level) AS n + water_level, + count(*) OVER (PARTITION BY location) AS n FROM waterway.gauge_measurements -- consider all measurements within 14 days plus a tolerance WHERE measure_date >= current_timestamp - '14 days 00:15'::interval - GROUP BY location) AS wl_14d + ORDER BY location, measure_date DESC) AS wl USING (location) - LEFT JOIN (SELECT location, - max(acc) FILTER (WHERE - measure_date <= current_timestamp + '1 day'::interval) - AS forecast_accuracy_1d, - max(acc) AS forecast_accuracy_3d - FROM waterway.gauge_predictions, - GREATEST(water_level - lower(conf_interval), - upper(conf_interval) - water_level) AS acc (acc) - WHERE measure_date - BETWEEN current_timestamp - AND current_timestamp + '3 days'::interval - GROUP BY location) AS fca + LEFT JOIN (SELECT DISTINCT ON (location) + location, + max(acc) FILTER (WHERE measure_date + <= current_timestamp + '1 day'::interval) + OVER loc_date_issue AS forecast_accuracy_1d, + max(acc) OVER loc_date_issue AS forecast_accuracy_3d + FROM (SELECT location, date_issue, measure_date, + GREATEST(water_level - lower(conf_interval), + upper(conf_interval) - water_level) AS acc + FROM waterway.gauge_predictions + -- consider predictions made within last 14 days ... + WHERE date_issue + >= current_timestamp - '14 days 00:15'::interval + -- ... for the next three days from now + AND measure_date BETWEEN current_timestamp + AND current_timestamp + '3 days'::interval) AS acc + WINDOW loc_date_issue AS (PARTITION BY location, date_issue) + ORDER BY location, date_issue DESC) AS fca USING (location); CREATE OR REPLACE VIEW waterway.gauges_geoserver AS