changeset 5520:05db984d3db1

Improve performance of bottleneck area calculation Avoid buffer calculations by replacing them with simple distance comparisons and calculate the boundary of the result geometry only once per iteration. In some edge cases with very large numbers of iterations, this reduced the runtime of a bottleneck import by a factor of more than twenty.
author Tom Gottfried <tom@intevation.de>
date Thu, 21 Oct 2021 19:50:39 +0200
parents e8e1a24d2f4c
children 3cfbc5769e8b
files schema/isrs_functions.sql schema/updates/1467/01.improve_ISRSrange_axis.sql schema/version.sql
diffstat 3 files changed, 111 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/schema/isrs_functions.sql	Thu Oct 21 14:36:02 2021 +0200
+++ b/schema/isrs_functions.sql	Thu Oct 21 19:50:39 2021 +0200
@@ -90,8 +90,7 @@
             -- to find the contiguous axis on which given distance marks lie
             (SELECT ARRAY[id] AS ids, wtwaxs
                 FROM axis, points
-                WHERE ST_Intersects(
-                    ST_Buffer(axis.wtwaxs, 0.0001), points.geom)
+                WHERE ST_DWithin(axis.wtwaxs, points.geom, 0.0001)
                 FETCH FIRST ROW ONLY)
             UNION
             -- Connect endpoint of next linestring with closest
@@ -103,29 +102,25 @@
                         SELECT refgeom
                         UNION
                         -- Fill eventual gap
-                        SELECT ST_ShortestLine(
-                                ST_Boundary(refgeom), bdr)
+                        SELECT ST_ShortestLine(refbdr, bdr)
                         UNION
                         -- Linestring to be added
                         SELECT geom)))
                 FROM axis_snapped AS axis_snapped (refids, refgeom),
                     axis AS axis (id, geom, bdr),
-                    (SELECT ST_Collect(points.geom) AS pts
-                        FROM points) AS points
+                    ST_Boundary(refgeom) AS refbdr (refbdr)
                 WHERE id <> ALL(refids)
-                    AND ST_DWithin(
-                        ST_Boundary(refgeom), bdr, tolerance)
-                    AND NOT ST_Covers(ST_Buffer(refgeom, 0.0001), points.pts)
-                ORDER BY ST_Boundary(refgeom) <-> bdr
+                    AND ST_DWithin(refbdr, bdr, tolerance)
+                    -- Stop if refgeom goes through both distance marks
+                    AND NOT 0.0001 >= ALL(SELECT refgeom <-> geom FROM points)
+                ORDER BY refbdr <-> bdr
                 FETCH FIRST ROW ONLY)),
         axis_segment AS (
             -- Fetch end result from snapping
             SELECT wtwaxs AS line
-                FROM axis_snapped,
-                    (SELECT ST_Collect(points.geom) AS pts
-                        FROM points) AS points
+                FROM axis_snapped
                 -- Return end result only if both distance marks were connected
-                WHERE ST_Covers(ST_Buffer(wtwaxs, 0.0001), points.pts))
+                WHERE 0.0001 >= ALL(SELECT wtwaxs <-> geom FROM points))
         -- Use linear referencing to clip axis between distance marks.
         -- Simplification is used to work-around the problem, that
         -- ST_LineSubstring might generate very small line segments at an
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/updates/1467/01.improve_ISRSrange_axis.sql	Thu Oct 21 19:50:39 2021 +0200
@@ -0,0 +1,101 @@
+CREATE OR REPLACE FUNCTION ISRSrange_axis(
+    stretch isrsrange,
+    tolerance float
+    -- in m, up to which linestrings will be connected at their boundary
+) RETURNS geometry
+AS $$
+DECLARE z int;
+DECLARE result_geom geometry;
+BEGIN
+    -- Find best matchting UTM zone
+    z = best_utm(stretch);
+
+    CREATE TEMP TABLE axis AS
+        SELECT row_number() OVER () AS id,
+                geom AS wtwaxs,
+                ST_Boundary(geom) AS bdr
+            FROM waterway.waterway_axis,
+                ST_Dump(ST_Transform(wtwaxs::geometry, z))
+            WHERE validity @> current_timestamp;
+    CREATE INDEX axs_bdr ON axis USING GiST (bdr);
+    ANALYZE axis;
+
+    WITH RECURSIVE
+        -- In order to guarantee the following ST_Covers to work,
+        -- snap distance mark coordinates to axis
+        points0 AS (
+            SELECT ST_ClosestPoint(
+                    wtwaxs,
+                    ST_Transform(geom, z)) AS geom
+                FROM ST_Dump(ISRSrange_points(stretch)), (
+                    SELECT ST_Collect(wtwaxs) AS wtwaxs
+                        FROM axis) AS ax),
+        -- Ensure two distinct points on axis have been found
+        points AS (
+            SELECT geom
+                FROM points0
+                WHERE 2 = (SELECT count(DISTINCT geom) FROM points0)),
+        axis_snapped AS (
+            -- Iteratively connect non-contiguous axis chunks
+            -- to find the contiguous axis on which given distance marks lie
+            (SELECT ARRAY[id] AS ids, wtwaxs
+                FROM axis, points
+                WHERE ST_DWithin(axis.wtwaxs, points.geom, 0.0001)
+                FETCH FIRST ROW ONLY)
+            UNION
+            -- Connect endpoint of next linestring with closest
+            -- endpoint of merged linestring until a contiguous
+            -- linestring connecting both distance marks is build up
+            (SELECT refids || id,
+                    ST_LineMerge(ST_Collect(ARRAY(
+                        -- Linestring build up so far
+                        SELECT refgeom
+                        UNION
+                        -- Fill eventual gap
+                        SELECT ST_ShortestLine(refbdr, bdr)
+                        UNION
+                        -- Linestring to be added
+                        SELECT geom)))
+                FROM axis_snapped AS axis_snapped (refids, refgeom),
+                    axis AS axis (id, geom, bdr),
+                    ST_Boundary(refgeom) AS refbdr (refbdr)
+                WHERE id <> ALL(refids)
+                    AND ST_DWithin(refbdr, bdr, tolerance)
+                    -- Stop if refgeom goes through both distance marks
+                    AND NOT 0.0001 >= ALL(SELECT refgeom <-> geom FROM points)
+                ORDER BY refbdr <-> bdr
+                FETCH FIRST ROW ONLY)),
+        axis_segment AS (
+            -- Fetch end result from snapping
+            SELECT wtwaxs AS line
+                FROM axis_snapped
+                -- Return end result only if both distance marks were connected
+                WHERE 0.0001 >= ALL(SELECT wtwaxs <-> geom FROM points))
+        -- Use linear referencing to clip axis between distance marks.
+        -- Simplification is used to work-around the problem, that
+        -- ST_LineSubstring might generate very small line segments at an
+        -- end of the resulting linestring, that significantly differ from
+        -- the direction of the input linestring due to finite precision
+        -- of the calculation. The generated small segment of the
+        -- resulting line would lead e.g. to unexpected results in an area
+        -- generated by ISRSrange_area().
+        SELECT ST_SimplifyPreserveTopology(ST_LineSubstring(
+                    axis_segment.line, min(fractions.f), max(fractions.f)),
+                0.0001) AS line
+        INTO STRICT result_geom
+        FROM axis_segment, LATERAL (
+            SELECT ST_LineLocatePoint(axis_segment.line, points.geom) AS f
+                FROM points) AS fractions
+        GROUP BY axis_segment.line;
+
+    -- Drop temporary table to avoid side effects on PostgreSQL's MVCC,
+    -- because otherwise subsequent invocations of the function will not see
+    -- changes on the underlying waterway.waterway_axis that might have
+    -- occured.
+    DROP TABLE axis;
+
+    RETURN result_geom;
+END;
+    $$
+    LANGUAGE plpgsql
+    PARALLEL RESTRICTED;
--- a/schema/version.sql	Thu Oct 21 14:36:02 2021 +0200
+++ b/schema/version.sql	Thu Oct 21 19:50:39 2021 +0200
@@ -1,1 +1,1 @@
-INSERT INTO gemma_schema_version(version) VALUES (1466);
+INSERT INTO gemma_schema_version(version) VALUES (1467);