view schema/gemma.sql @ 1133:dd4071019676

Delete contour lines with their sounding result Contour lines are no independent data sets and thus can safely be deleted with sounding results. In passing, name the primary key as such.
author Tom Gottfried <tom@intevation.de>
date Wed, 07 Nov 2018 18:13:02 +0100
parents 2e6b47cdb2ca
children 930fdd8b474f
line wrap: on
line source

BEGIN;

--
-- Infrastructure
--
CREATE EXTENSION postgis;

-- TODO: will there ever be UPDATEs or can we drop that function due to
-- historicisation?
CREATE FUNCTION update_date_info() RETURNS trigger
    LANGUAGE plpgsql
    AS $$
    BEGIN
        NEW.date_info = CURRENT_TIMESTAMP;
        RETURN NEW;
    END;
$$;

--
-- GEMMA data
--

-- Namespace not to be accessed directly by any user
CREATE SCHEMA internal
    -- Profile data are only accessible via the view users.list_users.
    CREATE TABLE user_profiles (
        username varchar PRIMARY KEY CHECK(octet_length(username) <= 63),
        -- keep username length compatible with role identifier
        map_extent box2d NOT NULL,
        email_address varchar NOT NULL
    )
    -- Columns referencing user-visible schemas added below.
;


-- Namespace to be accessed by sys_admin only
CREATE SCHEMA sys_admin
    CREATE TABLE system_config (
        config_key varchar PRIMARY KEY,
        config_val varchar
    )

    CREATE TABLE password_reset_requests (
        hash varchar(32) PRIMARY KEY,
        issued timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
        username varchar NOT NULL
            REFERENCES internal.user_profiles(username)
                ON DELETE CASCADE ON UPDATE CASCADE
    )

    -- Tables with geo data to be published with GeoServer.
    CREATE TABLE external_services (
        local_name varchar PRIMARY KEY,
        remote_url varchar NOT NULL,
        is_wfs     boolean NOT NULL DEFAULT TRUE
    )

    CREATE TABLE published_services (
        name regclass PRIMARY KEY,
        style bytea,
        as_wms boolean NOT NULL DEFAULT TRUE,
        as_wfs boolean NOT NULL DEFAULT TRUE
    )
;

--
-- Look-up tables with data that are static in a running system
--
CREATE TABLE countries (
    country_code char(2) PRIMARY KEY -- ISO 3166 country code
    -- A smallint PK would require even less disk space i.e. on the FK side.
    -- This might be an issue in case cache space becomes a limiting
    -- factor when there are many FKs pointing here.
);

CREATE TABLE language_codes (
    language_code varchar PRIMARY KEY
);

CREATE TABLE catccls (
    catccl smallint PRIMARY KEY
    -- TODO: Do we need name and/or definition from IENC feature catalogue?
    -- (see page 328 of edition 2.3)
);
INSERT INTO catccls VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11);

CREATE TABLE dirimps (
    dirimp smallint PRIMARY KEY
    -- TODO: Do we need name and/or definition from IENC feature catalogue?
    -- (see page 381 of edition 2.3)
);
INSERT INTO dirimps VALUES (1), (2), (3), (4);
-- dirimp_5 left out because it cannot be used for waterway area

CREATE TABLE depth_references (
    depth_reference varchar(4) PRIMARY KEY
    -- See col. AB and AI RIS-Index Encoding Guide
    -- XXX: We need a way to distinguish between geodetic (eg. col. AP
    -- RIS-Index) and other references (e.g. col. AB and AI):
    -- _ multi-column FK with a boolean column (geodetic/non-geodetic;
    --   i.e. absolut/not absolut) and DEFAULTs and CHECKs at the FK side.
    -- _ Do not mixup things with different meanings in one table at all
    --   (which would mean a model differing a bit from RIS-Index ideas)
);

CREATE TABLE reference_water_levels (
    reference_water_level varchar(20) PRIMARY KEY
);

CREATE TABLE catdis (
    catdis smallint PRIMARY KEY
    -- TODO: Do we need name and/or definition from IENC feature catalogue?
    -- (see page 171 of edition 2.3)
);
INSERT INTO catdis VALUES (1), (2), (3), (4);

CREATE TABLE position_codes (
    position_code char(2) PRIMARY KEY
    -- Use smallint because of fairway availability provided on daily basis?
    -- Data come from 2015_06_23_RIS_Index_template_v2p0.xlsx,
    -- sheet "Position_code" or RIS-Index encoding guide?
    -- XXX: DRC 2.1.7 and 2.2.5 _seem_ to reference the same thing here.
    -- Clarify!
    -- TODO: Do we need an attribute "meaning" or so?
);

CREATE TABLE levels_of_service (
    level_of_service smallint PRIMARY KEY
);
INSERT INTO levels_of_service VALUES (1), (2), (3);

CREATE TABLE riverbed_materials (
    material varchar PRIMARY KEY
    -- XXX: Should this table contain choices from DRC 2.2.3 or
    -- from IENC Encoding Guide M.4.3, attribute NATSUR?
);

CREATE TABLE survey_types (
    survey_type varchar PRIMARY KEY
);

CREATE TABLE coverage_types (
    coverage_type varchar PRIMARY KEY
);

CREATE TABLE limiting_factors (
    limiting_factor varchar PRIMARY KEY
);

CREATE TABLE measure_types (
    measure_type varchar PRIMARY KEY
);


-- Namespace for user management related data
CREATE SCHEMA users
    CREATE TABLE responsibility_areas (
        country char(2) PRIMARY KEY REFERENCES countries,
        area geography(MULTIPOLYGON, 4326)
    )

    CREATE TABLE templates (
        template_name varchar PRIMARY KEY,
        template_data bytea NOT NULL,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP
    )
    CREATE TRIGGER templates_date_info BEFORE UPDATE ON templates
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE user_templates (
        username varchar NOT NULL
            REFERENCES internal.user_profiles
                ON DELETE CASCADE ON UPDATE CASCADE,
        template_name varchar NOT NULL REFERENCES templates ON DELETE CASCADE,
        PRIMARY KEY (username, template_name)
    )
;
ALTER TABLE internal.user_profiles ADD
    country char(2) NOT NULL REFERENCES users.responsibility_areas;


-- Namespace for system wide configuration
CREATE SCHEMA systemconf
    CREATE TABLE feature_colours (
        feature_name varchar,
        style_attr varchar,
        r int NOT NULL CHECK (r >= 0 AND r < 256),
        g int NOT NULL CHECK (g >= 0 AND g < 256),
        b int NOT NULL CHECK (b >= 0 AND b < 256),
        a numeric NOT NULL CHECK (a >= 0 AND a <= 1),
        PRIMARY KEY (feature_name, style_attr)
    )
;

-- Namespace for waterway data that can change in a running system
CREATE SCHEMA waterway

    -- Eventually obsolete.
    -- See https://roundup-intern.intevation.de/gemma/issue5
    -- CREATE TABLE rwdrs (
    --     tretch isrsrange PRIMARY KEY,
    --     -- https://www.postgresql.org/docs/10/static/sql-createindex.html:
    --     -- Only B-tree supports UNIQUE indexes, but we need the GIST index
    --     -- below anyhow.
    --     -- Is it a good idea to build B-tree indexes on relatively large
    --     -- composites of string values or should we use inter PKs?
    --     -- => In case the index is used and cache space becomes a limiting
    --     -- factor, this might be an issue.
    --     rwdr double precision NOT NULL,
    --     EXCLUDE USING GIST (stretch WITH &&)
    --)

    CREATE TABLE waterway_area (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        area geography(POLYGON, 4326) NOT NULL,
        catccl smallint REFERENCES catccls,
        dirimp smallint REFERENCES dirimps
    )
    CREATE UNIQUE INDEX ON waterway_area ((ST_GeoHash(area, 23)))

    CREATE TABLE gauges (
        location isrs PRIMARY KEY CHECK(
            (location).orc SIMILAR TO 'G[[:digit:]]{4}'
            AND CAST(substring((location).orc from 2 for 4) AS int) < 2048),
        objname varchar NOT NULL,
        is_left boolean, -- XXX: Or reference position_codes?
        geom geography(POINT, 4326) NOT NULL,
        applicability isrsrange,
        validity tstzrange,-- XXX: Should ranges be NOT NULL? In DRC, only copy
        -- pasted text from a more general specification is given
        -- (a gauge is not a berth!)
        -- TODO: Ranges need a joint exclusion constaint to prevent overlaps?
        zero_point double precision NOT NULL,
        geodref varchar(4) REFERENCES depth_references,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL
    )
    CREATE TRIGGER gauges_date_info BEFORE UPDATE ON gauges
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE gauges_reference_water_levels (
        gauge_id isrs NOT NULL REFERENCES gauges,
        reference_water_level varchar(20)
            NOT NULL REFERENCES reference_water_levels,
        PRIMARY KEY (gauge_id, reference_water_level),
        value int NOT NULL
    )

    CREATE TABLE gauge_measurements (
        fk_gauge_id isrs NOT NULL REFERENCES gauges,
        measure_date timestamp with time zone NOT NULL,
        PRIMARY KEY (fk_gauge_id, measure_date),
        -- XXX: Is country_code really relevant for GEMMA or just NtS?
        -- country_code char(2) NOT NULL REFERENCES countries,
        -- TODO: add relations to stuff provided as enumerations
        sender varchar NOT NULL, -- "from" attribute from DRC
        language_code varchar NOT NULL REFERENCES language_codes,
        date_issue timestamp with time zone NOT NULL,
        -- reference_code varchar(4) NOT NULL REFERENCES depth_references,
        -- XXX: Always ZPG?
        water_level double precision NOT NULL,
        predicted boolean NOT NULL,
        is_waterlevel boolean NOT NULL,
        -- XXX: "measure_code" if really only W or Q
        -- XXX: Do we need "unit" attribute or can we normalise on import?
        value_min double precision, -- XXX: NOT NULL if predicted?
        value_max double precision, -- XXX: NOT NULL if predicted?
        --- TODO: Add a double range type for checking?
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL, -- "originator"
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER gauge_measurements_date_info
        BEFORE UPDATE ON gauge_measurements
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE waterway_axis (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        wtwaxs geography(LINESTRING, 4326) NOT NULL,
        -- TODO: Do we need to check data set quality (DRC 2.1.6)?
        objnam varchar NOT NULL,
        nobjnam varchar
    )
    CREATE UNIQUE INDEX ON waterway_axis ((ST_GeoHash(wtwaxs, 23)))

    -- This table allows linkage between 1D ISRS location codes and 2D space
    -- e.g. for cutting bottleneck area out of waterway area based on virtual
    -- distance marks along waterway axis (see SUC7).
    CREATE TABLE distance_marks_virtual (
        location_code isrs PRIMARY KEY,
        geom geography(POINT, 4326) NOT NULL,
        related_enc varchar(12) NOT NULL
    )

    CREATE TABLE distance_marks (
        country char(2) NOT NULL REFERENCES countries,
        hectom int NOT NULL,
        geom geography(POINT, 4326) NOT NULL,
        -- include location in primary key, because we have no fairway code:
        PRIMARY KEY (country, hectom, geom),
        catdis smallint NOT NULL REFERENCES catdis,
        position_code char(2) NOT NULL REFERENCES position_codes,
        related_enc varchar(12) NOT NULL
    )

    -- A table to help geoserver serve the distance marks as WFS 1.1.0.
    -- At least geoserver-2.13.2 does not serve type geography correctly
    -- and does not serve the location_code as isrs type
    CREATE VIEW waterway.distance_marks_geoserver AS
        SELECT location_code::VARCHAR,
               geom::Geometry(POINT, 4326),
               related_enc,
               (location_code).hectometre
            FROM waterway.distance_marks_virtual

    CREATE TABLE sections_stretches (
        id varchar PRIMARY KEY,
        is_section boolean NOT NULL, -- maps 'function' from interface
        stretch isrsrange,
        objnam varchar NOT NULL,
        nobjnam varchar,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER sections_stretches_date_info
        BEFORE UPDATE ON sections_stretches
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE waterway_profiles (
        location isrs NOT NULL,
        validity tstzrange,
        EXCLUDE USING GIST (validity WITH &&),
        PRIMARY KEY (location, validity),
        lnwl smallint,
        mwl smallint,
        hnwl smallint,
        fe30 smallint,
        fe100 smallint,
        -- XXX: further normalise using reference_water_levels?
        CHECK(COALESCE(lnwl, mwl, hnwl, fe30, fe100) IS NULL
            OR validity IS NOT NULL),
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER waterway_profiles_date_info
        BEFORE UPDATE ON waterway_profiles
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE fairway_dimensions (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        area geography(POLYGON, 4326) NOT NULL,
        level_of_service smallint NOT NULL REFERENCES levels_of_service,
        min_width smallint NOT NULL,
        max_width smallint NOT NULL,
        min_depth smallint NOT NULL,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE UNIQUE INDEX ON fairway_dimensions ((ST_GeoHash(area, 23)))
    CREATE TRIGGER fairway_dimensions_date_info
        BEFORE UPDATE ON fairway_dimensions
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    --
    -- Bottlenecks
    --
    -- XXX: Nullability differs between DRC (attributes marked "O") and WSDL
    -- (minOccurs=0; nillable seems to be set arbitrarily as even bottleneck_id and
    -- fk_g_fid (both mandatory, i.e. marked "M" in DRC) have nillable="true" in WSDL)
    CREATE TABLE bottlenecks (
        bottleneck_id varchar PRIMARY KEY,
        fk_g_fid isrs NOT NULL REFERENCES gauges,
        -- XXX: DRC references "ch. 3.1.1", which does not exist in document.
        objnam varchar,
        nobjnm varchar,
        stretch isrsrange NOT NULL,
        area geography(POLYGON, 4326) NOT NULL,
        rb char(2) NOT NULL REFERENCES countries, -- from rb_lb in interface
        lb char(2) NOT NULL REFERENCES countries, -- from rb_lb in interface
        responsible_country char(2) NOT NULL REFERENCES countries,
        revisiting_time smallint NOT NULL,
        limiting varchar NOT NULL REFERENCES limiting_factors,
        -- surtyp varchar NOT NULL REFERENCES survey_types,
        -- XXX: Also an attribut of sounding result?
        -- coverage varchar REFERENCES coverage_types,
        -- XXX: Also an attribut of sounding result?
        -- CHECK allowed combinations of surtyp and coverage or
        -- different model approach?
        -- depth_reference char(3) NOT NULL REFERENCES depth_references,
        -- XXX: Also an attribut of sounding result?
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        -- additional_data xml -- Currently not relevant for GEMMA
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER bottleneck_date_info BEFORE UPDATE ON bottlenecks
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE bottlenecks_riverbed_materials (
        bottleneck_id varchar REFERENCES bottlenecks,
        riverbed varchar REFERENCES riverbed_materials,
        -- XXX: should be 'natsur' according to IENC Encoding Guide M.4.3
        PRIMARY KEY (bottleneck_id, riverbed)
    )

    CREATE TABLE sounding_results (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        bottleneck_id varchar NOT NULL REFERENCES bottlenecks,
        date_info date NOT NULL,
        UNIQUE (bottleneck_id, date_info),
        area geography(POLYGON, 4326) NOT NULL,
        surtyp varchar REFERENCES survey_types,
        coverage varchar REFERENCES coverage_types,
        depth_reference char(3) NOT NULL REFERENCES depth_references,
        point_cloud geography(MULTIPOINTZ, 4326) NOT NULL,
        octree_checksum varchar,
        octree_index bytea,
        staging_done boolean NOT NULL DEFAULT false
    )

    CREATE TABLE sounding_results_contour_lines (
        sounding_result_id int NOT NULL REFERENCES sounding_results
            ON DELETE CASCADE,
        height numeric NOT NULL,
        lines geography(multilinestring, 4326) NOT NULL,
        PRIMARY KEY (sounding_result_id, height)
    )
    -- A view to help geoserver serve contour lines.
    -- At least geoserver-2.13.2 does not serve type geography correctly
    CREATE VIEW waterway.sounding_results_contour_lines_geoserver AS
        SELECT bottleneck_id,
                date_info,
                height,
                CAST(lines AS geometry(multilinestring, 4326)) AS lines
            FROM waterway.sounding_results_contour_lines cl
                JOIN waterway.sounding_results sr
                    ON sr.id = cl.sounding_result_id

    --
    -- Fairway availability
    --
    CREATE TABLE fairway_availability (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        position_code char(2) REFERENCES position_codes,
        bottleneck_id varchar NOT NULL REFERENCES bottlenecks,
        surdat date NOT NULL,
        UNIQUE (bottleneck_id, surdat),
        -- additional_data xml -- Currently not relevant for GEMMA
        critical boolean,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL
    )
    CREATE TRIGGER fairway_availability_date_info
        BEFORE UPDATE ON fairway_availability
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE fa_reference_values (
        fairway_availability_id int NOT NULL REFERENCES fairway_availability,
        level_of_service smallint NOT NULL REFERENCES levels_of_service,
        PRIMARY KEY (fairway_availability_id, level_of_service),
        fairway_depth smallint,
        fairway_width smallint,
        fairway_radius int,
        CHECK(COALESCE(fairway_depth, fairway_width, fairway_radius)
            IS NOT NULL),
        shallowest_spot geography(POINT, 4326)
    )

    CREATE TABLE bottleneck_pdfs (
        fairway_availability_id int NOT NULL REFERENCES fairway_availability,
        profile_pdf_filename varchar NOT NULL, -- redundant to last part of URL
        profile_pdf_url varchar NOT NULL, -- prohibit insecure URLs somehow?
        PRIMARY KEY (fairway_availability_id, profile_pdf_url),
        pdf_generation_date timestamp with time zone NOT NULL,
        source_organization varchar NOT NULL
    )

    CREATE TABLE effective_fairway_availability (
        fairway_availability_id int NOT NULL REFERENCES fairway_availability,
        measure_date timestamp with time zone NOT NULL,
        level_of_service smallint NOT NULL REFERENCES levels_of_service,
        PRIMARY KEY (fairway_availability_id, measure_date, level_of_service),
        available_depth_value smallint,
        available_width_value smallint,
        water_level_value smallint,
        CHECK(COALESCE(available_depth_value, available_width_value,
            water_level_value) IS NOT NULL),
        measure_type varchar NOT NULL REFERENCES measure_types,
        source_organization varchar NOT NULL,
        forecast_generation_time timestamp with time zone,
        CHECK(measure_type <> 'forecasted'
            OR forecast_generation_time IS NOT NULL),
        value_lifetime timestamp with time zone,
        CHECK(measure_type = 'minimum guaranteed'
            OR value_lifetime IS NOT NULL)
    )

    CREATE VIEW waterway.bottleneck_overview AS
    SELECT
      objnam AS name,
      ST_Centroid(area)::Geometry AS point,
      (lower(stretch)).hectometre AS from,
      (upper(stretch)).hectometre AS to,
      sr.current
    FROM waterway.bottlenecks bn LEFT JOIN (
      SELECT bottleneck_id, max(date_info) AS current FROM
      waterway.sounding_results
      GROUP BY bottleneck_id) sr ON sr.bottleneck_id = bn.bottleneck_id
    ORDER BY objnam
;

--
-- Import queue and respective logging
--
CREATE TYPE waterway.import_state AS ENUM ('queued', 'running', 'successful', 'failed');

CREATE TABLE waterway.imports (
    id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
    state waterway.import_state NOT NULL DEFAULT 'queued',
    enqueued timestamp NOT NULL DEFAULT now(),
    kind  varchar NOT NULL,
    username varchar NOT NULL
        REFERENCES internal.user_profiles(username)
            ON DELETE CASCADE ON UPDATE CASCADE,
    data TEXT
);

CREATE INDEX enqueued_idx ON waterway.imports(enqueued, state);

CREATE TYPE waterway.log_type AS ENUM ('info', 'warn', 'error');

CREATE TABLE waterway.import_logs (
    import_id int NOT NULL REFERENCES waterway.imports(id),
    time timestamp NOT NULL DEFAULT now(),
    kind waterway.log_type NOT NULL DEFAULT 'info',
    msg TEXT NOT NULL
);

COMMIT;