view schema/gemma.sql @ 927:48f70782400d

Add categories of CEMT class and directions of impact
author Tom Gottfried <tom@intevation.de>
date Mon, 08 Oct 2018 13:04:30 +0200
parents 271561dce2e6
children 688e1530f66a 71445b091415
line wrap: on
line source

BEGIN;

--
-- Infrastructure
--
CREATE EXTENSION postgis;

-- Needed for 3D processing e.g. for cross section profiles
-- FIXME disabled for now, because not available on Ubuntu/Debian
--CREATE EXTENSION postgis_sfcgal;

-- TODO: will there ever be UPDATEs or can we drop that function due to
-- historicisation?
CREATE FUNCTION update_date_info() RETURNS trigger
    LANGUAGE plpgsql
    AS $$
    BEGIN
        NEW.date_info = CURRENT_TIMESTAMP;
        RETURN NEW;
    END;
$$;

--
-- GEMMA data
--

-- Namespace not to be accessed directly by any user
CREATE SCHEMA internal
    -- Profile data are only accessible via the view users.list_users.
    CREATE TABLE user_profiles (
        username varchar PRIMARY KEY CHECK(octet_length(username) <= 63),
        -- keep username length compatible with role identifier
        map_extent box2d NOT NULL,
        email_address varchar NOT NULL
    )
    -- Columns referencing user-visible schemas added below.
;


-- Namespace to be accessed by sys_admin only
CREATE SCHEMA sys_admin
    CREATE TABLE system_config (
        config_key varchar PRIMARY KEY,
        config_val varchar
    )

    CREATE TABLE password_reset_requests (
        hash varchar(32) PRIMARY KEY,
        issued timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
        username varchar NOT NULL
            REFERENCES internal.user_profiles(username)
                ON DELETE CASCADE ON UPDATE CASCADE
    )

    -- Tables with geo data to be published with GeoServer.
    CREATE TABLE external_services (
        local_name varchar PRIMARY KEY,
        remote_url varchar NOT NULL,
        is_wfs     boolean NOT NULL DEFAULT TRUE
    )

    CREATE TABLE published_services (
        name regclass PRIMARY KEY,
        style bytea,
        as_wms boolean NOT NULL DEFAULT TRUE,
        as_wfs boolean NOT NULL DEFAULT TRUE
    )
;

--
-- Look-up tables with data that are static in a running system
--
CREATE TABLE countries (
    country_code char(2) PRIMARY KEY -- ISO 3166 country code
    -- A smallint PK would require even less disk space i.e. on the FK side.
    -- This might be an issue in case cache space becomes a limiting
    -- factor when there are many FKs pointing here.
);

CREATE TABLE language_codes (
    language_code varchar PRIMARY KEY
);

CREATE TABLE catccls (
    catccl smallint PRIMARY KEY
    -- TODO: Do we need name and/or definition from IENC feature catalogue?
    -- (see page 328 of edition 2.3)
);
INSERT INTO catccls VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11);

CREATE TABLE dirimps (
    dirimp smallint PRIMARY KEY
    -- TODO: Do we need name and/or definition from IENC feature catalogue?
    -- (see page 381 of edition 2.3)
);
INSERT INTO dirimps VALUES (1), (2), (3), (4);
-- dirimp_5 left out because it cannot be used for waterway area

CREATE TABLE depth_references (
    depth_reference varchar(4) PRIMARY KEY
    -- See col. AB and AI RIS-Index Encoding Guide
    -- XXX: We need a way to distinguish between geodetic (eg. col. AP
    -- RIS-Index) and other references (e.g. col. AB and AI):
    -- _ multi-column FK with a boolean column (geodetic/non-geodetic;
    --   i.e. absolut/not absolut) and DEFAULTs and CHECKs at the FK side.
    -- _ Do not mixup things with different meanings in one table at all
    --   (which would mean a model differing a bit from RIS-Index ideas)
);

CREATE TABLE reference_water_levels (
    reference_water_level varchar(20) PRIMARY KEY
);

CREATE TABLE distance_mark_functions (
    -- XXX: Redundant information to object code in isrs code of dist. mark
    distance_mark_function varchar(8) PRIMARY KEY
);

CREATE TABLE position_codes (
    position_code char(2) PRIMARY KEY
    -- Use smallint because of fairway availability provided on daily basis?
    -- Data come from 2015_06_23_RIS_Index_template_v2p0.xlsx,
    -- sheet "Position_code" or RIS-Index encoding guide?
    -- XXX: DRC 2.1.7 and 2.2.5 _seem_ to reference the same thing here.
    -- Clarify!
    -- TODO: Do we need an attribute "meaning" or so?
);

CREATE TABLE levels_of_service (
    level_of_service smallint PRIMARY KEY
);
INSERT INTO levels_of_service VALUES (1), (2), (3);

CREATE TABLE riverbed_materials (
    material varchar PRIMARY KEY
    -- XXX: Should this table contain choices from DRC 2.2.3 or
    -- from IENC Encoding Guide M.4.3, attribute NATSUR?
);

CREATE TABLE survey_types (
    survey_type varchar PRIMARY KEY
);

CREATE TABLE coverage_types (
    coverage_type varchar PRIMARY KEY
);

CREATE TABLE limiting_factors (
    limiting_factor varchar PRIMARY KEY
);

CREATE TABLE measure_types (
    measure_type varchar PRIMARY KEY
);


-- Namespace for user management related data
CREATE SCHEMA users
    CREATE TABLE responsibility_areas (
        country char(2) PRIMARY KEY REFERENCES countries,
        area geography(MULTIPOLYGON, 4326)
    )

    CREATE TABLE templates (
        template_name varchar PRIMARY KEY,
        template_data bytea NOT NULL,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP
    )
    CREATE TRIGGER templates_date_info BEFORE UPDATE ON templates
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE user_templates (
        username varchar NOT NULL
            REFERENCES internal.user_profiles
                ON DELETE CASCADE ON UPDATE CASCADE,
        template_name varchar NOT NULL REFERENCES templates ON DELETE CASCADE,
        PRIMARY KEY (username, template_name)
    )
;
ALTER TABLE internal.user_profiles ADD
    country char(2) NOT NULL REFERENCES users.responsibility_areas;


-- Namespace for system wide configuration
CREATE SCHEMA systemconf
    CREATE TABLE feature_colours (
        feature_name varchar,
        style_attr varchar,
        r int NOT NULL CHECK (r >= 0 AND r < 256),
        g int NOT NULL CHECK (g >= 0 AND g < 256),
        b int NOT NULL CHECK (b >= 0 AND b < 256),
        a numeric NOT NULL CHECK (a >= 0 AND a <= 1),
        PRIMARY KEY (feature_name, style_attr)
    )
;

-- Namespace for waterway data that can change in a running system
CREATE SCHEMA waterway

    -- Eventually obsolete.
    -- See https://roundup-intern.intevation.de/gemma/issue5
    -- CREATE TABLE rwdrs (
    --     tretch isrsrange PRIMARY KEY,
    --     -- https://www.postgresql.org/docs/10/static/sql-createindex.html:
    --     -- Only B-tree supports UNIQUE indexes, but we need the GIST index
    --     -- below anyhow.
    --     -- Is it a good idea to build B-tree indexes on relatively large
    --     -- composites of string values or should we use inter PKs?
    --     -- => In case the index is used and cache space becomes a limiting
    --     -- factor, this might be an issue.
    --     rwdr double precision NOT NULL,
    --     EXCLUDE USING GIST (stretch WITH &&)
    --)

    CREATE TABLE waterway_area (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        area geography(POLYGON, 4326) NOT NULL,
        catccl smallint REFERENCES catccls,
        dirimp smallint REFERENCES dirimps
    )
    CREATE UNIQUE INDEX ON waterway_area ((ST_GeoHash(area, 23)))

    CREATE TABLE gauges (
        location isrs PRIMARY KEY CHECK(
            (location).orc SIMILAR TO 'G[[:digit:]]{4}'
            AND CAST(substring((location).orc from 2 for 4) AS int) < 2048),
        objname varchar NOT NULL,
        is_left boolean, -- XXX: Or reference position_codes?
        geom geography(POINT, 4326) NOT NULL,
        applicability isrsrange,
        validity tstzrange,-- XXX: Should ranges be NOT NULL? In DRC, only copy
        -- pasted text from a more general specification is given
        -- (a gauge is not a berth!)
        -- TODO: Ranges need a joint exclusion constaint to prevent overlaps?
        zero_point double precision NOT NULL,
        geodref varchar(4) REFERENCES depth_references,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL
    )
    CREATE TRIGGER gauges_date_info BEFORE UPDATE ON gauges
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE gauges_reference_water_levels (
        gauge_id isrs NOT NULL REFERENCES gauges,
        reference_water_level varchar(20)
            NOT NULL REFERENCES reference_water_levels,
        PRIMARY KEY (gauge_id, reference_water_level),
        value int NOT NULL
    )

    CREATE TABLE gauge_measurements (
        fk_gauge_id isrs NOT NULL REFERENCES gauges,
        measure_date timestamp with time zone NOT NULL,
        PRIMARY KEY (fk_gauge_id, measure_date),
        -- XXX: Is country_code really relevant for GEMMA or just NtS?
        -- country_code char(2) NOT NULL REFERENCES countries,
        -- TODO: add relations to stuff provided as enumerations
        sender varchar NOT NULL, -- "from" attribute from DRC
        language_code varchar NOT NULL REFERENCES language_codes,
        date_issue timestamp with time zone NOT NULL,
        -- reference_code varchar(4) NOT NULL REFERENCES depth_references,
        -- XXX: Always ZPG?
        water_level double precision NOT NULL,
        predicted boolean NOT NULL,
        is_waterlevel boolean NOT NULL,
        -- XXX: "measure_code" if really only W or Q
        -- XXX: Do we need "unit" attribute or can we normalise on import?
        value_min double precision, -- XXX: NOT NULL if predicted?
        value_max double precision, -- XXX: NOT NULL if predicted?
        --- TODO: Add a double range type for checking?
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL, -- "originator"
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER gauge_measurements_date_info
        BEFORE UPDATE ON gauge_measurements
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE waterway_axis (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        wtwaxs geography(LINESTRING, 4326) NOT NULL,
        -- TODO: Do we need to check data set quality (DRC 2.1.6)?
        objnam varchar NOT NULL,
        nobjnam varchar
    )
    CREATE UNIQUE INDEX ON waterway_axis ((ST_GeoHash(wtwaxs, 23)))

    -- This table allows linkage between 1D ISRS location codes and 2D space
    -- e.g. for cutting bottleneck area out of waterway area based on virtual
    -- distance marks along waterway axis (see SUC7).
    CREATE TABLE distance_marks (
        location_code isrs PRIMARY KEY,
        geom geography(POINT, 4326) NOT NULL,
        distance_mark_function varchar(8)
            NOT NULL REFERENCES distance_mark_functions,
        -- XXX: attribute "function" in DRC 2.1.7 as well as CATDIS seem
        -- to encode the same thing as the object code in ISRS location code.
        position_code char(2) NOT NULL REFERENCES position_codes
    )

    -- A table to help geoserver serve the distance marks as WFS 1.1.0.
    -- At least geoserver-2.13.2 does not serve type geography correctly
    -- and does not serve the location_code as isrs type
    CREATE VIEW waterway.distance_marks_geoserver AS
        SELECT location_code::VARCHAR,
               geom::Geometry(POINT, 4326),
               distance_mark_function,
               position_code,
               (location_code).hectometre
            FROM waterway.distance_marks

    CREATE TABLE sections_stretches (
        id varchar PRIMARY KEY,
        is_section boolean NOT NULL, -- maps 'function' from interface
        stretch isrsrange,
        objnam varchar NOT NULL,
        nobjnam varchar,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER sections_stretches_date_info
        BEFORE UPDATE ON sections_stretches
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE waterway_profiles (
        location isrs NOT NULL,
        validity tstzrange,
        EXCLUDE USING GIST (validity WITH &&),
        PRIMARY KEY (location, validity),
        lnwl smallint,
        mwl smallint,
        hnwl smallint,
        fe30 smallint,
        fe100 smallint,
        -- XXX: further normalise using reference_water_levels?
        CHECK(COALESCE(lnwl, mwl, hnwl, fe30, fe100) IS NULL
            OR validity IS NOT NULL),
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER waterway_profiles_date_info
        BEFORE UPDATE ON waterway_profiles
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE fairway_dimensions (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        area geography(POLYGON, 4326) NOT NULL,
        level_of_service smallint NOT NULL REFERENCES levels_of_service,
        min_width smallint NOT NULL,
        max_width smallint NOT NULL,
        min_depth smallint NOT NULL,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE UNIQUE INDEX ON fairway_dimensions ((ST_GeoHash(area, 23)))
    CREATE TRIGGER fairway_dimensions_date_info
        BEFORE UPDATE ON fairway_dimensions
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    --
    -- Bottlenecks
    --
    -- XXX: Nullability differs between DRC (attributes marked "O") and WSDL
    -- (minOccurs=0; nillable seems to be set arbitrarily as even bottleneck_id and
    -- fk_g_fid (both mandatory, i.e. marked "M" in DRC) have nillable="true" in WSDL)
    CREATE TABLE bottlenecks (
        bottleneck_id varchar PRIMARY KEY,
        fk_g_fid isrs NOT NULL REFERENCES gauges,
        -- XXX: DRC references "ch. 3.1.1", which does not exist in document.
        objnam varchar,
        nobjnm varchar,
        stretch isrsrange NOT NULL,
        area geography(POLYGON, 4326) NOT NULL,
        rb char(2) NOT NULL REFERENCES countries, -- from rb_lb in interface
        lb char(2) NOT NULL REFERENCES countries, -- from rb_lb in interface
        responsible_country char(2) NOT NULL REFERENCES countries,
        revisiting_time smallint NOT NULL,
        limiting varchar NOT NULL REFERENCES limiting_factors,
        -- surtyp varchar NOT NULL REFERENCES survey_types,
        -- XXX: Also an attribut of sounding result?
        -- coverage varchar REFERENCES coverage_types,
        -- XXX: Also an attribut of sounding result?
        -- CHECK allowed combinations of surtyp and coverage or
        -- different model approach?
        -- depth_reference char(3) NOT NULL REFERENCES depth_references,
        -- XXX: Also an attribut of sounding result?
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        -- additional_data xml -- Currently not relevant for GEMMA
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER bottleneck_date_info BEFORE UPDATE ON bottlenecks
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE bottlenecks_riverbed_materials (
        bottleneck_id varchar REFERENCES bottlenecks,
        riverbed varchar REFERENCES riverbed_materials,
        -- XXX: should be 'natsur' according to IENC Encoding Guide M.4.3
        PRIMARY KEY (bottleneck_id, riverbed)
    )

    CREATE TABLE sounding_results (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        bottleneck_id varchar NOT NULL REFERENCES bottlenecks,
        date_info date NOT NULL,
        UNIQUE (bottleneck_id, date_info),
        area geography(POLYGON, 4326) NOT NULL,
        surtyp varchar REFERENCES survey_types,
        coverage varchar REFERENCES coverage_types,
        depth_reference char(3) NOT NULL REFERENCES depth_references,
        point_cloud geography(MULTIPOINTZ, 4326) NOT NULL,
        staging_done boolean NOT NULL DEFAULT false
    )

    CREATE TABLE octrees (
        sounding_result_id int NOT NULL UNIQUE REFERENCES sounding_results,
        checksum varchar NOT NULL,
        octree_index bytea NOT NULL
    )

    CREATE TABLE sounding_results_contour_lines (
        sounding_result_id int NOT NULL REFERENCES sounding_results,
        height numeric NOT NULL,
        lines geography(multilinestring, 4326) NOT NULL,
        UNIQUE (sounding_result_id, height)
    )
    -- A view to help geoserver serve contour lines.
    -- At least geoserver-2.13.2 does not serve type geography correctly
    CREATE VIEW waterway.sounding_results_contour_lines_geoserver AS
        SELECT bottleneck_id,
                date_info,
                height,
                CAST(lines AS geometry(multilinestring, 4326)) AS lines
            FROM waterway.sounding_results_contour_lines cl
                JOIN waterway.sounding_results sr
                    ON sr.id = cl.sounding_result_id

    --
    -- Fairway availability
    --
    CREATE TABLE fairway_availability (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        position_code char(2) REFERENCES position_codes,
        bottleneck_id varchar NOT NULL REFERENCES bottlenecks,
        surdat date NOT NULL,
        UNIQUE (bottleneck_id, surdat),
        -- additional_data xml -- Currently not relevant for GEMMA
        critical boolean,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL
    )
    CREATE TRIGGER fairway_availability_date_info
        BEFORE UPDATE ON fairway_availability
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE fa_reference_values (
        fairway_availability_id int NOT NULL REFERENCES fairway_availability,
        level_of_service smallint NOT NULL REFERENCES levels_of_service,
        PRIMARY KEY (fairway_availability_id, level_of_service),
        fairway_depth smallint,
        fairway_width smallint,
        fairway_radius int,
        CHECK(COALESCE(fairway_depth, fairway_width, fairway_radius)
            IS NOT NULL),
        shallowest_spot geography(POINT, 4326)
    )

    CREATE TABLE bottleneck_pdfs (
        fairway_availability_id int NOT NULL REFERENCES fairway_availability,
        profile_pdf_filename varchar NOT NULL, -- redundant to last part of URL
        profile_pdf_url varchar NOT NULL, -- prohibit insecure URLs somehow?
        PRIMARY KEY (fairway_availability_id, profile_pdf_url),
        pdf_generation_date timestamp with time zone NOT NULL,
        source_organization varchar NOT NULL
    )

    CREATE TABLE effective_fairway_availability (
        fairway_availability_id int NOT NULL REFERENCES fairway_availability,
        measure_date timestamp with time zone NOT NULL,
        level_of_service smallint NOT NULL REFERENCES levels_of_service,
        PRIMARY KEY (fairway_availability_id, measure_date, level_of_service),
        available_depth_value smallint,
        available_width_value smallint,
        water_level_value smallint,
        CHECK(COALESCE(available_depth_value, available_width_value,
            water_level_value) IS NOT NULL),
        measure_type varchar NOT NULL REFERENCES measure_types,
        source_organization varchar NOT NULL,
        forecast_generation_time timestamp with time zone,
        CHECK(measure_type <> 'forecasted'
            OR forecast_generation_time IS NOT NULL),
        value_lifetime timestamp with time zone,
        CHECK(measure_type = 'minimum guaranteed'
            OR value_lifetime IS NOT NULL)
    )
;

COMMIT;