view schema/gemma.sql @ 567:f3452ce5c056

Make table writable for data import Lookup tables will probably be filled with data during import and thus should reside in the waterway schema, which is accordingly authorised. Touch only the one table relevant for bottlenecks here, as other data might be handled differently. Lookup tables referenced in out-commented columns not touched, as it is not yet clarified whether these columns will be removed completely. countries not touched neither, because it will be filled from different source and demo-data should have everything necessary to start importing bottlenecks.
author Tom Gottfried <tom@intevation.de>
date Tue, 04 Sep 2018 21:41:45 +0200
parents 3af7ca761f6a
children ad07846b09d1
line wrap: on
line source

BEGIN;

--
-- Infrastructure
--
CREATE EXTENSION postgis;

-- TODO: will there ever be UPDATEs or can we drop that function due to
-- historicisation?
CREATE FUNCTION update_date_info() RETURNS trigger
    LANGUAGE plpgsql
    AS $$
    BEGIN
        NEW.date_info = CURRENT_TIMESTAMP;
        RETURN NEW;
    END;
$$;

-- Composite type: UN/LOCODE, fairway section, object reference, hectometre.
-- See RIS-Index Encoding Guide
CREATE TYPE isrs AS (
    country_code char(2), -- ISO 3166 country code
    -- could be validated against countries table.
    locode char(3), -- without the country code:
    -- http://www.unece.org/cefact/locode/welcome.html
    fairway_section char(5),
    object_reference char(5),
    hectometre int -- should be constrained to five digits
);

CREATE TYPE isrsrange AS RANGE (
    subtype = isrs
);

--
-- GEMMA data
--

-- Namespace not to be accessed directly by any user
CREATE SCHEMA internal
    -- Profile data are only accessible via the view users.list_users.
    CREATE TABLE user_profiles (
        username varchar PRIMARY KEY CHECK(octet_length(username) <= 63),
        -- keep username length compatible with role identifier
        map_extent box2d NOT NULL,
        email_address varchar NOT NULL
    )
    -- Columns referencing user-visible schemas added below.
;


-- Namespace to be accessed by sys_admin only
CREATE SCHEMA sys_admin
    CREATE TABLE system_config (
        config_key varchar PRIMARY KEY,
        config_val varchar
    )

    CREATE TABLE password_reset_requests (
        hash varchar(32) PRIMARY KEY,
        issued timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
        username varchar NOT NULL
            REFERENCES internal.user_profiles(username)
                ON DELETE CASCADE ON UPDATE CASCADE
    )

    CREATE TABLE external_services (
        local_name varchar PRIMARY KEY,
        remote_url varchar NOT NULL,
        is_wfs     boolean NOT NULL DEFAULT TRUE
    )

    CREATE TABLE published_services (
        name varchar PRIMARY KEY,
        style bytea,
        as_wms boolean NOT NULL DEFAULT TRUE,
        as_wfs boolean NOT NULL DEFAULT TRUE
    )
;

-- Tables with geo data to be published with GeoServer.
INSERT INTO sys_admin.published_services (name) VALUES ('fairway_dimensions');

--
-- Look-up tables with data that are static in a running system
--
CREATE TABLE countries (
    country_code char(2) PRIMARY KEY -- ISO 3166 country code
    -- A smallint PK would require even less disk space i.e. on the FK side.
    -- This might be an issue in case cache space becomes a limiting
    -- factor when there are many FKs pointing here.
);

CREATE TABLE language_codes (
    language_code varchar PRIMARY KEY
);

CREATE TABLE catccls (
    catccl smallint PRIMARY KEY
    -- TODO: Do we need name and/or definition from IENC feature catalogue?
);

CREATE TABLE dirimps (
    dirimp smallint PRIMARY KEY
    -- TODO: Do we need name and/or definition from IENC feature catalogue?
);

CREATE TABLE depth_references (
    depth_reference varchar(4) PRIMARY KEY
    -- See col. AB and AI RIS-Index Encoding Guide
    -- XXX: We need a way to distinguish between geodetic (eg. col. AP
    -- RIS-Index) and other references (e.g. col. AB and AI):
    -- _ multi-column FK with a boolean column (geodetic/non-geodetic;
    --   i.e. absolut/not absolut) and DEFAULTs and CHECKs at the FK side.
    -- _ Do not mixup things with different meanings in one table at all
    --   (which would mean a model differing a bit from RIS-Index ideas)
);

CREATE TABLE reference_water_levels (
    reference_water_level varchar(20) PRIMARY KEY
);

CREATE TABLE distance_mark_functions (
    -- XXX: Redundant information to object code in isrs code of dist. mark
    distance_mark_function varchar(8) PRIMARY KEY
);

CREATE TABLE position_codes (
    position_code char(2) PRIMARY KEY
    -- Use smallint because of fairway availability provided on daily basis?
    -- Data come from 2015_06_23_RIS_Index_template_v2p0.xlsx,
    -- sheet "Position_code" or RIS-Index encoding guide?
    -- XXX: DRC 2.1.7 and 2.2.5 _seem_ to reference the same thing here.
    -- Clarify!
    -- TODO: Do we need an attribute "meaning" or so?
);

CREATE TABLE levels_of_service (
    level_of_service smallint PRIMARY KEY
);
INSERT INTO levels_of_service VALUES (1), (2), (3);

CREATE TABLE riverbed_materials (
    material varchar PRIMARY KEY
    -- XXX: Should this table contain choices from DRC 2.2.3 or
    -- from IENC Encoding Guide M.4.3, attribute NATSUR?
);

CREATE TABLE survey_types (
    survey_type varchar PRIMARY KEY
);

CREATE TABLE coverage_types (
    coverage_type varchar PRIMARY KEY
);

CREATE TABLE measure_types (
    measure_type varchar PRIMARY KEY
);


-- Namespace for user management related data
CREATE SCHEMA users
    CREATE TABLE responsibility_areas (
        country char(2) PRIMARY KEY REFERENCES countries,
        area geography(MULTIPOLYGON, 4326)
    )

    CREATE TABLE templates (
        template_name varchar PRIMARY KEY,
        template_data bytea NOT NULL,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP
    )
    CREATE TRIGGER templates_date_info BEFORE UPDATE ON templates
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE user_templates (
        username varchar NOT NULL
            REFERENCES internal.user_profiles
                ON DELETE CASCADE ON UPDATE CASCADE,
        template_name varchar NOT NULL REFERENCES templates ON DELETE CASCADE,
        PRIMARY KEY (username, template_name)
    )
;
ALTER TABLE internal.user_profiles ADD
    country char(2) NOT NULL REFERENCES users.responsibility_areas;


-- Namespace for waterway data that can change in a running system
CREATE SCHEMA waterway

    -- Eventually obsolete.
    -- See https://roundup-intern.intevation.de/gemma/issue5
    -- CREATE TABLE rwdrs (
    --     tretch isrsrange PRIMARY KEY,
    --     -- https://www.postgresql.org/docs/10/static/sql-createindex.html:
    --     -- Only B-tree supports UNIQUE indexes, but we need the GIST index
    --     -- below anyhow.
    --     -- Is it a good idea to build B-tree indexes on relatively large
    --     -- composites of string values or should we use inter PKs?
    --     -- => In case the index is used and cache space becomes a limiting
    --     -- factor, this might be an issue.
    --     rwdr double precision NOT NULL,
    --     EXCLUDE USING GIST (stretch WITH &&)
    --)

    CREATE TABLE waterway_area (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        area geography(POLYGON, 4326) NOT NULL,
        catccl smallint REFERENCES catccls,
        dirimp smallint REFERENCES dirimps
    )
    CREATE UNIQUE INDEX ON waterway_area ((ST_GeoHash(area, 23)))

    CREATE TABLE gauges (
        location isrs PRIMARY KEY,
        function_code varchar(10) NOT NULL, -- XXX: What is this really for?
        objname varchar NOT NULL,
        is_left boolean, -- XXX: Or reference position_codes?
        geom geography(POINT, 4326) NOT NULL,
        applicability isrsrange,
        validity tstzrange,-- XXX: Should ranges be NOT NULL? In DRC, only copy
        -- pasted text from a more general specification is given
        -- (a gauge is not a berth!)
        -- TODO: Ranges need a joint exclusion constaint to prevent overlaps?
        zero_point double precision NOT NULL,
        geodref varchar(4) REFERENCES depth_references,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL
    )
    CREATE TRIGGER gauges_date_info BEFORE UPDATE ON gauges
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE gauges_reference_water_levels (
        gauge_id isrs NOT NULL REFERENCES gauges,
        reference_water_level varchar(20)
            NOT NULL REFERENCES reference_water_levels,
        PRIMARY KEY (gauge_id, reference_water_level),
        value int NOT NULL
    )

    CREATE TABLE gauge_measurements (
        fk_gauge_id isrs NOT NULL REFERENCES gauges,
        measure_date timestamp with time zone NOT NULL,
        PRIMARY KEY (fk_gauge_id, measure_date),
        -- XXX: Is country_code really relevant for GEMMA or just NtS?
        -- country_code char(2) NOT NULL REFERENCES countries,
        -- TODO: add relations to stuff provided as enumerations
        sender varchar NOT NULL, -- "from" attribute from DRC
        language_code varchar NOT NULL REFERENCES language_codes,
        date_issue timestamp with time zone NOT NULL,
        -- reference_code varchar(4) NOT NULL REFERENCES depth_references,
        -- XXX: Always ZPG?
        water_level double precision NOT NULL,
        predicted boolean NOT NULL,
        is_waterlevel boolean NOT NULL,
        -- XXX: "measure_code" if really only W or Q
        -- XXX: Do we need "unit" attribute or can we normalise on import?
        value_min double precision, -- XXX: NOT NULL if predicted?
        value_max double precision, -- XXX: NOT NULL if predicted?
        --- TODO: Add a double range type for checking?
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL, -- "originator"
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER gauge_measurements_date_info
        BEFORE UPDATE ON gauge_measurements
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE waterway_axis (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        wtwaxs geography(LINESTRING, 4326) NOT NULL,
        -- TODO: Do we need to check data set quality (DRC 2.1.6)?
        objnam varchar NOT NULL,
        nobjnam varchar
    )
    CREATE UNIQUE INDEX ON waterway_axis ((ST_GeoHash(wtwaxs, 23)))

    -- This table allows linkage between 1D ISRS location codes and 2D space
    -- e.g. for cutting bottleneck area out of waterway area based on virtual
    -- distance marks along waterway axis (see SUC7).
    CREATE TABLE distance_marks (
        location_code isrs PRIMARY KEY,
        geom geography(POINT, 4326) NOT NULL,
        distance_mark_function varchar(8)
            NOT NULL REFERENCES distance_mark_functions,
        -- XXX: attribute "function" in DRC 2.1.7 as well as CATDIS seem
        -- to encode the same thing as the object code in ISRS location code.
        position_code char(2) NOT NULL REFERENCES position_codes
    )

    CREATE TABLE sections_stretches (
        id varchar PRIMARY KEY,
        is_section boolean NOT NULL, -- maps 'function' from interface
        stretch isrsrange,
        objnam varchar NOT NULL,
        nobjnam varchar,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER sections_stretches_date_info
        BEFORE UPDATE ON sections_stretches
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE waterway_profiles (
        location isrs NOT NULL,
        validity tstzrange,
        EXCLUDE USING GIST (validity WITH &&),
        PRIMARY KEY (location, validity),
        lnwl smallint,
        mwl smallint,
        hnwl smallint,
        fe30 smallint,
        fe100 smallint,
        -- XXX: further normalise using reference_water_levels?
        CHECK(COALESCE(lnwl, mwl, hnwl, fe30, fe100) IS NULL
            OR validity IS NOT NULL),
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER waterway_profiles_date_info
        BEFORE UPDATE ON waterway_profiles
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE fairway_dimensions (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        area geography(POLYGON, 4326) NOT NULL,
        level_of_service smallint NOT NULL REFERENCES levels_of_service,
        min_width smallint NOT NULL,
        max_width smallint NOT NULL,
        min_depth smallint NOT NULL,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE UNIQUE INDEX ON fairway_dimensions ((ST_GeoHash(area, 23)))
    CREATE TRIGGER fairway_dimensions_date_info
        BEFORE UPDATE ON fairway_dimensions
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    --
    -- Bottlenecks
    --
    CREATE TABLE limiting_factors (
        limiting_factor varchar PRIMARY KEY
    )

    -- XXX: Nullability differs between DRC (attributes marked "O") and WSDL
    -- (minOccurs=0; nillable seems to be set arbitrarily as even bottleneck_id and
    -- fk_g_fid (both mandatory, i.e. marked "M" in DRC) have nillable="true" in WSDL)
    CREATE TABLE bottlenecks (
        bottleneck_id varchar PRIMARY KEY,
        fk_g_fid isrs NOT NULL REFERENCES gauges,
        -- XXX: DRC references "ch. 3.1.1", which does not exist in document.
        objnam varchar,
        nobjnm varchar,
        stretch isrsrange NOT NULL,
        area geography(POLYGON, 4326) NOT NULL,
        rb char(2) NOT NULL REFERENCES countries, -- from rb_lb in interface
        lb char(2) NOT NULL REFERENCES countries, -- from rb_lb in interface
        responsible_country char(2) NOT NULL REFERENCES countries,
        revisiting_time smallint NOT NULL,
        limiting varchar NOT NULL REFERENCES limiting_factors,
        -- surtyp varchar NOT NULL REFERENCES survey_types,
        -- XXX: Also an attribut of sounding result?
        -- coverage varchar REFERENCES coverage_types,
        -- XXX: Also an attribut of sounding result?
        -- CHECK allowed combinations of surtyp and coverage or
        -- different model approach?
        -- depth_reference char(3) NOT NULL REFERENCES depth_references,
        -- XXX: Also an attribut of sounding result?
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL,
        -- additional_data xml -- Currently not relevant for GEMMA
        staging_done boolean NOT NULL DEFAULT false
    )
    CREATE TRIGGER bottleneck_date_info BEFORE UPDATE ON bottlenecks
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE bottlenecks_riverbed_materials (
        bottleneck_id varchar REFERENCES bottlenecks,
        riverbed varchar REFERENCES riverbed_materials,
        -- XXX: should be 'natsur' according to IENC Encoding Guide M.4.3
        PRIMARY KEY (bottleneck_id, riverbed)
    )

    CREATE TABLE sounding_results (
        bottleneck_id varchar NOT NULL REFERENCES bottlenecks,
        date_info date NOT NULL,
        PRIMARY KEY (bottleneck_id, date_info),
        area geography(POLYGON, 4326) NOT NULL,
        surtyp varchar NOT NULL REFERENCES survey_types,
        coverage varchar REFERENCES coverage_types,
        depth_reference char(3) NOT NULL REFERENCES depth_references,
        sounding_data raster NOT NULL,
        staging_done boolean NOT NULL DEFAULT false
    )

    --
    -- Fairway availability
    --
    CREATE TABLE fairway_availability (
        id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
        position_code char(2) REFERENCES position_codes,
        bottleneck_id varchar NOT NULL REFERENCES bottlenecks,
        surdat date NOT NULL,
        UNIQUE (bottleneck_id, surdat),
        -- additional_data xml -- Currently not relevant for GEMMA
        critical boolean,
        date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
        source_organization varchar NOT NULL
    )
    CREATE TRIGGER fairway_availability_date_info
        BEFORE UPDATE ON fairway_availability
        FOR EACH ROW EXECUTE PROCEDURE update_date_info()

    CREATE TABLE fa_reference_values (
        fairway_availability_id int NOT NULL REFERENCES fairway_availability,
        level_of_service smallint NOT NULL REFERENCES levels_of_service,
        PRIMARY KEY (fairway_availability_id, level_of_service),
        fairway_depth smallint,
        fairway_width smallint,
        fairway_radius int,
        CHECK(COALESCE(fairway_depth, fairway_width, fairway_radius)
            IS NOT NULL),
        shallowest_spot geography(POINT, 4326)
    )

    CREATE TABLE bottleneck_pdfs (
        fairway_availability_id int NOT NULL REFERENCES fairway_availability,
        profile_pdf_filename varchar NOT NULL, -- redundant to last part of URL
        profile_pdf_url varchar NOT NULL, -- prohibit insecure URLs somehow?
        PRIMARY KEY (fairway_availability_id, profile_pdf_url),
        pdf_generation_date timestamp with time zone NOT NULL,
        source_organization varchar NOT NULL
    )

    CREATE TABLE effective_fairway_availability (
        fairway_availability_id int NOT NULL REFERENCES fairway_availability,
        measure_date timestamp with time zone NOT NULL,
        level_of_service smallint NOT NULL REFERENCES levels_of_service,
        PRIMARY KEY (fairway_availability_id, measure_date, level_of_service),
        available_depth_value smallint,
        available_width_value smallint,
        water_level_value smallint,
        CHECK(COALESCE(available_depth_value, available_width_value,
            water_level_value) IS NOT NULL),
        measure_type varchar NOT NULL REFERENCES measure_types,
        source_organization varchar NOT NULL,
        forecast_generation_time timestamp with time zone,
        CHECK(measure_type <> 'forecasted'
            OR forecast_generation_time IS NOT NULL),
        value_lifetime timestamp with time zone,
        CHECK(measure_type = 'minimum guaranteed'
            OR value_lifetime IS NOT NULL)
    )
;

COMMIT;