diff schema/gemma.sql @ 115:d349db18bece

s/wamos/gemma/g on database schema.
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Tue, 26 Jun 2018 16:20:04 +0200
parents schema/wamos.sql@25b28fd0e256
children 731fb359b29c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/gemma.sql	Tue Jun 26 16:20:04 2018 +0200
@@ -0,0 +1,409 @@
+BEGIN;
+
+--
+-- Infrastructure
+--
+CREATE EXTENSION postgis;
+
+-- TODO: will there ever be UPDATEs or can we drop that function due to
+-- historicisation?
+CREATE FUNCTION update_date_info() RETURNS trigger
+    LANGUAGE plpgsql
+    AS $$
+    BEGIN
+        NEW.date_info = CURRENT_TIMESTAMP;
+        RETURN NEW;
+    END;
+$$;
+
+-- Composite type: UN/LOCODE, fairway section, object reference, hectometre.
+-- See RIS-Index Encoding Guide
+CREATE TYPE isrs AS (
+       country_code char(2), -- ISO 3166 country code
+       -- could be validated against countries table.
+       locode char(3), -- without the country code:
+       -- http://www.unece.org/cefact/locode/welcome.html
+       fairway_section char(5),
+       object_reference char(5),
+       hectometre int -- should be constrained to five digits
+);
+
+CREATE TYPE isrsrange AS RANGE (
+       subtype = isrs
+       );
+
+--
+-- GEMMA data
+--
+CREATE SCHEMA gemma;
+CREATE SCHEMA gemma_waterway;
+CREATE SCHEMA gemma_fairway;
+SET search_path TO public, gemma, gemma_waterway, gemma_fairway;
+-- TODO: consolidate schemas. The current distribution of tables is mainly
+-- for diagram generation. A privilege based distribution might ease
+-- privilege management.
+
+--
+-- Auxiliary tables
+--
+CREATE TABLE gemma.system_config (
+       config_key varchar PRIMARY KEY,
+       config_val varchar
+       );
+
+CREATE TABLE gemma.countries (
+       country_code char(2) PRIMARY KEY -- ISO 3166 country code
+       -- A smallint PK would require even less disk space i.e. on the FK side.
+       -- This might be an issue in case cache space becomes a limiting
+       -- factor when there are many FKs pointing here.
+       );
+
+CREATE TABLE gemma.responsibility_areas (
+       country char(2) PRIMARY KEY REFERENCES countries,
+       area geometry(POLYGON, 4326) --XXX: Should be geography (elsewhere too)
+       );
+
+CREATE TABLE gemma.language_codes (
+       language_code varchar PRIMARY KEY
+       );
+
+CREATE TABLE gemma.user_profiles (
+       username varchar PRIMARY KEY, -- TODO: check it's in pg_roles by trigger
+       country char(2) NOT NULL REFERENCES countries,
+       language_code varchar REFERENCES language_codes,
+       map_extent box2d,
+       email_adress varchar NOT NULL UNIQUE
+       );
+
+CREATE TABLE gemma.templates (
+       id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
+       dummy_attrib varchar,
+       "..." varchar
+       -- TODO: template attributes tbd.
+       );
+
+CREATE TABLE gemma.user_templates (
+       username varchar NOT NULL REFERENCES user_profiles,
+       template_id int NOT NULL REFERENCES templates,
+       PRIMARY KEY (username, template_id)
+       );
+
+--
+-- General river information
+--
+
+-- Eventually obsolete.
+-- See https://roundup-intern.intevation.de/gemma/issue5
+-- CREATE TABLE rwdrs (
+--        stretch isrsrange PRIMARY KEY,
+--        -- https://www.postgresql.org/docs/10/static/sql-createindex.html:
+--        -- Only B-tree supports UNIQUE indexes, but we need the GIST index
+--        -- below anyhow.
+--        -- Is it a good idea to build B-tree indexes on relatively large
+--        -- composites of string values or should we use inter PKs?
+--        -- => In case the index is used and cache space becomes a limiting
+--        -- factor, this might be an issue.
+--        rwdr double precision NOT NULL,
+--        EXCLUDE USING GIST (stretch WITH &&)
+--        );
+
+CREATE TABLE gemma_waterway.catccls (
+       catccl smallint PRIMARY KEY
+       -- TODO: Do we need name and/or definition from IENC feature catalogue?
+       );
+
+CREATE TABLE gemma_waterway.dirimps (
+       dirimp smallint PRIMARY KEY
+       -- TODO: Do we need name and/or definition from IENC feature catalogue?
+       );
+
+CREATE TABLE gemma_waterway.waterway_area (
+       area geometry(POLYGON, 4326) PRIMARY KEY,
+       catccl smallint REFERENCES catccls,
+       dirimp smallint REFERENCES dirimps
+       );
+
+CREATE TABLE gemma_fairway.depth_references (
+       depth_reference varchar(4) PRIMARY KEY
+       -- See col. AB and AI RIS-Index Encoding Guide
+       -- XXX: We need a way to distinguish between geodetic (eg. col. AP
+       -- RIS-Index) and other references (e.g. col. AB and AI):
+       -- _ multi-column FK with a boolean column (geodetic/non-geodetic;
+       --   i.e. absolut/not absolut) and DEFAULTs and CHECKs at the FK side.
+       -- _ Do not mixup things with different meanings in one table at all
+       --   (which would mean a model differing a bit from RIS-Index ideas)
+       );
+
+CREATE TABLE gemma_fairway.reference_water_levels (
+       reference_water_level varchar(20) PRIMARY KEY
+       );
+
+CREATE TABLE gemma_fairway.gauges (
+       location isrs PRIMARY KEY,
+       function_code varchar(10) NOT NULL, -- XXX: What is this really for?
+       objname varchar NOT NULL,
+       is_left boolean, -- XXX: Or reference position_codes?
+       geom geometry(POINT, 4326) NOT NULL,
+       applicability isrsrange,
+       validity tstzrange, -- XXX: Should ranges be NOT NULL? In DRC, only copy
+       -- pasted text from a more general specification is given
+       -- (a gauge is not a berth!)
+       -- TODO: Ranges need a joint exclusion constaint to prevent overlaps?
+       zero_point double precision NOT NULL,
+       geodref varchar(4) REFERENCES depth_references,
+       date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
+       source_organization varchar NOT NULL
+       );
+CREATE TRIGGER gauges_date_info BEFORE UPDATE ON gauges
+       FOR EACH ROW EXECUTE PROCEDURE update_date_info();
+
+CREATE TABLE gemma_fairway.gauges_reference_water_levels (
+       gauge_id isrs NOT NULL REFERENCES gauges,
+       reference_water_level varchar(20)
+           NOT NULL REFERENCES reference_water_levels,
+       PRIMARY KEY (gauge_id, reference_water_level),
+       value int NOT NULL
+       );
+
+CREATE TABLE gemma_fairway.gauge_measurements (
+       fk_gauge_id isrs NOT NULL REFERENCES gauges,
+       measure_date timestamp with time zone NOT NULL,
+       PRIMARY KEY (fk_gauge_id, measure_date),
+       -- XXX: Is country_code really relevant for GEMMA or just NtS?
+       -- country_code char(2) NOT NULL REFERENCES countries,
+       -- TODO: add relations to stuff provided as enumerations
+       sender varchar NOT NULL, -- "from" attribute from DRC
+       language_code varchar NOT NULL REFERENCES language_codes,
+       date_issue timestamp with time zone NOT NULL,
+       -- reference_code varchar(4) NOT NULL REFERENCES depth_references,
+       -- XXX: Always ZPG?
+       water_level double precision NOT NULL,
+       predicted boolean NOT NULL,
+       is_waterlevel boolean NOT NULL,
+       -- XXX: "measure_code" if really only W or Q
+       -- XXX: Do we need "unit" attribute or can we normalise on import?
+       value_min double precision, -- XXX: NOT NULL if predicted?
+       value_max double precision, -- XXX: NOT NULL if predicted?
+       --- TODO: Add a double range type for checking?
+       date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
+       source_organization varchar NOT NULL -- "originator"
+       );
+CREATE TRIGGER gauge_measurements_date_info BEFORE UPDATE ON gauge_measurements
+       FOR EACH ROW EXECUTE PROCEDURE update_date_info();
+
+CREATE TABLE gemma_waterway.waterway_axis (
+       wtwaxs geometry(LINESTRING, 4326) PRIMARY KEY,
+       -- TODO: Do we need to check data set quality as described in DRC 2.1.6?
+       objnam varchar NOT NULL,
+       nobjnam varchar
+       );
+
+CREATE TABLE gemma_waterway.distance_mark_functions (
+       -- XXX: Redundant information to object code in isrs code of dist. mark
+       distance_mark_function varchar(8) PRIMARY KEY
+       );
+
+CREATE TABLE gemma_waterway.position_codes (
+       position_code char(2) PRIMARY KEY
+       -- Use smallint because of fairway availability provided on daily basis?
+       -- Data come from 2015_06_23_RIS_Index_template_v2p0.xlsx,
+       -- sheet "Position_code" or RIS-Index encoding guide?
+       -- XXX: DRC 2.1.7 and 2.2.5 _seem_ to reference the same thing here.
+       -- Clarify!
+       -- TODO: Do we need an attribute "meaning" or so?
+       );
+
+-- This table allows linkage between the 1D ISRS location codes and 2D space
+-- e.g. for cutting bottleneck area out of waterway area based on virtual
+-- distance marks along waterway axis (see SUC7).
+CREATE TABLE gemma_waterway.distance_marks (
+       location_code isrs PRIMARY KEY,
+       geom geometry(POINT, 4326) NOT NULL,
+       distance_mark_function varchar(8)
+           NOT NULL REFERENCES distance_mark_functions,
+       -- XXX: attribute "function" in DRC 2.1.7 as well as CATDIS seem
+       -- to encode the same thing as the object code in ISRS location code.
+       position_code char(2) NOT NULL REFERENCES position_codes
+       );
+
+CREATE TABLE gemma_waterway.sections_stretches (
+       id varchar PRIMARY KEY,
+       is_section boolean NOT NULL, -- maps 'function' from interface
+       stretch isrsrange,
+       objnam varchar NOT NULL,
+       nobjnam varchar,
+       date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
+       source_organization varchar NOT NULL
+       );
+CREATE TRIGGER sections_stretches_date_info BEFORE UPDATE ON sections_stretches
+       FOR EACH ROW EXECUTE PROCEDURE update_date_info();
+
+CREATE TABLE gemma_waterway.waterway_profiles (
+       location isrs NOT NULL,
+       validity tstzrange,
+       EXCLUDE USING GIST (validity WITH &&),
+       PRIMARY KEY (location, validity),
+       lnwl smallint,
+       mwl smallint,
+       hnwl smallint,
+       fe30 smallint,
+       fe100 smallint,
+       -- XXX: further normalise using reference_water_levels?
+       CHECK(COALESCE(lnwl, mwl, hnwl, fe30, fe100) IS NULL
+             OR validity IS NOT NULL),
+       date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
+       source_organization varchar NOT NULL
+       );
+CREATE TRIGGER waterway_profiles_date_info BEFORE UPDATE ON waterway_profiles
+       FOR EACH ROW EXECUTE PROCEDURE update_date_info();
+
+CREATE TABLE gemma_waterway.levels_of_service (
+       level_of_service smallint PRIMARY KEY
+       );
+
+CREATE TABLE gemma_waterway.fairway_dimensions (
+       area geometry(POLYGON, 4326) PRIMARY KEY,
+       level_of_service smallint NOT NULL REFERENCES levels_of_service,
+       min_width smallint NOT NULL,
+       max_width smallint NOT NULL,
+       min_depth smallint NOT NULL,
+       date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
+       source_organization varchar NOT NULL
+       );
+CREATE TRIGGER fairway_dimensions_date_info BEFORE UPDATE ON fairway_dimensions
+       FOR EACH ROW EXECUTE PROCEDURE update_date_info();
+
+--
+-- Bottlenecks
+--
+CREATE TABLE gemma_fairway.riverbed_materials (
+       material varchar PRIMARY KEY
+       -- XXX: Should this table contain choices from DRC 2.2.3 or
+       -- from IENC Encoding Guide M.4.3, attribute NATSUR?
+       );
+
+CREATE TABLE gemma_fairway.survey_types (
+       survey_type varchar PRIMARY KEY
+       );
+
+CREATE TABLE gemma_fairway.coverage_types (
+       coverage_type varchar PRIMARY KEY
+       );
+
+CREATE TABLE gemma_fairway.limiting_factors (
+       limiting_factor varchar PRIMARY KEY
+       );
+
+-- XXX: Nullability differs between DRC (attributes marked "O") and WSDL
+-- (minOccurs=0; nillable seems to be set arbitrarily as even bottleneck_id and
+-- fk_g_fid (both mandatory, i.e. marked "M" in DRC) have nillable="true" in WSDL)
+CREATE TABLE gemma_fairway.bottlenecks (
+       bottleneck_id varchar PRIMARY KEY,
+       fk_g_fid isrs NOT NULL REFERENCES gauges,
+       -- XXX: DRC references "ch. 3.1.1", which does not exist in document.
+       objnam varchar,
+       nobjnm varchar,
+       stretch isrsrange NOT NULL,
+       area geometry(POLYGON, 4326) NOT NULL,
+       rb char(2) NOT NULL REFERENCES countries, -- from rb_lb in interface
+       lb char(2) NOT NULL REFERENCES countries, -- from rb_lb in interface
+       responsible_country char(2) NOT NULL REFERENCES countries,
+       revisiting_time smallint NOT NULL,
+       limiting varchar NOT NULL REFERENCES limiting_factors,
+       -- surtyp varchar NOT NULL REFERENCES survey_types,
+       -- XXX: Also an attribut of sounding result?
+       -- coverage varchar REFERENCES coverage_types,
+       -- XXX: Also an attribut of sounding result?
+       -- CHECK allowed combinations of surtyp and coverage or
+       -- different model approach?
+       -- depth_reference char(3) NOT NULL REFERENCES depth_references,
+       -- XXX: Also an attribut of sounding result?
+       date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
+       source_organization varchar NOT NULL,
+       -- additional_data xml -- Currently not relevant for GEMMA
+       staging_done boolean NOT NULL DEFAULT false
+       );
+CREATE TRIGGER bottleneck_date_info BEFORE UPDATE ON bottlenecks
+       FOR EACH ROW EXECUTE PROCEDURE update_date_info();
+
+CREATE TABLE gemma_fairway.bottlenecks_riverbed_materials (
+       bottleneck_id varchar REFERENCES bottlenecks,
+       riverbed varchar REFERENCES riverbed_materials,
+       -- XXX: should be 'natsur' according to IENC Encoding Guide M.4.3
+       PRIMARY KEY (bottleneck_id, riverbed)
+       );
+
+CREATE TABLE gemma_fairway.sounding_results (
+       bottleneck_id varchar NOT NULL REFERENCES bottlenecks,
+       date_info date NOT NULL,
+       PRIMARY KEY (bottleneck_id, date_info),
+       area geometry(POLYGON, 4326) NOT NULL,
+       surtyp varchar NOT NULL REFERENCES survey_types,
+       coverage varchar REFERENCES coverage_types,
+       depth_reference char(3) NOT NULL REFERENCES depth_references,
+       sounding_data raster NOT NULL,
+       staging_done boolean NOT NULL DEFAULT false
+       );
+
+--
+-- Fairway availability
+--
+CREATE TABLE gemma_fairway.measure_types (
+       measure_type varchar PRIMARY KEY
+       );
+
+CREATE TABLE gemma_fairway.fairway_availability (
+       id int PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
+       position_code char(2) REFERENCES position_codes,
+       bottleneck_id varchar NOT NULL REFERENCES bottlenecks,
+       surdat date NOT NULL,
+       UNIQUE (bottleneck_id, surdat),
+       -- additional_data xml -- Currently not relevant for GEMMA
+       critical boolean,
+       date_info timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
+       source_organization varchar NOT NULL
+       );
+CREATE TRIGGER fairway_availability_date_info
+       BEFORE UPDATE ON fairway_availability
+       FOR EACH ROW EXECUTE PROCEDURE update_date_info();
+
+CREATE TABLE gemma_fairway.fa_reference_values (
+       fairway_availability_id int NOT NULL REFERENCES fairway_availability,
+       level_of_service smallint NOT NULL REFERENCES levels_of_service,
+       PRIMARY KEY (fairway_availability_id, level_of_service),
+       fairway_depth smallint,
+       fairway_width smallint,
+       fairway_radius int,
+       CHECK(COALESCE(fairway_depth, fairway_width, fairway_radius) IS NOT NULL),
+       shallowest_spot geometry(POINT, 4326)
+       );
+
+CREATE TABLE gemma_fairway.bottleneck_pdfs (
+       fairway_availability_id int NOT NULL REFERENCES fairway_availability,
+       profile_pdf_filename varchar NOT NULL, -- redundant to last part of URL
+       profile_pdf_url varchar NOT NULL, -- prohibit insecure URLs somehow?
+       PRIMARY KEY (fairway_availability_id, profile_pdf_url),
+       pdf_generation_date timestamp with time zone NOT NULL,
+       source_organization varchar NOT NULL
+       );
+
+CREATE TABLE gemma_fairway.effective_fairway_availability (
+       fairway_availability_id int NOT NULL REFERENCES fairway_availability,
+       measure_date timestamp with time zone NOT NULL,
+       level_of_service smallint NOT NULL REFERENCES levels_of_service,
+       PRIMARY KEY (fairway_availability_id, measure_date, level_of_service),
+       available_depth_value smallint,
+       available_width_value smallint,
+       water_level_value smallint,
+       CHECK(COALESCE(available_depth_value, available_width_value,
+           water_level_value) IS NOT NULL),
+       measure_type varchar NOT NULL REFERENCES measure_types,
+       source_organization varchar NOT NULL,
+       forecast_generation_time timestamp with time zone,
+       CHECK(measure_type <> 'forecasted'
+             OR forecast_generation_time IS NOT NULL),
+       value_lifetime timestamp with time zone,
+       CHECK(measure_type = 'minimum guaranteed' OR value_lifetime IS NOT NULL)
+       );
+
+COMMIT;