Mercurial > gemma
changeset 1109:74a75a5ce770
Added geonames data and extended search for villages/cities.
The free data from geonames https://download.geonames.org/ is imported
into the gemma data base (for the relevant states) and used to search
for cities and villages.
This data might be replaced by data from the RIS-index later on..?
author | Sascha Wilde <wilde@intevation.de> |
---|---|
date | Mon, 05 Nov 2018 13:07:16 +0100 |
parents | 5adee8b0e29f |
children | ed8fbbc48440 |
files | .hgignore pkg/controllers/search.go schema/geonames-import/import-geonames.sh schema/geonames-import/import-geonames.sql schema/geonames.sql schema/install-db.sh schema/search.sql |
diffstat | 7 files changed, 309 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgignore Fri Nov 02 17:21:33 2018 +0100 +++ b/.hgignore Mon Nov 05 13:07:16 2018 +0100 @@ -105,3 +105,6 @@ # End of https://www.gitignore.io/api/go,vim,node,emacs,visualstudiocode TAGS + +# Import data +schema/geonames-import/data/*
--- a/pkg/controllers/search.go Fri Nov 02 17:21:33 2018 +0100 +++ b/pkg/controllers/search.go Mon Nov 05 13:07:16 2018 +0100 @@ -30,12 +30,7 @@ FROM waterway.distance_marks_virtual WHERE (location_code).hectometre = $1) r` - searchBottleneckSQL = `SELECT COALESCE(json_agg(r),'[]') -FROM (SELECT objnam AS name, - ST_AsGeoJSON(ST_Centroid(area))::json AS geom, - 'bottleneck' AS type - FROM waterway.bottlenecks - WHERE objnam ILIKE $1) r` + searchMostSQL = `SELECT search_most($1)::text` listBottlenecksSQL = ` SELECT COALESCE(json_agg(r),'[]') @@ -95,8 +90,8 @@ // Hande search for bottlencks: err = db.QueryRowContext( req.Context(), - searchBottleneckSQL, - "%"+s.SearchString+"%", + searchMostSQL, + s.SearchString, ).Scan(&result) }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/schema/geonames-import/import-geonames.sh Mon Nov 05 13:07:16 2018 +0100 @@ -0,0 +1,147 @@ +#!/bin/bash + +# This is Free Software under GNU Affero General Public License v >= 3.0 +# without warranty, see README.md and license for details. + +# SPDX-License-Identifier: AGPL-3.0-or-later +# License-Filename: LICENSES/AGPL-3.0.txt + +# Copyright (C) 2018 by via donau +# – Österreichische Wasserstraßen-Gesellschaft mbH +# Software engineering by Intevation GmbH + +# Author(s): +# * Sascha Wilde <wilde@intevation.de> + + +ME=$(basename $0) +MYHOME=$(realpath $(dirname $0)) +CCs="AT BG HR HU RO SK" + +# Defaults: +datadir=$(realpath "$MYHOME/data") +db=gemma +port=5432 +drop=0 + +usage() +{ + cat <<EOF +$ME [OPTION]... + +Import geonames data (used for extended search features) into gemma database. + +If data was downloaded previously, it will be reused. To force update +of data, first delete the existing data directory or use a fresh one. + +Options: + -d, --db=NAME create the database NAME. Default: "gemma" + -p, --port=PORT connect do the postgresql cluster at PORT. + Default is the postgresql standard port 5432 + -D, --datadir=PATH download data to PATH. Default: "$datadir" + --drop drop geonames data + --help display this help and exit +EOF +} + +fatal() +{ + echo >&2 "$1" + exit 23 +} + +dl() { + [ -d "$datadir" ] || mkdir "$datadir" + cd "$datadir" + for cc in $CCs ; do + if [ -f "${cc}.zip" ] ; then + echo "Keeping existing ${cc}.zip ..." + else + echo "Fetching ${cc}.zip ..." + curl -O "https://download.geonames.org/export/dump/${cc}.zip" + fi + done +} + +unpack() +{ + cd "$datadir" || fatal "$datadir not missing!" + for cc in $CCs ; do + if [ -f "${cc}.zip" ] ; then + if [ -f "${cc}.txt" ] ; then + echo "Reusing existing ${cc}.txt" + else + echo "Extracting ${cc}.zip ..." + unzip "${cc}.zip" "${cc}.txt" + fi + else + fatal "Missing archive: ${cc}.zip" + fi + done +} + +drop_data() +{ + echo "Dropping geonames data..." + psql -q -p "$port" -d "$db" -c "TRUNCATE TABLE waterway.geonames;" +} + +import_data() +{ + for cc in $CCs ; do + fqfn="${datadir}/${cc}.txt" + echo "Importing $fqfn into database..." + psql -qtv ON_ERROR_STOP= -p "$port" -d "$db" \ + -v filename="$fqfn" \ + -f "$MYHOME/import-geonames.sql" + done +} + +# Parse options: + +OPTS=`getopt \ + -l help,db:,port:,datadir:,drop \ + -o D:d:p: -n "$ME" -- "$@"` +[ $? -eq 0 ] || { usage ; exit 1 ; } + +eval set -- "$OPTS" + +while true ; do + case "$1" in + --db|-d) + db="$2" + shift 2 + ;; + --port|-p) + port="$2" + shift 2 + ;; + --datadir|-D) + datadir="$2" + shift 2 + ;; + --drop) + drop=1 + shift 1 + ;; + --help) + { usage ; exit 0 ; } + ;; + --) + shift + break + ;; + esac +done + + +# Main ------------------------------------------------------------ + +if [ $drop -eq 0 ] ; then + dl + unpack + drop_data + import_data +else + drop_data +fi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/schema/geonames-import/import-geonames.sql Mon Nov 05 13:07:16 2018 +0100 @@ -0,0 +1,57 @@ +-- This is Free Software under GNU Affero General Public License v >= 3.0 +-- without warranty, see README.md and license for details. + +-- SPDX-License-Identifier: AGPL-3.0-or-later +-- License-Filename: LICENSES/AGPL-3.0.txt + +-- Copyright (C) 2018 by via donau +-- – Österreichische Wasserstraßen-Gesellschaft mbH +-- Software engineering by Intevation GmbH + +-- Author(s): +-- * Sascha Wilde <wilde@intevation.de> + + +CREATE TEMP TABLE geonames_import_tmp ( + geonameid int PRIMARY KEY, + name varchar(200) NOT NULL, + asciiname varchar(200), + alternatenames varchar(10000), + lat float NOT NULL, + lon float NOT NULL, + feature_class char(1), + feature_code varchar(10), + country_code char(2), + cc2 varchar(200), + admin1_code varchar(20), + admin2_code varchar(80), + admin3_code varchar(20), + admin4_code varchar(20), + population text, + elevation text, + dem text, + timezone varchar(40), + modification_date timestamp +); + +COPY geonames_import_tmp FROM :'filename' WITH NULL ''; + +INSERT INTO waterway.geonames + SELECT geonameid, + name, + asciiname, + alternatenames, + ST_MakePoint(lon,lat), + feature_class, + feature_code, + country_code, + cc2, + admin1_code, + admin2_code, + admin3_code, + admin4_code, + population::int, + elevation::int, + dem::int, + timezone, + modification_date FROM geonames_import_tmp;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/schema/geonames.sql Mon Nov 05 13:07:16 2018 +0100 @@ -0,0 +1,38 @@ +-- This is Free Software under GNU Affero General Public License v >= 3.0 +-- without warranty, see README.md and license for details. + +-- SPDX-License-Identifier: AGPL-3.0-or-later +-- License-Filename: LICENSES/AGPL-3.0.txt + +-- Copyright (C) 2018 by via donau +-- – Österreichische Wasserstraßen-Gesellschaft mbH +-- Software engineering by Intevation GmbH + +-- Author(s): +-- * Sascha Wilde <wilde@intevation.de> + + +-- Tables for data from geonames database +-- https://download.geonames.org/ +-- used for searching cities and villages... + +CREATE TABLE waterway.geonames ( + geonameid int PRIMARY KEY, + name varchar(200) NOT NULL, + asciiname varchar(200), + alternatenames varchar(10000), + location geography(POINT, 4326) NOT NULL, + feature_class char(1), + feature_code varchar(10), + country_code char(2), + cc2 varchar(200), + admin1_code varchar(20), + admin2_code varchar(80), + admin3_code varchar(20), + admin4_code varchar(20), + population int, + elevation int, + dem int, + timezone varchar(40), + modification_date timestamp +);
--- a/schema/install-db.sh Fri Nov 02 17:21:33 2018 +0100 +++ b/schema/install-db.sh Mon Nov 05 13:07:16 2018 +0100 @@ -15,6 +15,7 @@ -p, --port=PORT connect do the postgresql cluster at PORT. Default is the postgresql standard port 5432 -D, --demo also install demo accounts and data + -G, --geonames also import geonames data --adminpw set the password to use for the "sysadmin" account. Default is a random password. --metapw set the password to use for the "meta_login" account. @@ -43,6 +44,7 @@ db=gemma port=5432 demo=0 +geonames=0 drop=0 adminpw=`genpw 15` metapw=`genpw 15` @@ -50,8 +52,8 @@ # Parse options: OPTS=`getopt \ - -l help,demo,db:,port:,drop,adminpw:,metapw: \ - -o Dd:p: -n "$ME" -- "$@"` + -l help,demo,geonames,db:,port:,drop,adminpw:,metapw: \ + -o DGd:p: -n "$ME" -- "$@"` [ $? -eq 0 ] || { usage ; exit 1 ; } eval set -- "$OPTS" @@ -78,6 +80,10 @@ demo=1 shift 1 ;; + --geonames|-G) + geonames=1 + shift 1 + ;; --drop) drop=1 shift 1 @@ -103,10 +109,13 @@ -c "SET client_min_messages TO WARNING;" \ -f "$BASEDIR/isrs.sql" \ -f "$BASEDIR/gemma.sql" \ + -f "$BASEDIR/search.sql" \ + -f "$BASEDIR/geonames.sql" \ -f "$BASEDIR/manage_users.sql" \ -f "$BASEDIR/auth.sql" \ -f "$BASEDIR/default_sysconfig.sql" + # setup initial login roles with given passwords: psql -qt -P pager=off -p "$port" -d "$db" \ -v adminpw="$adminpw" -v metapw="$metapw" \ @@ -118,7 +127,10 @@ -f "$BASEDIR/demo-data/users.sql" -d "$db" psql -q -p "$port" -f "$BASEDIR/demo-data/roles.sql" \ -d "$db" + fi + if [[ $geonames -eq 1 ]] ; then + "$BASEDIR/geonames-import/import-geonames.sh" -p "$port" -d "$db" fi else
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/schema/search.sql Mon Nov 05 13:07:16 2018 +0100 @@ -0,0 +1,47 @@ +CREATE FUNCTION search_bottlenecks(search_string text) RETURNS jsonb + LANGUAGE plpgsql + AS $$ +DECLARE + _result jsonb; +BEGIN + SELECT COALESCE(json_agg(r),'[]') + INTO _result + FROM (SELECT objnam AS name, + ST_AsGeoJSON(ST_Centroid(area))::json AS geom, + 'bottleneck' AS type + FROM waterway.bottlenecks + WHERE objnam ILIKE '%' || search_string || '%' + ORDER BY name) r; + RETURN _result; +END; +$$; + +CREATE FUNCTION search_cities(search_string text) RETURNS jsonb + LANGUAGE plpgsql + AS $$ +DECLARE + _result jsonb; +BEGIN + SELECT COALESCE(json_agg(r),'[]') + INTO _result + FROM (SELECT name || ' (' || country_code || ')' AS name, + ST_AsGeoJSON(location)::json AS geom, + 'city' AS type + FROM waterway.geonames + WHERE feature_code IN ('PPLA', 'PPLA1', 'PPLA2', 'PPLA3', 'PPLC') + AND (name ILIKE '%' || search_string || '%' + OR alternatenames ~* ('(^|,)' || search_string || '($|,)')) + ORDER BY array_position(ARRAY['PPLC', 'PPLA', 'PPLA1', 'PPLA2', 'PPLA3'], + feature_code::text), + name) r; + RETURN _result; +END; +$$; + +CREATE FUNCTION search_most(search_string text) RETURNS jsonb + LANGUAGE plpgsql + AS $$ +BEGIN + RETURN search_bottlenecks(search_string) || search_cities(search_string); +END; +$$;