comparison schema/geonames-import/import-geonames.sh @ 1109:74a75a5ce770

Added geonames data and extended search for villages/cities. The free data from geonames https://download.geonames.org/ is imported into the gemma data base (for the relevant states) and used to search for cities and villages. This data might be replaced by data from the RIS-index later on..?
author Sascha Wilde <wilde@intevation.de>
date Mon, 05 Nov 2018 13:07:16 +0100
parents
children ca7821e1f720
comparison
equal deleted inserted replaced
1108:5adee8b0e29f 1109:74a75a5ce770
1 #!/bin/bash
2
3 # This is Free Software under GNU Affero General Public License v >= 3.0
4 # without warranty, see README.md and license for details.
5
6 # SPDX-License-Identifier: AGPL-3.0-or-later
7 # License-Filename: LICENSES/AGPL-3.0.txt
8
9 # Copyright (C) 2018 by via donau
10 # – Österreichische Wasserstraßen-Gesellschaft mbH
11 # Software engineering by Intevation GmbH
12
13 # Author(s):
14 # * Sascha Wilde <wilde@intevation.de>
15
16
17 ME=$(basename $0)
18 MYHOME=$(realpath $(dirname $0))
19 CCs="AT BG HR HU RO SK"
20
21 # Defaults:
22 datadir=$(realpath "$MYHOME/data")
23 db=gemma
24 port=5432
25 drop=0
26
27 usage()
28 {
29 cat <<EOF
30 $ME [OPTION]...
31
32 Import geonames data (used for extended search features) into gemma database.
33
34 If data was downloaded previously, it will be reused. To force update
35 of data, first delete the existing data directory or use a fresh one.
36
37 Options:
38 -d, --db=NAME create the database NAME. Default: "gemma"
39 -p, --port=PORT connect do the postgresql cluster at PORT.
40 Default is the postgresql standard port 5432
41 -D, --datadir=PATH download data to PATH. Default: "$datadir"
42 --drop drop geonames data
43 --help display this help and exit
44 EOF
45 }
46
47 fatal()
48 {
49 echo >&2 "$1"
50 exit 23
51 }
52
53 dl() {
54 [ -d "$datadir" ] || mkdir "$datadir"
55 cd "$datadir"
56 for cc in $CCs ; do
57 if [ -f "${cc}.zip" ] ; then
58 echo "Keeping existing ${cc}.zip ..."
59 else
60 echo "Fetching ${cc}.zip ..."
61 curl -O "https://download.geonames.org/export/dump/${cc}.zip"
62 fi
63 done
64 }
65
66 unpack()
67 {
68 cd "$datadir" || fatal "$datadir not missing!"
69 for cc in $CCs ; do
70 if [ -f "${cc}.zip" ] ; then
71 if [ -f "${cc}.txt" ] ; then
72 echo "Reusing existing ${cc}.txt"
73 else
74 echo "Extracting ${cc}.zip ..."
75 unzip "${cc}.zip" "${cc}.txt"
76 fi
77 else
78 fatal "Missing archive: ${cc}.zip"
79 fi
80 done
81 }
82
83 drop_data()
84 {
85 echo "Dropping geonames data..."
86 psql -q -p "$port" -d "$db" -c "TRUNCATE TABLE waterway.geonames;"
87 }
88
89 import_data()
90 {
91 for cc in $CCs ; do
92 fqfn="${datadir}/${cc}.txt"
93 echo "Importing $fqfn into database..."
94 psql -qtv ON_ERROR_STOP= -p "$port" -d "$db" \
95 -v filename="$fqfn" \
96 -f "$MYHOME/import-geonames.sql"
97 done
98 }
99
100 # Parse options:
101
102 OPTS=`getopt \
103 -l help,db:,port:,datadir:,drop \
104 -o D:d:p: -n "$ME" -- "$@"`
105 [ $? -eq 0 ] || { usage ; exit 1 ; }
106
107 eval set -- "$OPTS"
108
109 while true ; do
110 case "$1" in
111 --db|-d)
112 db="$2"
113 shift 2
114 ;;
115 --port|-p)
116 port="$2"
117 shift 2
118 ;;
119 --datadir|-D)
120 datadir="$2"
121 shift 2
122 ;;
123 --drop)
124 drop=1
125 shift 1
126 ;;
127 --help)
128 { usage ; exit 0 ; }
129 ;;
130 --)
131 shift
132 break
133 ;;
134 esac
135 done
136
137
138 # Main ------------------------------------------------------------
139
140 if [ $drop -eq 0 ] ; then
141 dl
142 unpack
143 drop_data
144 import_data
145 else
146 drop_data
147 fi