summaryrefslogtreecommitdiff
path: root/sources
diff options
context:
space:
mode:
authorJochen Topf <jochen@topf.org>2010-11-04 20:31:01 +0100
committerJochen Topf <jochen@topf.org>2010-11-04 20:31:01 +0100
commit0bf8b3ea8c9524a4a8666e5dcb5276c9729d45f6 (patch)
treefe7514fc74432857e905ddcc2b6de9290ad6ea3e /sources
parent94748c28692398b953a385a3316162a3b141f5d8 (diff)
downloadtaginfo-0bf8b3ea8c9524a4a8666e5dcb5276c9729d45f6.tar
taginfo-0bf8b3ea8c9524a4a8666e5dcb5276c9729d45f6.tar.gz
New report: characters in keys
Diffstat (limited to 'sources')
-rw-r--r--sources/db/post.sql7
-rw-r--r--sources/db/pre.sql1
-rwxr-xr-xsources/db/update.sh3
-rwxr-xr-xsources/db/update_characters.pl66
4 files changed, 77 insertions, 0 deletions
diff --git a/sources/db/post.sql b/sources/db/post.sql
index 2316657..8f0b2fb 100644
--- a/sources/db/post.sql
+++ b/sources/db/post.sql
@@ -38,6 +38,13 @@ INSERT INTO stats (key, value) SELECT 'num_keypairs_on_nodes', count(*) FROM
INSERT INTO stats (key, value) SELECT 'num_keypairs_on_ways', count(*) FROM keypairs WHERE count_ways > 0;
INSERT INTO stats (key, value) SELECT 'num_keypairs_on_relations', count(*) FROM keypairs WHERE count_relations > 0;
+INSERT INTO stats (key, value) SELECT 'characters_in_keys_plain', count(*) FROM keys WHERE characters='plain';
+INSERT INTO stats (key, value) SELECT 'characters_in_keys_colon', count(*) FROM keys WHERE characters='colon';
+INSERT INTO stats (key, value) SELECT 'characters_in_keys_letters', count(*) FROM keys WHERE characters='letters';
+INSERT INTO stats (key, value) SELECT 'characters_in_keys_space', count(*) FROM keys WHERE characters='space';
+INSERT INTO stats (key, value) SELECT 'characters_in_keys_problem', count(*) FROM keys WHERE characters='problem';
+INSERT INTO stats (key, value) SELECT 'characters_in_keys_rest', count(*) FROM keys WHERE characters='rest';
+
UPDATE keys SET prevalent_values=(
SELECT group_concat(value, '|') FROM (
SELECT value FROM tags t
diff --git a/sources/db/pre.sql b/sources/db/pre.sql
index 0163089..998b20e 100644
--- a/sources/db/pre.sql
+++ b/sources/db/pre.sql
@@ -47,6 +47,7 @@ CREATE TABLE keys (
in_josm INTEGER DEFAULT 0,
in_potlatch INTEGER DEFAULT 0,
in_merkaartor INTEGER DEFAULT 0,
+ characters VARCHAR,
prevalent_values TEXT
);
diff --git a/sources/db/update.sh b/sources/db/update.sh
index 3a48e74..4782219 100755
--- a/sources/db/update.sh
+++ b/sources/db/update.sh
@@ -30,6 +30,9 @@ cd $DIR
bzcat $DIR/planet.osm.bz2 | $HERE/osmium_tagstats -
cd $HERE
+echo "Running update_characters..."
+./update_characters.pl
+
echo "Running post.sql..."
perl -pe "s|__DIR__|$DIR|" post.sql | sqlite3 $DATABASE
diff --git a/sources/db/update_characters.pl b/sources/db/update_characters.pl
new file mode 100755
index 0000000..2161260
--- /dev/null
+++ b/sources/db/update_characters.pl
@@ -0,0 +1,66 @@
+#!/usr/bin/perl
+#------------------------------------------------------------------------------
+#
+# Taginfo source: DB
+#
+# update_characters.pl
+#
+#------------------------------------------------------------------------------
+#
+# Copyright (C) 2010 Jochen Topf <jochen@remote.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#------------------------------------------------------------------------------
+
+use DBI;
+
+my $dir = $ARGV[0] || '.';
+
+my $dbh = DBI->connect("dbi:SQLite:dbname=$dir/taginfo-db.db", '', '');
+$dbh->{unicode} = 1;
+#$dbh->{sqlite_unicode} = 1;
+
+my @regexes = (
+ ['plain', qr{^[a-z]([a-z_]*[a-z])?$}],
+ ['colon', qr{^[a-z][a-z_:]*[a-z]$}],
+ ['letters', qr{^[\p{L}\p{M}]([\p{L}\p{M}\p{N}_:]*[\p{L}\p{M}\p{N}])?$}],
+ ['space', qr{[\s\p{Z}]}],
+ ['problem', qr{[=+/&<>;\@'"?%#\\,\p{C}]}]
+);
+
+my %keys;
+my $results = $dbh->selectcol_arrayref('SELECT key FROM keys');
+
+ROW: foreach my $key (@$results) {
+ $keys{$key} = 'rest';
+ foreach my $r (@regexes) {
+ if ($key =~ $r->[1]) {
+ $keys{$key} = $r->[0];
+ next ROW;
+ }
+ }
+}
+
+$dbh->do('BEGIN TRANSACTION');
+
+foreach my $key (keys %keys) {
+ $dbh->do('UPDATE keys SET characters=? WHERE key=?', undef, $keys{$key}, $key);
+}
+
+$dbh->do('COMMIT');
+
+
+#-- THE END -------------------------------------------------------------------