From 0bf8b3ea8c9524a4a8666e5dcb5276c9729d45f6 Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Thu, 4 Nov 2010 20:31:01 +0100 Subject: New report: characters in keys --- sources/db/post.sql | 7 +++++ sources/db/pre.sql | 1 + sources/db/update.sh | 3 ++ sources/db/update_characters.pl | 66 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+) create mode 100755 sources/db/update_characters.pl (limited to 'sources/db') diff --git a/sources/db/post.sql b/sources/db/post.sql index 2316657..8f0b2fb 100644 --- a/sources/db/post.sql +++ b/sources/db/post.sql @@ -38,6 +38,13 @@ INSERT INTO stats (key, value) SELECT 'num_keypairs_on_nodes', count(*) FROM INSERT INTO stats (key, value) SELECT 'num_keypairs_on_ways', count(*) FROM keypairs WHERE count_ways > 0; INSERT INTO stats (key, value) SELECT 'num_keypairs_on_relations', count(*) FROM keypairs WHERE count_relations > 0; +INSERT INTO stats (key, value) SELECT 'characters_in_keys_plain', count(*) FROM keys WHERE characters='plain'; +INSERT INTO stats (key, value) SELECT 'characters_in_keys_colon', count(*) FROM keys WHERE characters='colon'; +INSERT INTO stats (key, value) SELECT 'characters_in_keys_letters', count(*) FROM keys WHERE characters='letters'; +INSERT INTO stats (key, value) SELECT 'characters_in_keys_space', count(*) FROM keys WHERE characters='space'; +INSERT INTO stats (key, value) SELECT 'characters_in_keys_problem', count(*) FROM keys WHERE characters='problem'; +INSERT INTO stats (key, value) SELECT 'characters_in_keys_rest', count(*) FROM keys WHERE characters='rest'; + UPDATE keys SET prevalent_values=( SELECT group_concat(value, '|') FROM ( SELECT value FROM tags t diff --git a/sources/db/pre.sql b/sources/db/pre.sql index 0163089..998b20e 100644 --- a/sources/db/pre.sql +++ b/sources/db/pre.sql @@ -47,6 +47,7 @@ CREATE TABLE keys ( in_josm INTEGER DEFAULT 0, in_potlatch INTEGER DEFAULT 0, in_merkaartor INTEGER DEFAULT 0, + characters VARCHAR, prevalent_values TEXT ); diff --git a/sources/db/update.sh b/sources/db/update.sh index 3a48e74..4782219 100755 --- a/sources/db/update.sh +++ b/sources/db/update.sh @@ -30,6 +30,9 @@ cd $DIR bzcat $DIR/planet.osm.bz2 | $HERE/osmium_tagstats - cd $HERE +echo "Running update_characters..." +./update_characters.pl + echo "Running post.sql..." perl -pe "s|__DIR__|$DIR|" post.sql | sqlite3 $DATABASE diff --git a/sources/db/update_characters.pl b/sources/db/update_characters.pl new file mode 100755 index 0000000..2161260 --- /dev/null +++ b/sources/db/update_characters.pl @@ -0,0 +1,66 @@ +#!/usr/bin/perl +#------------------------------------------------------------------------------ +# +# Taginfo source: DB +# +# update_characters.pl +# +#------------------------------------------------------------------------------ +# +# Copyright (C) 2010 Jochen Topf +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------ + +use DBI; + +my $dir = $ARGV[0] || '.'; + +my $dbh = DBI->connect("dbi:SQLite:dbname=$dir/taginfo-db.db", '', ''); +$dbh->{unicode} = 1; +#$dbh->{sqlite_unicode} = 1; + +my @regexes = ( + ['plain', qr{^[a-z]([a-z_]*[a-z])?$}], + ['colon', qr{^[a-z][a-z_:]*[a-z]$}], + ['letters', qr{^[\p{L}\p{M}]([\p{L}\p{M}\p{N}_:]*[\p{L}\p{M}\p{N}])?$}], + ['space', qr{[\s\p{Z}]}], + ['problem', qr{[=+/&<>;\@'"?%#\\,\p{C}]}] +); + +my %keys; +my $results = $dbh->selectcol_arrayref('SELECT key FROM keys'); + +ROW: foreach my $key (@$results) { + $keys{$key} = 'rest'; + foreach my $r (@regexes) { + if ($key =~ $r->[1]) { + $keys{$key} = $r->[0]; + next ROW; + } + } +} + +$dbh->do('BEGIN TRANSACTION'); + +foreach my $key (keys %keys) { + $dbh->do('UPDATE keys SET characters=? WHERE key=?', undef, $keys{$key}, $key); +} + +$dbh->do('COMMIT'); + + +#-- THE END ------------------------------------------------------------------- -- cgit v1.2.3