summaryrefslogtreecommitdiff
path: root/sources
diff options
context:
space:
mode:
Diffstat (limited to 'sources')
-rwxr-xr-xsources/db/osmium_tagstatsbin0 -> 135169 bytes
-rw-r--r--sources/db/post.sql55
-rw-r--r--sources/db/pre.sql76
-rwxr-xr-xsources/db/update.sh37
-rwxr-xr-xsources/josm/import_josm.rb97
-rw-r--r--sources/josm/post.sql14
-rw-r--r--sources/josm/pre.sql60
-rwxr-xr-xsources/josm/update.sh38
-rw-r--r--sources/master/languages.sql50
-rw-r--r--sources/master/master.sql90
-rwxr-xr-xsources/master/update.sh27
-rwxr-xr-xsources/update_all.sh47
-rw-r--r--sources/wiki/README22
-rwxr-xr-xsources/wiki/get_page_list.rb123
-rwxr-xr-xsources/wiki/get_wiki_data.rb311
-rw-r--r--sources/wiki/lib/mediawikiapi.rb65
-rw-r--r--sources/wiki/post.sql28
-rw-r--r--sources/wiki/pre.sql80
-rwxr-xr-xsources/wiki/update.sh40
19 files changed, 1260 insertions, 0 deletions
diff --git a/sources/db/osmium_tagstats b/sources/db/osmium_tagstats
new file mode 100755
index 0000000..618ee6d
--- /dev/null
+++ b/sources/db/osmium_tagstats
Binary files differ
diff --git a/sources/db/post.sql b/sources/db/post.sql
new file mode 100644
index 0000000..2316657
--- /dev/null
+++ b/sources/db/post.sql
@@ -0,0 +1,55 @@
+--
+-- Taginfo source: Database
+--
+-- post.sql
+--
+
+.bail ON
+
+PRAGMA journal_mode = OFF;
+PRAGMA synchronous = OFF;
+PRAGMA count_changes = OFF;
+PRAGMA temp_store = MEMORY;
+PRAGMA cache_size = 5000000;
+
+ATTACH DATABASE '__DIR__/count.db' AS count;
+INSERT INTO stats SELECT * FROM count.stats;
+DETACH DATABASE count;
+
+CREATE UNIQUE INDEX keys_key_idx ON keys (key);
+CREATE INDEX tags_key_idx ON tags (key);
+-- CREATE UNIQUE INDEX tags_key_value_idx ON tags (key, value);
+CREATE INDEX keypairs_key1_idx ON keypairs (key1);
+CREATE INDEX keypairs_key2_idx ON keypairs (key2);
+CREATE UNIQUE INDEX key_distributions_key_idx ON key_distributions (key);
+
+INSERT INTO stats (key, value) SELECT 'num_keys', count(*) FROM keys;
+INSERT INTO stats (key, value) SELECT 'num_keys_on_nodes', count(*) FROM keys WHERE count_nodes > 0;
+INSERT INTO stats (key, value) SELECT 'num_keys_on_ways', count(*) FROM keys WHERE count_ways > 0;
+INSERT INTO stats (key, value) SELECT 'num_keys_on_relations', count(*) FROM keys WHERE count_relations > 0;
+
+INSERT INTO stats (key, value) SELECT 'num_tags', count(*) FROM tags;
+INSERT INTO stats (key, value) SELECT 'num_tags_on_nodes', count(*) FROM tags WHERE count_nodes > 0;
+INSERT INTO stats (key, value) SELECT 'num_tags_on_ways', count(*) FROM tags WHERE count_ways > 0;
+INSERT INTO stats (key, value) SELECT 'num_tags_on_relations', count(*) FROM tags WHERE count_relations > 0;
+
+INSERT INTO stats (key, value) SELECT 'num_keypairs', count(*) FROM keypairs;
+INSERT INTO stats (key, value) SELECT 'num_keypairs_on_nodes', count(*) FROM keypairs WHERE count_nodes > 0;
+INSERT INTO stats (key, value) SELECT 'num_keypairs_on_ways', count(*) FROM keypairs WHERE count_ways > 0;
+INSERT INTO stats (key, value) SELECT 'num_keypairs_on_relations', count(*) FROM keypairs WHERE count_relations > 0;
+
+UPDATE keys SET prevalent_values=(
+ SELECT group_concat(value, '|') FROM (
+ SELECT value FROM tags t
+ WHERE t.key = keys.key
+ AND t.count_all > keys.count_all / 100
+ ORDER BY t.count_all DESC
+ LIMIT 10
+
+ )
+ ) WHERE values_all < 1000;
+
+ANALYZE;
+
+UPDATE meta SET update_end=datetime('now');
+
diff --git a/sources/db/pre.sql b/sources/db/pre.sql
new file mode 100644
index 0000000..1bb00ea
--- /dev/null
+++ b/sources/db/pre.sql
@@ -0,0 +1,76 @@
+--
+-- Taginfo source: Database
+--
+-- pre.sql
+--
+
+.bail ON
+
+DROP TABLE IF EXISTS meta;
+
+CREATE TABLE meta (
+ source_id TEXT,
+ source_name TEXT,
+ update_start TEXT,
+ update_end TEXT,
+ data_until TEXT
+);
+
+INSERT INTO meta (source_id, source_name, update_start) SELECT 'db', 'Database', datetime('now');
+
+DROP TABLE IF EXISTS stats;
+
+CREATE TABLE stats (
+ key TEXT,
+ value INT64
+);
+
+DROP TABLE IF EXISTS keys;
+
+CREATE TABLE keys (
+ key VARCHAR,
+ count_all INTEGER,
+ count_nodes INTEGER,
+ count_ways INTEGER,
+ count_relations INTEGER,
+ values_all INTEGER,
+ values_nodes INTEGER,
+ values_ways INTEGER,
+ values_relations INTEGER,
+ users_all INTEGER,
+ users_nodes INTEGER,
+ users_ways INTEGER,
+ users_relations INTEGER,
+ grids INTEGER,
+ prevalent_values TEXT
+);
+
+DROP TABLE IF EXISTS key_distributions;
+
+CREATE TABLE key_distributions (
+ key VARCHAR,
+ png BLOB
+);
+
+DROP TABLE IF EXISTS tags;
+
+CREATE TABLE tags (
+ key VARCHAR,
+ value VARCHAR,
+ count_all INTEGER,
+ count_nodes INTEGER,
+ count_ways INTEGER,
+ count_relations INTEGER
+);
+
+DROP TABLE IF EXISTS keypairs;
+
+CREATE TABLE keypairs (
+ key1 VARCHAR,
+ key2 VARCHAR,
+ count_all INTEGER,
+ count_nodes INTEGER,
+ count_ways INTEGER,
+ count_relations INTEGER
+);
+
diff --git a/sources/db/update.sh b/sources/db/update.sh
new file mode 100755
index 0000000..3a48e74
--- /dev/null
+++ b/sources/db/update.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+#
+# Taginfo source: DB
+#
+# update.sh DIR
+#
+
+set -e
+
+DIR=$1
+
+if [ "x" = "x$DIR" ]; then
+ echo "Usage: update.sh DIR"
+ exit 1
+fi
+
+echo -n "Start db: "; date
+
+DATABASE=$DIR/taginfo-db.db
+
+rm -f $DATABASE
+rm -f $DIR/count.db
+
+echo "Running pre.sql..."
+sqlite3 $DATABASE <pre.sql
+
+echo "Running count..."
+HERE=`pwd`
+cd $DIR
+bzcat $DIR/planet.osm.bz2 | $HERE/osmium_tagstats -
+cd $HERE
+
+echo "Running post.sql..."
+perl -pe "s|__DIR__|$DIR|" post.sql | sqlite3 $DATABASE
+
+echo -n "Done db: "; date
+
diff --git a/sources/josm/import_josm.rb b/sources/josm/import_josm.rb
new file mode 100755
index 0000000..9f42a40
--- /dev/null
+++ b/sources/josm/import_josm.rb
@@ -0,0 +1,97 @@
+#!/usr/bin/ruby
+#------------------------------------------------------------------------------
+#
+# Taginfo source: JOSM
+#
+# import_josm.rb
+#
+#------------------------------------------------------------------------------
+#
+# Copyright (C) 2010 Jochen Topf <jochen@remote.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#------------------------------------------------------------------------------
+
+require 'pp'
+require 'sqlite3'
+require 'rexml/document'
+
+class Rule
+
+ attr_accessor :k, :v, :b
+ attr_accessor :scale_min, :scale_max
+ attr_accessor :icon_source
+ attr_accessor :line_width, :line_realwidth
+
+ attr_reader :rule
+
+ def initialize(rule)
+ @rule = rule
+ end
+
+ def insert(db)
+ db.execute(
+ 'INSERT INTO josm_style_rules (k, v, b, scale_min, scale_max, icon_source, line_width, line_realwidth, rule) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
+ k,
+ v,
+ b,
+ scale_min,
+ scale_max,
+ icon_source,
+ line_width,
+ line_realwidth,
+ rule
+ )
+ end
+
+end
+
+dir = ARGV[0] || '.'
+
+db = SQLite3::Database.new(dir + '/taginfo-josm.db')
+
+db.execute('BEGIN TRANSACTION');
+
+file = File.new(dir + '/elemstyles.xml')
+doc = REXML::Document.new(file)
+
+doc.elements.each('/rules/rule') do |rule_element|
+ rule = Rule.new(rule_element.to_s)
+ rule_element.elements.each do |element|
+ case element.name
+ when 'condition'
+ rule.k = element.attributes['k']
+ rule.v = element.attributes['v']
+ rule.b = element.attributes['b']
+ when 'scale_min'
+ rule.scale_min = element.text
+ when 'scale_max'
+ rule.scale_max = element.text
+ when 'icon'
+ rule.icon_source = element.attributes['src']
+ when 'line'
+ rule.line_width = element.attributes['width']
+ rule.line_realwidth = element.attributes['realwidth']
+ end
+ end
+# pp "rule #{rule.k} #{rule.v}"
+ rule.insert(db)
+end
+
+db.execute('COMMIT');
+
+
+#-- THE END -------------------------------------------------------------------
diff --git a/sources/josm/post.sql b/sources/josm/post.sql
new file mode 100644
index 0000000..d02f8c8
--- /dev/null
+++ b/sources/josm/post.sql
@@ -0,0 +1,14 @@
+--
+-- Taginfo source: JOSM
+--
+-- post.sql
+--
+
+.bail ON
+
+CREATE INDEX josm_style_rules_idx ON josm_style_rules (style, k, v);
+
+ANALYZE;
+
+UPDATE meta SET update_end=datetime('now');
+
diff --git a/sources/josm/pre.sql b/sources/josm/pre.sql
new file mode 100644
index 0000000..42a4213
--- /dev/null
+++ b/sources/josm/pre.sql
@@ -0,0 +1,60 @@
+--
+-- Taginfo source: JOSM
+--
+-- pre.sql
+--
+
+.bail ON
+
+DROP TABLE IF EXISTS meta;
+
+CREATE TABLE meta (
+ source_id TEXT,
+ source_name TEXT,
+ update_start TEXT,
+ update_end TEXT,
+ data_until TEXT
+);
+
+INSERT INTO meta (source_id, source_name, update_start, data_until) SELECT 'josm', 'JOSM', datetime('now'), datetime('now');
+
+DROP TABLE IF EXISTS stats;
+
+CREATE TABLE stats (
+ key TEXT,
+ value INT64
+);
+
+--
+-- josm_styles
+--
+-- Contains list of JOSM styles.
+--
+
+DROP TABLE IF EXISTS josm_styles;
+
+CREATE TABLE josm_styles (
+ style VARCHAR
+);
+
+--
+-- josm_style_rules
+--
+-- Contains data about JOSM style rules.
+--
+
+DROP TABLE IF EXISTS josm_style_rules;
+
+CREATE TABLE josm_style_rules (
+ style VARCHAR,
+ k VARCHAR,
+ v VARCHAR,
+ b VARCHAR,
+ scale_min INTEGER,
+ scale_max INTEGER,
+ icon_source VARCHAR,
+ line_width INTEGER,
+ line_realwidth INTEGER,
+ rule VARCHAR
+);
+
diff --git a/sources/josm/update.sh b/sources/josm/update.sh
new file mode 100755
index 0000000..6eca722
--- /dev/null
+++ b/sources/josm/update.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+#
+# Taginfo source: JOSM
+#
+# update.sh DIR
+#
+
+set -e
+
+DIR=$1
+
+if [ "x" = "x$DIR" ]; then
+ echo "Usage: update.sh DIR"
+ exit 1
+fi
+
+echo -n "Start josm: "; date
+
+DATABASE=$DIR/taginfo-josm.db
+ELEMSTYLES=$DIR/elemstyles.xml
+
+rm -f $DATABASE
+rm -f $ELEMSTYLES
+
+echo "Getting styles..."
+wget -O $ELEMSTYLES http://josm.openstreetmap.de/svn/trunk/styles/standard/elemstyles.xml
+
+echo "Running pre.sql..."
+sqlite3 $DATABASE <pre.sql
+
+echo "Running import..."
+./import_josm.rb $DIR
+
+echo "Running post.sql..."
+sqlite3 $DATABASE <post.sql
+
+echo -n "Done josm: "; date
+
diff --git a/sources/master/languages.sql b/sources/master/languages.sql
new file mode 100644
index 0000000..a04db33
--- /dev/null
+++ b/sources/master/languages.sql
@@ -0,0 +1,50 @@
+--
+-- Taginfo
+--
+-- languages.sql
+--
+
+.bail ON
+
+DROP TABLE IF EXISTS languages;
+
+CREATE TABLE languages (
+ code VARCHAR,
+ english_name VARCHAR,
+ native_name VARCHAR
+);
+
+INSERT INTO languages VALUES ('ar', 'Arabic', 'العربية');
+INSERT INTO languages VALUES ('bg', 'Bulgarian', 'български език');
+INSERT INTO languages VALUES ('cz', 'Czech', 'česky');
+INSERT INTO languages VALUES ('da', 'Danish', 'dansk');
+INSERT INTO languages VALUES ('de', 'German', 'Deutsch');
+INSERT INTO languages VALUES ('en', 'English', 'English');
+INSERT INTO languages VALUES ('es', 'Spanish', 'español');
+INSERT INTO languages VALUES ('et', 'Estonian', 'eesti');
+INSERT INTO languages VALUES ('fi', 'Finish', 'suomi');
+INSERT INTO languages VALUES ('fr', 'French', 'français');
+INSERT INTO languages VALUES ('he', 'Hebrew', 'עברית');
+INSERT INTO languages VALUES ('hr', 'Croatian', 'hrvatski');
+INSERT INTO languages VALUES ('hu', 'Hungarian', 'Magyar');
+INSERT INTO languages VALUES ('it', 'Italian', 'Italiano');
+INSERT INTO languages VALUES ('ja', 'Japanese', '日本語');
+INSERT INTO languages VALUES ('nl', 'Dutch', 'Nederlands');
+INSERT INTO languages VALUES ('no', 'Norwegian', 'Norsk');
+INSERT INTO languages VALUES ('pl', 'Polish', 'polski');
+INSERT INTO languages VALUES ('pt', 'Portuguese', 'Português');
+INSERT INTO languages VALUES ('pt-br', 'Brazilian Portuguese', 'Português do Brasil');
+INSERT INTO languages VALUES ('ro', 'Romanian', 'română');
+INSERT INTO languages VALUES ('ro-md', 'Moldovan', 'română (Moldova)');
+INSERT INTO languages VALUES ('ru', 'Russian', 'русский язык');
+-- INSERT INTO languages VALUES ('sh', '', ''); -- not in ISO 639-1
+INSERT INTO languages VALUES ('sq', 'Albanian', 'Shqip');
+INSERT INTO languages VALUES ('sv', 'Swedish', 'svenska');
+INSERT INTO languages VALUES ('tr', 'Turkish', 'Türkçe');
+INSERT INTO languages VALUES ('uk', 'Ukrainian', 'українська');
+INSERT INTO languages VALUES ('zh', 'Chinese', '中文');
+INSERT INTO languages VALUES ('zh-hans', 'Chinese', '中文'); -- hans?
+-- INSERT INTO languages VALUES ('', '', '');
+
+ANALYZE;
+
diff --git a/sources/master/master.sql b/sources/master/master.sql
new file mode 100644
index 0000000..c446ae7
--- /dev/null
+++ b/sources/master/master.sql
@@ -0,0 +1,90 @@
+--
+-- Taginfo
+--
+-- master.sql
+--
+
+.bail ON
+
+ATTACH DATABASE '__DIR__/db/taginfo-db.db' AS db;
+ATTACH DATABASE '__DIR__/wiki/taginfo-wiki.db' AS wiki;
+ATTACH DATABASE '__DIR__/josm/taginfo-josm.db' AS josm;
+
+DROP TABLE IF EXISTS master_meta;
+
+CREATE TABLE master_meta (
+ source_id TEXT,
+ source_name TEXT,
+ update_start TEXT,
+ update_end TEXT,
+ data_until TEXT
+);
+
+INSERT INTO master_meta SELECT * FROM db.meta
+ UNION SELECT * FROM josm.meta
+ UNION SELECT * FROM wiki.meta;
+
+DROP TABLE IF EXISTS master_stats;
+
+CREATE TABLE master_stats (
+ key TEXT,
+ value INT64
+);
+
+INSERT INTO master_stats SELECT * FROM db.stats
+ UNION SELECT * FROM josm.stats
+ UNION SELECT * FROM wiki.stats;
+
+DROP TABLE IF EXISTS popular_keys;
+
+CREATE TABLE popular_keys (
+ key VARCHAR,
+ count INTEGER,
+ users INTEGER,
+ wikipages INTEGER DEFAULT 0,
+ in_wiki INTEGER DEFAULT 0,
+ in_josm INTEGER DEFAULT 0,
+ scale_count REAL,
+ scale_users REAL,
+ scale_wiki REAL,
+ scale_josm REAL,
+ scale_name REAL,
+ scale1 REAL,
+ scale2 REAL
+);
+
+INSERT INTO popular_keys (key, count, users)
+ SELECT key, count_all, users_all FROM db.keys WHERE count_all > 1000 GROUP BY key;
+
+-- count number of wikipages for each key
+UPDATE popular_keys SET wikipages = (SELECT count(*) FROM wiki.wikipages w WHERE w.key=popular_keys.key);
+
+UPDATE popular_keys SET in_wiki=1 WHERE key IN (SELECT key FROM wiki.wikipages);
+UPDATE popular_keys SET in_josm=1 WHERE key IN (SELECT k FROM josm.josm_style_rules);
+
+DROP TABLE IF EXISTS popular_metadata;
+
+CREATE TABLE popular_metadata (
+ keys INTEGER,
+ count_min INTEGER,
+ count_max INTEGER,
+ count_delta INTEGER,
+ users_min INTEGER,
+ users_max INTEGER,
+ users_delta INTEGER
+);
+
+INSERT INTO popular_metadata (keys, count_min, count_max, count_delta, users_min, users_max, users_delta)
+ SELECT count(*), min(count), max(count), max(count) - min(count), min(users), max(users), max(users) - min(users) FROM popular_keys;
+
+UPDATE popular_keys SET scale_count = CAST (count - (SELECT count_min FROM popular_metadata) AS REAL) / (SELECT count_delta FROM popular_metadata);
+UPDATE popular_keys SET scale_users = CAST (users - (SELECT users_min FROM popular_metadata) AS REAL) / (SELECT users_delta FROM popular_metadata);
+UPDATE popular_keys SET scale_wiki = CAST (wikipages AS REAL) / (SELECT max(wikipages) FROM popular_keys);
+UPDATE popular_keys SET scale_josm = in_josm;
+UPDATE popular_keys SET scale_name = 1;
+UPDATE popular_keys SET scale_name = 0 WHERE key LIKE '%:%';
+
+UPDATE popular_keys SET scale1 = 10 * scale_count + 8 * scale_users + 2 * scale_wiki + 1 * scale_josm + 2 * scale_name;
+
+ANALYZE;
+
diff --git a/sources/master/update.sh b/sources/master/update.sh
new file mode 100755
index 0000000..9b7040e
--- /dev/null
+++ b/sources/master/update.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# Taginfo Master DB
+#
+# update.sh DIR
+#
+
+set -e
+
+DIR=$1
+
+if [ "x" = "x$DIR" ]; then
+ echo "Usage: update.sh DIR"
+ exit 1
+fi
+
+echo -n "Start master: "; date
+
+DATABASE=$DIR/taginfo-master.db
+
+rm -f $DATABASE
+
+perl -pe "s|__DIR__|$DIR|" master.sql | sqlite3 $DATABASE
+sqlite3 $DATABASE <languages.sql
+
+echo -n "Done master: "; date
+
diff --git a/sources/update_all.sh b/sources/update_all.sh
new file mode 100755
index 0000000..4011a2d
--- /dev/null
+++ b/sources/update_all.sh
@@ -0,0 +1,47 @@
+#!/bin/sh
+#
+# Taginfo
+#
+# update_all.sh DIR
+#
+
+SOURCES="db josm wiki"
+
+set -e
+
+DIR=$1
+
+if [ "x" = "x$DIR" ]; then
+ echo "Usage: update.sh DIR"
+ exit 1
+fi
+
+exec >$DIR/update_all.log 2>&1
+
+echo -n "Start: "; date
+
+mkdir -p $DIR/download
+
+for source in $SOURCES; do
+ echo "====================================="
+ echo "Running $source/update.sh..."
+ mkdir -p $DIR/$source
+ cd $source
+ ./update.sh $DIR/$source
+ cd ..
+ echo "Running bzip2..."
+ bzip2 -9 -c $DIR/$source/taginfo-$source.db >$DIR/download/taginfo-$source.db.bz2
+ echo "Done."
+done
+
+echo "====================================="
+echo "Running master/update.sh..."
+cd master
+./update.sh $DIR
+cd ..
+echo "Running bzip2..."
+bzip2 -9 -c $DIR/taginfo-master.db >$DIR/download/taginfo-master.db.bz2
+echo "Done."
+
+echo -n "Done: "; date
+
diff --git a/sources/wiki/README b/sources/wiki/README
new file mode 100644
index 0000000..6c3a621
--- /dev/null
+++ b/sources/wiki/README
@@ -0,0 +1,22 @@
+
+Taginfo Sources: Wiki
+=====================
+
+PREREQUISITES
+-------------
+
+On Debian/Ubuntu you need the following packages:
+ libjson-ruby sqlite3 packages libsqlite3-ruby1.8
+
+
+RUNNING
+-------
+
+Run ./update.sh DIR
+
+where DIR is the name of a directory where the data will be stored.
+
+The file 'allpages.list' is currently not used. But because we get the
+information from the wiki anyway, we just store it. Maybe in the future
+it is of some use.
+
diff --git a/sources/wiki/get_page_list.rb b/sources/wiki/get_page_list.rb
new file mode 100755
index 0000000..649efbe
--- /dev/null
+++ b/sources/wiki/get_page_list.rb
@@ -0,0 +1,123 @@
+#!/usr/bin/ruby
+#------------------------------------------------------------------------------
+#
+# get_page_list.rb [DIR]
+#
+#------------------------------------------------------------------------------
+#
+# Gets the list of all wiki pages from the OSM wiki.
+#
+# Two files will be written: 'allpages.list' contains all pages in the wiki,
+# 'tagpages.list' contains all pages about tags from the wiki. Both have the
+# format:
+#
+# <type> TAB <namespace> TAB <title>
+#
+# The <type> is either 'page' or 'redirect', depending on whether this is a
+# proper wiki page or a redirect to another wiki page, respectively.
+#
+# The <namespaces> gives the namespace this page is in. This is empty for the
+# main namespace.
+#
+# <title> is the full title of the wiki page including leading namespaces etc.
+#
+# The files will be created in DIR or in the current directory, if no directory
+# was given on the command line.
+#
+#------------------------------------------------------------------------------
+#
+# Copyright (C) 2010 Jochen Topf <jochen@remote.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#------------------------------------------------------------------------------
+
+require 'pp'
+
+require 'net/http'
+require 'uri'
+require 'json'
+
+require 'lib/mediawikiapi.rb'
+
+#------------------------------------------------------------------------------
+
+def get_namespaces(api)
+ data = api.query(:meta => 'siteinfo', :siprop => 'namespaces')
+ namespaces = {}
+ data['query']['namespaces'].values.each do |ns|
+ if ns['canonical'] =~ /^[A-Z]{2}$/
+ namespaces[ns['canonical']] = ns['id']
+ end
+ end
+ namespaces
+end
+
+def get_page_list(api, namespaceid, options)
+ apfrom = ''
+ loop do
+ data = api.query(:list => 'allpages', :aplimit => 'max', :apfrom => apfrom, :apnamespace => namespaceid, :apfilterredir => options[:redirect] ? 'redirects' : 'nonredirects')
+# pp data
+ data['query']['allpages'].each do |h|
+ yield h['title'].gsub(/\s/, '_')
+ end
+ if data['query-continue']
+ apfrom = data['query-continue']['allpages']['apfrom'].gsub(/\s/, '_')
+# puts "apfrom=#{apfrom}"
+ else
+ return
+ end
+ end
+end
+
+#------------------------------------------------------------------------------
+
+dir = ARGV[0] || '.'
+
+api = MediaWikiAPI::API.new('wiki.openstreetmap.org')
+api.add_header('User-agent', 'taginfo/0.1 (jochen@remote.org)')
+
+namespaces = get_namespaces(api)
+
+# add main namespace
+namespaces[''] = 0
+
+allpages = File.open(dir + '/allpages.list', 'w')
+tagpages = File.open(dir + '/tagpages.list', 'w')
+
+namespaces.keys.sort.each do |namespace|
+ id = namespaces[namespace]
+
+ get_page_list(api, id, :redirect => false) do |page|
+ line = ['page', namespace, page].join("\t")
+ allpages.puts line
+ if page =~ /^([^:]+:)?(Key|Tag):(.+)$/
+ tagpages.puts line
+ end
+ end
+
+ get_page_list(api, id, :redirect => true) do |page|
+ line = ['redirect', namespace, page].join("\t")
+ allpages.puts line
+ if page =~ /^([^:]+:)?(Key|Tag):(.+)$/
+ tagpages.puts line
+ end
+ end
+end
+
+tagpages.close
+allpages.close
+
+#-- THE END -------------------------------------------------------------------
diff --git a/sources/wiki/get_wiki_data.rb b/sources/wiki/get_wiki_data.rb
new file mode 100755
index 0000000..742e520
--- /dev/null
+++ b/sources/wiki/get_wiki_data.rb
@@ -0,0 +1,311 @@
+#!/usr/bin/ruby
+#------------------------------------------------------------------------------
+#
+# get_wiki_data.rb [DIR]
+#
+#------------------------------------------------------------------------------
+#
+# Reads all the wiki pages from 'tagpages.list' and gets their content from
+# the OSM wiki. The pages are parsed and the information stored in the
+# sqlite database 'taginfo-wiki.db' which must have been initialized before.
+#
+# All files are in DIR or the current directory if no directory was given on
+# the command line.
+#
+# This script writes copious debugging information to STDOUT. You might want
+# to redirect that to a file.
+#
+#------------------------------------------------------------------------------
+#
+# Copyright (C) 2010 Jochen Topf <jochen@remote.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#------------------------------------------------------------------------------
+
+require 'pp'
+
+require 'json'
+require 'net/http'
+require 'uri'
+require 'sqlite3'
+
+require 'lib/mediawikiapi.rb'
+
+#------------------------------------------------------------------------------
+
+class WikiPage
+
+ @@pages = {}
+
+ attr_accessor :content
+ attr_accessor :description, :image, :group, :onNode, :onWay, :onArea, :onRelation, :has_templ
+ attr_reader :type, :namespace, :title, :tag, :key, :value, :lang, :ttype, :tags_implies, :tags_combination, :tags_linked, :parsed
+
+ def initialize(type, namespace, title)
+ @type = type # 'page' or 'redirect'
+ @namespace = namespace # 'XX' (mediawiki namespace or '')
+ @title = title # wiki page title
+
+ @tag = title.gsub(/^([^:]+:)?(Key|Tag):/, '') # complete tag (key=value)
+ @key = @tag.sub(/=.*/, '') # key
+ if @tag =~ /=/
+ @value = @tag.sub(/.*?=/, '') # value (if any)
+ end
+ if title =~ /^(.*):(Key|Tag):/
+ @lang = $1.downcase # IETF language tag
+ @ttype = $2.downcase # 'tag' or 'key'
+ else
+ @lang = 'en'
+ end
+
+ @has_templ = false
+
+ @tags_implies = []
+ @tags_combination = []
+ @tags_linked = []
+
+ @group = ''
+ @onNode = false
+ @onWay = false
+ @onArea = false
+ @onRelation = false
+
+ @parsed = nil
+
+ @@pages[@title] = self
+ end
+
+ def self.pages
+ @@pages.values.sort{ |a,b| a.title <=> b.title }
+ end
+
+ def self.find(name)
+ @@pages[name]
+ end
+
+ # Has this wiki page a name that we can understand and process?
+ def valid?
+ return false if @lang !~ /^[a-z]{2}(-[a-z0-9-]+)?$/
+ return false if @ttype == 'key' && ! @value.nil?
+ return false if @ttype == 'tag' && @value.nil?
+ return false if @key =~ %r{/}
+ return false if @value =~ %r{/}
+ return true
+ end
+
+ # Return parameters for API call to read this page.
+ def params
+ { :title => title, :action => 'raw' }
+ end
+
+ def add_tag_link(tag)
+ @tags_linked << tag
+ end
+
+ def insert(db)
+ db.execute(
+ "INSERT INTO wikipages (lang, tag, key, value, title, tgroup, type, has_templ, parsed, description, image, on_node, on_way, on_area, on_relation, tags_implies, tags_combination, tags_linked) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+ lang,
+ tag,
+ key,
+ value,
+ title,
+ group,
+ type,
+ has_templ,
+ parsed ? 1 : 0,
+ description,
+ image,
+ onNode ? 1 : 0,
+ onWay ? 1 : 0,
+ onArea ? 1 : 0,
+ onRelation ? 1 : 0,
+ tags_implies. sort.uniq.join(','),
+ tags_combination.sort.uniq.join(','),
+ tags_linked. sort.uniq.join(',')
+ )
+ end
+
+ # Parse content of the wiki page. This will find the templates
+ # and their parameters.
+ def parse_content
+ @parsed = true
+ text = @content
+
+ # dummy template as base context
+ context = [ Template.new ]
+
+ loop do
+ # split text into ('before', 'token', 'after')
+ m = /^(.*?)(\{\{|\}\}|[|=])(.*)$/m.match(text)
+
+ # we are done if there are no more tokens
+ if m.nil?
+ return
+ end
+
+ # do the right thing depending on next token
+ case m[2]
+ when '{{' # start of template
+ context.last.add_parameter(m[1].strip)
+ context << Template.new()
+ when '}}' # end of template
+ context.last.add_parameter(m[1].strip)
+ c = context.pop
+ yield c
+ context.last.add_parameter(c)
+ when '|' # template parameter
+ context.last.add_parameter(m[1].strip)
+ context.last.parname(nil)
+ when '=' # named template parameter
+ parameter_name = (m[1].strip == ':') ? 'subkey' : m[1].strip
+ context.last.parname(parameter_name)
+ end
+
+ # 'after' is our next 'text'
+ text = m[3]
+ end
+ rescue
+ puts "Parsing of page #{title} failed"
+ @parsed = false
+ end
+
+end
+
+#------------------------------------------------------------------------------
+
+class Template
+
+ attr_reader :name, :parameters, :named_parameters
+
+ def initialize()
+ @name = nil
+ @parname = nil
+ @parameters = []
+ @named_parameters = {}
+ end
+
+ def parname(name)
+ @parname = name
+ end
+
+ def add_parameter(value)
+ if value != ''
+ if @parname.nil? # positional parameter
+ # first parameter is really the name of this template
+ if @name.nil?
+ @name = value
+ else
+ @parameters << value
+ end
+ else # named parameter
+ @named_parameters[@parname] ||= []
+ @named_parameters[@parname] << value
+ end
+ end
+ end
+
+end
+
+#------------------------------------------------------------------------------
+
+dir = ARGV[0] || '.'
+
+api = MediaWikiAPI::API.new('wiki.openstreetmap.org', 80, '/w/index.php?')
+api.add_header('User-agent', 'taginfo/0.1 (jochen@remote.org)')
+
+db = SQLite3::Database.new(dir + '/taginfo-wiki.db')
+
+db.execute('BEGIN TRANSACTION');
+
+File.open(dir + '/tagpages.list') do |wikipages|
+ wikipages.each do |line|
+ line.chomp!
+ t = line.split("\t")
+ page = WikiPage.new(t[0], t[1], t[2])
+ puts "page: (#{page.title}) (#{page.type}) (#{page.namespace}) (#{page.tag})"
+
+ if page.valid?
+ res = api.get(page.params)
+ page.content = res.body
+
+ page.parse_content do |template|
+ puts "Template: #{template.name} [#{template.parameters.join(',')}] #{template.named_parameters.inspect}"
+ if template.name == 'Key' || template.name == 'Tag'
+ tag = template.parameters[0]
+ if template.parameters[1]
+ tag += '=' + template.parameters[1]
+ end
+ page.add_tag_link(tag)
+ end
+ if template.name =~ /(Key|Value)Description$/
+ page.has_templ = true
+ end
+ if template.named_parameters['description']
+ desc = []
+ template.named_parameters['description'].each do |i|
+ if i.class == Template
+ desc << ' ' << i.parameters.join('=') << ' '
+ else
+ desc << i
+ end
+ page.description = desc.join('').strip
+ end
+ end
+ if template.named_parameters['image']
+ page.image = template.named_parameters['image'][0]
+ end
+ if template.named_parameters['group']
+ page.group = template.named_parameters['group'][0]
+ end
+ if template.named_parameters['onNode'] == ['yes']
+ page.onNode = true
+ end
+ if template.named_parameters['onWay'] == ['yes']
+ page.onWay = true
+ end
+ if template.named_parameters['onArea'] == ['yes']
+ page.onArea = true
+ end
+ if template.named_parameters['onRelation'] == ['yes']
+ page.onRelation = true
+ end
+ if template.named_parameters['implies']
+ template.named_parameters['implies'].each do |i|
+ if i.class == Template
+ page.tags_implies << i.parameters.join('=')
+ end
+ end
+ end
+ if template.named_parameters['combination']
+ template.named_parameters['combination'].each do |i|
+ if i.class == Template
+ page.tags_combination << i.parameters.join('=')
+ end
+ end
+ end
+ end
+ page.insert(db)
+ else
+ puts "invalid page: #{page.title}"
+ end
+ end
+end
+
+db.execute('COMMIT');
+
+
+#-- THE END -------------------------------------------------------------------
diff --git a/sources/wiki/lib/mediawikiapi.rb b/sources/wiki/lib/mediawikiapi.rb
new file mode 100644
index 0000000..7df131c
--- /dev/null
+++ b/sources/wiki/lib/mediawikiapi.rb
@@ -0,0 +1,65 @@
+#------------------------------------------------------------------------------
+#
+# MediaWikiAPI
+#
+#------------------------------------------------------------------------------
+#
+# Simple helper class to access the Mediawiki API.
+#
+#------------------------------------------------------------------------------
+#
+# Copyright (C) 2010 Jochen Topf <jochen@remote.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#------------------------------------------------------------------------------
+
+module MediaWikiAPI
+
+ class API
+
+ def initialize(host, port=80, path='/w/api.php?')
+ @host = host
+ @port = port
+ @path = path
+ @headers = {}
+ end
+
+ def add_header(name, value)
+ @headers[name] = value
+ end
+
+ def build_path(params)
+ @path + params.to_a.map{ |el| el.join('=') }.join('&')
+ end
+
+ def get(params)
+ path = build_path(params)
+ http = Net::HTTP.start(@host, @port)
+ http.get(path, @headers)
+ end
+
+ def query(params)
+ params[:action] = 'query'
+ params[:format] = 'json'
+ result = get(params)
+ JSON.parse(result.body)
+ end
+
+ end
+
+end
+
+#-- THE END -------------------------------------------------------------------
diff --git a/sources/wiki/post.sql b/sources/wiki/post.sql
new file mode 100644
index 0000000..28e5673
--- /dev/null
+++ b/sources/wiki/post.sql
@@ -0,0 +1,28 @@
+--
+-- Taginfo source: Wiki
+--
+-- post.sql
+--
+
+.bail ON
+
+UPDATE wikipages SET status='r' WHERE type='redirect';
+UPDATE wikipages SET status='p' WHERE type='page' AND has_templ='false';
+UPDATE wikipages SET status='t' WHERE type='page' AND has_templ='true' AND parsed=1;
+UPDATE wikipages SET status='e' WHERE type='page' AND has_templ='true' AND parsed=0;
+
+CREATE INDEX wikipages_key_value_idx ON wikipages(key, value);
+
+INSERT INTO wikipages_keys (key, langs) SELECT key, group_concat(lang || ' ' || status) FROM wikipages WHERE value IS NULL GROUP BY key;
+INSERT INTO wikipages_tags (key, value, langs) SELECT key, value, group_concat(lang || ' ' || status) FROM wikipages WHERE value IS NOT NULL GROUP BY key, value;
+
+INSERT INTO wiki_languages (language, count_pages) SELECT lang, count(*) FROM wikipages GROUP BY lang;
+
+INSERT INTO stats (key, value) SELECT 'wikipages_keys', count(*) FROM wikipages_keys;
+INSERT INTO stats (key, value) SELECT 'wikipages_tags', count(*) FROM wikipages_tags;
+INSERT INTO stats (key, value) SELECT 'wikipages_languages', count(*) FROM wiki_languages;
+
+ANALYZE;
+
+UPDATE meta SET update_end=datetime('now');
+
diff --git a/sources/wiki/pre.sql b/sources/wiki/pre.sql
new file mode 100644
index 0000000..2cb3bfd
--- /dev/null
+++ b/sources/wiki/pre.sql
@@ -0,0 +1,80 @@
+--
+-- Taginfo source: Wiki
+--
+-- pre.sql
+--
+
+.bail ON
+
+DROP TABLE IF EXISTS meta;
+
+CREATE TABLE meta (
+ source_id TEXT,
+ source_name TEXT,
+ update_start TEXT,
+ update_end TEXT,
+ data_until TEXT
+);
+
+INSERT INTO meta (source_id, source_name, update_start, data_until) SELECT 'wiki', 'Wiki', datetime('now'), datetime('now');
+
+DROP TABLE IF EXISTS stats;
+
+CREATE TABLE stats (
+ key TEXT,
+ value INT64
+);
+
+DROP TABLE IF EXISTS wikipages;
+
+CREATE TABLE wikipages (
+ lang VARCHAR,
+ tag VARCHAR,
+ key VARCHAR,
+ value VARCHAR,
+ title VARCHAR,
+ tgroup VARCHAR,
+ type VARCHAR,
+ has_templ INTEGER,
+ parsed INTEGER,
+ description VARCHAR,
+ image VARCHAR,
+ on_node INTEGER,
+ on_way INTEGER,
+ on_area INTEGER,
+ on_relation INTEGER,
+ tags_implies VARCHAR,
+ tags_combination VARCHAR,
+ tags_linked VARCHAR,
+ status VARCHAR
+);
+
+DROP TABLE IF EXISTS wikipages_keys;
+
+CREATE TABLE wikipages_keys (
+ key VARCHAR,
+ langs VARCHAR
+);
+
+DROP TABLE IF EXISTS wikipages_tags;
+
+CREATE TABLE wikipages_tags (
+ key VARCHAR,
+ value VARCHAR,
+ langs VARCHAR
+);
+
+DROP TABLE IF EXISTS wiki_languages;
+
+CREATE TABLE wiki_languages (
+ language VARCHAR,
+ count_pages INT
+);
+
+DROP TABLE IF EXISTS stats;
+
+CREATE TABLE stats (
+ key VARCHAR,
+ value INT64
+);
+
diff --git a/sources/wiki/update.sh b/sources/wiki/update.sh
new file mode 100755
index 0000000..b6f63a6
--- /dev/null
+++ b/sources/wiki/update.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+#
+# Taginfo source: Wiki
+#
+# update.sh DIR
+#
+
+set -e
+
+DIR=$1
+
+if [ "x" = "x$DIR" ]; then
+ echo "Usage: update.sh DIR"
+ exit 1
+fi
+
+echo -n "Start wiki: "; date
+
+DATABASE=$DIR/taginfo-wiki.db
+LOGFILE=$DIR/get_wiki_data.log
+
+rm -f $DIR/allpages.list
+rm -f $DIR/tagpages.list
+rm -f $LOGFILE
+rm -f $DATABASE
+
+echo "Running pre.sql..."
+sqlite3 $DATABASE <pre.sql
+
+echo "Getting page list..."
+./get_page_list.rb $DIR
+
+echo "Getting wiki data..."
+./get_wiki_data.rb $DIR >$LOGFILE
+
+echo "Running post.sql..."
+sqlite3 $DATABASE <post.sql
+
+echo -n "Done wiki: "; date
+