diff options
author | Jochen Topf <jochen@topf.org> | 2010-10-04 18:41:53 +0200 |
---|---|---|
committer | Jochen Topf <jochen@topf.org> | 2010-10-04 18:41:53 +0200 |
commit | 9918c2c4c266a29848ce39fe2496876c66c3a48e (patch) | |
tree | b49fe450d33dcb3c30b37f7bff68fbb475ecec66 /sources/master | |
download | taginfo-9918c2c4c266a29848ce39fe2496876c66c3a48e.tar taginfo-9918c2c4c266a29848ce39fe2496876c66c3a48e.tar.gz |
first commit
Diffstat (limited to 'sources/master')
-rw-r--r-- | sources/master/languages.sql | 50 | ||||
-rw-r--r-- | sources/master/master.sql | 90 | ||||
-rwxr-xr-x | sources/master/update.sh | 27 |
3 files changed, 167 insertions, 0 deletions
diff --git a/sources/master/languages.sql b/sources/master/languages.sql new file mode 100644 index 0000000..a04db33 --- /dev/null +++ b/sources/master/languages.sql @@ -0,0 +1,50 @@ +-- +-- Taginfo +-- +-- languages.sql +-- + +.bail ON + +DROP TABLE IF EXISTS languages; + +CREATE TABLE languages ( + code VARCHAR, + english_name VARCHAR, + native_name VARCHAR +); + +INSERT INTO languages VALUES ('ar', 'Arabic', 'العربية'); +INSERT INTO languages VALUES ('bg', 'Bulgarian', 'български език'); +INSERT INTO languages VALUES ('cz', 'Czech', 'česky'); +INSERT INTO languages VALUES ('da', 'Danish', 'dansk'); +INSERT INTO languages VALUES ('de', 'German', 'Deutsch'); +INSERT INTO languages VALUES ('en', 'English', 'English'); +INSERT INTO languages VALUES ('es', 'Spanish', 'español'); +INSERT INTO languages VALUES ('et', 'Estonian', 'eesti'); +INSERT INTO languages VALUES ('fi', 'Finish', 'suomi'); +INSERT INTO languages VALUES ('fr', 'French', 'français'); +INSERT INTO languages VALUES ('he', 'Hebrew', 'עברית'); +INSERT INTO languages VALUES ('hr', 'Croatian', 'hrvatski'); +INSERT INTO languages VALUES ('hu', 'Hungarian', 'Magyar'); +INSERT INTO languages VALUES ('it', 'Italian', 'Italiano'); +INSERT INTO languages VALUES ('ja', 'Japanese', '日本語'); +INSERT INTO languages VALUES ('nl', 'Dutch', 'Nederlands'); +INSERT INTO languages VALUES ('no', 'Norwegian', 'Norsk'); +INSERT INTO languages VALUES ('pl', 'Polish', 'polski'); +INSERT INTO languages VALUES ('pt', 'Portuguese', 'Português'); +INSERT INTO languages VALUES ('pt-br', 'Brazilian Portuguese', 'Português do Brasil'); +INSERT INTO languages VALUES ('ro', 'Romanian', 'română'); +INSERT INTO languages VALUES ('ro-md', 'Moldovan', 'română (Moldova)'); +INSERT INTO languages VALUES ('ru', 'Russian', 'русский язык'); +-- INSERT INTO languages VALUES ('sh', '', ''); -- not in ISO 639-1 +INSERT INTO languages VALUES ('sq', 'Albanian', 'Shqip'); +INSERT INTO languages VALUES ('sv', 'Swedish', 'svenska'); +INSERT INTO languages VALUES ('tr', 'Turkish', 'Türkçe'); +INSERT INTO languages VALUES ('uk', 'Ukrainian', 'українська'); +INSERT INTO languages VALUES ('zh', 'Chinese', '中文'); +INSERT INTO languages VALUES ('zh-hans', 'Chinese', '中文'); -- hans? +-- INSERT INTO languages VALUES ('', '', ''); + +ANALYZE; + diff --git a/sources/master/master.sql b/sources/master/master.sql new file mode 100644 index 0000000..c446ae7 --- /dev/null +++ b/sources/master/master.sql @@ -0,0 +1,90 @@ +-- +-- Taginfo +-- +-- master.sql +-- + +.bail ON + +ATTACH DATABASE '__DIR__/db/taginfo-db.db' AS db; +ATTACH DATABASE '__DIR__/wiki/taginfo-wiki.db' AS wiki; +ATTACH DATABASE '__DIR__/josm/taginfo-josm.db' AS josm; + +DROP TABLE IF EXISTS master_meta; + +CREATE TABLE master_meta ( + source_id TEXT, + source_name TEXT, + update_start TEXT, + update_end TEXT, + data_until TEXT +); + +INSERT INTO master_meta SELECT * FROM db.meta + UNION SELECT * FROM josm.meta + UNION SELECT * FROM wiki.meta; + +DROP TABLE IF EXISTS master_stats; + +CREATE TABLE master_stats ( + key TEXT, + value INT64 +); + +INSERT INTO master_stats SELECT * FROM db.stats + UNION SELECT * FROM josm.stats + UNION SELECT * FROM wiki.stats; + +DROP TABLE IF EXISTS popular_keys; + +CREATE TABLE popular_keys ( + key VARCHAR, + count INTEGER, + users INTEGER, + wikipages INTEGER DEFAULT 0, + in_wiki INTEGER DEFAULT 0, + in_josm INTEGER DEFAULT 0, + scale_count REAL, + scale_users REAL, + scale_wiki REAL, + scale_josm REAL, + scale_name REAL, + scale1 REAL, + scale2 REAL +); + +INSERT INTO popular_keys (key, count, users) + SELECT key, count_all, users_all FROM db.keys WHERE count_all > 1000 GROUP BY key; + +-- count number of wikipages for each key +UPDATE popular_keys SET wikipages = (SELECT count(*) FROM wiki.wikipages w WHERE w.key=popular_keys.key); + +UPDATE popular_keys SET in_wiki=1 WHERE key IN (SELECT key FROM wiki.wikipages); +UPDATE popular_keys SET in_josm=1 WHERE key IN (SELECT k FROM josm.josm_style_rules); + +DROP TABLE IF EXISTS popular_metadata; + +CREATE TABLE popular_metadata ( + keys INTEGER, + count_min INTEGER, + count_max INTEGER, + count_delta INTEGER, + users_min INTEGER, + users_max INTEGER, + users_delta INTEGER +); + +INSERT INTO popular_metadata (keys, count_min, count_max, count_delta, users_min, users_max, users_delta) + SELECT count(*), min(count), max(count), max(count) - min(count), min(users), max(users), max(users) - min(users) FROM popular_keys; + +UPDATE popular_keys SET scale_count = CAST (count - (SELECT count_min FROM popular_metadata) AS REAL) / (SELECT count_delta FROM popular_metadata); +UPDATE popular_keys SET scale_users = CAST (users - (SELECT users_min FROM popular_metadata) AS REAL) / (SELECT users_delta FROM popular_metadata); +UPDATE popular_keys SET scale_wiki = CAST (wikipages AS REAL) / (SELECT max(wikipages) FROM popular_keys); +UPDATE popular_keys SET scale_josm = in_josm; +UPDATE popular_keys SET scale_name = 1; +UPDATE popular_keys SET scale_name = 0 WHERE key LIKE '%:%'; + +UPDATE popular_keys SET scale1 = 10 * scale_count + 8 * scale_users + 2 * scale_wiki + 1 * scale_josm + 2 * scale_name; + +ANALYZE; + diff --git a/sources/master/update.sh b/sources/master/update.sh new file mode 100755 index 0000000..9b7040e --- /dev/null +++ b/sources/master/update.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# +# Taginfo Master DB +# +# update.sh DIR +# + +set -e + +DIR=$1 + +if [ "x" = "x$DIR" ]; then + echo "Usage: update.sh DIR" + exit 1 +fi + +echo -n "Start master: "; date + +DATABASE=$DIR/taginfo-master.db + +rm -f $DATABASE + +perl -pe "s|__DIR__|$DIR|" master.sql | sqlite3 $DATABASE +sqlite3 $DATABASE <languages.sql + +echo -n "Done master: "; date + |