From 70f35022161f625e26dba93bcba6c6383e798623 Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Thu, 19 Mar 2015 16:36:41 +0100 Subject: An attempt to classify all keys into 'good', 'bad', and 'unknown'. Of course this is very rough. Could be used to show "bad" keys in editors etc. --- sources/db/post.sql | 7 +++++++ sources/db/post_grades.sql | 46 ++++++++++++++++++++++++++++++++++++++++++++++ sources/db/pre.sql | 3 ++- sources/db/update.sh | 3 +++ 4 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 sources/db/post_grades.sql diff --git a/sources/db/post.sql b/sources/db/post.sql index c321377..49cc634 100644 --- a/sources/db/post.sql +++ b/sources/db/post.sql @@ -38,6 +38,13 @@ INSERT INTO stats (key, value) SELECT 'characters_in_keys_space', count(*) FRO INSERT INTO stats (key, value) SELECT 'characters_in_keys_problem', count(*) FROM keys WHERE characters='problem'; INSERT INTO stats (key, value) SELECT 'characters_in_keys_rest', count(*) FROM keys WHERE characters='rest'; +INSERT INTO stats (key, value) SELECT 'grade_bad', count(*) FROM keys WHERE grade='b'; +INSERT INTO stats (key, value) SELECT 'grade_unknown', count(*) FROM keys WHERE grade='u'; +INSERT INTO stats (key, value) SELECT 'grade_good', count(*) FROM keys WHERE grade='g'; +INSERT INTO stats (key, value) SELECT 'grade_key_count_bad', sum(count_all) FROM keys WHERE grade='b'; +INSERT INTO stats (key, value) SELECT 'grade_key_count_unknown', sum(count_all) FROM keys WHERE grade='u'; +INSERT INTO stats (key, value) SELECT 'grade_key_count_good', sum(count_all) FROM keys WHERE grade='g'; + INSERT INTO stats (key, value) VALUES ('objects', (SELECT sum(value) FROM stats WHERE key IN ('nodes', 'ways', 'relations'))); INSERT INTO stats (key, value) VALUES ('object_tags', (SELECT sum(value) FROM stats WHERE key IN ('node_tags', 'way_tags', 'relation_tags'))); diff --git a/sources/db/post_grades.sql b/sources/db/post_grades.sql new file mode 100644 index 0000000..980df01 --- /dev/null +++ b/sources/db/post_grades.sql @@ -0,0 +1,46 @@ +-- +-- Taginfo source: Database +-- +-- post_grades.sql +-- + +.bail ON + +PRAGMA journal_mode = OFF; +PRAGMA synchronous = OFF; +PRAGMA temp_store = MEMORY; +PRAGMA cache_size = 5000000; + +-- ============================================================================ + +-- BAD KEYS: + +-- All keys containing whitespace or other problematic characters. +UPDATE keys SET grade='b' WHERE characters IN ('space', 'problem'); + +-- All keys documented in the wiki but never used. +UPDATE keys SET grade='b' WHERE characters IS NULL; + +-- All other keys not used at least 10 times with strange characters in them. +UPDATE keys SET grade='b' WHERE count_all < 10 AND characters='rest'; + +-- ============================================================================ + +-- GOOD KEYS: + +-- Documented in the wiki or used more than 100 times if they use letters, +-- underscores and colons only. +UPDATE keys SET grade='g' WHERE ((in_wiki=1 AND count_all > 0) OR (count_all > 100)) AND characters IN ('plain', 'colon', 'letters'); + +-- Languages can contain '-' characters, so we have a few extra "good" keys. +UPDATE keys SET grade='g' WHERE key LIKE '%name:%-%'; + +-- Everything used more than 1000 times is good. Of course thats not the case, +-- but we avoid overwhelming users with stuff they think they need to fix. +UPDATE keys SET grade='g' WHERE count_all > 1000; + +-- ============================================================================ + +-- SELECT grade, count(*), sum(count_all) FROM keys GROUP BY grade; + + diff --git a/sources/db/pre.sql b/sources/db/pre.sql index eab3e47..9faf613 100644 --- a/sources/db/pre.sql +++ b/sources/db/pre.sql @@ -28,7 +28,8 @@ CREATE TABLE keys ( cells_ways INTEGER DEFAULT 0, in_wiki INTEGER DEFAULT 0, in_projects INTEGER DEFAULT 0, - characters VARCHAR + characters VARCHAR, + grade CHAR DEFAULT 'u' ); DROP TABLE IF EXISTS prevalent_values; diff --git a/sources/db/update.sh b/sources/db/update.sh index 44f1fa6..faa5cde 100755 --- a/sources/db/update.sh +++ b/sources/db/update.sh @@ -76,6 +76,9 @@ sqlite3 $DATABASE