aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJochen Topf <jochen@topf.org>2015-03-19 16:36:41 +0100
committerJochen Topf <jochen@topf.org>2015-03-19 16:36:41 +0100
commit70f35022161f625e26dba93bcba6c6383e798623 (patch)
treeed76e754e2313b285d2d3c9125f478dc4fc02b6f
parentc616b58a82011608e4632e9699aa7d195f6f3731 (diff)
downloadtaginfo-70f35022161f625e26dba93bcba6c6383e798623.tar
taginfo-70f35022161f625e26dba93bcba6c6383e798623.tar.gz
An attempt to classify all keys into 'good', 'bad', and 'unknown'.
Of course this is very rough. Could be used to show "bad" keys in editors etc.
-rw-r--r--sources/db/post.sql7
-rw-r--r--sources/db/post_grades.sql46
-rw-r--r--sources/db/pre.sql3
-rwxr-xr-xsources/db/update.sh3
4 files changed, 58 insertions, 1 deletions
diff --git a/sources/db/post.sql b/sources/db/post.sql
index c321377..49cc634 100644
--- a/sources/db/post.sql
+++ b/sources/db/post.sql
@@ -38,6 +38,13 @@ INSERT INTO stats (key, value) SELECT 'characters_in_keys_space', count(*) FRO
INSERT INTO stats (key, value) SELECT 'characters_in_keys_problem', count(*) FROM keys WHERE characters='problem';
INSERT INTO stats (key, value) SELECT 'characters_in_keys_rest', count(*) FROM keys WHERE characters='rest';
+INSERT INTO stats (key, value) SELECT 'grade_bad', count(*) FROM keys WHERE grade='b';
+INSERT INTO stats (key, value) SELECT 'grade_unknown', count(*) FROM keys WHERE grade='u';
+INSERT INTO stats (key, value) SELECT 'grade_good', count(*) FROM keys WHERE grade='g';
+INSERT INTO stats (key, value) SELECT 'grade_key_count_bad', sum(count_all) FROM keys WHERE grade='b';
+INSERT INTO stats (key, value) SELECT 'grade_key_count_unknown', sum(count_all) FROM keys WHERE grade='u';
+INSERT INTO stats (key, value) SELECT 'grade_key_count_good', sum(count_all) FROM keys WHERE grade='g';
+
INSERT INTO stats (key, value) VALUES ('objects', (SELECT sum(value) FROM stats WHERE key IN ('nodes', 'ways', 'relations')));
INSERT INTO stats (key, value) VALUES ('object_tags', (SELECT sum(value) FROM stats WHERE key IN ('node_tags', 'way_tags', 'relation_tags')));
diff --git a/sources/db/post_grades.sql b/sources/db/post_grades.sql
new file mode 100644
index 0000000..980df01
--- /dev/null
+++ b/sources/db/post_grades.sql
@@ -0,0 +1,46 @@
+--
+-- Taginfo source: Database
+--
+-- post_grades.sql
+--
+
+.bail ON
+
+PRAGMA journal_mode = OFF;
+PRAGMA synchronous = OFF;
+PRAGMA temp_store = MEMORY;
+PRAGMA cache_size = 5000000;
+
+-- ============================================================================
+
+-- BAD KEYS:
+
+-- All keys containing whitespace or other problematic characters.
+UPDATE keys SET grade='b' WHERE characters IN ('space', 'problem');
+
+-- All keys documented in the wiki but never used.
+UPDATE keys SET grade='b' WHERE characters IS NULL;
+
+-- All other keys not used at least 10 times with strange characters in them.
+UPDATE keys SET grade='b' WHERE count_all < 10 AND characters='rest';
+
+-- ============================================================================
+
+-- GOOD KEYS:
+
+-- Documented in the wiki or used more than 100 times if they use letters,
+-- underscores and colons only.
+UPDATE keys SET grade='g' WHERE ((in_wiki=1 AND count_all > 0) OR (count_all > 100)) AND characters IN ('plain', 'colon', 'letters');
+
+-- Languages can contain '-' characters, so we have a few extra "good" keys.
+UPDATE keys SET grade='g' WHERE key LIKE '%name:%-%';
+
+-- Everything used more than 1000 times is good. Of course thats not the case,
+-- but we avoid overwhelming users with stuff they think they need to fix.
+UPDATE keys SET grade='g' WHERE count_all > 1000;
+
+-- ============================================================================
+
+-- SELECT grade, count(*), sum(count_all) FROM keys GROUP BY grade;
+
+
diff --git a/sources/db/pre.sql b/sources/db/pre.sql
index eab3e47..9faf613 100644
--- a/sources/db/pre.sql
+++ b/sources/db/pre.sql
@@ -28,7 +28,8 @@ CREATE TABLE keys (
cells_ways INTEGER DEFAULT 0,
in_wiki INTEGER DEFAULT 0,
in_projects INTEGER DEFAULT 0,
- characters VARCHAR
+ characters VARCHAR,
+ grade CHAR DEFAULT 'u'
);
DROP TABLE IF EXISTS prevalent_values;
diff --git a/sources/db/update.sh b/sources/db/update.sh
index 44f1fa6..faa5cde 100755
--- a/sources/db/update.sh
+++ b/sources/db/update.sh
@@ -76,6 +76,9 @@ sqlite3 $DATABASE <post_similar_keys.sql
echo "`$DATECMD` Running update_characters... "
./update_characters.rb $DIR
+echo "`$DATECMD` Running post_grades.sql... "
+sqlite3 $DATABASE <post_grades.sql
+
echo "`$DATECMD` Running post_indexes.sql... "
sqlite3 $DATABASE <post_indexes.sql