diff options
Diffstat (limited to 'sources/wiki/classify_links.rb')
-rwxr-xr-x | sources/wiki/classify_links.rb | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/sources/wiki/classify_links.rb b/sources/wiki/classify_links.rb new file mode 100755 index 0000000..907eadc --- /dev/null +++ b/sources/wiki/classify_links.rb @@ -0,0 +1,127 @@ +#!/usr/bin/env ruby +#------------------------------------------------------------------------------ +# +# classify_links.rb [DIR] +# +#------------------------------------------------------------------------------ +# +# Read the links we got from get_links.rb, classify them, and add the to the +# taginfo-wiki.db database. +# +# Classification (link_class): +# +# category - From a Category: page +# how_to_map - From any "How to map" page +# import - From any "Import" page +# key_to_tag - From a Key to one of its Tags +# ktr - From any Key/Tag/Relation page +# map_features - From any "Map Features" page +# proposed - From any "Proposed" page +# rest - From anything else +# same - From one language variant to another of the same Key/Tag/Relation +# tag_to_key - From a Tag to its Key +# template - From any "Template:" page +# user - From any "User:" or "User talk:" page +# +#------------------------------------------------------------------------------ +# +# Copyright (C) 2015 Jochen Topf <jochen@remote.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------ + +require 'sqlite3' + +dir = ARGV[0] || '.' + +db = SQLite3::Database.new(dir + '/taginfo-wiki.db') +db.results_as_hash = true + +# Regular expression matching Key/Tag/Relation pages in all languages +regexp_ktr = Regexp.new('^(?:(.*):)?(Key|Tag|Relation):(.*)$') + +db.transaction do |db| + + File.open(dir + '/links.list') do |linkfile| + linkfile.each do |line| + line.chomp! + (from, to) = line.split("\t") + + link_class = 'rest' + + if from =~ /^Category:/ + link_class = 'category' + end + + if from =~ /^(([A-Za-z]+):)?Template(_talk)?:/ + link_class = 'template' + end + + if from =~ /Map_Features/i + link_class = 'map_features' + end + + if from =~ /Import/i + link_class = 'import' + end + + if from =~ /How_to_map_a$/ + link_class = 'how_to_map' + end + + if from =~ /Proposed_features/i + link_class = 'proposed' + end + + if from =~ /^(([A-Za-z]+):)?User(_talk)?:/ + link_class = 'user' + end + + fm = from.match regexp_ktr + if fm + from_lang = fm[1] + from_type = fm[2] + from_name = fm[3] + end + + tm = to.match regexp_ktr + if tm + to_lang = tm[1] + to_type = tm[2] + to_name = tm[3] + end + + if fm && tm + if from_type == to_type && from_name == to_name + link_class = 'same' + elsif from_type == 'Tag' && to_type == 'Key' && from_name.sub(/=.*/, '') == to_name + link_class = 'tag_to_key' + elsif from_type == 'Key' && to_type == 'Tag' && to_name.sub(/=.*/, '') == from_name + link_class = 'key_to_tag' + else + link_class = 'ktr' + end + end + + db.execute("INSERT INTO wiki_links (link_class, from_title, from_lang, from_type, from_name, to_title, to_lang, to_type, to_name) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + link_class, + from, from_lang, from_type, from_name, + to, to_lang, to_type, to_name + ) +# puts "#{link_class}\t#{from}\t#{from_lang}\t#{from_type}\t#{from_name}\t#{to}\t#{to_lang}\t#{to_type}\t#{to_name}" + end + end +end |