summaryrefslogtreecommitdiff
path: root/sources/wiki/classify_links.rb
blob: 907eadc9b301f81190e759d591050835fad91a42 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env ruby
#------------------------------------------------------------------------------
#
#  classify_links.rb [DIR]
#
#------------------------------------------------------------------------------
#
#  Read the links we got from get_links.rb, classify them, and add the to the
#  taginfo-wiki.db database.
#
#  Classification (link_class):
#
#   category - From a Category: page
#   how_to_map - From any "How to map" page
#   import - From any "Import" page
#   key_to_tag - From a Key to one of its Tags
#   ktr - From any Key/Tag/Relation page
#   map_features - From any "Map Features" page
#   proposed - From any "Proposed" page
#   rest - From anything else
#   same - From one language variant to another of the same Key/Tag/Relation
#   tag_to_key - From a Tag to its Key
#   template - From any "Template:" page
#   user - From any "User:" or "User talk:" page
#
#------------------------------------------------------------------------------
#
#  Copyright (C) 2015  Jochen Topf <jochen@remote.org>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
#------------------------------------------------------------------------------

require 'sqlite3'

dir = ARGV[0] || '.'

db = SQLite3::Database.new(dir + '/taginfo-wiki.db')
db.results_as_hash = true

# Regular expression matching Key/Tag/Relation pages in all languages
regexp_ktr = Regexp.new('^(?:(.*):)?(Key|Tag|Relation):(.*)$')

db.transaction do |db|

    File.open(dir + '/links.list') do |linkfile|
        linkfile.each do |line|
            line.chomp!
            (from, to) = line.split("\t")

            link_class = 'rest'

            if from =~ /^Category:/
                link_class = 'category'
            end

            if from =~ /^(([A-Za-z]+):)?Template(_talk)?:/
                link_class = 'template'
            end

            if from =~ /Map_Features/i
                link_class = 'map_features'
            end

            if from =~ /Import/i
                link_class = 'import'
            end

            if from =~ /How_to_map_a$/
                link_class = 'how_to_map'
            end

            if from =~ /Proposed_features/i
                link_class = 'proposed'
            end

            if from =~ /^(([A-Za-z]+):)?User(_talk)?:/
                link_class = 'user'
            end

            fm = from.match regexp_ktr
            if fm
                from_lang = fm[1]
                from_type = fm[2]
                from_name = fm[3]
            end

            tm = to.match regexp_ktr
            if tm
                to_lang = tm[1]
                to_type = tm[2]
                to_name = tm[3]
            end

            if fm && tm
                if from_type == to_type && from_name == to_name
                    link_class = 'same'
                elsif from_type == 'Tag' && to_type == 'Key' && from_name.sub(/=.*/, '') == to_name
                    link_class = 'tag_to_key'
                elsif from_type == 'Key' && to_type == 'Tag' && to_name.sub(/=.*/, '') == from_name
                    link_class = 'key_to_tag'
                else
                    link_class = 'ktr'
                end
            end

            db.execute("INSERT INTO wiki_links (link_class, from_title, from_lang, from_type, from_name, to_title, to_lang, to_type, to_name) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
                link_class,
                from, from_lang, from_type, from_name,
                to, to_lang, to_type, to_name
            )
#        puts "#{link_class}\t#{from}\t#{from_lang}\t#{from_type}\t#{from_name}\t#{to}\t#{to_lang}\t#{to_type}\t#{to_name}"
        end
    end
end