1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
#!/usr/bin/env ruby
#------------------------------------------------------------------------------
#
# classify_links.rb [DIR]
#
#------------------------------------------------------------------------------
#
# Read the links we got from get_links.rb, classify them, and add the to the
# taginfo-wiki.db database.
#
# Classification (link_class):
#
# category - From a Category: page
# how_to_map - From any "How to map" page
# import - From any "Import" page
# key_to_tag - From a Key to one of its Tags
# ktr - From any Key/Tag/Relation page
# map_features - From any "Map Features" page
# proposed - From any "Proposed" page
# rest - From anything else
# same - From one language variant to another of the same Key/Tag/Relation
# tag_to_key - From a Tag to its Key
# template - From any "Template:" page
# user - From any "User:" or "User talk:" page
#
#------------------------------------------------------------------------------
#
# Copyright (C) 2015 Jochen Topf <jochen@remote.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
#------------------------------------------------------------------------------
require 'sqlite3'
dir = ARGV[0] || '.'
db = SQLite3::Database.new(dir + '/taginfo-wiki.db')
db.results_as_hash = true
# Regular expression matching Key/Tag/Relation pages in all languages
regexp_ktr = Regexp.new('^(?:(.*):)?(Key|Tag|Relation):(.*)$')
db.transaction do |db|
File.open(dir + '/links.list') do |linkfile|
linkfile.each do |line|
line.chomp!
(from, to) = line.split("\t")
link_class = 'rest'
if from =~ /^Category:/
link_class = 'category'
end
if from =~ /^(([A-Za-z]+):)?Template(_talk)?:/
link_class = 'template'
end
if from =~ /Map_Features/i
link_class = 'map_features'
end
if from =~ /Import/i
link_class = 'import'
end
if from =~ /How_to_map_a$/
link_class = 'how_to_map'
end
if from =~ /Proposed_features/i
link_class = 'proposed'
end
if from =~ /^(([A-Za-z]+):)?User(_talk)?:/
link_class = 'user'
end
fm = from.match regexp_ktr
if fm
from_lang = fm[1]
from_type = fm[2]
from_name = fm[3]
end
tm = to.match regexp_ktr
if tm
to_lang = tm[1]
to_type = tm[2]
to_name = tm[3]
end
if fm && tm
if from_type == to_type && from_name == to_name
link_class = 'same'
elsif from_type == 'Tag' && to_type == 'Key' && from_name.sub(/=.*/, '') == to_name
link_class = 'tag_to_key'
elsif from_type == 'Key' && to_type == 'Tag' && to_name.sub(/=.*/, '') == from_name
link_class = 'key_to_tag'
else
link_class = 'ktr'
end
end
db.execute("INSERT INTO wiki_links (link_class, from_title, from_lang, from_type, from_name, to_title, to_lang, to_type, to_name) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
link_class,
from, from_lang, from_type, from_name,
to, to_lang, to_type, to_name
)
# puts "#{link_class}\t#{from}\t#{from_lang}\t#{from_type}\t#{from_name}\t#{to}\t#{to_lang}\t#{to_type}\t#{to_name}"
end
end
end
|