summaryrefslogtreecommitdiff
path: root/sources/wiki/get_links.rb
blob: 19c06c814bbc9a284db2b05edc48cb54107448dc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env ruby
#------------------------------------------------------------------------------
#
#  get_links.rb [DIR]
#
#------------------------------------------------------------------------------
#
#  Get a list of pages linking to all Key/Tag/Relation pages from the OSM
#  wiki. This list will include links from other language versions of the
#  same Key/Tag/Relation, links from other Key/Tag/Relation pages and links
#  from all other wiki pages.
#
#  Output is on STDOUT with the title of the page the link is from a TAB
#  character and the title of the page the link is to. The underscore (_) is
#  used where there are spaces in a title.
#
#------------------------------------------------------------------------------
#
#  Copyright (C) 2015  Jochen Topf <jochen@remote.org>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
#------------------------------------------------------------------------------

require 'net/http'
require 'uri'
require 'json'
require 'pp'

require './lib/mediawikiapi.rb'

#------------------------------------------------------------------------------

def what_links_to(api, title)
    blcontinue = nil
    loop do
        options = { :action => 'query', :list => 'backlinks', :bltitle => title, :bllimit => 500 }
        if blcontinue
            options[:blcontinue] = blcontinue
        end
        data = api.query(options)
        data['query']['backlinks'].each do |bl|
            bl['title'].gsub!(/\s/, '_')
            puts "#{bl['title']}\t#{title}"
        end
        if data['query-continue']
            blcontinue = data['query-continue']['backlinks']['blcontinue'].gsub(/\s/, '_')
        else
            return
        end
    end
end

#------------------------------------------------------------------------------

dir = ARGV[0] || '.'

api = MediaWikiAPI::API.new('wiki.openstreetmap.org')

File.open(dir + '/tagpages.list') do |tagpages|
    tagpages.each do |line|
        line.chomp!
        (type, timestamp, namespace, title) = line.split("\t")
        what_links_to(api, title)
    end
end


#-- THE END -------------------------------------------------------------------