aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xsources/db/update_characters.rb13
-rwxr-xr-xsources/josm/import_josm.rb89
-rwxr-xr-xsources/languages/import_subtag_registry.rb43
-rwxr-xr-xsources/languages/import_unicode_scripts.rb57
-rwxr-xr-xsources/merkaartor/import_merkaartor.rb45
-rwxr-xr-xsources/potlatch/import_potlatch.rb67
-rwxr-xr-xsources/wiki/extract_words.rb15
-rwxr-xr-xsources/wiki/get_image_info.rb108
-rwxr-xr-xsources/wiki/get_page_list.rb9
-rwxr-xr-xsources/wiki/get_wiki_data.rb72
10 files changed, 251 insertions, 267 deletions
diff --git a/sources/db/update_characters.rb b/sources/db/update_characters.rb
index 671b38e..9d253d2 100755
--- a/sources/db/update_characters.rb
+++ b/sources/db/update_characters.rb
@@ -31,6 +31,8 @@ dir = ARGV[0] || '.'
db = SQLite3::Database.new(dir + '/taginfo-db.db')
db.results_as_hash = true
+#------------------------------------------------------------------------------
+
regexes = [
[ 'plain', %r{^[a-z]([a-z_]*[a-z])?$} ],
[ 'colon', %r{^[a-z][a-z_:]*[a-z]$} ],
@@ -50,11 +52,10 @@ db.execute("SELECT key FROM keys").map{ |row| row['key'] }.each do |key|
end
end
-db.execute('BEGIN TRANSACTION');
-
-keys.each do |key, type|
- db.execute("UPDATE keys SET characters=? WHERE key=?", type, key);
+db.transaction do |db|
+ keys.each do |key, type|
+ db.execute("UPDATE keys SET characters=? WHERE key=?", type, key);
+ end
end
-db.execute('COMMIT');
-
+#-- THE END -------------------------------------------------------------------
diff --git a/sources/josm/import_josm.rb b/sources/josm/import_josm.rb
index 51009ab..5ed7a38 100755
--- a/sources/josm/import_josm.rb
+++ b/sources/josm/import_josm.rb
@@ -7,7 +7,7 @@
#
#------------------------------------------------------------------------------
#
-# Copyright (C) 2012 Jochen Topf <jochen@remote.org>
+# Copyright (C) 2013 Jochen Topf <jochen@remote.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -25,8 +25,6 @@
#
#------------------------------------------------------------------------------
-require 'rubygems'
-
require 'find'
require 'pp'
require 'sqlite3'
@@ -65,62 +63,61 @@ class Rule
end
-dir = ARGV[0] || '.'
+#------------------------------------------------------------------------------
+dir = ARGV[0] || '.'
db = SQLite3::Database.new(dir + '/taginfo-josm.db')
-db.execute('BEGIN TRANSACTION');
-
-file = File.new(dir + '/elemstyles.xml')
-doc = REXML::Document.new(file)
+#------------------------------------------------------------------------------
-doc.elements.each('/rules/rule') do |rule_element|
- rule = Rule.new(rule_element.to_s)
- rule_element.elements.each do |element|
- case element.name
- when 'condition'
- rule.k = element.attributes['k']
- rule.v = element.attributes['v']
- rule.b = element.attributes['b']
- when 'scale_min'
- rule.scale_min = element.text
- when 'scale_max'
- rule.scale_max = element.text
- when 'icon'
- rule.icon_source = element.attributes['src']
- when 'area'
- rule.area_color = element.attributes['colour']
- when 'line'
- rule.line_color = element.attributes['colour']
- rule.line_width = element.attributes['width']
- rule.line_realwidth = element.attributes['realwidth']
+db.transaction do |db|
+ file = File.new(dir + '/elemstyles.xml')
+ doc = REXML::Document.new(file)
+
+ doc.elements.each('/rules/rule') do |rule_element|
+ rule = Rule.new(rule_element.to_s)
+ rule_element.elements.each do |element|
+ case element.name
+ when 'condition'
+ rule.k = element.attributes['k']
+ rule.v = element.attributes['v']
+ rule.b = element.attributes['b']
+ when 'scale_min'
+ rule.scale_min = element.text
+ when 'scale_max'
+ rule.scale_max = element.text
+ when 'icon'
+ rule.icon_source = element.attributes['src']
+ when 'area'
+ rule.area_color = element.attributes['colour']
+ when 'line'
+ rule.line_color = element.attributes['colour']
+ rule.line_width = element.attributes['width']
+ rule.line_realwidth = element.attributes['realwidth']
+ end
end
- end
# pp "rule #{rule.k} #{rule.v}"
- rule.insert(db)
+ rule.insert(db)
+ end
end
-db.execute('COMMIT');
-
-db.execute('BEGIN TRANSACTION');
-
-Dir.chdir(dir + '/svn-source') do
- Dir.foreach(dir + '/svn-source') do |style|
- Find.find(style) do |path|
- if FileTest.directory?(path) && File.basename(path) =~ /^\./
- Find.prune
- elsif FileTest.file?(path)
- File.open(path) do |file|
- png = file.read
- pathwostyle = path.sub(%r(^#{style}/), '')
- db.execute('INSERT INTO josm_style_images (style, path, png) VALUES (?, ?, ?)', style, pathwostyle, SQLite3::Blob.new(png))
+db.transaction do |db|
+ Dir.chdir(dir + '/svn-source') do
+ Dir.foreach(dir + '/svn-source') do |style|
+ Find.find(style) do |path|
+ if FileTest.directory?(path) && File.basename(path) =~ /^\./
+ Find.prune
+ elsif FileTest.file?(path)
+ File.open(path) do |file|
+ png = file.read
+ pathwostyle = path.sub(%r(^#{style}/), '')
+ db.execute('INSERT INTO josm_style_images (style, path, png) VALUES (?, ?, ?)', style, pathwostyle, SQLite3::Blob.new(png))
+ end
end
end
end
end
end
-db.execute('COMMIT');
-
#-- THE END -------------------------------------------------------------------
diff --git a/sources/languages/import_subtag_registry.rb b/sources/languages/import_subtag_registry.rb
index ac3d17b..300b0fb 100755
--- a/sources/languages/import_subtag_registry.rb
+++ b/sources/languages/import_subtag_registry.rb
@@ -25,8 +25,6 @@
#
#------------------------------------------------------------------------------
-require 'rubygems'
-
require 'sqlite3'
class Subtag
@@ -63,10 +61,13 @@ class Subtag
end
-dir = ARGV[0] || '.'
+#------------------------------------------------------------------------------
+dir = ARGV[0] || '.'
db = SQLite3::Database.new(dir + '/taginfo-languages.db')
+#------------------------------------------------------------------------------
+
registry_file = "#{dir}/language-subtag-registry"
file_date = nil
@@ -101,27 +102,25 @@ end
SUBTAG_TYPES = %w( language script region variant )
-db.execute('BEGIN TRANSACTION');
-
-Subtag.entries.each do |entry|
- if SUBTAG_TYPES.include?(entry.type) &&
- entry.description != 'Private use' &&
- (entry.type != 'language' || (entry.scope != 'special' && entry.scope != 'collection')) &&
- (entry.type != 'script' || !entry.subtag.match(%r{^Z}) ) &&
- (entry.type != 'region' || entry.subtag.match(%r{^[A-Z]{2}$}) )
- db.execute("INSERT INTO subtags (stype, subtag, added, suppress_script, scope, description, prefix) VALUES (?, ?, ?, ?, ?, ?, ?)",
- entry.type,
- entry.subtag,
- entry.added,
- entry.suppress_script,
- entry.scope,
- entry.description,
- entry.prefix
- )
+db.transaction do |db|
+ Subtag.entries.each do |entry|
+ if SUBTAG_TYPES.include?(entry.type) &&
+ entry.description != 'Private use' &&
+ (entry.type != 'language' || (entry.scope != 'special' && entry.scope != 'collection')) &&
+ (entry.type != 'script' || !entry.subtag.match(%r{^Z}) ) &&
+ (entry.type != 'region' || entry.subtag.match(%r{^[A-Z]{2}$}) )
+ db.execute("INSERT INTO subtags (stype, subtag, added, suppress_script, scope, description, prefix) VALUES (?, ?, ?, ?, ?, ?, ?)",
+ entry.type,
+ entry.subtag,
+ entry.added,
+ entry.suppress_script,
+ entry.scope,
+ entry.description,
+ entry.prefix
+ )
+ end
end
end
-db.execute('COMMIT');
-
#-- THE END -------------------------------------------------------------------
diff --git a/sources/languages/import_unicode_scripts.rb b/sources/languages/import_unicode_scripts.rb
index 7855a78..e946983 100755
--- a/sources/languages/import_unicode_scripts.rb
+++ b/sources/languages/import_unicode_scripts.rb
@@ -25,48 +25,47 @@
#
#------------------------------------------------------------------------------
-require 'rubygems'
-
require 'sqlite3'
dir = ARGV[0] || '.'
-
db = SQLite3::Database.new(dir + '/taginfo-languages.db')
+#------------------------------------------------------------------------------
+
property_value_alias_file = "#{dir}/PropertyValueAliases.txt"
codepoint_script_mapping_file = "#{dir}/Scripts.txt"
-db.execute('BEGIN TRANSACTION');
-
-open(property_value_alias_file) do |file|
- file.each do |line|
- line.chomp!
- if line.match(%r{^sc ;})
- (sc, script, name) = line.split(%r{\s*;\s*})
- db.execute("INSERT INTO unicode_scripts (script, name) VALUES (?, ?)", script, name)
+db.transaction do |db|
+ open(property_value_alias_file) do |file|
+ file.each do |line|
+ line.chomp!
+ if line.match(%r{^sc ;})
+ (sc, script, name) = line.split(%r{\s*;\s*})
+ db.execute("INSERT INTO unicode_scripts (script, name) VALUES (?, ?)", script, name)
+ end
end
end
-end
-open(codepoint_script_mapping_file) do |file|
- file.each do |line|
- line.chomp!
- line.sub!(%r{\s*#.*}, '')
- next if line.match(%r{^$})
- (codes, script) = line.split(%r{\s+;\s+})
- if codes.match(%r{^[0-9A-F]{4,5}$})
- from = codes
- to = codes
- elsif codes.match(%r{^([0-9A-F]{4,5})..([0-9A-F]{4,5})$})
- from = $1
- to = $2
- else
- puts "Line does not match: #{line}"
- next
+ open(codepoint_script_mapping_file) do |file|
+ file.each do |line|
+ line.chomp!
+ line.sub!(%r{\s*#.*}, '')
+ next if line.match(%r{^$})
+ (codes, script) = line.split(%r{\s+;\s+})
+ if codes.match(%r{^[0-9A-F]{4,5}$})
+ from = codes
+ to = codes
+ elsif codes.match(%r{^([0-9A-F]{4,5})..([0-9A-F]{4,5})$})
+ from = $1
+ to = $2
+ else
+ puts "Line does not match: #{line}"
+ next
+ end
+ db.execute("INSERT INTO unicode_codepoint_script_mapping (codepoint_from, codepoint_to, name) VALUES (?, ?, ?)", from, to, script)
end
- db.execute("INSERT INTO unicode_codepoint_script_mapping (codepoint_from, codepoint_to, name) VALUES (?, ?, ?)", from, to, script)
end
end
-db.execute('COMMIT');
+#-- THE END -------------------------------------------------------------------
diff --git a/sources/merkaartor/import_merkaartor.rb b/sources/merkaartor/import_merkaartor.rb
index 40572bd..4f0ad4b 100755
--- a/sources/merkaartor/import_merkaartor.rb
+++ b/sources/merkaartor/import_merkaartor.rb
@@ -7,7 +7,7 @@
#
#------------------------------------------------------------------------------
#
-# Copyright (C) 2012 Jochen Topf <jochen@remote.org>
+# Copyright (C) 2013 Jochen Topf <jochen@remote.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -25,44 +25,41 @@
#
#------------------------------------------------------------------------------
-require 'rubygems'
-
require 'pp'
require 'sqlite3'
require 'rexml/document'
dir = ARGV[0] || '.'
-
db = SQLite3::Database.new(dir + '/taginfo-merkaartor.db')
-db.execute('BEGIN TRANSACTION');
+#------------------------------------------------------------------------------
template = 'default'
-db.execute('INSERT INTO templates (name) VALUES (?)', template)
-file = File.new(dir + '/git-source/Templates/' + template + '.mat')
-doc = REXML::Document.new(file)
+db.transaction do |db|
+ db.execute('INSERT INTO templates (name) VALUES (?)', template)
-doc.elements.each('/templates/widgets/widget') do |widget|
- key = widget.attributes['tag']
- link = widget.elements['link'].attributes['src'] if widget.elements['link']
- selector = widget.elements['selector'].attributes['expr'] if widget.elements['selector']
- db.execute('INSERT INTO keys (template, key, tag_type, link, selector) VALUES (?, ?, ?, ?, ?)', template, key, widget.attributes['type'], link, selector)
- widget.elements.each('description') do |desc|
- db.execute('INSERT INTO key_descriptions (template, key, lang, description) VALUES (?, ?, ?, ?)', template, key, desc.attributes['locale'], desc.text)
- end
- widget.elements.each('value') do |valelement|
- value = valelement.attributes['tag']
- vlink = valelement.elements['link'].attributes['src'] if valelement.elements['link']
- db.execute('INSERT INTO tags (template, key, value, link) VALUES (?, ?, ?, ?)', template, key, value, vlink)
+ file = File.new(dir + '/git-source/Templates/' + template + '.mat')
+ doc = REXML::Document.new(file)
+
+ doc.elements.each('/templates/widgets/widget') do |widget|
+ key = widget.attributes['tag']
+ link = widget.elements['link'].attributes['src'] if widget.elements['link']
+ selector = widget.elements['selector'].attributes['expr'] if widget.elements['selector']
+ db.execute('INSERT INTO keys (template, key, tag_type, link, selector) VALUES (?, ?, ?, ?, ?)', template, key, widget.attributes['type'], link, selector)
widget.elements.each('description') do |desc|
- db.execute('INSERT INTO tag_descriptions (template, key, value, lang, description) VALUES (?, ?, ?, ?, ?)', template, key, value, desc.attributes['locale'], desc.text)
+ db.execute('INSERT INTO key_descriptions (template, key, lang, description) VALUES (?, ?, ?, ?)', template, key, desc.attributes['locale'], desc.text)
+ end
+ widget.elements.each('value') do |valelement|
+ value = valelement.attributes['tag']
+ vlink = valelement.elements['link'].attributes['src'] if valelement.elements['link']
+ db.execute('INSERT INTO tags (template, key, value, link) VALUES (?, ?, ?, ?)', template, key, value, vlink)
+ widget.elements.each('description') do |desc|
+ db.execute('INSERT INTO tag_descriptions (template, key, value, lang, description) VALUES (?, ?, ?, ?, ?)', template, key, value, desc.attributes['locale'], desc.text)
+ end
end
end
end
-db.execute('COMMIT');
-
-
#-- THE END -------------------------------------------------------------------
diff --git a/sources/potlatch/import_potlatch.rb b/sources/potlatch/import_potlatch.rb
index f99926a..c63670d 100755
--- a/sources/potlatch/import_potlatch.rb
+++ b/sources/potlatch/import_potlatch.rb
@@ -7,7 +7,7 @@
#
#------------------------------------------------------------------------------
#
-# Copyright (C) 2012 Jochen Topf <jochen@remote.org>
+# Copyright (C) 2013 Jochen Topf <jochen@remote.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -25,56 +25,53 @@
#
#------------------------------------------------------------------------------
-require 'rubygems'
-
-require 'pp'
require 'sqlite3'
require 'rexml/document'
dir = ARGV[0] || '.'
-
db = SQLite3::Database.new(dir + '/taginfo-potlatch.db')
-db.execute('BEGIN TRANSACTION');
+#------------------------------------------------------------------------------
-file = File.new(dir + '/git-source/resources/map_features.xml')
-doc = REXML::Document.new(file)
+db.transaction do |db|
+ file = File.new(dir + '/git-source/resources/map_features.xml')
+ doc = REXML::Document.new(file)
-doc.elements.each('/mapFeatures/category') do |category_element|
- db.execute('INSERT INTO categories (id, name) VALUES (?, ?)', category_element.attributes['id'], category_element.attributes['name'])
-end
+ doc.elements.each('/mapFeatures/category') do |category_element|
+ db.execute('INSERT INTO categories (id, name) VALUES (?, ?)', category_element.attributes['id'], category_element.attributes['name'])
+ end
-doc.elements.each('/mapFeatures/feature') do |feature_element|
- feature_name = feature_element.attributes['name']
+ doc.elements.each('/mapFeatures/feature') do |feature_element|
+ feature_name = feature_element.attributes['name']
- on = { :point => 0, :line => 0, :area => 0, :relation => 0 }
+ on = { :point => 0, :line => 0, :area => 0, :relation => 0 }
- fields = Hash.new
- feature_element.elements.each do |element|
- case element.name
- when 'tag'
- value = element.attributes['v'] == '*' ? nil : element.attributes['v']
- db.execute('INSERT INTO tags (key, value, feature_name) VALUES (?, ?, ?)', element.attributes['k'], value, feature_name)
- when /^(point|line|area|relation)$/
- on[$1.to_sym] = 1
- when /^(category|help)$/
- fields[element.name] = element.text.strip
- when 'icon'
- fields['icon_image'] = element.attributes['image']
- fields['icon_background'] = element.attributes['background']
- fields['icon_foreground'] = element.attributes['foreground']
+ fields = Hash.new
+ feature_element.elements.each do |element|
+ case element.name
+ when 'tag'
+ value = element.attributes['v'] == '*' ? nil : element.attributes['v']
+ db.execute('INSERT INTO tags (key, value, feature_name) VALUES (?, ?, ?)', element.attributes['k'], value, feature_name)
+ when /^(point|line|area|relation)$/
+ on[$1.to_sym] = 1
+ when /^(category|help)$/
+ fields[element.name] = element.text.strip
+ when 'icon'
+ fields['icon_image'] = element.attributes['image']
+ fields['icon_background'] = element.attributes['background']
+ fields['icon_foreground'] = element.attributes['foreground']
+ end
+ end
+
+ if on[:point] + on[:line] + on[:area] + on[:relation] == 0
+ on = { :point => 1, :line => 1, :area => 1, :relation => 1 }
end
- end
- if on[:point] + on[:line] + on[:area] + on[:relation] == 0
- on = { :point => 1, :line => 1, :area => 1, :relation => 1 }
+ db.execute('INSERT INTO features (name, category_id, help, on_point, on_line, on_area, on_relation, icon_image, icon_background, icon_foreground) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
+ feature_name, fields['category'], fields['help'], on[:point], on[:line], on[:area], on[:relation], fields['icon_image'], fields['icon_background'], fields['icon_foreground'])
end
- db.execute('INSERT INTO features (name, category_id, help, on_point, on_line, on_area, on_relation, icon_image, icon_background, icon_foreground) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
- feature_name, fields['category'], fields['help'], on[:point], on[:line], on[:area], on[:relation], fields['icon_image'], fields['icon_background'], fields['icon_foreground'])
end
-db.execute('COMMIT');
-
#-- THE END -------------------------------------------------------------------
diff --git a/sources/wiki/extract_words.rb b/sources/wiki/extract_words.rb
index 8b018d8..70c483d 100755
--- a/sources/wiki/extract_words.rb
+++ b/sources/wiki/extract_words.rb
@@ -9,7 +9,7 @@
#
#------------------------------------------------------------------------------
#
-# Copyright (C) 2012 Jochen Topf <jochen@remote.org>
+# Copyright (C) 2013 Jochen Topf <jochen@remote.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -27,8 +27,6 @@
#
#------------------------------------------------------------------------------
-require 'rubygems'
-
require 'sqlite3'
#------------------------------------------------------------------------------
@@ -126,6 +124,8 @@ dir = ARGV[0] || '.'
db = SQLite3::Database.new(dir + '/taginfo-wiki.db')
db.results_as_hash = true
+#------------------------------------------------------------------------------
+
words = Words.new
we = WordExtractor.new(words)
@@ -141,10 +141,11 @@ words.invert
# puts "#{key}=#{value}: #{words}"
#end
-db.execute('BEGIN TRANSACTION');
-words.dump do |key, value, words|
- db.execute('INSERT INTO words (key, value, words) VALUES (?, ?, ?)', key, value, words)
+db.transaction do |db|
+ words.dump do |key, value, words|
+ db.execute('INSERT INTO words (key, value, words) VALUES (?, ?, ?)', key, value, words)
+ end
end
-db.execute('COMMIT');
+
#-- THE END -------------------------------------------------------------------
diff --git a/sources/wiki/get_image_info.rb b/sources/wiki/get_image_info.rb
index 8430c49..6254208 100755
--- a/sources/wiki/get_image_info.rb
+++ b/sources/wiki/get_image_info.rb
@@ -34,8 +34,6 @@
#
#------------------------------------------------------------------------------
-require 'rubygems'
-
require 'pp'
require 'net/http'
@@ -48,79 +46,79 @@ require './lib/mediawikiapi.rb'
#------------------------------------------------------------------------------
dir = ARGV[0] || '.'
+db = SQLite3::Database.new(dir + '/taginfo-wiki.db')
+db.results_as_hash = true
+
+#------------------------------------------------------------------------------
api = MediaWikiAPI::API.new('wiki.openstreetmap.org')
-db = SQLite3::Database.new(dir + '/taginfo-wiki.db')
-db.results_as_hash = true
image_titles = db.execute("SELECT DISTINCT(image) AS title FROM wikipages WHERE image IS NOT NULL AND image != '' UNION SELECT DISTINCT(image) AS title FROM relation_pages WHERE image IS NOT NULL AND image != ''").
map{ |row| row['title'] }.
select{ |title| title.match(%r{^(file|image):}i) }
-db.execute('BEGIN TRANSACTION');
+db.transaction do |db|
+ puts "Found #{ image_titles.size } different image titles"
-puts "Found #{ image_titles.size } different image titles"
+ images_added = {}
-images_added = {}
+ until image_titles.empty?
+ some_titles = image_titles.slice!(0, 10)
+ puts "Get image info for: #{ some_titles.join(' ') }"
-until image_titles.empty?
- some_titles = image_titles.slice!(0, 10)
- puts "Get image info for: #{ some_titles.join(' ') }"
+ begin
+ data = api.query(:prop => 'imageinfo', :iiprop => 'url|size|mime', :titles => some_titles.join('|'), :iiurlwidth => 10, :iiurlheight => 10)
- begin
- data = api.query(:prop => 'imageinfo', :iiprop => 'url|size|mime', :titles => some_titles.join('|'), :iiurlwidth => 10, :iiurlheight => 10)
-
- if !data['query']
- puts "Wiki API call failed (no 'query' field):"
- pp data
- next
- end
+ if !data['query']
+ puts "Wiki API call failed (no 'query' field):"
+ pp data
+ next
+ end
- normalized = data['query']['normalized']
- if normalized
- normalized.each do |n|
- db.execute('UPDATE wikipages SET image=? WHERE image=?', n['to'], n['from'])
- db.execute('UPDATE relation_pages SET image=? WHERE image=?', n['to'], n['from'])
+ normalized = data['query']['normalized']
+ if normalized
+ normalized.each do |n|
+ db.execute('UPDATE wikipages SET image=? WHERE image=?', n['to'], n['from'])
+ db.execute('UPDATE relation_pages SET image=? WHERE image=?', n['to'], n['from'])
+ end
end
- end
- if !data['query']['pages']
- puts "Wiki API call failed (no 'pages' field):"
- pp data
- next
- end
+ if !data['query']['pages']
+ puts "Wiki API call failed (no 'pages' field):"
+ pp data
+ next
+ end
- data['query']['pages'].each do |k,v|
- if v['imageinfo'] && ! images_added[v['title']]
- info = v['imageinfo'][0]
- if info['thumburl'].match(%r{^(.*/)[0-9]{1,4}(px-.*)$})
- prefix = $1
- suffix = $2
- else
- prefix = nil
- suffix = nil
- puts "Wrong thumbnail format: '#{info['thumburl']}'"
+ data['query']['pages'].each do |k,v|
+ if v['imageinfo'] && ! images_added[v['title']]
+ info = v['imageinfo'][0]
+ if info['thumburl'].match(%r{^(.*/)[0-9]{1,4}(px-.*)$})
+ prefix = $1
+ suffix = $2
+ else
+ prefix = nil
+ suffix = nil
+ puts "Wrong thumbnail format: '#{info['thumburl']}'"
+ end
+ images_added[v['title']] = 1
+ db.execute("INSERT INTO wiki_images (image, width, height, size, mime, image_url, thumb_url_prefix, thumb_url_suffix) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
+ v['title'],
+ info['width'],
+ info['height'],
+ info['size'],
+ info['mime'],
+ info['url'],
+ prefix,
+ suffix
+ )
end
- images_added[v['title']] = 1
- db.execute("INSERT INTO wiki_images (image, width, height, size, mime, image_url, thumb_url_prefix, thumb_url_suffix) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
- v['title'],
- info['width'],
- info['height'],
- info['size'],
- info['mime'],
- info['url'],
- prefix,
- suffix
- )
end
+ rescue
+ puts "Wiki API call error:"
+ pp data
end
- rescue
- puts "Wiki API call error:"
- pp data
end
end
-db.execute('COMMIT');
-
#-- THE END -------------------------------------------------------------------
diff --git a/sources/wiki/get_page_list.rb b/sources/wiki/get_page_list.rb
index 3c4b219..05812c3 100755
--- a/sources/wiki/get_page_list.rb
+++ b/sources/wiki/get_page_list.rb
@@ -26,7 +26,7 @@
#
#------------------------------------------------------------------------------
#
-# Copyright (C) 2012 Jochen Topf <jochen@remote.org>
+# Copyright (C) 2013 Jochen Topf <jochen@remote.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -44,15 +44,10 @@
#
#------------------------------------------------------------------------------
-require 'rubygems'
-
-require 'pp'
-
require 'net/http'
require 'uri'
require 'json'
-
require './lib/mediawikiapi.rb'
#------------------------------------------------------------------------------
@@ -72,7 +67,6 @@ def get_page_list(api, namespaceid, options)
apfrom = ''
loop do
data = api.query(:generator => 'allpages', :gaplimit => 'max', :gapfrom => apfrom, :gapnamespace => namespaceid, :gapfilterredir => options[:redirect] ? 'redirects' : 'nonredirects', :prop => 'info')
-# pp data
data['query']['pages'].each do |k,v|
yield v['touched'], v['title'].gsub(/\s/, '_')
end
@@ -122,4 +116,5 @@ end
tagpages.close
allpages.close
+
#-- THE END -------------------------------------------------------------------
diff --git a/sources/wiki/get_wiki_data.rb b/sources/wiki/get_wiki_data.rb
index d593d9d..f90f4f5 100755
--- a/sources/wiki/get_wiki_data.rb
+++ b/sources/wiki/get_wiki_data.rb
@@ -35,10 +35,6 @@
#
#------------------------------------------------------------------------------
-require 'rubygems'
-
-require 'pp'
-
require 'json'
require 'net/http'
require 'uri'
@@ -210,6 +206,8 @@ class WikiPage
end
end
+#------------------------------------------------------------------------------
+
class KeyOrTagPage < WikiPage
def initialize(type, timestamp, namespace, title)
@@ -262,6 +260,8 @@ class KeyOrTagPage < WikiPage
end
+#------------------------------------------------------------------------------
+
class KeyPage < KeyOrTagPage
end
@@ -384,49 +384,49 @@ end
#------------------------------------------------------------------------------
dir = ARGV[0] || '.'
-
-api = MediaWikiAPI::API.new('wiki.openstreetmap.org', 80, '/w/index.php?')
-
db = SQLite3::Database.new(dir + '/taginfo-wiki.db')
db.results_as_hash = true
-cache = Cache.new(dir, db, api)
+#------------------------------------------------------------------------------
-db.execute('BEGIN TRANSACTION')
+api = MediaWikiAPI::API.new('wiki.openstreetmap.org', 80, '/w/index.php?')
-File.open(dir + '/tagpages.list') do |wikipages|
- wikipages.each do |line|
- line.chomp!
- (type, timestamp, namespace, title) = line.split("\t")
+cache = Cache.new(dir, db, api)
- if title =~ /(^|:)Key:/
- page = KeyPage.new(type, timestamp, namespace, title)
- elsif title =~ /(^|:)Tag:/
- page = TagPage.new(type, timestamp, namespace, title)
- elsif title =~ /(^|:)Relation:/
- page = RelationPage.new(type, timestamp, namespace, title)
- else
- puts "Wiki page has wrong format: '#{title}'"
- next
- end
+db.transaction do |db|
+
+ File.open(dir + '/tagpages.list') do |wikipages|
+ wikipages.each do |line|
+ line.chomp!
+ (type, timestamp, namespace, title) = line.split("\t")
+
+ if title =~ /(^|:)Key:/
+ page = KeyPage.new(type, timestamp, namespace, title)
+ elsif title =~ /(^|:)Tag:/
+ page = TagPage.new(type, timestamp, namespace, title)
+ elsif title =~ /(^|:)Relation:/
+ page = RelationPage.new(type, timestamp, namespace, title)
+ else
+ puts "Wiki page has wrong format: '#{title}'"
+ next
+ end
- puts "Parsing page: title='#{page.title}' type='#{page.type}' timestamp='#{page.timestamp}' namespace='#{page.namespace}'"
+ puts "Parsing page: title='#{page.title}' type='#{page.type}' timestamp='#{page.timestamp}' namespace='#{page.namespace}'"
- reason = page.check_title
- if reason == :ok
- cache.get_page(page)
- page.parse_content(db)
- page.insert(db)
- else
- puts "invalid page: #{reason} #{page.title}"
- db.execute('INSERT INTO invalid_page_titles (reason, title) VALUES (?, ?)', reason.to_s, page.title)
+ reason = page.check_title
+ if reason == :ok
+ cache.get_page(page)
+ page.parse_content(db)
+ page.insert(db)
+ else
+ puts "invalid page: #{reason} #{page.title}"
+ db.execute('INSERT INTO invalid_page_titles (reason, title) VALUES (?, ?)', reason.to_s, page.title)
+ end
end
end
-end
-cache.cleanup
-
-db.execute('COMMIT')
+ cache.cleanup
+end
#-- THE END -------------------------------------------------------------------