diff options
-rwxr-xr-x | sources/db/update_characters.rb | 13 | ||||
-rwxr-xr-x | sources/josm/import_josm.rb | 89 | ||||
-rwxr-xr-x | sources/languages/import_subtag_registry.rb | 43 | ||||
-rwxr-xr-x | sources/languages/import_unicode_scripts.rb | 57 | ||||
-rwxr-xr-x | sources/merkaartor/import_merkaartor.rb | 45 | ||||
-rwxr-xr-x | sources/potlatch/import_potlatch.rb | 67 | ||||
-rwxr-xr-x | sources/wiki/extract_words.rb | 15 | ||||
-rwxr-xr-x | sources/wiki/get_image_info.rb | 108 | ||||
-rwxr-xr-x | sources/wiki/get_page_list.rb | 9 | ||||
-rwxr-xr-x | sources/wiki/get_wiki_data.rb | 72 |
10 files changed, 251 insertions, 267 deletions
diff --git a/sources/db/update_characters.rb b/sources/db/update_characters.rb index 671b38e..9d253d2 100755 --- a/sources/db/update_characters.rb +++ b/sources/db/update_characters.rb @@ -31,6 +31,8 @@ dir = ARGV[0] || '.' db = SQLite3::Database.new(dir + '/taginfo-db.db') db.results_as_hash = true +#------------------------------------------------------------------------------ + regexes = [ [ 'plain', %r{^[a-z]([a-z_]*[a-z])?$} ], [ 'colon', %r{^[a-z][a-z_:]*[a-z]$} ], @@ -50,11 +52,10 @@ db.execute("SELECT key FROM keys").map{ |row| row['key'] }.each do |key| end end -db.execute('BEGIN TRANSACTION'); - -keys.each do |key, type| - db.execute("UPDATE keys SET characters=? WHERE key=?", type, key); +db.transaction do |db| + keys.each do |key, type| + db.execute("UPDATE keys SET characters=? WHERE key=?", type, key); + end end -db.execute('COMMIT'); - +#-- THE END ------------------------------------------------------------------- diff --git a/sources/josm/import_josm.rb b/sources/josm/import_josm.rb index 51009ab..5ed7a38 100755 --- a/sources/josm/import_josm.rb +++ b/sources/josm/import_josm.rb @@ -7,7 +7,7 @@ # #------------------------------------------------------------------------------ # -# Copyright (C) 2012 Jochen Topf <jochen@remote.org> +# Copyright (C) 2013 Jochen Topf <jochen@remote.org> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,8 +25,6 @@ # #------------------------------------------------------------------------------ -require 'rubygems' - require 'find' require 'pp' require 'sqlite3' @@ -65,62 +63,61 @@ class Rule end -dir = ARGV[0] || '.' +#------------------------------------------------------------------------------ +dir = ARGV[0] || '.' db = SQLite3::Database.new(dir + '/taginfo-josm.db') -db.execute('BEGIN TRANSACTION'); - -file = File.new(dir + '/elemstyles.xml') -doc = REXML::Document.new(file) +#------------------------------------------------------------------------------ -doc.elements.each('/rules/rule') do |rule_element| - rule = Rule.new(rule_element.to_s) - rule_element.elements.each do |element| - case element.name - when 'condition' - rule.k = element.attributes['k'] - rule.v = element.attributes['v'] - rule.b = element.attributes['b'] - when 'scale_min' - rule.scale_min = element.text - when 'scale_max' - rule.scale_max = element.text - when 'icon' - rule.icon_source = element.attributes['src'] - when 'area' - rule.area_color = element.attributes['colour'] - when 'line' - rule.line_color = element.attributes['colour'] - rule.line_width = element.attributes['width'] - rule.line_realwidth = element.attributes['realwidth'] +db.transaction do |db| + file = File.new(dir + '/elemstyles.xml') + doc = REXML::Document.new(file) + + doc.elements.each('/rules/rule') do |rule_element| + rule = Rule.new(rule_element.to_s) + rule_element.elements.each do |element| + case element.name + when 'condition' + rule.k = element.attributes['k'] + rule.v = element.attributes['v'] + rule.b = element.attributes['b'] + when 'scale_min' + rule.scale_min = element.text + when 'scale_max' + rule.scale_max = element.text + when 'icon' + rule.icon_source = element.attributes['src'] + when 'area' + rule.area_color = element.attributes['colour'] + when 'line' + rule.line_color = element.attributes['colour'] + rule.line_width = element.attributes['width'] + rule.line_realwidth = element.attributes['realwidth'] + end end - end # pp "rule #{rule.k} #{rule.v}" - rule.insert(db) + rule.insert(db) + end end -db.execute('COMMIT'); - -db.execute('BEGIN TRANSACTION'); - -Dir.chdir(dir + '/svn-source') do - Dir.foreach(dir + '/svn-source') do |style| - Find.find(style) do |path| - if FileTest.directory?(path) && File.basename(path) =~ /^\./ - Find.prune - elsif FileTest.file?(path) - File.open(path) do |file| - png = file.read - pathwostyle = path.sub(%r(^#{style}/), '') - db.execute('INSERT INTO josm_style_images (style, path, png) VALUES (?, ?, ?)', style, pathwostyle, SQLite3::Blob.new(png)) +db.transaction do |db| + Dir.chdir(dir + '/svn-source') do + Dir.foreach(dir + '/svn-source') do |style| + Find.find(style) do |path| + if FileTest.directory?(path) && File.basename(path) =~ /^\./ + Find.prune + elsif FileTest.file?(path) + File.open(path) do |file| + png = file.read + pathwostyle = path.sub(%r(^#{style}/), '') + db.execute('INSERT INTO josm_style_images (style, path, png) VALUES (?, ?, ?)', style, pathwostyle, SQLite3::Blob.new(png)) + end end end end end end -db.execute('COMMIT'); - #-- THE END ------------------------------------------------------------------- diff --git a/sources/languages/import_subtag_registry.rb b/sources/languages/import_subtag_registry.rb index ac3d17b..300b0fb 100755 --- a/sources/languages/import_subtag_registry.rb +++ b/sources/languages/import_subtag_registry.rb @@ -25,8 +25,6 @@ # #------------------------------------------------------------------------------ -require 'rubygems' - require 'sqlite3' class Subtag @@ -63,10 +61,13 @@ class Subtag end -dir = ARGV[0] || '.' +#------------------------------------------------------------------------------ +dir = ARGV[0] || '.' db = SQLite3::Database.new(dir + '/taginfo-languages.db') +#------------------------------------------------------------------------------ + registry_file = "#{dir}/language-subtag-registry" file_date = nil @@ -101,27 +102,25 @@ end SUBTAG_TYPES = %w( language script region variant ) -db.execute('BEGIN TRANSACTION'); - -Subtag.entries.each do |entry| - if SUBTAG_TYPES.include?(entry.type) && - entry.description != 'Private use' && - (entry.type != 'language' || (entry.scope != 'special' && entry.scope != 'collection')) && - (entry.type != 'script' || !entry.subtag.match(%r{^Z}) ) && - (entry.type != 'region' || entry.subtag.match(%r{^[A-Z]{2}$}) ) - db.execute("INSERT INTO subtags (stype, subtag, added, suppress_script, scope, description, prefix) VALUES (?, ?, ?, ?, ?, ?, ?)", - entry.type, - entry.subtag, - entry.added, - entry.suppress_script, - entry.scope, - entry.description, - entry.prefix - ) +db.transaction do |db| + Subtag.entries.each do |entry| + if SUBTAG_TYPES.include?(entry.type) && + entry.description != 'Private use' && + (entry.type != 'language' || (entry.scope != 'special' && entry.scope != 'collection')) && + (entry.type != 'script' || !entry.subtag.match(%r{^Z}) ) && + (entry.type != 'region' || entry.subtag.match(%r{^[A-Z]{2}$}) ) + db.execute("INSERT INTO subtags (stype, subtag, added, suppress_script, scope, description, prefix) VALUES (?, ?, ?, ?, ?, ?, ?)", + entry.type, + entry.subtag, + entry.added, + entry.suppress_script, + entry.scope, + entry.description, + entry.prefix + ) + end end end -db.execute('COMMIT'); - #-- THE END ------------------------------------------------------------------- diff --git a/sources/languages/import_unicode_scripts.rb b/sources/languages/import_unicode_scripts.rb index 7855a78..e946983 100755 --- a/sources/languages/import_unicode_scripts.rb +++ b/sources/languages/import_unicode_scripts.rb @@ -25,48 +25,47 @@ # #------------------------------------------------------------------------------ -require 'rubygems' - require 'sqlite3' dir = ARGV[0] || '.' - db = SQLite3::Database.new(dir + '/taginfo-languages.db') +#------------------------------------------------------------------------------ + property_value_alias_file = "#{dir}/PropertyValueAliases.txt" codepoint_script_mapping_file = "#{dir}/Scripts.txt" -db.execute('BEGIN TRANSACTION'); - -open(property_value_alias_file) do |file| - file.each do |line| - line.chomp! - if line.match(%r{^sc ;}) - (sc, script, name) = line.split(%r{\s*;\s*}) - db.execute("INSERT INTO unicode_scripts (script, name) VALUES (?, ?)", script, name) +db.transaction do |db| + open(property_value_alias_file) do |file| + file.each do |line| + line.chomp! + if line.match(%r{^sc ;}) + (sc, script, name) = line.split(%r{\s*;\s*}) + db.execute("INSERT INTO unicode_scripts (script, name) VALUES (?, ?)", script, name) + end end end -end -open(codepoint_script_mapping_file) do |file| - file.each do |line| - line.chomp! - line.sub!(%r{\s*#.*}, '') - next if line.match(%r{^$}) - (codes, script) = line.split(%r{\s+;\s+}) - if codes.match(%r{^[0-9A-F]{4,5}$}) - from = codes - to = codes - elsif codes.match(%r{^([0-9A-F]{4,5})..([0-9A-F]{4,5})$}) - from = $1 - to = $2 - else - puts "Line does not match: #{line}" - next + open(codepoint_script_mapping_file) do |file| + file.each do |line| + line.chomp! + line.sub!(%r{\s*#.*}, '') + next if line.match(%r{^$}) + (codes, script) = line.split(%r{\s+;\s+}) + if codes.match(%r{^[0-9A-F]{4,5}$}) + from = codes + to = codes + elsif codes.match(%r{^([0-9A-F]{4,5})..([0-9A-F]{4,5})$}) + from = $1 + to = $2 + else + puts "Line does not match: #{line}" + next + end + db.execute("INSERT INTO unicode_codepoint_script_mapping (codepoint_from, codepoint_to, name) VALUES (?, ?, ?)", from, to, script) end - db.execute("INSERT INTO unicode_codepoint_script_mapping (codepoint_from, codepoint_to, name) VALUES (?, ?, ?)", from, to, script) end end -db.execute('COMMIT'); +#-- THE END ------------------------------------------------------------------- diff --git a/sources/merkaartor/import_merkaartor.rb b/sources/merkaartor/import_merkaartor.rb index 40572bd..4f0ad4b 100755 --- a/sources/merkaartor/import_merkaartor.rb +++ b/sources/merkaartor/import_merkaartor.rb @@ -7,7 +7,7 @@ # #------------------------------------------------------------------------------ # -# Copyright (C) 2012 Jochen Topf <jochen@remote.org> +# Copyright (C) 2013 Jochen Topf <jochen@remote.org> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,44 +25,41 @@ # #------------------------------------------------------------------------------ -require 'rubygems' - require 'pp' require 'sqlite3' require 'rexml/document' dir = ARGV[0] || '.' - db = SQLite3::Database.new(dir + '/taginfo-merkaartor.db') -db.execute('BEGIN TRANSACTION'); +#------------------------------------------------------------------------------ template = 'default' -db.execute('INSERT INTO templates (name) VALUES (?)', template) -file = File.new(dir + '/git-source/Templates/' + template + '.mat') -doc = REXML::Document.new(file) +db.transaction do |db| + db.execute('INSERT INTO templates (name) VALUES (?)', template) -doc.elements.each('/templates/widgets/widget') do |widget| - key = widget.attributes['tag'] - link = widget.elements['link'].attributes['src'] if widget.elements['link'] - selector = widget.elements['selector'].attributes['expr'] if widget.elements['selector'] - db.execute('INSERT INTO keys (template, key, tag_type, link, selector) VALUES (?, ?, ?, ?, ?)', template, key, widget.attributes['type'], link, selector) - widget.elements.each('description') do |desc| - db.execute('INSERT INTO key_descriptions (template, key, lang, description) VALUES (?, ?, ?, ?)', template, key, desc.attributes['locale'], desc.text) - end - widget.elements.each('value') do |valelement| - value = valelement.attributes['tag'] - vlink = valelement.elements['link'].attributes['src'] if valelement.elements['link'] - db.execute('INSERT INTO tags (template, key, value, link) VALUES (?, ?, ?, ?)', template, key, value, vlink) + file = File.new(dir + '/git-source/Templates/' + template + '.mat') + doc = REXML::Document.new(file) + + doc.elements.each('/templates/widgets/widget') do |widget| + key = widget.attributes['tag'] + link = widget.elements['link'].attributes['src'] if widget.elements['link'] + selector = widget.elements['selector'].attributes['expr'] if widget.elements['selector'] + db.execute('INSERT INTO keys (template, key, tag_type, link, selector) VALUES (?, ?, ?, ?, ?)', template, key, widget.attributes['type'], link, selector) widget.elements.each('description') do |desc| - db.execute('INSERT INTO tag_descriptions (template, key, value, lang, description) VALUES (?, ?, ?, ?, ?)', template, key, value, desc.attributes['locale'], desc.text) + db.execute('INSERT INTO key_descriptions (template, key, lang, description) VALUES (?, ?, ?, ?)', template, key, desc.attributes['locale'], desc.text) + end + widget.elements.each('value') do |valelement| + value = valelement.attributes['tag'] + vlink = valelement.elements['link'].attributes['src'] if valelement.elements['link'] + db.execute('INSERT INTO tags (template, key, value, link) VALUES (?, ?, ?, ?)', template, key, value, vlink) + widget.elements.each('description') do |desc| + db.execute('INSERT INTO tag_descriptions (template, key, value, lang, description) VALUES (?, ?, ?, ?, ?)', template, key, value, desc.attributes['locale'], desc.text) + end end end end -db.execute('COMMIT'); - - #-- THE END ------------------------------------------------------------------- diff --git a/sources/potlatch/import_potlatch.rb b/sources/potlatch/import_potlatch.rb index f99926a..c63670d 100755 --- a/sources/potlatch/import_potlatch.rb +++ b/sources/potlatch/import_potlatch.rb @@ -7,7 +7,7 @@ # #------------------------------------------------------------------------------ # -# Copyright (C) 2012 Jochen Topf <jochen@remote.org> +# Copyright (C) 2013 Jochen Topf <jochen@remote.org> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,56 +25,53 @@ # #------------------------------------------------------------------------------ -require 'rubygems' - -require 'pp' require 'sqlite3' require 'rexml/document' dir = ARGV[0] || '.' - db = SQLite3::Database.new(dir + '/taginfo-potlatch.db') -db.execute('BEGIN TRANSACTION'); +#------------------------------------------------------------------------------ -file = File.new(dir + '/git-source/resources/map_features.xml') -doc = REXML::Document.new(file) +db.transaction do |db| + file = File.new(dir + '/git-source/resources/map_features.xml') + doc = REXML::Document.new(file) -doc.elements.each('/mapFeatures/category') do |category_element| - db.execute('INSERT INTO categories (id, name) VALUES (?, ?)', category_element.attributes['id'], category_element.attributes['name']) -end + doc.elements.each('/mapFeatures/category') do |category_element| + db.execute('INSERT INTO categories (id, name) VALUES (?, ?)', category_element.attributes['id'], category_element.attributes['name']) + end -doc.elements.each('/mapFeatures/feature') do |feature_element| - feature_name = feature_element.attributes['name'] + doc.elements.each('/mapFeatures/feature') do |feature_element| + feature_name = feature_element.attributes['name'] - on = { :point => 0, :line => 0, :area => 0, :relation => 0 } + on = { :point => 0, :line => 0, :area => 0, :relation => 0 } - fields = Hash.new - feature_element.elements.each do |element| - case element.name - when 'tag' - value = element.attributes['v'] == '*' ? nil : element.attributes['v'] - db.execute('INSERT INTO tags (key, value, feature_name) VALUES (?, ?, ?)', element.attributes['k'], value, feature_name) - when /^(point|line|area|relation)$/ - on[$1.to_sym] = 1 - when /^(category|help)$/ - fields[element.name] = element.text.strip - when 'icon' - fields['icon_image'] = element.attributes['image'] - fields['icon_background'] = element.attributes['background'] - fields['icon_foreground'] = element.attributes['foreground'] + fields = Hash.new + feature_element.elements.each do |element| + case element.name + when 'tag' + value = element.attributes['v'] == '*' ? nil : element.attributes['v'] + db.execute('INSERT INTO tags (key, value, feature_name) VALUES (?, ?, ?)', element.attributes['k'], value, feature_name) + when /^(point|line|area|relation)$/ + on[$1.to_sym] = 1 + when /^(category|help)$/ + fields[element.name] = element.text.strip + when 'icon' + fields['icon_image'] = element.attributes['image'] + fields['icon_background'] = element.attributes['background'] + fields['icon_foreground'] = element.attributes['foreground'] + end + end + + if on[:point] + on[:line] + on[:area] + on[:relation] == 0 + on = { :point => 1, :line => 1, :area => 1, :relation => 1 } end - end - if on[:point] + on[:line] + on[:area] + on[:relation] == 0 - on = { :point => 1, :line => 1, :area => 1, :relation => 1 } + db.execute('INSERT INTO features (name, category_id, help, on_point, on_line, on_area, on_relation, icon_image, icon_background, icon_foreground) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + feature_name, fields['category'], fields['help'], on[:point], on[:line], on[:area], on[:relation], fields['icon_image'], fields['icon_background'], fields['icon_foreground']) end - db.execute('INSERT INTO features (name, category_id, help, on_point, on_line, on_area, on_relation, icon_image, icon_background, icon_foreground) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', - feature_name, fields['category'], fields['help'], on[:point], on[:line], on[:area], on[:relation], fields['icon_image'], fields['icon_background'], fields['icon_foreground']) end -db.execute('COMMIT'); - #-- THE END ------------------------------------------------------------------- diff --git a/sources/wiki/extract_words.rb b/sources/wiki/extract_words.rb index 8b018d8..70c483d 100755 --- a/sources/wiki/extract_words.rb +++ b/sources/wiki/extract_words.rb @@ -9,7 +9,7 @@ # #------------------------------------------------------------------------------ # -# Copyright (C) 2012 Jochen Topf <jochen@remote.org> +# Copyright (C) 2013 Jochen Topf <jochen@remote.org> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -27,8 +27,6 @@ # #------------------------------------------------------------------------------ -require 'rubygems' - require 'sqlite3' #------------------------------------------------------------------------------ @@ -126,6 +124,8 @@ dir = ARGV[0] || '.' db = SQLite3::Database.new(dir + '/taginfo-wiki.db') db.results_as_hash = true +#------------------------------------------------------------------------------ + words = Words.new we = WordExtractor.new(words) @@ -141,10 +141,11 @@ words.invert # puts "#{key}=#{value}: #{words}" #end -db.execute('BEGIN TRANSACTION'); -words.dump do |key, value, words| - db.execute('INSERT INTO words (key, value, words) VALUES (?, ?, ?)', key, value, words) +db.transaction do |db| + words.dump do |key, value, words| + db.execute('INSERT INTO words (key, value, words) VALUES (?, ?, ?)', key, value, words) + end end -db.execute('COMMIT'); + #-- THE END ------------------------------------------------------------------- diff --git a/sources/wiki/get_image_info.rb b/sources/wiki/get_image_info.rb index 8430c49..6254208 100755 --- a/sources/wiki/get_image_info.rb +++ b/sources/wiki/get_image_info.rb @@ -34,8 +34,6 @@ # #------------------------------------------------------------------------------ -require 'rubygems' - require 'pp' require 'net/http' @@ -48,79 +46,79 @@ require './lib/mediawikiapi.rb' #------------------------------------------------------------------------------ dir = ARGV[0] || '.' +db = SQLite3::Database.new(dir + '/taginfo-wiki.db') +db.results_as_hash = true + +#------------------------------------------------------------------------------ api = MediaWikiAPI::API.new('wiki.openstreetmap.org') -db = SQLite3::Database.new(dir + '/taginfo-wiki.db') -db.results_as_hash = true image_titles = db.execute("SELECT DISTINCT(image) AS title FROM wikipages WHERE image IS NOT NULL AND image != '' UNION SELECT DISTINCT(image) AS title FROM relation_pages WHERE image IS NOT NULL AND image != ''"). map{ |row| row['title'] }. select{ |title| title.match(%r{^(file|image):}i) } -db.execute('BEGIN TRANSACTION'); +db.transaction do |db| + puts "Found #{ image_titles.size } different image titles" -puts "Found #{ image_titles.size } different image titles" + images_added = {} -images_added = {} + until image_titles.empty? + some_titles = image_titles.slice!(0, 10) + puts "Get image info for: #{ some_titles.join(' ') }" -until image_titles.empty? - some_titles = image_titles.slice!(0, 10) - puts "Get image info for: #{ some_titles.join(' ') }" + begin + data = api.query(:prop => 'imageinfo', :iiprop => 'url|size|mime', :titles => some_titles.join('|'), :iiurlwidth => 10, :iiurlheight => 10) - begin - data = api.query(:prop => 'imageinfo', :iiprop => 'url|size|mime', :titles => some_titles.join('|'), :iiurlwidth => 10, :iiurlheight => 10) - - if !data['query'] - puts "Wiki API call failed (no 'query' field):" - pp data - next - end + if !data['query'] + puts "Wiki API call failed (no 'query' field):" + pp data + next + end - normalized = data['query']['normalized'] - if normalized - normalized.each do |n| - db.execute('UPDATE wikipages SET image=? WHERE image=?', n['to'], n['from']) - db.execute('UPDATE relation_pages SET image=? WHERE image=?', n['to'], n['from']) + normalized = data['query']['normalized'] + if normalized + normalized.each do |n| + db.execute('UPDATE wikipages SET image=? WHERE image=?', n['to'], n['from']) + db.execute('UPDATE relation_pages SET image=? WHERE image=?', n['to'], n['from']) + end end - end - if !data['query']['pages'] - puts "Wiki API call failed (no 'pages' field):" - pp data - next - end + if !data['query']['pages'] + puts "Wiki API call failed (no 'pages' field):" + pp data + next + end - data['query']['pages'].each do |k,v| - if v['imageinfo'] && ! images_added[v['title']] - info = v['imageinfo'][0] - if info['thumburl'].match(%r{^(.*/)[0-9]{1,4}(px-.*)$}) - prefix = $1 - suffix = $2 - else - prefix = nil - suffix = nil - puts "Wrong thumbnail format: '#{info['thumburl']}'" + data['query']['pages'].each do |k,v| + if v['imageinfo'] && ! images_added[v['title']] + info = v['imageinfo'][0] + if info['thumburl'].match(%r{^(.*/)[0-9]{1,4}(px-.*)$}) + prefix = $1 + suffix = $2 + else + prefix = nil + suffix = nil + puts "Wrong thumbnail format: '#{info['thumburl']}'" + end + images_added[v['title']] = 1 + db.execute("INSERT INTO wiki_images (image, width, height, size, mime, image_url, thumb_url_prefix, thumb_url_suffix) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + v['title'], + info['width'], + info['height'], + info['size'], + info['mime'], + info['url'], + prefix, + suffix + ) end - images_added[v['title']] = 1 - db.execute("INSERT INTO wiki_images (image, width, height, size, mime, image_url, thumb_url_prefix, thumb_url_suffix) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", - v['title'], - info['width'], - info['height'], - info['size'], - info['mime'], - info['url'], - prefix, - suffix - ) end + rescue + puts "Wiki API call error:" + pp data end - rescue - puts "Wiki API call error:" - pp data end end -db.execute('COMMIT'); - #-- THE END ------------------------------------------------------------------- diff --git a/sources/wiki/get_page_list.rb b/sources/wiki/get_page_list.rb index 3c4b219..05812c3 100755 --- a/sources/wiki/get_page_list.rb +++ b/sources/wiki/get_page_list.rb @@ -26,7 +26,7 @@ # #------------------------------------------------------------------------------ # -# Copyright (C) 2012 Jochen Topf <jochen@remote.org> +# Copyright (C) 2013 Jochen Topf <jochen@remote.org> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -44,15 +44,10 @@ # #------------------------------------------------------------------------------ -require 'rubygems' - -require 'pp' - require 'net/http' require 'uri' require 'json' - require './lib/mediawikiapi.rb' #------------------------------------------------------------------------------ @@ -72,7 +67,6 @@ def get_page_list(api, namespaceid, options) apfrom = '' loop do data = api.query(:generator => 'allpages', :gaplimit => 'max', :gapfrom => apfrom, :gapnamespace => namespaceid, :gapfilterredir => options[:redirect] ? 'redirects' : 'nonredirects', :prop => 'info') -# pp data data['query']['pages'].each do |k,v| yield v['touched'], v['title'].gsub(/\s/, '_') end @@ -122,4 +116,5 @@ end tagpages.close allpages.close + #-- THE END ------------------------------------------------------------------- diff --git a/sources/wiki/get_wiki_data.rb b/sources/wiki/get_wiki_data.rb index d593d9d..f90f4f5 100755 --- a/sources/wiki/get_wiki_data.rb +++ b/sources/wiki/get_wiki_data.rb @@ -35,10 +35,6 @@ # #------------------------------------------------------------------------------ -require 'rubygems' - -require 'pp' - require 'json' require 'net/http' require 'uri' @@ -210,6 +206,8 @@ class WikiPage end end +#------------------------------------------------------------------------------ + class KeyOrTagPage < WikiPage def initialize(type, timestamp, namespace, title) @@ -262,6 +260,8 @@ class KeyOrTagPage < WikiPage end +#------------------------------------------------------------------------------ + class KeyPage < KeyOrTagPage end @@ -384,49 +384,49 @@ end #------------------------------------------------------------------------------ dir = ARGV[0] || '.' - -api = MediaWikiAPI::API.new('wiki.openstreetmap.org', 80, '/w/index.php?') - db = SQLite3::Database.new(dir + '/taginfo-wiki.db') db.results_as_hash = true -cache = Cache.new(dir, db, api) +#------------------------------------------------------------------------------ -db.execute('BEGIN TRANSACTION') +api = MediaWikiAPI::API.new('wiki.openstreetmap.org', 80, '/w/index.php?') -File.open(dir + '/tagpages.list') do |wikipages| - wikipages.each do |line| - line.chomp! - (type, timestamp, namespace, title) = line.split("\t") +cache = Cache.new(dir, db, api) - if title =~ /(^|:)Key:/ - page = KeyPage.new(type, timestamp, namespace, title) - elsif title =~ /(^|:)Tag:/ - page = TagPage.new(type, timestamp, namespace, title) - elsif title =~ /(^|:)Relation:/ - page = RelationPage.new(type, timestamp, namespace, title) - else - puts "Wiki page has wrong format: '#{title}'" - next - end +db.transaction do |db| + + File.open(dir + '/tagpages.list') do |wikipages| + wikipages.each do |line| + line.chomp! + (type, timestamp, namespace, title) = line.split("\t") + + if title =~ /(^|:)Key:/ + page = KeyPage.new(type, timestamp, namespace, title) + elsif title =~ /(^|:)Tag:/ + page = TagPage.new(type, timestamp, namespace, title) + elsif title =~ /(^|:)Relation:/ + page = RelationPage.new(type, timestamp, namespace, title) + else + puts "Wiki page has wrong format: '#{title}'" + next + end - puts "Parsing page: title='#{page.title}' type='#{page.type}' timestamp='#{page.timestamp}' namespace='#{page.namespace}'" + puts "Parsing page: title='#{page.title}' type='#{page.type}' timestamp='#{page.timestamp}' namespace='#{page.namespace}'" - reason = page.check_title - if reason == :ok - cache.get_page(page) - page.parse_content(db) - page.insert(db) - else - puts "invalid page: #{reason} #{page.title}" - db.execute('INSERT INTO invalid_page_titles (reason, title) VALUES (?, ?)', reason.to_s, page.title) + reason = page.check_title + if reason == :ok + cache.get_page(page) + page.parse_content(db) + page.insert(db) + else + puts "invalid page: #{reason} #{page.title}" + db.execute('INSERT INTO invalid_page_titles (reason, title) VALUES (?, ?)', reason.to_s, page.title) + end end end -end -cache.cleanup - -db.execute('COMMIT') + cache.cleanup +end #-- THE END ------------------------------------------------------------------- |