diff options
author | Jochen Topf <jochen@topf.org> | 2013-01-09 19:18:55 +0100 |
---|---|---|
committer | Jochen Topf <jochen@topf.org> | 2013-01-09 19:18:55 +0100 |
commit | 64a047f622a5ed15dea94b5e52dd8c948fce9e95 (patch) | |
tree | 1dfeecec0fa0477c1a246ef6567dffc56592d325 | |
parent | 9cadfd89c12c9223e7c572646680d0bcce57310c (diff) | |
download | taginfo-64a047f622a5ed15dea94b5e52dd8c948fce9e95.tar taginfo-64a047f622a5ed15dea94b5e52dd8c948fce9e95.tar.gz |
Better support for wiki images.
Key and tag wiki pages can contain images. Until now we only got the titles of
those images. Now we also get the URL to the image, URL to thumbnails, width,
height, and mime type. This information is now exposed in the API and it is
used to show the images in the Overview tab of the key and tag pages.
While we are changing the update process anyway, I changed the program that
gets the list of all pages to also output the time those pages changed last.
This information is currently not used, but it could be used to cache those
pages locally making the update much faster and adding less strain to the
wiki server.
-rwxr-xr-x | sources/wiki/get_image_info.rb | 118 | ||||
-rwxr-xr-x | sources/wiki/get_page_list.rb | 16 | ||||
-rwxr-xr-x | sources/wiki/get_wiki_data.rb | 20 | ||||
-rw-r--r-- | sources/wiki/lib/mediawikiapi.rb | 5 | ||||
-rw-r--r-- | sources/wiki/post.sql | 2 | ||||
-rw-r--r-- | sources/wiki/pre.sql | 21 | ||||
-rw-r--r-- | web/lib/api/v4/key.rb | 12 | ||||
-rw-r--r-- | web/lib/api/v4/tag.rb | 12 | ||||
-rw-r--r-- | web/lib/ui/keys_tags.rb | 22 | ||||
-rw-r--r-- | web/lib/utils.rb | 10 | ||||
-rw-r--r-- | web/views/key.erb | 5 | ||||
-rw-r--r-- | web/views/tag.erb | 5 | ||||
-rw-r--r-- | web/viewsjs/key.js.erb | 2 | ||||
-rw-r--r-- | web/viewsjs/tag.js.erb | 2 |
14 files changed, 230 insertions, 22 deletions
diff --git a/sources/wiki/get_image_info.rb b/sources/wiki/get_image_info.rb new file mode 100755 index 0000000..06707ca --- /dev/null +++ b/sources/wiki/get_image_info.rb @@ -0,0 +1,118 @@ +#!/usr/bin/ruby +#------------------------------------------------------------------------------ +# +# get_image_info.rb [DIR] +# +#------------------------------------------------------------------------------ +# +# Gets meta information about images from the OSM wiki. +# +# Reads the list of all images used in Key: and Tag: pages from the local +# database and requests meta information (width, height, mime type, URL, ...) +# for those images. Writes this data into the wiki_images table. +# +# The database must be in DIR or in the current directory, if no directory +# was given on the command line. +# +#------------------------------------------------------------------------------ +# +# Copyright (C) 2013 Jochen Topf <jochen@remote.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------ + +require 'rubygems' + +require 'pp' + +require 'net/http' +require 'uri' +require 'json' +require 'sqlite3' + +require 'lib/mediawikiapi.rb' + +#------------------------------------------------------------------------------ + +dir = ARGV[0] || '.' + +api = MediaWikiAPI::API.new('wiki.openstreetmap.org') +api.add_header('User-agent', 'taginfo/0.1 (jochen@remote.org)') + +db = SQLite3::Database.new(dir + '/taginfo-wiki.db') +db.results_as_hash = true +image_titles = db.execute("SELECT DISTINCT(image) AS title FROM wikipages").map{ |row| row['title'] }.select{ |title| !title.nil? && title.match(%r{^(file|image):}i) } + +db.execute('BEGIN TRANSACTION'); + +until image_titles.empty? + some_titles = image_titles.slice!(0, 10) +# puts some_titles.join(",") + "\n" + + begin + data = api.query(:prop => 'imageinfo', :iiprop => 'url|size|mime', :titles => some_titles.join('|'), :iiurlwidth => 200, :iiurlheight => 200) + + if !data['query'] + STDERR.puts "Wiki API call failed (no 'query' field):" + pp data + next + end + + normalized = data['query']['normalized'] + if normalized + normalized.each do |n| + db.execute('UPDATE wikipages SET image=? WHERE image=?', n['to'], n['from']) + end + end + + if !data['query']['pages'] + STDERR.puts "Wiki API call failed (no 'pages' field):" + pp data + next + end + + data['query']['pages'].each do |k,v| + if v['imageinfo'] + info = v['imageinfo'][0] + if info['thumburl'].match(%r{^(.*/)[0-9]{1,4}(px-.*)$}) + prefix = $1 + suffix = $2 + else + prefix = nil + suffix = nil + end + db.execute("INSERT INTO wiki_images (image, width, height, size, mime, image_url, thumb_url_prefix, thumb_url_suffix) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + v['title'], + info['width'], + info['height'], + info['size'], + info['mime'], + info['url'], + prefix, + suffix + ) + end + end + rescue + puts "Wiki API call error:" + pp data + end +end + +db.execute('COMMIT'); + + +#-- THE END ------------------------------------------------------------------- diff --git a/sources/wiki/get_page_list.rb b/sources/wiki/get_page_list.rb index 88afe10..7cdbafa 100755 --- a/sources/wiki/get_page_list.rb +++ b/sources/wiki/get_page_list.rb @@ -70,13 +70,13 @@ end def get_page_list(api, namespaceid, options) apfrom = '' loop do - data = api.query(:list => 'allpages', :aplimit => 'max', :apfrom => apfrom, :apnamespace => namespaceid, :apfilterredir => options[:redirect] ? 'redirects' : 'nonredirects') + data = api.query(:generator => 'allpages', :gaplimit => 'max', :gapfrom => apfrom, :gapnamespace => namespaceid, :gapfilterredir => options[:redirect] ? 'redirects' : 'nonredirects', :prop => 'info') # pp data - data['query']['allpages'].each do |h| - yield h['title'].gsub(/\s/, '_') + data['query']['pages'].each do |k,v| + yield v['touched'], v['title'].gsub(/\s/, '_') end if data['query-continue'] - apfrom = data['query-continue']['allpages']['apfrom'].gsub(/\s/, '_') + apfrom = data['query-continue']['allpages']['gapfrom'].gsub(/\s/, '_') # puts "apfrom=#{apfrom}" else return @@ -102,16 +102,16 @@ tagpages = File.open(dir + '/tagpages.list', 'w') namespaces.keys.sort.each do |namespace| id = namespaces[namespace] - get_page_list(api, id, :redirect => false) do |page| - line = ['page', namespace, page].join("\t") + get_page_list(api, id, :redirect => false) do |timestamp, page| + line = ['page', timestamp, namespace, page].join("\t") allpages.puts line if page =~ /^([^:]+:)?(Key|Tag):(.+)$/ tagpages.puts line end end - get_page_list(api, id, :redirect => true) do |page| - line = ['redirect', namespace, page].join("\t") + get_page_list(api, id, :redirect => true) do |timestamp, page| + line = ['redirect', timestamp, namespace, page].join("\t") allpages.puts line if page =~ /^([^:]+:)?(Key|Tag):(.+)$/ tagpages.puts line diff --git a/sources/wiki/get_wiki_data.rb b/sources/wiki/get_wiki_data.rb index 302db28..34d1182 100755 --- a/sources/wiki/get_wiki_data.rb +++ b/sources/wiki/get_wiki_data.rb @@ -54,10 +54,11 @@ class WikiPage attr_accessor :content attr_accessor :description, :image, :group, :onNode, :onWay, :onArea, :onRelation, :has_templ - attr_reader :type, :namespace, :title, :tag, :key, :value, :lang, :ttype, :tags_implies, :tags_combination, :tags_linked, :parsed + attr_reader :type, :timestamp, :namespace, :title, :tag, :key, :value, :lang, :ttype, :tags_implies, :tags_combination, :tags_linked, :parsed - def initialize(type, namespace, title) + def initialize(type, timestamp, namespace, title) @type = type # 'page' or 'redirect' + @timestamp = timestamp # page last touched @namespace = namespace # 'XX' (mediawiki namespace or '') @title = title # wiki page title @@ -129,7 +130,7 @@ class WikiPage content, group, type, - has_templ, + has_templ ? 1 : 0, parsed ? 1 : 0, description, image, @@ -239,8 +240,8 @@ File.open(dir + '/tagpages.list') do |wikipages| wikipages.each do |line| line.chomp! t = line.split("\t") - page = WikiPage.new(t[0], t[1], t[2]) - puts "page: (#{page.title}) (#{page.type}) (#{page.namespace}) (#{page.tag})" + page = WikiPage.new(t[0], t[1], t[2], t[3]) + puts "page: (#{page.title}) (#{page.type}) (#{page.timestamp}) (#{page.namespace}) (#{page.tag})" reason = page.check_title if reason == :ok @@ -271,7 +272,14 @@ File.open(dir + '/tagpages.list') do |wikipages| end end if template.named_parameters['image'] - page.image = template.named_parameters['image'][0] + ititle = template.named_parameters['image'][0] + if !ititle.nil? && ititle.match(%r{^(file|image):(.*)$}i) + page.image = "File:#{$2}" + else + puts "invalid image: #{reason} #{page.title} #{ititle}" + db.execute('INSERT INTO invalid_image_titles (reason, page_title, image_title) VALUES (?, ?, ?)', reason, page.title, ititle) + page.image = '' + end end if template.named_parameters['group'] page.group = template.named_parameters['group'][0] diff --git a/sources/wiki/lib/mediawikiapi.rb b/sources/wiki/lib/mediawikiapi.rb index a231cee..293aa6f 100644 --- a/sources/wiki/lib/mediawikiapi.rb +++ b/sources/wiki/lib/mediawikiapi.rb @@ -26,6 +26,8 @@ # #------------------------------------------------------------------------------ +require 'cgi' + module MediaWikiAPI class API @@ -42,12 +44,13 @@ module MediaWikiAPI end def build_path(params) - @path + params.to_a.map{ |el| el.join('=') }.join('&') + @path + params.to_a.map{ |el| CGI::escape(el[0].to_s) + '=' + CGI::escape(el[1].to_s) }.join('&') end def get(params) path = build_path(params) http = Net::HTTP.start(@host, @port) +# puts "Getting path [#{path}]" http.get(path, @headers) end diff --git a/sources/wiki/post.sql b/sources/wiki/post.sql index 773a04d..99eb26f 100644 --- a/sources/wiki/post.sql +++ b/sources/wiki/post.sql @@ -13,6 +13,8 @@ UPDATE wikipages SET status='e' WHERE type='page' AND has_templ='true' AND parse CREATE INDEX wikipages_key_value_idx ON wikipages(key, value); +CREATE INDEX wiki_images_image ON wiki_images(image); + INSERT INTO wikipages_keys (key, langs, lang_count) SELECT key, group_concat(lang || ' ' || status), count(*) FROM wikipages WHERE value IS NULL GROUP BY key; INSERT INTO wikipages_tags (key, value, langs, lang_count) SELECT key, value, group_concat(lang || ' ' || status), count(*) FROM wikipages WHERE value IS NOT NULL GROUP BY key, value; diff --git a/sources/wiki/pre.sql b/sources/wiki/pre.sql index ebb80d3..8a515e7 100644 --- a/sources/wiki/pre.sql +++ b/sources/wiki/pre.sql @@ -33,6 +33,19 @@ CREATE TABLE wikipages ( status TEXT ); +DROP TABLE IF EXISTS wiki_images; + +CREATE TABLE wiki_images ( + image TEXT, + width INTEGER, + height INTEGER, + size INTEGER, + mime TEXT, + image_url TEXT, + thumb_url_prefix TEXT, + thumb_url_suffix TEXT +); + DROP TABLE IF EXISTS wikipages_keys; CREATE TABLE wikipages_keys ( @@ -64,6 +77,14 @@ CREATE TABLE invalid_page_titles ( title TEXT ); +DROP TABLE IF EXISTS invalid_image_titles; + +CREATE TABLE invalid_image_titles ( + reason TEXT, + page_title TEXT, + image_title TEXT +); + DROP TABLE IF EXISTS words; CREATE TABLE words ( diff --git a/web/lib/api/v4/key.rb b/web/lib/api/v4/key.rb index 7cc9532..420ef37 100644 --- a/web/lib/api/v4/key.rb +++ b/web/lib/api/v4/key.rb @@ -283,7 +283,15 @@ class Taginfo < Sinatra::Base [:language_en, :STRING, 'Language name in English.'], [:title, :STRING, 'Wiki page title.'], [:description, :STRING, 'Short description of key from wiki page.'], - [:image, :STRING, 'Wiki page title of associated image.'], + [:image, :HASH, 'Associated image.', [ + [:title, :STRING, 'Wiki page title of associated image.' ], + [:width, :INT, 'Width of image.' ], + [:height, :INT, 'Height of image.' ], + [:mime, :STRING, 'MIME type of image.' ], + [:image_url, :STRING, 'Image URL' ], + [:thumb_url_prefix, :STRING, 'Prefix of thumbnail URL.' ], + [:thumb_url_suffix, :STRING, 'Suffix of thumbnail URL.' ] + ]], [:on_node, :BOOL, 'Is this a key for nodes?'], [:on_way, :BOOL, 'Is this a key for ways?'], [:on_area, :BOOL, 'Is this a key for areas?'], @@ -297,7 +305,7 @@ class Taginfo < Sinatra::Base }) do key = params[:key] - res = @db.execute('SELECT * FROM wikipages WHERE value IS NULL AND key = ? ORDER BY lang', key) + res = @db.execute('SELECT * FROM wikipages LEFT OUTER JOIN wiki_images USING (image) WHERE value IS NULL AND key = ? ORDER BY lang', key) return get_wiki_result(res) end diff --git a/web/lib/api/v4/tag.rb b/web/lib/api/v4/tag.rb index 50e26c7..20a0559 100644 --- a/web/lib/api/v4/tag.rb +++ b/web/lib/api/v4/tag.rb @@ -177,7 +177,15 @@ class Taginfo < Sinatra::Base [:language_en, :STRING, 'Language name in English.'], [:title, :STRING, 'Wiki page title.'], [:description, :STRING, 'Short description of tag from wiki page.'], - [:image, :STRING, 'Wiki page title of associated image.'], + [:image, :HASH, 'Associated image.', [ + [:title, :STRING, 'Wiki page title of associated image.' ], + [:width, :INT, 'Width of image.' ], + [:height, :INT, 'Height of image.' ], + [:mime, :STRING, 'MIME type of image.' ], + [:image_url, :STRING, 'Image URL' ], + [:thumb_url_prefix, :STRING, 'Prefix of thumbnail URL.' ], + [:thumb_url_suffix, :STRING, 'Suffix of thumbnail URL.' ] + ]], [:on_node, :BOOL, 'Is this a tag for nodes?'], [:on_way, :BOOL, 'Is this a tag for ways?'], [:on_area, :BOOL, 'Is this a tag for areas?'], @@ -192,7 +200,7 @@ class Taginfo < Sinatra::Base key = params[:key] value = params[:value] - res = @db.execute('SELECT * FROM wikipages WHERE key = ? AND value = ? ORDER BY lang', key, value) + res = @db.execute('SELECT * FROM wikipages LEFT OUTER JOIN wiki_images USING (image) WHERE key = ? AND value = ? ORDER BY lang', key, value) return get_wiki_result(res) end diff --git a/web/lib/ui/keys_tags.rb b/web/lib/ui/keys_tags.rb index feaf90d..5e21c6e 100644 --- a/web/lib/ui/keys_tags.rb +++ b/web/lib/ui/keys_tags.rb @@ -1,6 +1,17 @@ # web/lib/ui/keys_tags.rb class Taginfo < Sinatra::Base + MAX_IMAGE_WIDTH = 300 + + def build_image_url(row) + w = row['width'].to_i + h = row['height'].to_i + if w > 0 && h > 0 + return "#{row['thumb_url_prefix']}#{ h <= w ? MAX_IMAGE_WIDTH : (MAX_IMAGE_WIDTH * w / h).to_i }#{ row['thumb_url_suffix'] }" + end + return nil + end + get %r{^/keys/(.*)} do |key| if params[:key].nil? @key = key @@ -28,6 +39,11 @@ class Taginfo < Sinatra::Base @desc = "<span title='#{ t.pages.key.description_from_wiki }' tipsy='w'>#{ @desc }</span" end + @db.select("SELECT width, height, thumb_url_prefix, thumb_url_suffix FROM wiki.wikipages LEFT OUTER JOIN wiki.wiki_images USING(image) WHERE lang=? AND key=? AND value IS NULL UNION SELECT width, height, thumb_url_prefix, thumb_url_suffix FROM wiki.wikipages LEFT OUTER JOIN wiki.wiki_images USING(image) WHERE lang='en' AND key=? AND value IS NULL LIMIT 1", r18n.locale.code, @key, @key). + execute() do |row| + @image_url = build_image_url(row) + end + @prevalent_values = @db.select("SELECT value, count_#{@filter_type} AS count FROM tags"). condition('key=?', @key). condition('count > ?', @count_all_values * 0.02). @@ -107,8 +123,14 @@ class Taginfo < Sinatra::Base @desc = "<span title='#{ t.pages.tag.description_from_wiki }' tipsy='w'>#{ @desc }</span" end + @db.select("SELECT width, height, thumb_url_prefix, thumb_url_suffix FROM wiki.wikipages LEFT OUTER JOIN wiki.wiki_images USING(image) WHERE lang=? AND key=? AND value=? UNION SELECT width, height, thumb_url_prefix, thumb_url_suffix FROM wiki.wikipages LEFT OUTER JOIN wiki.wiki_images USING(image) WHERE lang='en' AND key=? AND value=? LIMIT 1", r18n.locale.code, @key, @value, @key, @value). + execute() do |row| + @image_url = build_image_url(row) + end + javascript "#{ r18n.locale.code }/tag" erb :tag end end + diff --git a/web/lib/utils.rb b/web/lib/utils.rb index 20a4dab..2549b57 100644 --- a/web/lib/utils.rb +++ b/web/lib/utils.rb @@ -184,7 +184,15 @@ def get_wiki_result(res) :language_en => h(::Language[row['lang']].english_name), :title => h(row['title']), :description => h(row['description']), - :image => h(row['image']), + :image => { + :title => h(row['image']), + :width => row['width'].to_i, + :height => row['height'].to_i, + :mime => h(row['mime']), + :image_url => h(row['image_url']), + :thumb_url_prefix => h(row['thumb_url_prefix']), + :thumb_url_suffix => h(row['thumb_url_suffix']) + }, :on_node => row['on_node'].to_i == 1, :on_way => row['on_way'].to_i == 1, :on_area => row['on_area'].to_i == 1, diff --git a/web/views/key.erb b/web/views/key.erb index 14f952a..c51af26 100644 --- a/web/views/key.erb +++ b/web/views/key.erb @@ -33,6 +33,11 @@ <p><%= t.pages.key.overview.distribution_of_values %></p> <div class="canvas" id="canvas-values"></div> </div> +<% if @image_url %> + <div style="float: right; padding: 0 20px 20px 20px;"> + <img src="<%= @image_url %>" style="border: 1px solid #a0a0a0; border-radius: 4px; padding: 1px;" alt=""/> + </div> +<% end %> <h2><%= t.taginfo.overview %></h2> <table id="grid-overview"> </table> diff --git a/web/views/tag.erb b/web/views/tag.erb index 70ebd8c..243954d 100644 --- a/web/views/tag.erb +++ b/web/views/tag.erb @@ -28,6 +28,11 @@ </ul> <div id="overview"> <h2><%= t.taginfo.overview %></h2> +<% if @image_url %> + <div style="float: right;"> + <img src="<%= @image_url %>" style="border: 1px solid #a0a0a0; border-radius: 4px; padding: 1px;" alt=""/> + </div> +<% end %> <table id="grid-overview"> </table> </div> diff --git a/web/viewsjs/key.js.erb b/web/viewsjs/key.js.erb index 8fc4a04..e2bdcd3 100644 --- a/web/viewsjs/key.js.erb +++ b/web/viewsjs/key.js.erb @@ -104,7 +104,7 @@ var create_flexigrid_for = { print_language(row.lang, row.language, row.language_en), print_wiki_link(row.title), row.description, - row.image == '' ? empty('<%= misc.no_image %>') : hover_expand(print_wiki_link(row.image)), + row.image.title ? hover_expand(print_wiki_link(row.image.title)) : empty('<%= misc.no_image %>'), print_type_icon('node', row.on_node) + print_type_icon('way', row.on_way) + print_type_icon('area', row.on_area) + diff --git a/web/viewsjs/tag.js.erb b/web/viewsjs/tag.js.erb index 88e1e41..c98d7c5 100644 --- a/web/viewsjs/tag.js.erb +++ b/web/viewsjs/tag.js.erb @@ -76,7 +76,7 @@ var create_flexigrid_for = { print_language(row.lang, row.language, row.language_en), print_wiki_link(row.title), row.description, - row.image == '' ? empty('<%= misc.no_image %>') : hover_expand(print_wiki_link(row.image)), + row.image.title ? hover_expand(print_wiki_link(row.image.title)) : empty('<%= misc.no_image %>'), print_type_icon('node', row.on_node) + print_type_icon('way', row.on_way) + print_type_icon('area', row.on_area) + |