summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJochen Topf <jochen@topf.org>2013-01-09 19:18:55 +0100
committerJochen Topf <jochen@topf.org>2013-01-09 19:18:55 +0100
commit64a047f622a5ed15dea94b5e52dd8c948fce9e95 (patch)
tree1dfeecec0fa0477c1a246ef6567dffc56592d325
parent9cadfd89c12c9223e7c572646680d0bcce57310c (diff)
downloadtaginfo-64a047f622a5ed15dea94b5e52dd8c948fce9e95.tar
taginfo-64a047f622a5ed15dea94b5e52dd8c948fce9e95.tar.gz
Better support for wiki images.
Key and tag wiki pages can contain images. Until now we only got the titles of those images. Now we also get the URL to the image, URL to thumbnails, width, height, and mime type. This information is now exposed in the API and it is used to show the images in the Overview tab of the key and tag pages. While we are changing the update process anyway, I changed the program that gets the list of all pages to also output the time those pages changed last. This information is currently not used, but it could be used to cache those pages locally making the update much faster and adding less strain to the wiki server.
-rwxr-xr-xsources/wiki/get_image_info.rb118
-rwxr-xr-xsources/wiki/get_page_list.rb16
-rwxr-xr-xsources/wiki/get_wiki_data.rb20
-rw-r--r--sources/wiki/lib/mediawikiapi.rb5
-rw-r--r--sources/wiki/post.sql2
-rw-r--r--sources/wiki/pre.sql21
-rw-r--r--web/lib/api/v4/key.rb12
-rw-r--r--web/lib/api/v4/tag.rb12
-rw-r--r--web/lib/ui/keys_tags.rb22
-rw-r--r--web/lib/utils.rb10
-rw-r--r--web/views/key.erb5
-rw-r--r--web/views/tag.erb5
-rw-r--r--web/viewsjs/key.js.erb2
-rw-r--r--web/viewsjs/tag.js.erb2
14 files changed, 230 insertions, 22 deletions
diff --git a/sources/wiki/get_image_info.rb b/sources/wiki/get_image_info.rb
new file mode 100755
index 0000000..06707ca
--- /dev/null
+++ b/sources/wiki/get_image_info.rb
@@ -0,0 +1,118 @@
+#!/usr/bin/ruby
+#------------------------------------------------------------------------------
+#
+# get_image_info.rb [DIR]
+#
+#------------------------------------------------------------------------------
+#
+# Gets meta information about images from the OSM wiki.
+#
+# Reads the list of all images used in Key: and Tag: pages from the local
+# database and requests meta information (width, height, mime type, URL, ...)
+# for those images. Writes this data into the wiki_images table.
+#
+# The database must be in DIR or in the current directory, if no directory
+# was given on the command line.
+#
+#------------------------------------------------------------------------------
+#
+# Copyright (C) 2013 Jochen Topf <jochen@remote.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#------------------------------------------------------------------------------
+
+require 'rubygems'
+
+require 'pp'
+
+require 'net/http'
+require 'uri'
+require 'json'
+require 'sqlite3'
+
+require 'lib/mediawikiapi.rb'
+
+#------------------------------------------------------------------------------
+
+dir = ARGV[0] || '.'
+
+api = MediaWikiAPI::API.new('wiki.openstreetmap.org')
+api.add_header('User-agent', 'taginfo/0.1 (jochen@remote.org)')
+
+db = SQLite3::Database.new(dir + '/taginfo-wiki.db')
+db.results_as_hash = true
+image_titles = db.execute("SELECT DISTINCT(image) AS title FROM wikipages").map{ |row| row['title'] }.select{ |title| !title.nil? && title.match(%r{^(file|image):}i) }
+
+db.execute('BEGIN TRANSACTION');
+
+until image_titles.empty?
+ some_titles = image_titles.slice!(0, 10)
+# puts some_titles.join(",") + "\n"
+
+ begin
+ data = api.query(:prop => 'imageinfo', :iiprop => 'url|size|mime', :titles => some_titles.join('|'), :iiurlwidth => 200, :iiurlheight => 200)
+
+ if !data['query']
+ STDERR.puts "Wiki API call failed (no 'query' field):"
+ pp data
+ next
+ end
+
+ normalized = data['query']['normalized']
+ if normalized
+ normalized.each do |n|
+ db.execute('UPDATE wikipages SET image=? WHERE image=?', n['to'], n['from'])
+ end
+ end
+
+ if !data['query']['pages']
+ STDERR.puts "Wiki API call failed (no 'pages' field):"
+ pp data
+ next
+ end
+
+ data['query']['pages'].each do |k,v|
+ if v['imageinfo']
+ info = v['imageinfo'][0]
+ if info['thumburl'].match(%r{^(.*/)[0-9]{1,4}(px-.*)$})
+ prefix = $1
+ suffix = $2
+ else
+ prefix = nil
+ suffix = nil
+ end
+ db.execute("INSERT INTO wiki_images (image, width, height, size, mime, image_url, thumb_url_prefix, thumb_url_suffix) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
+ v['title'],
+ info['width'],
+ info['height'],
+ info['size'],
+ info['mime'],
+ info['url'],
+ prefix,
+ suffix
+ )
+ end
+ end
+ rescue
+ puts "Wiki API call error:"
+ pp data
+ end
+end
+
+db.execute('COMMIT');
+
+
+#-- THE END -------------------------------------------------------------------
diff --git a/sources/wiki/get_page_list.rb b/sources/wiki/get_page_list.rb
index 88afe10..7cdbafa 100755
--- a/sources/wiki/get_page_list.rb
+++ b/sources/wiki/get_page_list.rb
@@ -70,13 +70,13 @@ end
def get_page_list(api, namespaceid, options)
apfrom = ''
loop do
- data = api.query(:list => 'allpages', :aplimit => 'max', :apfrom => apfrom, :apnamespace => namespaceid, :apfilterredir => options[:redirect] ? 'redirects' : 'nonredirects')
+ data = api.query(:generator => 'allpages', :gaplimit => 'max', :gapfrom => apfrom, :gapnamespace => namespaceid, :gapfilterredir => options[:redirect] ? 'redirects' : 'nonredirects', :prop => 'info')
# pp data
- data['query']['allpages'].each do |h|
- yield h['title'].gsub(/\s/, '_')
+ data['query']['pages'].each do |k,v|
+ yield v['touched'], v['title'].gsub(/\s/, '_')
end
if data['query-continue']
- apfrom = data['query-continue']['allpages']['apfrom'].gsub(/\s/, '_')
+ apfrom = data['query-continue']['allpages']['gapfrom'].gsub(/\s/, '_')
# puts "apfrom=#{apfrom}"
else
return
@@ -102,16 +102,16 @@ tagpages = File.open(dir + '/tagpages.list', 'w')
namespaces.keys.sort.each do |namespace|
id = namespaces[namespace]
- get_page_list(api, id, :redirect => false) do |page|
- line = ['page', namespace, page].join("\t")
+ get_page_list(api, id, :redirect => false) do |timestamp, page|
+ line = ['page', timestamp, namespace, page].join("\t")
allpages.puts line
if page =~ /^([^:]+:)?(Key|Tag):(.+)$/
tagpages.puts line
end
end
- get_page_list(api, id, :redirect => true) do |page|
- line = ['redirect', namespace, page].join("\t")
+ get_page_list(api, id, :redirect => true) do |timestamp, page|
+ line = ['redirect', timestamp, namespace, page].join("\t")
allpages.puts line
if page =~ /^([^:]+:)?(Key|Tag):(.+)$/
tagpages.puts line
diff --git a/sources/wiki/get_wiki_data.rb b/sources/wiki/get_wiki_data.rb
index 302db28..34d1182 100755
--- a/sources/wiki/get_wiki_data.rb
+++ b/sources/wiki/get_wiki_data.rb
@@ -54,10 +54,11 @@ class WikiPage
attr_accessor :content
attr_accessor :description, :image, :group, :onNode, :onWay, :onArea, :onRelation, :has_templ
- attr_reader :type, :namespace, :title, :tag, :key, :value, :lang, :ttype, :tags_implies, :tags_combination, :tags_linked, :parsed
+ attr_reader :type, :timestamp, :namespace, :title, :tag, :key, :value, :lang, :ttype, :tags_implies, :tags_combination, :tags_linked, :parsed
- def initialize(type, namespace, title)
+ def initialize(type, timestamp, namespace, title)
@type = type # 'page' or 'redirect'
+ @timestamp = timestamp # page last touched
@namespace = namespace # 'XX' (mediawiki namespace or '')
@title = title # wiki page title
@@ -129,7 +130,7 @@ class WikiPage
content,
group,
type,
- has_templ,
+ has_templ ? 1 : 0,
parsed ? 1 : 0,
description,
image,
@@ -239,8 +240,8 @@ File.open(dir + '/tagpages.list') do |wikipages|
wikipages.each do |line|
line.chomp!
t = line.split("\t")
- page = WikiPage.new(t[0], t[1], t[2])
- puts "page: (#{page.title}) (#{page.type}) (#{page.namespace}) (#{page.tag})"
+ page = WikiPage.new(t[0], t[1], t[2], t[3])
+ puts "page: (#{page.title}) (#{page.type}) (#{page.timestamp}) (#{page.namespace}) (#{page.tag})"
reason = page.check_title
if reason == :ok
@@ -271,7 +272,14 @@ File.open(dir + '/tagpages.list') do |wikipages|
end
end
if template.named_parameters['image']
- page.image = template.named_parameters['image'][0]
+ ititle = template.named_parameters['image'][0]
+ if !ititle.nil? && ititle.match(%r{^(file|image):(.*)$}i)
+ page.image = "File:#{$2}"
+ else
+ puts "invalid image: #{reason} #{page.title} #{ititle}"
+ db.execute('INSERT INTO invalid_image_titles (reason, page_title, image_title) VALUES (?, ?, ?)', reason, page.title, ititle)
+ page.image = ''
+ end
end
if template.named_parameters['group']
page.group = template.named_parameters['group'][0]
diff --git a/sources/wiki/lib/mediawikiapi.rb b/sources/wiki/lib/mediawikiapi.rb
index a231cee..293aa6f 100644
--- a/sources/wiki/lib/mediawikiapi.rb
+++ b/sources/wiki/lib/mediawikiapi.rb
@@ -26,6 +26,8 @@
#
#------------------------------------------------------------------------------
+require 'cgi'
+
module MediaWikiAPI
class API
@@ -42,12 +44,13 @@ module MediaWikiAPI
end
def build_path(params)
- @path + params.to_a.map{ |el| el.join('=') }.join('&')
+ @path + params.to_a.map{ |el| CGI::escape(el[0].to_s) + '=' + CGI::escape(el[1].to_s) }.join('&')
end
def get(params)
path = build_path(params)
http = Net::HTTP.start(@host, @port)
+# puts "Getting path [#{path}]"
http.get(path, @headers)
end
diff --git a/sources/wiki/post.sql b/sources/wiki/post.sql
index 773a04d..99eb26f 100644
--- a/sources/wiki/post.sql
+++ b/sources/wiki/post.sql
@@ -13,6 +13,8 @@ UPDATE wikipages SET status='e' WHERE type='page' AND has_templ='true' AND parse
CREATE INDEX wikipages_key_value_idx ON wikipages(key, value);
+CREATE INDEX wiki_images_image ON wiki_images(image);
+
INSERT INTO wikipages_keys (key, langs, lang_count) SELECT key, group_concat(lang || ' ' || status), count(*) FROM wikipages WHERE value IS NULL GROUP BY key;
INSERT INTO wikipages_tags (key, value, langs, lang_count) SELECT key, value, group_concat(lang || ' ' || status), count(*) FROM wikipages WHERE value IS NOT NULL GROUP BY key, value;
diff --git a/sources/wiki/pre.sql b/sources/wiki/pre.sql
index ebb80d3..8a515e7 100644
--- a/sources/wiki/pre.sql
+++ b/sources/wiki/pre.sql
@@ -33,6 +33,19 @@ CREATE TABLE wikipages (
status TEXT
);
+DROP TABLE IF EXISTS wiki_images;
+
+CREATE TABLE wiki_images (
+ image TEXT,
+ width INTEGER,
+ height INTEGER,
+ size INTEGER,
+ mime TEXT,
+ image_url TEXT,
+ thumb_url_prefix TEXT,
+ thumb_url_suffix TEXT
+);
+
DROP TABLE IF EXISTS wikipages_keys;
CREATE TABLE wikipages_keys (
@@ -64,6 +77,14 @@ CREATE TABLE invalid_page_titles (
title TEXT
);
+DROP TABLE IF EXISTS invalid_image_titles;
+
+CREATE TABLE invalid_image_titles (
+ reason TEXT,
+ page_title TEXT,
+ image_title TEXT
+);
+
DROP TABLE IF EXISTS words;
CREATE TABLE words (
diff --git a/web/lib/api/v4/key.rb b/web/lib/api/v4/key.rb
index 7cc9532..420ef37 100644
--- a/web/lib/api/v4/key.rb
+++ b/web/lib/api/v4/key.rb
@@ -283,7 +283,15 @@ class Taginfo < Sinatra::Base
[:language_en, :STRING, 'Language name in English.'],
[:title, :STRING, 'Wiki page title.'],
[:description, :STRING, 'Short description of key from wiki page.'],
- [:image, :STRING, 'Wiki page title of associated image.'],
+ [:image, :HASH, 'Associated image.', [
+ [:title, :STRING, 'Wiki page title of associated image.' ],
+ [:width, :INT, 'Width of image.' ],
+ [:height, :INT, 'Height of image.' ],
+ [:mime, :STRING, 'MIME type of image.' ],
+ [:image_url, :STRING, 'Image URL' ],
+ [:thumb_url_prefix, :STRING, 'Prefix of thumbnail URL.' ],
+ [:thumb_url_suffix, :STRING, 'Suffix of thumbnail URL.' ]
+ ]],
[:on_node, :BOOL, 'Is this a key for nodes?'],
[:on_way, :BOOL, 'Is this a key for ways?'],
[:on_area, :BOOL, 'Is this a key for areas?'],
@@ -297,7 +305,7 @@ class Taginfo < Sinatra::Base
}) do
key = params[:key]
- res = @db.execute('SELECT * FROM wikipages WHERE value IS NULL AND key = ? ORDER BY lang', key)
+ res = @db.execute('SELECT * FROM wikipages LEFT OUTER JOIN wiki_images USING (image) WHERE value IS NULL AND key = ? ORDER BY lang', key)
return get_wiki_result(res)
end
diff --git a/web/lib/api/v4/tag.rb b/web/lib/api/v4/tag.rb
index 50e26c7..20a0559 100644
--- a/web/lib/api/v4/tag.rb
+++ b/web/lib/api/v4/tag.rb
@@ -177,7 +177,15 @@ class Taginfo < Sinatra::Base
[:language_en, :STRING, 'Language name in English.'],
[:title, :STRING, 'Wiki page title.'],
[:description, :STRING, 'Short description of tag from wiki page.'],
- [:image, :STRING, 'Wiki page title of associated image.'],
+ [:image, :HASH, 'Associated image.', [
+ [:title, :STRING, 'Wiki page title of associated image.' ],
+ [:width, :INT, 'Width of image.' ],
+ [:height, :INT, 'Height of image.' ],
+ [:mime, :STRING, 'MIME type of image.' ],
+ [:image_url, :STRING, 'Image URL' ],
+ [:thumb_url_prefix, :STRING, 'Prefix of thumbnail URL.' ],
+ [:thumb_url_suffix, :STRING, 'Suffix of thumbnail URL.' ]
+ ]],
[:on_node, :BOOL, 'Is this a tag for nodes?'],
[:on_way, :BOOL, 'Is this a tag for ways?'],
[:on_area, :BOOL, 'Is this a tag for areas?'],
@@ -192,7 +200,7 @@ class Taginfo < Sinatra::Base
key = params[:key]
value = params[:value]
- res = @db.execute('SELECT * FROM wikipages WHERE key = ? AND value = ? ORDER BY lang', key, value)
+ res = @db.execute('SELECT * FROM wikipages LEFT OUTER JOIN wiki_images USING (image) WHERE key = ? AND value = ? ORDER BY lang', key, value)
return get_wiki_result(res)
end
diff --git a/web/lib/ui/keys_tags.rb b/web/lib/ui/keys_tags.rb
index feaf90d..5e21c6e 100644
--- a/web/lib/ui/keys_tags.rb
+++ b/web/lib/ui/keys_tags.rb
@@ -1,6 +1,17 @@
# web/lib/ui/keys_tags.rb
class Taginfo < Sinatra::Base
+ MAX_IMAGE_WIDTH = 300
+
+ def build_image_url(row)
+ w = row['width'].to_i
+ h = row['height'].to_i
+ if w > 0 && h > 0
+ return "#{row['thumb_url_prefix']}#{ h <= w ? MAX_IMAGE_WIDTH : (MAX_IMAGE_WIDTH * w / h).to_i }#{ row['thumb_url_suffix'] }"
+ end
+ return nil
+ end
+
get %r{^/keys/(.*)} do |key|
if params[:key].nil?
@key = key
@@ -28,6 +39,11 @@ class Taginfo < Sinatra::Base
@desc = "<span title='#{ t.pages.key.description_from_wiki }' tipsy='w'>#{ @desc }</span"
end
+ @db.select("SELECT width, height, thumb_url_prefix, thumb_url_suffix FROM wiki.wikipages LEFT OUTER JOIN wiki.wiki_images USING(image) WHERE lang=? AND key=? AND value IS NULL UNION SELECT width, height, thumb_url_prefix, thumb_url_suffix FROM wiki.wikipages LEFT OUTER JOIN wiki.wiki_images USING(image) WHERE lang='en' AND key=? AND value IS NULL LIMIT 1", r18n.locale.code, @key, @key).
+ execute() do |row|
+ @image_url = build_image_url(row)
+ end
+
@prevalent_values = @db.select("SELECT value, count_#{@filter_type} AS count FROM tags").
condition('key=?', @key).
condition('count > ?', @count_all_values * 0.02).
@@ -107,8 +123,14 @@ class Taginfo < Sinatra::Base
@desc = "<span title='#{ t.pages.tag.description_from_wiki }' tipsy='w'>#{ @desc }</span"
end
+ @db.select("SELECT width, height, thumb_url_prefix, thumb_url_suffix FROM wiki.wikipages LEFT OUTER JOIN wiki.wiki_images USING(image) WHERE lang=? AND key=? AND value=? UNION SELECT width, height, thumb_url_prefix, thumb_url_suffix FROM wiki.wikipages LEFT OUTER JOIN wiki.wiki_images USING(image) WHERE lang='en' AND key=? AND value=? LIMIT 1", r18n.locale.code, @key, @value, @key, @value).
+ execute() do |row|
+ @image_url = build_image_url(row)
+ end
+
javascript "#{ r18n.locale.code }/tag"
erb :tag
end
end
+
diff --git a/web/lib/utils.rb b/web/lib/utils.rb
index 20a4dab..2549b57 100644
--- a/web/lib/utils.rb
+++ b/web/lib/utils.rb
@@ -184,7 +184,15 @@ def get_wiki_result(res)
:language_en => h(::Language[row['lang']].english_name),
:title => h(row['title']),
:description => h(row['description']),
- :image => h(row['image']),
+ :image => {
+ :title => h(row['image']),
+ :width => row['width'].to_i,
+ :height => row['height'].to_i,
+ :mime => h(row['mime']),
+ :image_url => h(row['image_url']),
+ :thumb_url_prefix => h(row['thumb_url_prefix']),
+ :thumb_url_suffix => h(row['thumb_url_suffix'])
+ },
:on_node => row['on_node'].to_i == 1,
:on_way => row['on_way'].to_i == 1,
:on_area => row['on_area'].to_i == 1,
diff --git a/web/views/key.erb b/web/views/key.erb
index 14f952a..c51af26 100644
--- a/web/views/key.erb
+++ b/web/views/key.erb
@@ -33,6 +33,11 @@
<p><%= t.pages.key.overview.distribution_of_values %></p>
<div class="canvas" id="canvas-values"></div>
</div>
+<% if @image_url %>
+ <div style="float: right; padding: 0 20px 20px 20px;">
+ <img src="<%= @image_url %>" style="border: 1px solid #a0a0a0; border-radius: 4px; padding: 1px;" alt=""/>
+ </div>
+<% end %>
<h2><%= t.taginfo.overview %></h2>
<table id="grid-overview">
</table>
diff --git a/web/views/tag.erb b/web/views/tag.erb
index 70ebd8c..243954d 100644
--- a/web/views/tag.erb
+++ b/web/views/tag.erb
@@ -28,6 +28,11 @@
</ul>
<div id="overview">
<h2><%= t.taginfo.overview %></h2>
+<% if @image_url %>
+ <div style="float: right;">
+ <img src="<%= @image_url %>" style="border: 1px solid #a0a0a0; border-radius: 4px; padding: 1px;" alt=""/>
+ </div>
+<% end %>
<table id="grid-overview">
</table>
</div>
diff --git a/web/viewsjs/key.js.erb b/web/viewsjs/key.js.erb
index 8fc4a04..e2bdcd3 100644
--- a/web/viewsjs/key.js.erb
+++ b/web/viewsjs/key.js.erb
@@ -104,7 +104,7 @@ var create_flexigrid_for = {
print_language(row.lang, row.language, row.language_en),
print_wiki_link(row.title),
row.description,
- row.image == '' ? empty('<%= misc.no_image %>') : hover_expand(print_wiki_link(row.image)),
+ row.image.title ? hover_expand(print_wiki_link(row.image.title)) : empty('<%= misc.no_image %>'),
print_type_icon('node', row.on_node) +
print_type_icon('way', row.on_way) +
print_type_icon('area', row.on_area) +
diff --git a/web/viewsjs/tag.js.erb b/web/viewsjs/tag.js.erb
index 88e1e41..c98d7c5 100644
--- a/web/viewsjs/tag.js.erb
+++ b/web/viewsjs/tag.js.erb
@@ -76,7 +76,7 @@ var create_flexigrid_for = {
print_language(row.lang, row.language, row.language_en),
print_wiki_link(row.title),
row.description,
- row.image == '' ? empty('<%= misc.no_image %>') : hover_expand(print_wiki_link(row.image)),
+ row.image.title ? hover_expand(print_wiki_link(row.image.title)) : empty('<%= misc.no_image %>'),
print_type_icon('node', row.on_node) +
print_type_icon('way', row.on_way) +
print_type_icon('area', row.on_area) +